1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <stdint.h> 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <string.h> 38 #include <errno.h> 39 40 #include <rte_cycles.h> 41 #include <rte_memory.h> 42 #include <rte_memzone.h> 43 #include <rte_branch_prediction.h> 44 #include <rte_mempool.h> 45 #include <rte_malloc.h> 46 #include <rte_mbuf.h> 47 #include <rte_ether.h> 48 #include <rte_ethdev.h> 49 #include <rte_prefetch.h> 50 #include <rte_string_fns.h> 51 #include <rte_errno.h> 52 #include <rte_byteorder.h> 53 #include <rte_cpuflags.h> 54 #include <rte_net.h> 55 #include <rte_ip.h> 56 #include <rte_udp.h> 57 #include <rte_tcp.h> 58 59 #include "virtio_logs.h" 60 #include "virtio_ethdev.h" 61 #include "virtio_pci.h" 62 #include "virtqueue.h" 63 #include "virtio_rxtx.h" 64 65 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP 66 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len) 67 #else 68 #define VIRTIO_DUMP_PACKET(m, len) do { } while (0) 69 #endif 70 71 72 #define VIRTIO_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS | \ 73 ETH_TXQ_FLAGS_NOOFFLOADS) 74 75 int 76 virtio_dev_rx_queue_done(void *rxq, uint16_t offset) 77 { 78 struct virtnet_rx *rxvq = rxq; 79 struct virtqueue *vq = rxvq->vq; 80 81 return VIRTQUEUE_NUSED(vq) >= offset; 82 } 83 84 static void 85 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx) 86 { 87 struct vring_desc *dp, *dp_tail; 88 struct vq_desc_extra *dxp; 89 uint16_t desc_idx_last = desc_idx; 90 91 dp = &vq->vq_ring.desc[desc_idx]; 92 dxp = &vq->vq_descx[desc_idx]; 93 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs); 94 if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) { 95 while (dp->flags & VRING_DESC_F_NEXT) { 96 desc_idx_last = dp->next; 97 dp = &vq->vq_ring.desc[dp->next]; 98 } 99 } 100 dxp->ndescs = 0; 101 102 /* 103 * We must append the existing free chain, if any, to the end of 104 * newly freed chain. If the virtqueue was completely used, then 105 * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above). 106 */ 107 if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) { 108 vq->vq_desc_head_idx = desc_idx; 109 } else { 110 dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx]; 111 dp_tail->next = desc_idx; 112 } 113 114 vq->vq_desc_tail_idx = desc_idx_last; 115 dp->next = VQ_RING_DESC_CHAIN_END; 116 } 117 118 static uint16_t 119 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts, 120 uint32_t *len, uint16_t num) 121 { 122 struct vring_used_elem *uep; 123 struct rte_mbuf *cookie; 124 uint16_t used_idx, desc_idx; 125 uint16_t i; 126 127 /* Caller does the check */ 128 for (i = 0; i < num ; i++) { 129 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); 130 uep = &vq->vq_ring.used->ring[used_idx]; 131 desc_idx = (uint16_t) uep->id; 132 len[i] = uep->len; 133 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie; 134 135 if (unlikely(cookie == NULL)) { 136 PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u\n", 137 vq->vq_used_cons_idx); 138 break; 139 } 140 141 rte_prefetch0(cookie); 142 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *)); 143 rx_pkts[i] = cookie; 144 vq->vq_used_cons_idx++; 145 vq_ring_free_chain(vq, desc_idx); 146 vq->vq_descx[desc_idx].cookie = NULL; 147 } 148 149 return i; 150 } 151 152 #ifndef DEFAULT_TX_FREE_THRESH 153 #define DEFAULT_TX_FREE_THRESH 32 154 #endif 155 156 /* Cleanup from completed transmits. */ 157 static void 158 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num) 159 { 160 uint16_t i, used_idx, desc_idx; 161 for (i = 0; i < num; i++) { 162 struct vring_used_elem *uep; 163 struct vq_desc_extra *dxp; 164 165 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); 166 uep = &vq->vq_ring.used->ring[used_idx]; 167 168 desc_idx = (uint16_t) uep->id; 169 dxp = &vq->vq_descx[desc_idx]; 170 vq->vq_used_cons_idx++; 171 vq_ring_free_chain(vq, desc_idx); 172 173 if (dxp->cookie != NULL) { 174 rte_pktmbuf_free(dxp->cookie); 175 dxp->cookie = NULL; 176 } 177 } 178 } 179 180 181 static inline int 182 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie) 183 { 184 struct vq_desc_extra *dxp; 185 struct virtio_hw *hw = vq->hw; 186 struct vring_desc *start_dp; 187 uint16_t needed = 1; 188 uint16_t head_idx, idx; 189 190 if (unlikely(vq->vq_free_cnt == 0)) 191 return -ENOSPC; 192 if (unlikely(vq->vq_free_cnt < needed)) 193 return -EMSGSIZE; 194 195 head_idx = vq->vq_desc_head_idx; 196 if (unlikely(head_idx >= vq->vq_nentries)) 197 return -EFAULT; 198 199 idx = head_idx; 200 dxp = &vq->vq_descx[idx]; 201 dxp->cookie = (void *)cookie; 202 dxp->ndescs = needed; 203 204 start_dp = vq->vq_ring.desc; 205 start_dp[idx].addr = 206 VIRTIO_MBUF_ADDR(cookie, vq) + 207 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size; 208 start_dp[idx].len = 209 cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size; 210 start_dp[idx].flags = VRING_DESC_F_WRITE; 211 idx = start_dp[idx].next; 212 vq->vq_desc_head_idx = idx; 213 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) 214 vq->vq_desc_tail_idx = idx; 215 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed); 216 vq_update_avail_ring(vq, head_idx); 217 218 return 0; 219 } 220 221 /* When doing TSO, the IP length is not included in the pseudo header 222 * checksum of the packet given to the PMD, but for virtio it is 223 * expected. 224 */ 225 static void 226 virtio_tso_fix_cksum(struct rte_mbuf *m) 227 { 228 /* common case: header is not fragmented */ 229 if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len + 230 m->l4_len)) { 231 struct ipv4_hdr *iph; 232 struct ipv6_hdr *ip6h; 233 struct tcp_hdr *th; 234 uint16_t prev_cksum, new_cksum, ip_len, ip_paylen; 235 uint32_t tmp; 236 237 iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len); 238 th = RTE_PTR_ADD(iph, m->l3_len); 239 if ((iph->version_ihl >> 4) == 4) { 240 iph->hdr_checksum = 0; 241 iph->hdr_checksum = rte_ipv4_cksum(iph); 242 ip_len = iph->total_length; 243 ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) - 244 m->l3_len); 245 } else { 246 ip6h = (struct ipv6_hdr *)iph; 247 ip_paylen = ip6h->payload_len; 248 } 249 250 /* calculate the new phdr checksum not including ip_paylen */ 251 prev_cksum = th->cksum; 252 tmp = prev_cksum; 253 tmp += ip_paylen; 254 tmp = (tmp & 0xffff) + (tmp >> 16); 255 new_cksum = tmp; 256 257 /* replace it in the packet */ 258 th->cksum = new_cksum; 259 } 260 } 261 262 static inline int 263 tx_offload_enabled(struct virtio_hw *hw) 264 { 265 return vtpci_with_feature(hw, VIRTIO_NET_F_CSUM) || 266 vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) || 267 vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO6); 268 } 269 270 /* avoid write operation when necessary, to lessen cache issues */ 271 #define ASSIGN_UNLESS_EQUAL(var, val) do { \ 272 if ((var) != (val)) \ 273 (var) = (val); \ 274 } while (0) 275 276 static inline void 277 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie, 278 uint16_t needed, int use_indirect, int can_push) 279 { 280 struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr; 281 struct vq_desc_extra *dxp; 282 struct virtqueue *vq = txvq->vq; 283 struct vring_desc *start_dp; 284 uint16_t seg_num = cookie->nb_segs; 285 uint16_t head_idx, idx; 286 uint16_t head_size = vq->hw->vtnet_hdr_size; 287 struct virtio_net_hdr *hdr; 288 int offload; 289 290 offload = tx_offload_enabled(vq->hw); 291 head_idx = vq->vq_desc_head_idx; 292 idx = head_idx; 293 dxp = &vq->vq_descx[idx]; 294 dxp->cookie = (void *)cookie; 295 dxp->ndescs = needed; 296 297 start_dp = vq->vq_ring.desc; 298 299 if (can_push) { 300 /* prepend cannot fail, checked by caller */ 301 hdr = (struct virtio_net_hdr *) 302 rte_pktmbuf_prepend(cookie, head_size); 303 /* if offload disabled, it is not zeroed below, do it now */ 304 if (offload == 0) 305 memset(hdr, 0, head_size); 306 } else if (use_indirect) { 307 /* setup tx ring slot to point to indirect 308 * descriptor list stored in reserved region. 309 * 310 * the first slot in indirect ring is already preset 311 * to point to the header in reserved region 312 */ 313 start_dp[idx].addr = txvq->virtio_net_hdr_mem + 314 RTE_PTR_DIFF(&txr[idx].tx_indir, txr); 315 start_dp[idx].len = (seg_num + 1) * sizeof(struct vring_desc); 316 start_dp[idx].flags = VRING_DESC_F_INDIRECT; 317 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr; 318 319 /* loop below will fill in rest of the indirect elements */ 320 start_dp = txr[idx].tx_indir; 321 idx = 1; 322 } else { 323 /* setup first tx ring slot to point to header 324 * stored in reserved region. 325 */ 326 start_dp[idx].addr = txvq->virtio_net_hdr_mem + 327 RTE_PTR_DIFF(&txr[idx].tx_hdr, txr); 328 start_dp[idx].len = vq->hw->vtnet_hdr_size; 329 start_dp[idx].flags = VRING_DESC_F_NEXT; 330 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr; 331 332 idx = start_dp[idx].next; 333 } 334 335 /* Checksum Offload / TSO */ 336 if (offload) { 337 if (cookie->ol_flags & PKT_TX_TCP_SEG) 338 cookie->ol_flags |= PKT_TX_TCP_CKSUM; 339 340 switch (cookie->ol_flags & PKT_TX_L4_MASK) { 341 case PKT_TX_UDP_CKSUM: 342 hdr->csum_start = cookie->l2_len + cookie->l3_len; 343 hdr->csum_offset = offsetof(struct udp_hdr, 344 dgram_cksum); 345 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 346 break; 347 348 case PKT_TX_TCP_CKSUM: 349 hdr->csum_start = cookie->l2_len + cookie->l3_len; 350 hdr->csum_offset = offsetof(struct tcp_hdr, cksum); 351 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 352 break; 353 354 default: 355 ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0); 356 ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0); 357 ASSIGN_UNLESS_EQUAL(hdr->flags, 0); 358 break; 359 } 360 361 /* TCP Segmentation Offload */ 362 if (cookie->ol_flags & PKT_TX_TCP_SEG) { 363 virtio_tso_fix_cksum(cookie); 364 hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ? 365 VIRTIO_NET_HDR_GSO_TCPV6 : 366 VIRTIO_NET_HDR_GSO_TCPV4; 367 hdr->gso_size = cookie->tso_segsz; 368 hdr->hdr_len = 369 cookie->l2_len + 370 cookie->l3_len + 371 cookie->l4_len; 372 } else { 373 ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0); 374 ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0); 375 ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0); 376 } 377 } 378 379 do { 380 start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq); 381 start_dp[idx].len = cookie->data_len; 382 start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0; 383 idx = start_dp[idx].next; 384 } while ((cookie = cookie->next) != NULL); 385 386 if (use_indirect) 387 idx = vq->vq_ring.desc[head_idx].next; 388 389 vq->vq_desc_head_idx = idx; 390 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) 391 vq->vq_desc_tail_idx = idx; 392 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed); 393 vq_update_avail_ring(vq, head_idx); 394 } 395 396 void 397 virtio_dev_cq_start(struct rte_eth_dev *dev) 398 { 399 struct virtio_hw *hw = dev->data->dev_private; 400 401 if (hw->cvq && hw->cvq->vq) { 402 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq); 403 } 404 } 405 406 int 407 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev, 408 uint16_t queue_idx, 409 uint16_t nb_desc, 410 unsigned int socket_id __rte_unused, 411 __rte_unused const struct rte_eth_rxconf *rx_conf, 412 struct rte_mempool *mp) 413 { 414 uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX; 415 struct virtio_hw *hw = dev->data->dev_private; 416 struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; 417 struct virtnet_rx *rxvq; 418 int error, nbufs; 419 struct rte_mbuf *m; 420 uint16_t desc_idx; 421 422 PMD_INIT_FUNC_TRACE(); 423 424 if (nb_desc == 0 || nb_desc > vq->vq_nentries) 425 nb_desc = vq->vq_nentries; 426 vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc); 427 428 rxvq = &vq->rxq; 429 rxvq->queue_id = queue_idx; 430 rxvq->mpool = mp; 431 if (rxvq->mpool == NULL) { 432 rte_exit(EXIT_FAILURE, 433 "Cannot allocate mbufs for rx virtqueue"); 434 } 435 dev->data->rx_queues[queue_idx] = rxvq; 436 437 438 /* Allocate blank mbufs for the each rx descriptor */ 439 nbufs = 0; 440 error = ENOSPC; 441 442 if (hw->use_simple_rxtx) { 443 for (desc_idx = 0; desc_idx < vq->vq_nentries; 444 desc_idx++) { 445 vq->vq_ring.avail->ring[desc_idx] = desc_idx; 446 vq->vq_ring.desc[desc_idx].flags = 447 VRING_DESC_F_WRITE; 448 } 449 } 450 451 memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf)); 452 for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST; 453 desc_idx++) { 454 vq->sw_ring[vq->vq_nentries + desc_idx] = 455 &rxvq->fake_mbuf; 456 } 457 458 while (!virtqueue_full(vq)) { 459 m = rte_mbuf_raw_alloc(rxvq->mpool); 460 if (m == NULL) 461 break; 462 463 /* Enqueue allocated buffers */ 464 if (hw->use_simple_rxtx) 465 error = virtqueue_enqueue_recv_refill_simple(vq, m); 466 else 467 error = virtqueue_enqueue_recv_refill(vq, m); 468 469 if (error) { 470 rte_pktmbuf_free(m); 471 break; 472 } 473 nbufs++; 474 } 475 476 vq_update_avail_idx(vq); 477 478 PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs); 479 480 virtio_rxq_vec_setup(rxvq); 481 482 VIRTQUEUE_DUMP(vq); 483 484 return 0; 485 } 486 487 static void 488 virtio_update_rxtx_handler(struct rte_eth_dev *dev, 489 const struct rte_eth_txconf *tx_conf) 490 { 491 uint8_t use_simple_rxtx = 0; 492 struct virtio_hw *hw = dev->data->dev_private; 493 494 #if defined RTE_ARCH_X86 495 if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE3)) 496 use_simple_rxtx = 1; 497 #elif defined RTE_ARCH_ARM64 || defined CONFIG_RTE_ARCH_ARM 498 if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON)) 499 use_simple_rxtx = 1; 500 #endif 501 /* Use simple rx/tx func if single segment and no offloads */ 502 if (use_simple_rxtx && 503 (tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS && 504 !vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) { 505 PMD_INIT_LOG(INFO, "Using simple rx/tx path"); 506 dev->tx_pkt_burst = virtio_xmit_pkts_simple; 507 dev->rx_pkt_burst = virtio_recv_pkts_vec; 508 hw->use_simple_rxtx = use_simple_rxtx; 509 } 510 } 511 512 /* 513 * struct rte_eth_dev *dev: Used to update dev 514 * uint16_t nb_desc: Defaults to values read from config space 515 * unsigned int socket_id: Used to allocate memzone 516 * const struct rte_eth_txconf *tx_conf: Used to setup tx engine 517 * uint16_t queue_idx: Just used as an index in dev txq list 518 */ 519 int 520 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, 521 uint16_t queue_idx, 522 uint16_t nb_desc, 523 unsigned int socket_id __rte_unused, 524 const struct rte_eth_txconf *tx_conf) 525 { 526 uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX; 527 struct virtio_hw *hw = dev->data->dev_private; 528 struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; 529 struct virtnet_tx *txvq; 530 uint16_t tx_free_thresh; 531 uint16_t desc_idx; 532 533 PMD_INIT_FUNC_TRACE(); 534 535 virtio_update_rxtx_handler(dev, tx_conf); 536 537 if (nb_desc == 0 || nb_desc > vq->vq_nentries) 538 nb_desc = vq->vq_nentries; 539 vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc); 540 541 txvq = &vq->txq; 542 txvq->queue_id = queue_idx; 543 544 tx_free_thresh = tx_conf->tx_free_thresh; 545 if (tx_free_thresh == 0) 546 tx_free_thresh = 547 RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH); 548 549 if (tx_free_thresh >= (vq->vq_nentries - 3)) { 550 RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the " 551 "number of TX entries minus 3 (%u)." 552 " (tx_free_thresh=%u port=%u queue=%u)\n", 553 vq->vq_nentries - 3, 554 tx_free_thresh, dev->data->port_id, queue_idx); 555 return -EINVAL; 556 } 557 558 vq->vq_free_thresh = tx_free_thresh; 559 560 if (hw->use_simple_rxtx) { 561 uint16_t mid_idx = vq->vq_nentries >> 1; 562 563 for (desc_idx = 0; desc_idx < mid_idx; desc_idx++) { 564 vq->vq_ring.avail->ring[desc_idx] = 565 desc_idx + mid_idx; 566 vq->vq_ring.desc[desc_idx + mid_idx].next = 567 desc_idx; 568 vq->vq_ring.desc[desc_idx + mid_idx].addr = 569 txvq->virtio_net_hdr_mem + 570 offsetof(struct virtio_tx_region, tx_hdr); 571 vq->vq_ring.desc[desc_idx + mid_idx].len = 572 vq->hw->vtnet_hdr_size; 573 vq->vq_ring.desc[desc_idx + mid_idx].flags = 574 VRING_DESC_F_NEXT; 575 vq->vq_ring.desc[desc_idx].flags = 0; 576 } 577 for (desc_idx = mid_idx; desc_idx < vq->vq_nentries; 578 desc_idx++) 579 vq->vq_ring.avail->ring[desc_idx] = desc_idx; 580 } 581 582 VIRTQUEUE_DUMP(vq); 583 584 dev->data->tx_queues[queue_idx] = txvq; 585 return 0; 586 } 587 588 static void 589 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m) 590 { 591 int error; 592 /* 593 * Requeue the discarded mbuf. This should always be 594 * successful since it was just dequeued. 595 */ 596 error = virtqueue_enqueue_recv_refill(vq, m); 597 if (unlikely(error)) { 598 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf"); 599 rte_pktmbuf_free(m); 600 } 601 } 602 603 static void 604 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf) 605 { 606 uint32_t s = mbuf->pkt_len; 607 struct ether_addr *ea; 608 609 if (s == 64) { 610 stats->size_bins[1]++; 611 } else if (s > 64 && s < 1024) { 612 uint32_t bin; 613 614 /* count zeros, and offset into correct bin */ 615 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5; 616 stats->size_bins[bin]++; 617 } else { 618 if (s < 64) 619 stats->size_bins[0]++; 620 else if (s < 1519) 621 stats->size_bins[6]++; 622 else if (s >= 1519) 623 stats->size_bins[7]++; 624 } 625 626 ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *); 627 if (is_multicast_ether_addr(ea)) { 628 if (is_broadcast_ether_addr(ea)) 629 stats->broadcast++; 630 else 631 stats->multicast++; 632 } 633 } 634 635 /* Optionally fill offload information in structure */ 636 static int 637 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr) 638 { 639 struct rte_net_hdr_lens hdr_lens; 640 uint32_t hdrlen, ptype; 641 int l4_supported = 0; 642 643 /* nothing to do */ 644 if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE) 645 return 0; 646 647 m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN; 648 649 ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK); 650 m->packet_type = ptype; 651 if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP || 652 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP || 653 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP) 654 l4_supported = 1; 655 656 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 657 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len; 658 if (hdr->csum_start <= hdrlen && l4_supported) { 659 m->ol_flags |= PKT_RX_L4_CKSUM_NONE; 660 } else { 661 /* Unknown proto or tunnel, do sw cksum. We can assume 662 * the cksum field is in the first segment since the 663 * buffers we provided to the host are large enough. 664 * In case of SCTP, this will be wrong since it's a CRC 665 * but there's nothing we can do. 666 */ 667 uint16_t csum, off; 668 669 rte_raw_cksum_mbuf(m, hdr->csum_start, 670 rte_pktmbuf_pkt_len(m) - hdr->csum_start, 671 &csum); 672 if (likely(csum != 0xffff)) 673 csum = ~csum; 674 off = hdr->csum_offset + hdr->csum_start; 675 if (rte_pktmbuf_data_len(m) >= off + 1) 676 *rte_pktmbuf_mtod_offset(m, uint16_t *, 677 off) = csum; 678 } 679 } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) { 680 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD; 681 } 682 683 /* GSO request, save required information in mbuf */ 684 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 685 /* Check unsupported modes */ 686 if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) || 687 (hdr->gso_size == 0)) { 688 return -EINVAL; 689 } 690 691 /* Update mss lengthes in mbuf */ 692 m->tso_segsz = hdr->gso_size; 693 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 694 case VIRTIO_NET_HDR_GSO_TCPV4: 695 case VIRTIO_NET_HDR_GSO_TCPV6: 696 m->ol_flags |= PKT_RX_LRO | \ 697 PKT_RX_L4_CKSUM_NONE; 698 break; 699 default: 700 return -EINVAL; 701 } 702 } 703 704 return 0; 705 } 706 707 static inline int 708 rx_offload_enabled(struct virtio_hw *hw) 709 { 710 return vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) || 711 vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) || 712 vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6); 713 } 714 715 #define VIRTIO_MBUF_BURST_SZ 64 716 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc)) 717 uint16_t 718 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 719 { 720 struct virtnet_rx *rxvq = rx_queue; 721 struct virtqueue *vq = rxvq->vq; 722 struct virtio_hw *hw; 723 struct rte_mbuf *rxm, *new_mbuf; 724 uint16_t nb_used, num, nb_rx; 725 uint32_t len[VIRTIO_MBUF_BURST_SZ]; 726 struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ]; 727 int error; 728 uint32_t i, nb_enqueued; 729 uint32_t hdr_size; 730 int offload; 731 struct virtio_net_hdr *hdr; 732 733 nb_used = VIRTQUEUE_NUSED(vq); 734 735 virtio_rmb(); 736 737 num = (uint16_t)(likely(nb_used <= nb_pkts) ? nb_used : nb_pkts); 738 num = (uint16_t)(likely(num <= VIRTIO_MBUF_BURST_SZ) ? num : VIRTIO_MBUF_BURST_SZ); 739 if (likely(num > DESC_PER_CACHELINE)) 740 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE); 741 742 num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num); 743 PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num); 744 745 hw = vq->hw; 746 nb_rx = 0; 747 nb_enqueued = 0; 748 hdr_size = hw->vtnet_hdr_size; 749 offload = rx_offload_enabled(hw); 750 751 for (i = 0; i < num ; i++) { 752 rxm = rcv_pkts[i]; 753 754 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]); 755 756 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) { 757 PMD_RX_LOG(ERR, "Packet drop"); 758 nb_enqueued++; 759 virtio_discard_rxbuf(vq, rxm); 760 rxvq->stats.errors++; 761 continue; 762 } 763 764 rxm->port = rxvq->port_id; 765 rxm->data_off = RTE_PKTMBUF_HEADROOM; 766 rxm->ol_flags = 0; 767 rxm->vlan_tci = 0; 768 769 rxm->nb_segs = 1; 770 rxm->next = NULL; 771 rxm->pkt_len = (uint32_t)(len[i] - hdr_size); 772 rxm->data_len = (uint16_t)(len[i] - hdr_size); 773 774 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr + 775 RTE_PKTMBUF_HEADROOM - hdr_size); 776 777 if (hw->vlan_strip) 778 rte_vlan_strip(rxm); 779 780 if (offload && virtio_rx_offload(rxm, hdr) < 0) { 781 virtio_discard_rxbuf(vq, rxm); 782 rxvq->stats.errors++; 783 continue; 784 } 785 786 VIRTIO_DUMP_PACKET(rxm, rxm->data_len); 787 788 rx_pkts[nb_rx++] = rxm; 789 790 rxvq->stats.bytes += rx_pkts[nb_rx - 1]->pkt_len; 791 virtio_update_packet_stats(&rxvq->stats, rxm); 792 } 793 794 rxvq->stats.packets += nb_rx; 795 796 /* Allocate new mbuf for the used descriptor */ 797 error = ENOSPC; 798 while (likely(!virtqueue_full(vq))) { 799 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool); 800 if (unlikely(new_mbuf == NULL)) { 801 struct rte_eth_dev *dev 802 = &rte_eth_devices[rxvq->port_id]; 803 dev->data->rx_mbuf_alloc_failed++; 804 break; 805 } 806 error = virtqueue_enqueue_recv_refill(vq, new_mbuf); 807 if (unlikely(error)) { 808 rte_pktmbuf_free(new_mbuf); 809 break; 810 } 811 nb_enqueued++; 812 } 813 814 if (likely(nb_enqueued)) { 815 vq_update_avail_idx(vq); 816 817 if (unlikely(virtqueue_kick_prepare(vq))) { 818 virtqueue_notify(vq); 819 PMD_RX_LOG(DEBUG, "Notified"); 820 } 821 } 822 823 return nb_rx; 824 } 825 826 uint16_t 827 virtio_recv_mergeable_pkts(void *rx_queue, 828 struct rte_mbuf **rx_pkts, 829 uint16_t nb_pkts) 830 { 831 struct virtnet_rx *rxvq = rx_queue; 832 struct virtqueue *vq = rxvq->vq; 833 struct virtio_hw *hw; 834 struct rte_mbuf *rxm, *new_mbuf; 835 uint16_t nb_used, num, nb_rx; 836 uint32_t len[VIRTIO_MBUF_BURST_SZ]; 837 struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ]; 838 struct rte_mbuf *prev; 839 int error; 840 uint32_t i, nb_enqueued; 841 uint32_t seg_num; 842 uint16_t extra_idx; 843 uint32_t seg_res; 844 uint32_t hdr_size; 845 int offload; 846 847 nb_used = VIRTQUEUE_NUSED(vq); 848 849 virtio_rmb(); 850 851 PMD_RX_LOG(DEBUG, "used:%d", nb_used); 852 853 hw = vq->hw; 854 nb_rx = 0; 855 i = 0; 856 nb_enqueued = 0; 857 seg_num = 0; 858 extra_idx = 0; 859 seg_res = 0; 860 hdr_size = hw->vtnet_hdr_size; 861 offload = rx_offload_enabled(hw); 862 863 while (i < nb_used) { 864 struct virtio_net_hdr_mrg_rxbuf *header; 865 866 if (nb_rx == nb_pkts) 867 break; 868 869 num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, 1); 870 if (num != 1) 871 continue; 872 873 i++; 874 875 PMD_RX_LOG(DEBUG, "dequeue:%d", num); 876 PMD_RX_LOG(DEBUG, "packet len:%d", len[0]); 877 878 rxm = rcv_pkts[0]; 879 880 if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) { 881 PMD_RX_LOG(ERR, "Packet drop"); 882 nb_enqueued++; 883 virtio_discard_rxbuf(vq, rxm); 884 rxvq->stats.errors++; 885 continue; 886 } 887 888 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm->buf_addr + 889 RTE_PKTMBUF_HEADROOM - hdr_size); 890 seg_num = header->num_buffers; 891 892 if (seg_num == 0) 893 seg_num = 1; 894 895 rxm->data_off = RTE_PKTMBUF_HEADROOM; 896 rxm->nb_segs = seg_num; 897 rxm->next = NULL; 898 rxm->ol_flags = 0; 899 rxm->vlan_tci = 0; 900 rxm->pkt_len = (uint32_t)(len[0] - hdr_size); 901 rxm->data_len = (uint16_t)(len[0] - hdr_size); 902 903 rxm->port = rxvq->port_id; 904 rx_pkts[nb_rx] = rxm; 905 prev = rxm; 906 907 if (offload && virtio_rx_offload(rxm, &header->hdr) < 0) { 908 virtio_discard_rxbuf(vq, rxm); 909 rxvq->stats.errors++; 910 continue; 911 } 912 913 seg_res = seg_num - 1; 914 915 while (seg_res != 0) { 916 /* 917 * Get extra segments for current uncompleted packet. 918 */ 919 uint16_t rcv_cnt = 920 RTE_MIN(seg_res, RTE_DIM(rcv_pkts)); 921 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) { 922 uint32_t rx_num = 923 virtqueue_dequeue_burst_rx(vq, 924 rcv_pkts, len, rcv_cnt); 925 i += rx_num; 926 rcv_cnt = rx_num; 927 } else { 928 PMD_RX_LOG(ERR, 929 "No enough segments for packet."); 930 nb_enqueued++; 931 virtio_discard_rxbuf(vq, rxm); 932 rxvq->stats.errors++; 933 break; 934 } 935 936 extra_idx = 0; 937 938 while (extra_idx < rcv_cnt) { 939 rxm = rcv_pkts[extra_idx]; 940 941 rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size; 942 rxm->next = NULL; 943 rxm->pkt_len = (uint32_t)(len[extra_idx]); 944 rxm->data_len = (uint16_t)(len[extra_idx]); 945 946 if (prev) 947 prev->next = rxm; 948 949 prev = rxm; 950 rx_pkts[nb_rx]->pkt_len += rxm->pkt_len; 951 extra_idx++; 952 }; 953 seg_res -= rcv_cnt; 954 } 955 956 if (hw->vlan_strip) 957 rte_vlan_strip(rx_pkts[nb_rx]); 958 959 VIRTIO_DUMP_PACKET(rx_pkts[nb_rx], 960 rx_pkts[nb_rx]->data_len); 961 962 rxvq->stats.bytes += rx_pkts[nb_rx]->pkt_len; 963 virtio_update_packet_stats(&rxvq->stats, rx_pkts[nb_rx]); 964 nb_rx++; 965 } 966 967 rxvq->stats.packets += nb_rx; 968 969 /* Allocate new mbuf for the used descriptor */ 970 error = ENOSPC; 971 while (likely(!virtqueue_full(vq))) { 972 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool); 973 if (unlikely(new_mbuf == NULL)) { 974 struct rte_eth_dev *dev 975 = &rte_eth_devices[rxvq->port_id]; 976 dev->data->rx_mbuf_alloc_failed++; 977 break; 978 } 979 error = virtqueue_enqueue_recv_refill(vq, new_mbuf); 980 if (unlikely(error)) { 981 rte_pktmbuf_free(new_mbuf); 982 break; 983 } 984 nb_enqueued++; 985 } 986 987 if (likely(nb_enqueued)) { 988 vq_update_avail_idx(vq); 989 990 if (unlikely(virtqueue_kick_prepare(vq))) { 991 virtqueue_notify(vq); 992 PMD_RX_LOG(DEBUG, "Notified"); 993 } 994 } 995 996 return nb_rx; 997 } 998 999 uint16_t 1000 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) 1001 { 1002 struct virtnet_tx *txvq = tx_queue; 1003 struct virtqueue *vq = txvq->vq; 1004 struct virtio_hw *hw = vq->hw; 1005 uint16_t hdr_size = hw->vtnet_hdr_size; 1006 uint16_t nb_used, nb_tx; 1007 int error; 1008 1009 if (unlikely(nb_pkts < 1)) 1010 return nb_pkts; 1011 1012 PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts); 1013 nb_used = VIRTQUEUE_NUSED(vq); 1014 1015 virtio_rmb(); 1016 if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh)) 1017 virtio_xmit_cleanup(vq, nb_used); 1018 1019 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { 1020 struct rte_mbuf *txm = tx_pkts[nb_tx]; 1021 int can_push = 0, use_indirect = 0, slots, need; 1022 1023 /* Do VLAN tag insertion */ 1024 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) { 1025 error = rte_vlan_insert(&txm); 1026 if (unlikely(error)) { 1027 rte_pktmbuf_free(txm); 1028 continue; 1029 } 1030 } 1031 1032 /* optimize ring usage */ 1033 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) || 1034 vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) && 1035 rte_mbuf_refcnt_read(txm) == 1 && 1036 RTE_MBUF_DIRECT(txm) && 1037 txm->nb_segs == 1 && 1038 rte_pktmbuf_headroom(txm) >= hdr_size && 1039 rte_is_aligned(rte_pktmbuf_mtod(txm, char *), 1040 __alignof__(struct virtio_net_hdr_mrg_rxbuf))) 1041 can_push = 1; 1042 else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) && 1043 txm->nb_segs < VIRTIO_MAX_TX_INDIRECT) 1044 use_indirect = 1; 1045 1046 /* How many main ring entries are needed to this Tx? 1047 * any_layout => number of segments 1048 * indirect => 1 1049 * default => number of segments + 1 1050 */ 1051 slots = use_indirect ? 1 : (txm->nb_segs + !can_push); 1052 need = slots - vq->vq_free_cnt; 1053 1054 /* Positive value indicates it need free vring descriptors */ 1055 if (unlikely(need > 0)) { 1056 nb_used = VIRTQUEUE_NUSED(vq); 1057 virtio_rmb(); 1058 need = RTE_MIN(need, (int)nb_used); 1059 1060 virtio_xmit_cleanup(vq, need); 1061 need = slots - vq->vq_free_cnt; 1062 if (unlikely(need > 0)) { 1063 PMD_TX_LOG(ERR, 1064 "No free tx descriptors to transmit"); 1065 break; 1066 } 1067 } 1068 1069 /* Enqueue Packet buffers */ 1070 virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect, can_push); 1071 1072 txvq->stats.bytes += txm->pkt_len; 1073 virtio_update_packet_stats(&txvq->stats, txm); 1074 } 1075 1076 txvq->stats.packets += nb_tx; 1077 1078 if (likely(nb_tx)) { 1079 vq_update_avail_idx(vq); 1080 1081 if (unlikely(virtqueue_kick_prepare(vq))) { 1082 virtqueue_notify(vq); 1083 PMD_TX_LOG(DEBUG, "Notified backend after xmit"); 1084 } 1085 } 1086 1087 return nb_tx; 1088 } 1089