1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <stdint.h> 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <string.h> 38 #include <errno.h> 39 40 #include <rte_cycles.h> 41 #include <rte_memory.h> 42 #include <rte_branch_prediction.h> 43 #include <rte_mempool.h> 44 #include <rte_malloc.h> 45 #include <rte_mbuf.h> 46 #include <rte_ether.h> 47 #include <rte_ethdev.h> 48 #include <rte_prefetch.h> 49 #include <rte_string_fns.h> 50 #include <rte_errno.h> 51 #include <rte_byteorder.h> 52 #include <rte_net.h> 53 #include <rte_ip.h> 54 #include <rte_udp.h> 55 #include <rte_tcp.h> 56 57 #include "virtio_logs.h" 58 #include "virtio_ethdev.h" 59 #include "virtio_pci.h" 60 #include "virtqueue.h" 61 #include "virtio_rxtx.h" 62 63 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP 64 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len) 65 #else 66 #define VIRTIO_DUMP_PACKET(m, len) do { } while (0) 67 #endif 68 69 70 #define VIRTIO_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS | \ 71 ETH_TXQ_FLAGS_NOOFFLOADS) 72 73 int 74 virtio_dev_rx_queue_done(void *rxq, uint16_t offset) 75 { 76 struct virtnet_rx *rxvq = rxq; 77 struct virtqueue *vq = rxvq->vq; 78 79 return VIRTQUEUE_NUSED(vq) >= offset; 80 } 81 82 void 83 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx) 84 { 85 struct vring_desc *dp, *dp_tail; 86 struct vq_desc_extra *dxp; 87 uint16_t desc_idx_last = desc_idx; 88 89 dp = &vq->vq_ring.desc[desc_idx]; 90 dxp = &vq->vq_descx[desc_idx]; 91 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs); 92 if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) { 93 while (dp->flags & VRING_DESC_F_NEXT) { 94 desc_idx_last = dp->next; 95 dp = &vq->vq_ring.desc[dp->next]; 96 } 97 } 98 dxp->ndescs = 0; 99 100 /* 101 * We must append the existing free chain, if any, to the end of 102 * newly freed chain. If the virtqueue was completely used, then 103 * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above). 104 */ 105 if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) { 106 vq->vq_desc_head_idx = desc_idx; 107 } else { 108 dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx]; 109 dp_tail->next = desc_idx; 110 } 111 112 vq->vq_desc_tail_idx = desc_idx_last; 113 dp->next = VQ_RING_DESC_CHAIN_END; 114 } 115 116 static uint16_t 117 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts, 118 uint32_t *len, uint16_t num) 119 { 120 struct vring_used_elem *uep; 121 struct rte_mbuf *cookie; 122 uint16_t used_idx, desc_idx; 123 uint16_t i; 124 125 /* Caller does the check */ 126 for (i = 0; i < num ; i++) { 127 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); 128 uep = &vq->vq_ring.used->ring[used_idx]; 129 desc_idx = (uint16_t) uep->id; 130 len[i] = uep->len; 131 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie; 132 133 if (unlikely(cookie == NULL)) { 134 PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u", 135 vq->vq_used_cons_idx); 136 break; 137 } 138 139 rte_prefetch0(cookie); 140 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *)); 141 rx_pkts[i] = cookie; 142 vq->vq_used_cons_idx++; 143 vq_ring_free_chain(vq, desc_idx); 144 vq->vq_descx[desc_idx].cookie = NULL; 145 } 146 147 return i; 148 } 149 150 #ifndef DEFAULT_TX_FREE_THRESH 151 #define DEFAULT_TX_FREE_THRESH 32 152 #endif 153 154 /* Cleanup from completed transmits. */ 155 static void 156 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num) 157 { 158 uint16_t i, used_idx, desc_idx; 159 for (i = 0; i < num; i++) { 160 struct vring_used_elem *uep; 161 struct vq_desc_extra *dxp; 162 163 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); 164 uep = &vq->vq_ring.used->ring[used_idx]; 165 166 desc_idx = (uint16_t) uep->id; 167 dxp = &vq->vq_descx[desc_idx]; 168 vq->vq_used_cons_idx++; 169 vq_ring_free_chain(vq, desc_idx); 170 171 if (dxp->cookie != NULL) { 172 rte_pktmbuf_free(dxp->cookie); 173 dxp->cookie = NULL; 174 } 175 } 176 } 177 178 179 static inline int 180 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie) 181 { 182 struct vq_desc_extra *dxp; 183 struct virtio_hw *hw = vq->hw; 184 struct vring_desc *start_dp; 185 uint16_t needed = 1; 186 uint16_t head_idx, idx; 187 188 if (unlikely(vq->vq_free_cnt == 0)) 189 return -ENOSPC; 190 if (unlikely(vq->vq_free_cnt < needed)) 191 return -EMSGSIZE; 192 193 head_idx = vq->vq_desc_head_idx; 194 if (unlikely(head_idx >= vq->vq_nentries)) 195 return -EFAULT; 196 197 idx = head_idx; 198 dxp = &vq->vq_descx[idx]; 199 dxp->cookie = (void *)cookie; 200 dxp->ndescs = needed; 201 202 start_dp = vq->vq_ring.desc; 203 start_dp[idx].addr = 204 VIRTIO_MBUF_ADDR(cookie, vq) + 205 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size; 206 start_dp[idx].len = 207 cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size; 208 start_dp[idx].flags = VRING_DESC_F_WRITE; 209 idx = start_dp[idx].next; 210 vq->vq_desc_head_idx = idx; 211 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) 212 vq->vq_desc_tail_idx = idx; 213 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed); 214 vq_update_avail_ring(vq, head_idx); 215 216 return 0; 217 } 218 219 /* When doing TSO, the IP length is not included in the pseudo header 220 * checksum of the packet given to the PMD, but for virtio it is 221 * expected. 222 */ 223 static void 224 virtio_tso_fix_cksum(struct rte_mbuf *m) 225 { 226 /* common case: header is not fragmented */ 227 if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len + 228 m->l4_len)) { 229 struct ipv4_hdr *iph; 230 struct ipv6_hdr *ip6h; 231 struct tcp_hdr *th; 232 uint16_t prev_cksum, new_cksum, ip_len, ip_paylen; 233 uint32_t tmp; 234 235 iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len); 236 th = RTE_PTR_ADD(iph, m->l3_len); 237 if ((iph->version_ihl >> 4) == 4) { 238 iph->hdr_checksum = 0; 239 iph->hdr_checksum = rte_ipv4_cksum(iph); 240 ip_len = iph->total_length; 241 ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) - 242 m->l3_len); 243 } else { 244 ip6h = (struct ipv6_hdr *)iph; 245 ip_paylen = ip6h->payload_len; 246 } 247 248 /* calculate the new phdr checksum not including ip_paylen */ 249 prev_cksum = th->cksum; 250 tmp = prev_cksum; 251 tmp += ip_paylen; 252 tmp = (tmp & 0xffff) + (tmp >> 16); 253 new_cksum = tmp; 254 255 /* replace it in the packet */ 256 th->cksum = new_cksum; 257 } 258 } 259 260 static inline int 261 tx_offload_enabled(struct virtio_hw *hw) 262 { 263 return vtpci_with_feature(hw, VIRTIO_NET_F_CSUM) || 264 vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) || 265 vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO6); 266 } 267 268 /* avoid write operation when necessary, to lessen cache issues */ 269 #define ASSIGN_UNLESS_EQUAL(var, val) do { \ 270 if ((var) != (val)) \ 271 (var) = (val); \ 272 } while (0) 273 274 static inline void 275 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie, 276 uint16_t needed, int use_indirect, int can_push) 277 { 278 struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr; 279 struct vq_desc_extra *dxp; 280 struct virtqueue *vq = txvq->vq; 281 struct vring_desc *start_dp; 282 uint16_t seg_num = cookie->nb_segs; 283 uint16_t head_idx, idx; 284 uint16_t head_size = vq->hw->vtnet_hdr_size; 285 struct virtio_net_hdr *hdr; 286 int offload; 287 288 offload = tx_offload_enabled(vq->hw); 289 head_idx = vq->vq_desc_head_idx; 290 idx = head_idx; 291 dxp = &vq->vq_descx[idx]; 292 dxp->cookie = (void *)cookie; 293 dxp->ndescs = needed; 294 295 start_dp = vq->vq_ring.desc; 296 297 if (can_push) { 298 /* prepend cannot fail, checked by caller */ 299 hdr = (struct virtio_net_hdr *) 300 rte_pktmbuf_prepend(cookie, head_size); 301 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length, 302 * which is wrong. Below subtract restores correct pkt size. 303 */ 304 cookie->pkt_len -= head_size; 305 /* if offload disabled, it is not zeroed below, do it now */ 306 if (offload == 0) { 307 ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0); 308 ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0); 309 ASSIGN_UNLESS_EQUAL(hdr->flags, 0); 310 ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0); 311 ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0); 312 ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0); 313 } 314 } else if (use_indirect) { 315 /* setup tx ring slot to point to indirect 316 * descriptor list stored in reserved region. 317 * 318 * the first slot in indirect ring is already preset 319 * to point to the header in reserved region 320 */ 321 start_dp[idx].addr = txvq->virtio_net_hdr_mem + 322 RTE_PTR_DIFF(&txr[idx].tx_indir, txr); 323 start_dp[idx].len = (seg_num + 1) * sizeof(struct vring_desc); 324 start_dp[idx].flags = VRING_DESC_F_INDIRECT; 325 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr; 326 327 /* loop below will fill in rest of the indirect elements */ 328 start_dp = txr[idx].tx_indir; 329 idx = 1; 330 } else { 331 /* setup first tx ring slot to point to header 332 * stored in reserved region. 333 */ 334 start_dp[idx].addr = txvq->virtio_net_hdr_mem + 335 RTE_PTR_DIFF(&txr[idx].tx_hdr, txr); 336 start_dp[idx].len = vq->hw->vtnet_hdr_size; 337 start_dp[idx].flags = VRING_DESC_F_NEXT; 338 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr; 339 340 idx = start_dp[idx].next; 341 } 342 343 /* Checksum Offload / TSO */ 344 if (offload) { 345 if (cookie->ol_flags & PKT_TX_TCP_SEG) 346 cookie->ol_flags |= PKT_TX_TCP_CKSUM; 347 348 switch (cookie->ol_flags & PKT_TX_L4_MASK) { 349 case PKT_TX_UDP_CKSUM: 350 hdr->csum_start = cookie->l2_len + cookie->l3_len; 351 hdr->csum_offset = offsetof(struct udp_hdr, 352 dgram_cksum); 353 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 354 break; 355 356 case PKT_TX_TCP_CKSUM: 357 hdr->csum_start = cookie->l2_len + cookie->l3_len; 358 hdr->csum_offset = offsetof(struct tcp_hdr, cksum); 359 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 360 break; 361 362 default: 363 ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0); 364 ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0); 365 ASSIGN_UNLESS_EQUAL(hdr->flags, 0); 366 break; 367 } 368 369 /* TCP Segmentation Offload */ 370 if (cookie->ol_flags & PKT_TX_TCP_SEG) { 371 virtio_tso_fix_cksum(cookie); 372 hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ? 373 VIRTIO_NET_HDR_GSO_TCPV6 : 374 VIRTIO_NET_HDR_GSO_TCPV4; 375 hdr->gso_size = cookie->tso_segsz; 376 hdr->hdr_len = 377 cookie->l2_len + 378 cookie->l3_len + 379 cookie->l4_len; 380 } else { 381 ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0); 382 ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0); 383 ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0); 384 } 385 } 386 387 do { 388 start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq); 389 start_dp[idx].len = cookie->data_len; 390 start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0; 391 idx = start_dp[idx].next; 392 } while ((cookie = cookie->next) != NULL); 393 394 if (use_indirect) 395 idx = vq->vq_ring.desc[head_idx].next; 396 397 vq->vq_desc_head_idx = idx; 398 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) 399 vq->vq_desc_tail_idx = idx; 400 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed); 401 vq_update_avail_ring(vq, head_idx); 402 } 403 404 void 405 virtio_dev_cq_start(struct rte_eth_dev *dev) 406 { 407 struct virtio_hw *hw = dev->data->dev_private; 408 409 if (hw->cvq && hw->cvq->vq) { 410 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq); 411 } 412 } 413 414 int 415 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev, 416 uint16_t queue_idx, 417 uint16_t nb_desc, 418 unsigned int socket_id __rte_unused, 419 __rte_unused const struct rte_eth_rxconf *rx_conf, 420 struct rte_mempool *mp) 421 { 422 uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX; 423 struct virtio_hw *hw = dev->data->dev_private; 424 struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; 425 struct virtnet_rx *rxvq; 426 427 PMD_INIT_FUNC_TRACE(); 428 429 if (nb_desc == 0 || nb_desc > vq->vq_nentries) 430 nb_desc = vq->vq_nentries; 431 vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc); 432 433 rxvq = &vq->rxq; 434 rxvq->queue_id = queue_idx; 435 rxvq->mpool = mp; 436 if (rxvq->mpool == NULL) { 437 rte_exit(EXIT_FAILURE, 438 "Cannot allocate mbufs for rx virtqueue"); 439 } 440 dev->data->rx_queues[queue_idx] = rxvq; 441 442 return 0; 443 } 444 445 int 446 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx) 447 { 448 uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX; 449 struct virtio_hw *hw = dev->data->dev_private; 450 struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; 451 struct virtnet_rx *rxvq = &vq->rxq; 452 struct rte_mbuf *m; 453 uint16_t desc_idx; 454 int error, nbufs; 455 456 PMD_INIT_FUNC_TRACE(); 457 458 /* Allocate blank mbufs for the each rx descriptor */ 459 nbufs = 0; 460 461 if (hw->use_simple_rx) { 462 for (desc_idx = 0; desc_idx < vq->vq_nentries; 463 desc_idx++) { 464 vq->vq_ring.avail->ring[desc_idx] = desc_idx; 465 vq->vq_ring.desc[desc_idx].flags = 466 VRING_DESC_F_WRITE; 467 } 468 } 469 470 memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf)); 471 for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST; 472 desc_idx++) { 473 vq->sw_ring[vq->vq_nentries + desc_idx] = 474 &rxvq->fake_mbuf; 475 } 476 477 while (!virtqueue_full(vq)) { 478 m = rte_mbuf_raw_alloc(rxvq->mpool); 479 if (m == NULL) 480 break; 481 482 /* Enqueue allocated buffers */ 483 if (hw->use_simple_rx) 484 error = virtqueue_enqueue_recv_refill_simple(vq, m); 485 else 486 error = virtqueue_enqueue_recv_refill(vq, m); 487 488 if (error) { 489 rte_pktmbuf_free(m); 490 break; 491 } 492 nbufs++; 493 } 494 495 vq_update_avail_idx(vq); 496 497 PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs); 498 499 virtio_rxq_vec_setup(rxvq); 500 501 VIRTQUEUE_DUMP(vq); 502 503 return 0; 504 } 505 506 /* 507 * struct rte_eth_dev *dev: Used to update dev 508 * uint16_t nb_desc: Defaults to values read from config space 509 * unsigned int socket_id: Used to allocate memzone 510 * const struct rte_eth_txconf *tx_conf: Used to setup tx engine 511 * uint16_t queue_idx: Just used as an index in dev txq list 512 */ 513 int 514 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, 515 uint16_t queue_idx, 516 uint16_t nb_desc, 517 unsigned int socket_id __rte_unused, 518 const struct rte_eth_txconf *tx_conf) 519 { 520 uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX; 521 struct virtio_hw *hw = dev->data->dev_private; 522 struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; 523 struct virtnet_tx *txvq; 524 uint16_t tx_free_thresh; 525 526 PMD_INIT_FUNC_TRACE(); 527 528 /* cannot use simple rxtx funcs with multisegs or offloads */ 529 if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) != VIRTIO_SIMPLE_FLAGS) 530 hw->use_simple_tx = 0; 531 532 if (nb_desc == 0 || nb_desc > vq->vq_nentries) 533 nb_desc = vq->vq_nentries; 534 vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc); 535 536 txvq = &vq->txq; 537 txvq->queue_id = queue_idx; 538 539 tx_free_thresh = tx_conf->tx_free_thresh; 540 if (tx_free_thresh == 0) 541 tx_free_thresh = 542 RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH); 543 544 if (tx_free_thresh >= (vq->vq_nentries - 3)) { 545 RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the " 546 "number of TX entries minus 3 (%u)." 547 " (tx_free_thresh=%u port=%u queue=%u)\n", 548 vq->vq_nentries - 3, 549 tx_free_thresh, dev->data->port_id, queue_idx); 550 return -EINVAL; 551 } 552 553 vq->vq_free_thresh = tx_free_thresh; 554 555 dev->data->tx_queues[queue_idx] = txvq; 556 return 0; 557 } 558 559 int 560 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev, 561 uint16_t queue_idx) 562 { 563 uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX; 564 struct virtio_hw *hw = dev->data->dev_private; 565 struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; 566 uint16_t mid_idx = vq->vq_nentries >> 1; 567 struct virtnet_tx *txvq = &vq->txq; 568 uint16_t desc_idx; 569 570 PMD_INIT_FUNC_TRACE(); 571 572 if (hw->use_simple_tx) { 573 for (desc_idx = 0; desc_idx < mid_idx; desc_idx++) { 574 vq->vq_ring.avail->ring[desc_idx] = 575 desc_idx + mid_idx; 576 vq->vq_ring.desc[desc_idx + mid_idx].next = 577 desc_idx; 578 vq->vq_ring.desc[desc_idx + mid_idx].addr = 579 txvq->virtio_net_hdr_mem + 580 offsetof(struct virtio_tx_region, tx_hdr); 581 vq->vq_ring.desc[desc_idx + mid_idx].len = 582 vq->hw->vtnet_hdr_size; 583 vq->vq_ring.desc[desc_idx + mid_idx].flags = 584 VRING_DESC_F_NEXT; 585 vq->vq_ring.desc[desc_idx].flags = 0; 586 } 587 for (desc_idx = mid_idx; desc_idx < vq->vq_nentries; 588 desc_idx++) 589 vq->vq_ring.avail->ring[desc_idx] = desc_idx; 590 } 591 592 VIRTQUEUE_DUMP(vq); 593 594 return 0; 595 } 596 597 static void 598 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m) 599 { 600 int error; 601 /* 602 * Requeue the discarded mbuf. This should always be 603 * successful since it was just dequeued. 604 */ 605 error = virtqueue_enqueue_recv_refill(vq, m); 606 if (unlikely(error)) { 607 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf"); 608 rte_pktmbuf_free(m); 609 } 610 } 611 612 static void 613 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf) 614 { 615 uint32_t s = mbuf->pkt_len; 616 struct ether_addr *ea; 617 618 if (s == 64) { 619 stats->size_bins[1]++; 620 } else if (s > 64 && s < 1024) { 621 uint32_t bin; 622 623 /* count zeros, and offset into correct bin */ 624 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5; 625 stats->size_bins[bin]++; 626 } else { 627 if (s < 64) 628 stats->size_bins[0]++; 629 else if (s < 1519) 630 stats->size_bins[6]++; 631 else if (s >= 1519) 632 stats->size_bins[7]++; 633 } 634 635 ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *); 636 if (is_multicast_ether_addr(ea)) { 637 if (is_broadcast_ether_addr(ea)) 638 stats->broadcast++; 639 else 640 stats->multicast++; 641 } 642 } 643 644 /* Optionally fill offload information in structure */ 645 static int 646 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr) 647 { 648 struct rte_net_hdr_lens hdr_lens; 649 uint32_t hdrlen, ptype; 650 int l4_supported = 0; 651 652 /* nothing to do */ 653 if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE) 654 return 0; 655 656 m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN; 657 658 ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK); 659 m->packet_type = ptype; 660 if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP || 661 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP || 662 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP) 663 l4_supported = 1; 664 665 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 666 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len; 667 if (hdr->csum_start <= hdrlen && l4_supported) { 668 m->ol_flags |= PKT_RX_L4_CKSUM_NONE; 669 } else { 670 /* Unknown proto or tunnel, do sw cksum. We can assume 671 * the cksum field is in the first segment since the 672 * buffers we provided to the host are large enough. 673 * In case of SCTP, this will be wrong since it's a CRC 674 * but there's nothing we can do. 675 */ 676 uint16_t csum = 0, off; 677 678 rte_raw_cksum_mbuf(m, hdr->csum_start, 679 rte_pktmbuf_pkt_len(m) - hdr->csum_start, 680 &csum); 681 if (likely(csum != 0xffff)) 682 csum = ~csum; 683 off = hdr->csum_offset + hdr->csum_start; 684 if (rte_pktmbuf_data_len(m) >= off + 1) 685 *rte_pktmbuf_mtod_offset(m, uint16_t *, 686 off) = csum; 687 } 688 } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) { 689 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD; 690 } 691 692 /* GSO request, save required information in mbuf */ 693 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 694 /* Check unsupported modes */ 695 if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) || 696 (hdr->gso_size == 0)) { 697 return -EINVAL; 698 } 699 700 /* Update mss lengthes in mbuf */ 701 m->tso_segsz = hdr->gso_size; 702 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 703 case VIRTIO_NET_HDR_GSO_TCPV4: 704 case VIRTIO_NET_HDR_GSO_TCPV6: 705 m->ol_flags |= PKT_RX_LRO | \ 706 PKT_RX_L4_CKSUM_NONE; 707 break; 708 default: 709 return -EINVAL; 710 } 711 } 712 713 return 0; 714 } 715 716 static inline int 717 rx_offload_enabled(struct virtio_hw *hw) 718 { 719 return vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) || 720 vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) || 721 vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6); 722 } 723 724 #define VIRTIO_MBUF_BURST_SZ 64 725 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc)) 726 uint16_t 727 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 728 { 729 struct virtnet_rx *rxvq = rx_queue; 730 struct virtqueue *vq = rxvq->vq; 731 struct virtio_hw *hw = vq->hw; 732 struct rte_mbuf *rxm, *new_mbuf; 733 uint16_t nb_used, num, nb_rx; 734 uint32_t len[VIRTIO_MBUF_BURST_SZ]; 735 struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ]; 736 int error; 737 uint32_t i, nb_enqueued; 738 uint32_t hdr_size; 739 int offload; 740 struct virtio_net_hdr *hdr; 741 742 nb_rx = 0; 743 if (unlikely(hw->started == 0)) 744 return nb_rx; 745 746 nb_used = VIRTQUEUE_NUSED(vq); 747 748 virtio_rmb(); 749 750 num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts; 751 if (unlikely(num > VIRTIO_MBUF_BURST_SZ)) 752 num = VIRTIO_MBUF_BURST_SZ; 753 if (likely(num > DESC_PER_CACHELINE)) 754 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE); 755 756 num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num); 757 PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num); 758 759 nb_enqueued = 0; 760 hdr_size = hw->vtnet_hdr_size; 761 offload = rx_offload_enabled(hw); 762 763 for (i = 0; i < num ; i++) { 764 rxm = rcv_pkts[i]; 765 766 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]); 767 768 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) { 769 PMD_RX_LOG(ERR, "Packet drop"); 770 nb_enqueued++; 771 virtio_discard_rxbuf(vq, rxm); 772 rxvq->stats.errors++; 773 continue; 774 } 775 776 rxm->port = rxvq->port_id; 777 rxm->data_off = RTE_PKTMBUF_HEADROOM; 778 rxm->ol_flags = 0; 779 rxm->vlan_tci = 0; 780 781 rxm->pkt_len = (uint32_t)(len[i] - hdr_size); 782 rxm->data_len = (uint16_t)(len[i] - hdr_size); 783 784 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr + 785 RTE_PKTMBUF_HEADROOM - hdr_size); 786 787 if (hw->vlan_strip) 788 rte_vlan_strip(rxm); 789 790 if (offload && virtio_rx_offload(rxm, hdr) < 0) { 791 virtio_discard_rxbuf(vq, rxm); 792 rxvq->stats.errors++; 793 continue; 794 } 795 796 VIRTIO_DUMP_PACKET(rxm, rxm->data_len); 797 798 rx_pkts[nb_rx++] = rxm; 799 800 rxvq->stats.bytes += rxm->pkt_len; 801 virtio_update_packet_stats(&rxvq->stats, rxm); 802 } 803 804 rxvq->stats.packets += nb_rx; 805 806 /* Allocate new mbuf for the used descriptor */ 807 error = ENOSPC; 808 while (likely(!virtqueue_full(vq))) { 809 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool); 810 if (unlikely(new_mbuf == NULL)) { 811 struct rte_eth_dev *dev 812 = &rte_eth_devices[rxvq->port_id]; 813 dev->data->rx_mbuf_alloc_failed++; 814 break; 815 } 816 error = virtqueue_enqueue_recv_refill(vq, new_mbuf); 817 if (unlikely(error)) { 818 rte_pktmbuf_free(new_mbuf); 819 break; 820 } 821 nb_enqueued++; 822 } 823 824 if (likely(nb_enqueued)) { 825 vq_update_avail_idx(vq); 826 827 if (unlikely(virtqueue_kick_prepare(vq))) { 828 virtqueue_notify(vq); 829 PMD_RX_LOG(DEBUG, "Notified"); 830 } 831 } 832 833 return nb_rx; 834 } 835 836 uint16_t 837 virtio_recv_mergeable_pkts(void *rx_queue, 838 struct rte_mbuf **rx_pkts, 839 uint16_t nb_pkts) 840 { 841 struct virtnet_rx *rxvq = rx_queue; 842 struct virtqueue *vq = rxvq->vq; 843 struct virtio_hw *hw = vq->hw; 844 struct rte_mbuf *rxm, *new_mbuf; 845 uint16_t nb_used, num, nb_rx; 846 uint32_t len[VIRTIO_MBUF_BURST_SZ]; 847 struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ]; 848 struct rte_mbuf *prev; 849 int error; 850 uint32_t i, nb_enqueued; 851 uint32_t seg_num; 852 uint16_t extra_idx; 853 uint32_t seg_res; 854 uint32_t hdr_size; 855 int offload; 856 857 nb_rx = 0; 858 if (unlikely(hw->started == 0)) 859 return nb_rx; 860 861 nb_used = VIRTQUEUE_NUSED(vq); 862 863 virtio_rmb(); 864 865 PMD_RX_LOG(DEBUG, "used:%d", nb_used); 866 867 i = 0; 868 nb_enqueued = 0; 869 seg_num = 0; 870 extra_idx = 0; 871 seg_res = 0; 872 hdr_size = hw->vtnet_hdr_size; 873 offload = rx_offload_enabled(hw); 874 875 while (i < nb_used) { 876 struct virtio_net_hdr_mrg_rxbuf *header; 877 878 if (nb_rx == nb_pkts) 879 break; 880 881 num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, 1); 882 if (num != 1) 883 continue; 884 885 i++; 886 887 PMD_RX_LOG(DEBUG, "dequeue:%d", num); 888 PMD_RX_LOG(DEBUG, "packet len:%d", len[0]); 889 890 rxm = rcv_pkts[0]; 891 892 if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) { 893 PMD_RX_LOG(ERR, "Packet drop"); 894 nb_enqueued++; 895 virtio_discard_rxbuf(vq, rxm); 896 rxvq->stats.errors++; 897 continue; 898 } 899 900 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm->buf_addr + 901 RTE_PKTMBUF_HEADROOM - hdr_size); 902 seg_num = header->num_buffers; 903 904 if (seg_num == 0) 905 seg_num = 1; 906 907 rxm->data_off = RTE_PKTMBUF_HEADROOM; 908 rxm->nb_segs = seg_num; 909 rxm->ol_flags = 0; 910 rxm->vlan_tci = 0; 911 rxm->pkt_len = (uint32_t)(len[0] - hdr_size); 912 rxm->data_len = (uint16_t)(len[0] - hdr_size); 913 914 rxm->port = rxvq->port_id; 915 rx_pkts[nb_rx] = rxm; 916 prev = rxm; 917 918 if (offload && virtio_rx_offload(rxm, &header->hdr) < 0) { 919 virtio_discard_rxbuf(vq, rxm); 920 rxvq->stats.errors++; 921 continue; 922 } 923 924 seg_res = seg_num - 1; 925 926 while (seg_res != 0) { 927 /* 928 * Get extra segments for current uncompleted packet. 929 */ 930 uint16_t rcv_cnt = 931 RTE_MIN(seg_res, RTE_DIM(rcv_pkts)); 932 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) { 933 uint32_t rx_num = 934 virtqueue_dequeue_burst_rx(vq, 935 rcv_pkts, len, rcv_cnt); 936 i += rx_num; 937 rcv_cnt = rx_num; 938 } else { 939 PMD_RX_LOG(ERR, 940 "No enough segments for packet."); 941 nb_enqueued++; 942 virtio_discard_rxbuf(vq, rxm); 943 rxvq->stats.errors++; 944 break; 945 } 946 947 extra_idx = 0; 948 949 while (extra_idx < rcv_cnt) { 950 rxm = rcv_pkts[extra_idx]; 951 952 rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size; 953 rxm->pkt_len = (uint32_t)(len[extra_idx]); 954 rxm->data_len = (uint16_t)(len[extra_idx]); 955 956 if (prev) 957 prev->next = rxm; 958 959 prev = rxm; 960 rx_pkts[nb_rx]->pkt_len += rxm->pkt_len; 961 extra_idx++; 962 }; 963 seg_res -= rcv_cnt; 964 } 965 966 if (hw->vlan_strip) 967 rte_vlan_strip(rx_pkts[nb_rx]); 968 969 VIRTIO_DUMP_PACKET(rx_pkts[nb_rx], 970 rx_pkts[nb_rx]->data_len); 971 972 rxvq->stats.bytes += rx_pkts[nb_rx]->pkt_len; 973 virtio_update_packet_stats(&rxvq->stats, rx_pkts[nb_rx]); 974 nb_rx++; 975 } 976 977 rxvq->stats.packets += nb_rx; 978 979 /* Allocate new mbuf for the used descriptor */ 980 error = ENOSPC; 981 while (likely(!virtqueue_full(vq))) { 982 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool); 983 if (unlikely(new_mbuf == NULL)) { 984 struct rte_eth_dev *dev 985 = &rte_eth_devices[rxvq->port_id]; 986 dev->data->rx_mbuf_alloc_failed++; 987 break; 988 } 989 error = virtqueue_enqueue_recv_refill(vq, new_mbuf); 990 if (unlikely(error)) { 991 rte_pktmbuf_free(new_mbuf); 992 break; 993 } 994 nb_enqueued++; 995 } 996 997 if (likely(nb_enqueued)) { 998 vq_update_avail_idx(vq); 999 1000 if (unlikely(virtqueue_kick_prepare(vq))) { 1001 virtqueue_notify(vq); 1002 PMD_RX_LOG(DEBUG, "Notified"); 1003 } 1004 } 1005 1006 return nb_rx; 1007 } 1008 1009 uint16_t 1010 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) 1011 { 1012 struct virtnet_tx *txvq = tx_queue; 1013 struct virtqueue *vq = txvq->vq; 1014 struct virtio_hw *hw = vq->hw; 1015 uint16_t hdr_size = hw->vtnet_hdr_size; 1016 uint16_t nb_used, nb_tx = 0; 1017 int error; 1018 1019 if (unlikely(hw->started == 0)) 1020 return nb_tx; 1021 1022 if (unlikely(nb_pkts < 1)) 1023 return nb_pkts; 1024 1025 PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts); 1026 nb_used = VIRTQUEUE_NUSED(vq); 1027 1028 virtio_rmb(); 1029 if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh)) 1030 virtio_xmit_cleanup(vq, nb_used); 1031 1032 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { 1033 struct rte_mbuf *txm = tx_pkts[nb_tx]; 1034 int can_push = 0, use_indirect = 0, slots, need; 1035 1036 /* Do VLAN tag insertion */ 1037 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) { 1038 error = rte_vlan_insert(&txm); 1039 if (unlikely(error)) { 1040 rte_pktmbuf_free(txm); 1041 continue; 1042 } 1043 } 1044 1045 /* optimize ring usage */ 1046 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) || 1047 vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) && 1048 rte_mbuf_refcnt_read(txm) == 1 && 1049 RTE_MBUF_DIRECT(txm) && 1050 txm->nb_segs == 1 && 1051 rte_pktmbuf_headroom(txm) >= hdr_size && 1052 rte_is_aligned(rte_pktmbuf_mtod(txm, char *), 1053 __alignof__(struct virtio_net_hdr_mrg_rxbuf))) 1054 can_push = 1; 1055 else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) && 1056 txm->nb_segs < VIRTIO_MAX_TX_INDIRECT) 1057 use_indirect = 1; 1058 1059 /* How many main ring entries are needed to this Tx? 1060 * any_layout => number of segments 1061 * indirect => 1 1062 * default => number of segments + 1 1063 */ 1064 slots = use_indirect ? 1 : (txm->nb_segs + !can_push); 1065 need = slots - vq->vq_free_cnt; 1066 1067 /* Positive value indicates it need free vring descriptors */ 1068 if (unlikely(need > 0)) { 1069 nb_used = VIRTQUEUE_NUSED(vq); 1070 virtio_rmb(); 1071 need = RTE_MIN(need, (int)nb_used); 1072 1073 virtio_xmit_cleanup(vq, need); 1074 need = slots - vq->vq_free_cnt; 1075 if (unlikely(need > 0)) { 1076 PMD_TX_LOG(ERR, 1077 "No free tx descriptors to transmit"); 1078 break; 1079 } 1080 } 1081 1082 /* Enqueue Packet buffers */ 1083 virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect, can_push); 1084 1085 txvq->stats.bytes += txm->pkt_len; 1086 virtio_update_packet_stats(&txvq->stats, txm); 1087 } 1088 1089 txvq->stats.packets += nb_tx; 1090 1091 if (likely(nb_tx)) { 1092 vq_update_avail_idx(vq); 1093 1094 if (unlikely(virtqueue_kick_prepare(vq))) { 1095 virtqueue_notify(vq); 1096 PMD_TX_LOG(DEBUG, "Notified backend after xmit"); 1097 } 1098 } 1099 1100 return nb_tx; 1101 } 1102