1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <stdint.h> 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <string.h> 38 #include <errno.h> 39 40 #include <rte_cycles.h> 41 #include <rte_memory.h> 42 #include <rte_memzone.h> 43 #include <rte_branch_prediction.h> 44 #include <rte_mempool.h> 45 #include <rte_malloc.h> 46 #include <rte_mbuf.h> 47 #include <rte_ether.h> 48 #include <rte_ethdev.h> 49 #include <rte_prefetch.h> 50 #include <rte_string_fns.h> 51 #include <rte_errno.h> 52 #include <rte_byteorder.h> 53 #include <rte_net.h> 54 #include <rte_ip.h> 55 #include <rte_udp.h> 56 #include <rte_tcp.h> 57 58 #include "virtio_logs.h" 59 #include "virtio_ethdev.h" 60 #include "virtio_pci.h" 61 #include "virtqueue.h" 62 #include "virtio_rxtx.h" 63 64 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP 65 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len) 66 #else 67 #define VIRTIO_DUMP_PACKET(m, len) do { } while (0) 68 #endif 69 70 71 #define VIRTIO_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS | \ 72 ETH_TXQ_FLAGS_NOOFFLOADS) 73 74 int 75 virtio_dev_rx_queue_done(void *rxq, uint16_t offset) 76 { 77 struct virtnet_rx *rxvq = rxq; 78 struct virtqueue *vq = rxvq->vq; 79 80 return VIRTQUEUE_NUSED(vq) >= offset; 81 } 82 83 void 84 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx) 85 { 86 struct vring_desc *dp, *dp_tail; 87 struct vq_desc_extra *dxp; 88 uint16_t desc_idx_last = desc_idx; 89 90 dp = &vq->vq_ring.desc[desc_idx]; 91 dxp = &vq->vq_descx[desc_idx]; 92 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs); 93 if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) { 94 while (dp->flags & VRING_DESC_F_NEXT) { 95 desc_idx_last = dp->next; 96 dp = &vq->vq_ring.desc[dp->next]; 97 } 98 } 99 dxp->ndescs = 0; 100 101 /* 102 * We must append the existing free chain, if any, to the end of 103 * newly freed chain. If the virtqueue was completely used, then 104 * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above). 105 */ 106 if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) { 107 vq->vq_desc_head_idx = desc_idx; 108 } else { 109 dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx]; 110 dp_tail->next = desc_idx; 111 } 112 113 vq->vq_desc_tail_idx = desc_idx_last; 114 dp->next = VQ_RING_DESC_CHAIN_END; 115 } 116 117 static uint16_t 118 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts, 119 uint32_t *len, uint16_t num) 120 { 121 struct vring_used_elem *uep; 122 struct rte_mbuf *cookie; 123 uint16_t used_idx, desc_idx; 124 uint16_t i; 125 126 /* Caller does the check */ 127 for (i = 0; i < num ; i++) { 128 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); 129 uep = &vq->vq_ring.used->ring[used_idx]; 130 desc_idx = (uint16_t) uep->id; 131 len[i] = uep->len; 132 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie; 133 134 if (unlikely(cookie == NULL)) { 135 PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u", 136 vq->vq_used_cons_idx); 137 break; 138 } 139 140 rte_prefetch0(cookie); 141 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *)); 142 rx_pkts[i] = cookie; 143 vq->vq_used_cons_idx++; 144 vq_ring_free_chain(vq, desc_idx); 145 vq->vq_descx[desc_idx].cookie = NULL; 146 } 147 148 return i; 149 } 150 151 #ifndef DEFAULT_TX_FREE_THRESH 152 #define DEFAULT_TX_FREE_THRESH 32 153 #endif 154 155 /* Cleanup from completed transmits. */ 156 static void 157 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num) 158 { 159 uint16_t i, used_idx, desc_idx; 160 for (i = 0; i < num; i++) { 161 struct vring_used_elem *uep; 162 struct vq_desc_extra *dxp; 163 164 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); 165 uep = &vq->vq_ring.used->ring[used_idx]; 166 167 desc_idx = (uint16_t) uep->id; 168 dxp = &vq->vq_descx[desc_idx]; 169 vq->vq_used_cons_idx++; 170 vq_ring_free_chain(vq, desc_idx); 171 172 if (dxp->cookie != NULL) { 173 rte_pktmbuf_free(dxp->cookie); 174 dxp->cookie = NULL; 175 } 176 } 177 } 178 179 180 static inline int 181 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie) 182 { 183 struct vq_desc_extra *dxp; 184 struct virtio_hw *hw = vq->hw; 185 struct vring_desc *start_dp; 186 uint16_t needed = 1; 187 uint16_t head_idx, idx; 188 189 if (unlikely(vq->vq_free_cnt == 0)) 190 return -ENOSPC; 191 if (unlikely(vq->vq_free_cnt < needed)) 192 return -EMSGSIZE; 193 194 head_idx = vq->vq_desc_head_idx; 195 if (unlikely(head_idx >= vq->vq_nentries)) 196 return -EFAULT; 197 198 idx = head_idx; 199 dxp = &vq->vq_descx[idx]; 200 dxp->cookie = (void *)cookie; 201 dxp->ndescs = needed; 202 203 start_dp = vq->vq_ring.desc; 204 start_dp[idx].addr = 205 VIRTIO_MBUF_ADDR(cookie, vq) + 206 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size; 207 start_dp[idx].len = 208 cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size; 209 start_dp[idx].flags = VRING_DESC_F_WRITE; 210 idx = start_dp[idx].next; 211 vq->vq_desc_head_idx = idx; 212 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) 213 vq->vq_desc_tail_idx = idx; 214 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed); 215 vq_update_avail_ring(vq, head_idx); 216 217 return 0; 218 } 219 220 /* When doing TSO, the IP length is not included in the pseudo header 221 * checksum of the packet given to the PMD, but for virtio it is 222 * expected. 223 */ 224 static void 225 virtio_tso_fix_cksum(struct rte_mbuf *m) 226 { 227 /* common case: header is not fragmented */ 228 if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len + 229 m->l4_len)) { 230 struct ipv4_hdr *iph; 231 struct ipv6_hdr *ip6h; 232 struct tcp_hdr *th; 233 uint16_t prev_cksum, new_cksum, ip_len, ip_paylen; 234 uint32_t tmp; 235 236 iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len); 237 th = RTE_PTR_ADD(iph, m->l3_len); 238 if ((iph->version_ihl >> 4) == 4) { 239 iph->hdr_checksum = 0; 240 iph->hdr_checksum = rte_ipv4_cksum(iph); 241 ip_len = iph->total_length; 242 ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) - 243 m->l3_len); 244 } else { 245 ip6h = (struct ipv6_hdr *)iph; 246 ip_paylen = ip6h->payload_len; 247 } 248 249 /* calculate the new phdr checksum not including ip_paylen */ 250 prev_cksum = th->cksum; 251 tmp = prev_cksum; 252 tmp += ip_paylen; 253 tmp = (tmp & 0xffff) + (tmp >> 16); 254 new_cksum = tmp; 255 256 /* replace it in the packet */ 257 th->cksum = new_cksum; 258 } 259 } 260 261 static inline int 262 tx_offload_enabled(struct virtio_hw *hw) 263 { 264 return vtpci_with_feature(hw, VIRTIO_NET_F_CSUM) || 265 vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) || 266 vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO6); 267 } 268 269 /* avoid write operation when necessary, to lessen cache issues */ 270 #define ASSIGN_UNLESS_EQUAL(var, val) do { \ 271 if ((var) != (val)) \ 272 (var) = (val); \ 273 } while (0) 274 275 static inline void 276 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie, 277 uint16_t needed, int use_indirect, int can_push) 278 { 279 struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr; 280 struct vq_desc_extra *dxp; 281 struct virtqueue *vq = txvq->vq; 282 struct vring_desc *start_dp; 283 uint16_t seg_num = cookie->nb_segs; 284 uint16_t head_idx, idx; 285 uint16_t head_size = vq->hw->vtnet_hdr_size; 286 struct virtio_net_hdr *hdr; 287 int offload; 288 289 offload = tx_offload_enabled(vq->hw); 290 head_idx = vq->vq_desc_head_idx; 291 idx = head_idx; 292 dxp = &vq->vq_descx[idx]; 293 dxp->cookie = (void *)cookie; 294 dxp->ndescs = needed; 295 296 start_dp = vq->vq_ring.desc; 297 298 if (can_push) { 299 /* prepend cannot fail, checked by caller */ 300 hdr = (struct virtio_net_hdr *) 301 rte_pktmbuf_prepend(cookie, head_size); 302 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length, 303 * which is wrong. Below subtract restores correct pkt size. 304 */ 305 cookie->pkt_len -= head_size; 306 /* if offload disabled, it is not zeroed below, do it now */ 307 if (offload == 0) { 308 ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0); 309 ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0); 310 ASSIGN_UNLESS_EQUAL(hdr->flags, 0); 311 ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0); 312 ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0); 313 ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0); 314 } 315 } else if (use_indirect) { 316 /* setup tx ring slot to point to indirect 317 * descriptor list stored in reserved region. 318 * 319 * the first slot in indirect ring is already preset 320 * to point to the header in reserved region 321 */ 322 start_dp[idx].addr = txvq->virtio_net_hdr_mem + 323 RTE_PTR_DIFF(&txr[idx].tx_indir, txr); 324 start_dp[idx].len = (seg_num + 1) * sizeof(struct vring_desc); 325 start_dp[idx].flags = VRING_DESC_F_INDIRECT; 326 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr; 327 328 /* loop below will fill in rest of the indirect elements */ 329 start_dp = txr[idx].tx_indir; 330 idx = 1; 331 } else { 332 /* setup first tx ring slot to point to header 333 * stored in reserved region. 334 */ 335 start_dp[idx].addr = txvq->virtio_net_hdr_mem + 336 RTE_PTR_DIFF(&txr[idx].tx_hdr, txr); 337 start_dp[idx].len = vq->hw->vtnet_hdr_size; 338 start_dp[idx].flags = VRING_DESC_F_NEXT; 339 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr; 340 341 idx = start_dp[idx].next; 342 } 343 344 /* Checksum Offload / TSO */ 345 if (offload) { 346 if (cookie->ol_flags & PKT_TX_TCP_SEG) 347 cookie->ol_flags |= PKT_TX_TCP_CKSUM; 348 349 switch (cookie->ol_flags & PKT_TX_L4_MASK) { 350 case PKT_TX_UDP_CKSUM: 351 hdr->csum_start = cookie->l2_len + cookie->l3_len; 352 hdr->csum_offset = offsetof(struct udp_hdr, 353 dgram_cksum); 354 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 355 break; 356 357 case PKT_TX_TCP_CKSUM: 358 hdr->csum_start = cookie->l2_len + cookie->l3_len; 359 hdr->csum_offset = offsetof(struct tcp_hdr, cksum); 360 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 361 break; 362 363 default: 364 ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0); 365 ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0); 366 ASSIGN_UNLESS_EQUAL(hdr->flags, 0); 367 break; 368 } 369 370 /* TCP Segmentation Offload */ 371 if (cookie->ol_flags & PKT_TX_TCP_SEG) { 372 virtio_tso_fix_cksum(cookie); 373 hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ? 374 VIRTIO_NET_HDR_GSO_TCPV6 : 375 VIRTIO_NET_HDR_GSO_TCPV4; 376 hdr->gso_size = cookie->tso_segsz; 377 hdr->hdr_len = 378 cookie->l2_len + 379 cookie->l3_len + 380 cookie->l4_len; 381 } else { 382 ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0); 383 ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0); 384 ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0); 385 } 386 } 387 388 do { 389 start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq); 390 start_dp[idx].len = cookie->data_len; 391 start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0; 392 idx = start_dp[idx].next; 393 } while ((cookie = cookie->next) != NULL); 394 395 if (use_indirect) 396 idx = vq->vq_ring.desc[head_idx].next; 397 398 vq->vq_desc_head_idx = idx; 399 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) 400 vq->vq_desc_tail_idx = idx; 401 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed); 402 vq_update_avail_ring(vq, head_idx); 403 } 404 405 void 406 virtio_dev_cq_start(struct rte_eth_dev *dev) 407 { 408 struct virtio_hw *hw = dev->data->dev_private; 409 410 if (hw->cvq && hw->cvq->vq) { 411 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq); 412 } 413 } 414 415 int 416 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev, 417 uint16_t queue_idx, 418 uint16_t nb_desc, 419 unsigned int socket_id __rte_unused, 420 __rte_unused const struct rte_eth_rxconf *rx_conf, 421 struct rte_mempool *mp) 422 { 423 uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX; 424 struct virtio_hw *hw = dev->data->dev_private; 425 struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; 426 struct virtnet_rx *rxvq; 427 428 PMD_INIT_FUNC_TRACE(); 429 430 if (nb_desc == 0 || nb_desc > vq->vq_nentries) 431 nb_desc = vq->vq_nentries; 432 vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc); 433 434 rxvq = &vq->rxq; 435 rxvq->queue_id = queue_idx; 436 rxvq->mpool = mp; 437 if (rxvq->mpool == NULL) { 438 rte_exit(EXIT_FAILURE, 439 "Cannot allocate mbufs for rx virtqueue"); 440 } 441 dev->data->rx_queues[queue_idx] = rxvq; 442 443 return 0; 444 } 445 446 int 447 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx) 448 { 449 uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX; 450 struct virtio_hw *hw = dev->data->dev_private; 451 struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; 452 struct virtnet_rx *rxvq = &vq->rxq; 453 struct rte_mbuf *m; 454 uint16_t desc_idx; 455 int error, nbufs; 456 457 PMD_INIT_FUNC_TRACE(); 458 459 /* Allocate blank mbufs for the each rx descriptor */ 460 nbufs = 0; 461 462 if (hw->use_simple_rx) { 463 for (desc_idx = 0; desc_idx < vq->vq_nentries; 464 desc_idx++) { 465 vq->vq_ring.avail->ring[desc_idx] = desc_idx; 466 vq->vq_ring.desc[desc_idx].flags = 467 VRING_DESC_F_WRITE; 468 } 469 } 470 471 memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf)); 472 for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST; 473 desc_idx++) { 474 vq->sw_ring[vq->vq_nentries + desc_idx] = 475 &rxvq->fake_mbuf; 476 } 477 478 while (!virtqueue_full(vq)) { 479 m = rte_mbuf_raw_alloc(rxvq->mpool); 480 if (m == NULL) 481 break; 482 483 /* Enqueue allocated buffers */ 484 if (hw->use_simple_rx) 485 error = virtqueue_enqueue_recv_refill_simple(vq, m); 486 else 487 error = virtqueue_enqueue_recv_refill(vq, m); 488 489 if (error) { 490 rte_pktmbuf_free(m); 491 break; 492 } 493 nbufs++; 494 } 495 496 vq_update_avail_idx(vq); 497 498 PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs); 499 500 virtio_rxq_vec_setup(rxvq); 501 502 VIRTQUEUE_DUMP(vq); 503 504 return 0; 505 } 506 507 /* 508 * struct rte_eth_dev *dev: Used to update dev 509 * uint16_t nb_desc: Defaults to values read from config space 510 * unsigned int socket_id: Used to allocate memzone 511 * const struct rte_eth_txconf *tx_conf: Used to setup tx engine 512 * uint16_t queue_idx: Just used as an index in dev txq list 513 */ 514 int 515 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, 516 uint16_t queue_idx, 517 uint16_t nb_desc, 518 unsigned int socket_id __rte_unused, 519 const struct rte_eth_txconf *tx_conf) 520 { 521 uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX; 522 struct virtio_hw *hw = dev->data->dev_private; 523 struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; 524 struct virtnet_tx *txvq; 525 uint16_t tx_free_thresh; 526 527 PMD_INIT_FUNC_TRACE(); 528 529 /* cannot use simple rxtx funcs with multisegs or offloads */ 530 if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) != VIRTIO_SIMPLE_FLAGS) 531 hw->use_simple_tx = 0; 532 533 if (nb_desc == 0 || nb_desc > vq->vq_nentries) 534 nb_desc = vq->vq_nentries; 535 vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc); 536 537 txvq = &vq->txq; 538 txvq->queue_id = queue_idx; 539 540 tx_free_thresh = tx_conf->tx_free_thresh; 541 if (tx_free_thresh == 0) 542 tx_free_thresh = 543 RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH); 544 545 if (tx_free_thresh >= (vq->vq_nentries - 3)) { 546 RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the " 547 "number of TX entries minus 3 (%u)." 548 " (tx_free_thresh=%u port=%u queue=%u)\n", 549 vq->vq_nentries - 3, 550 tx_free_thresh, dev->data->port_id, queue_idx); 551 return -EINVAL; 552 } 553 554 vq->vq_free_thresh = tx_free_thresh; 555 556 dev->data->tx_queues[queue_idx] = txvq; 557 return 0; 558 } 559 560 int 561 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev, 562 uint16_t queue_idx) 563 { 564 uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX; 565 struct virtio_hw *hw = dev->data->dev_private; 566 struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; 567 uint16_t mid_idx = vq->vq_nentries >> 1; 568 struct virtnet_tx *txvq = &vq->txq; 569 uint16_t desc_idx; 570 571 PMD_INIT_FUNC_TRACE(); 572 573 if (hw->use_simple_tx) { 574 for (desc_idx = 0; desc_idx < mid_idx; desc_idx++) { 575 vq->vq_ring.avail->ring[desc_idx] = 576 desc_idx + mid_idx; 577 vq->vq_ring.desc[desc_idx + mid_idx].next = 578 desc_idx; 579 vq->vq_ring.desc[desc_idx + mid_idx].addr = 580 txvq->virtio_net_hdr_mem + 581 offsetof(struct virtio_tx_region, tx_hdr); 582 vq->vq_ring.desc[desc_idx + mid_idx].len = 583 vq->hw->vtnet_hdr_size; 584 vq->vq_ring.desc[desc_idx + mid_idx].flags = 585 VRING_DESC_F_NEXT; 586 vq->vq_ring.desc[desc_idx].flags = 0; 587 } 588 for (desc_idx = mid_idx; desc_idx < vq->vq_nentries; 589 desc_idx++) 590 vq->vq_ring.avail->ring[desc_idx] = desc_idx; 591 } 592 593 VIRTQUEUE_DUMP(vq); 594 595 return 0; 596 } 597 598 static void 599 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m) 600 { 601 int error; 602 /* 603 * Requeue the discarded mbuf. This should always be 604 * successful since it was just dequeued. 605 */ 606 error = virtqueue_enqueue_recv_refill(vq, m); 607 if (unlikely(error)) { 608 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf"); 609 rte_pktmbuf_free(m); 610 } 611 } 612 613 static void 614 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf) 615 { 616 uint32_t s = mbuf->pkt_len; 617 struct ether_addr *ea; 618 619 if (s == 64) { 620 stats->size_bins[1]++; 621 } else if (s > 64 && s < 1024) { 622 uint32_t bin; 623 624 /* count zeros, and offset into correct bin */ 625 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5; 626 stats->size_bins[bin]++; 627 } else { 628 if (s < 64) 629 stats->size_bins[0]++; 630 else if (s < 1519) 631 stats->size_bins[6]++; 632 else if (s >= 1519) 633 stats->size_bins[7]++; 634 } 635 636 ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *); 637 if (is_multicast_ether_addr(ea)) { 638 if (is_broadcast_ether_addr(ea)) 639 stats->broadcast++; 640 else 641 stats->multicast++; 642 } 643 } 644 645 /* Optionally fill offload information in structure */ 646 static int 647 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr) 648 { 649 struct rte_net_hdr_lens hdr_lens; 650 uint32_t hdrlen, ptype; 651 int l4_supported = 0; 652 653 /* nothing to do */ 654 if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE) 655 return 0; 656 657 m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN; 658 659 ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK); 660 m->packet_type = ptype; 661 if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP || 662 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP || 663 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP) 664 l4_supported = 1; 665 666 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 667 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len; 668 if (hdr->csum_start <= hdrlen && l4_supported) { 669 m->ol_flags |= PKT_RX_L4_CKSUM_NONE; 670 } else { 671 /* Unknown proto or tunnel, do sw cksum. We can assume 672 * the cksum field is in the first segment since the 673 * buffers we provided to the host are large enough. 674 * In case of SCTP, this will be wrong since it's a CRC 675 * but there's nothing we can do. 676 */ 677 uint16_t csum = 0, off; 678 679 rte_raw_cksum_mbuf(m, hdr->csum_start, 680 rte_pktmbuf_pkt_len(m) - hdr->csum_start, 681 &csum); 682 if (likely(csum != 0xffff)) 683 csum = ~csum; 684 off = hdr->csum_offset + hdr->csum_start; 685 if (rte_pktmbuf_data_len(m) >= off + 1) 686 *rte_pktmbuf_mtod_offset(m, uint16_t *, 687 off) = csum; 688 } 689 } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) { 690 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD; 691 } 692 693 /* GSO request, save required information in mbuf */ 694 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 695 /* Check unsupported modes */ 696 if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) || 697 (hdr->gso_size == 0)) { 698 return -EINVAL; 699 } 700 701 /* Update mss lengthes in mbuf */ 702 m->tso_segsz = hdr->gso_size; 703 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 704 case VIRTIO_NET_HDR_GSO_TCPV4: 705 case VIRTIO_NET_HDR_GSO_TCPV6: 706 m->ol_flags |= PKT_RX_LRO | \ 707 PKT_RX_L4_CKSUM_NONE; 708 break; 709 default: 710 return -EINVAL; 711 } 712 } 713 714 return 0; 715 } 716 717 static inline int 718 rx_offload_enabled(struct virtio_hw *hw) 719 { 720 return vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) || 721 vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) || 722 vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6); 723 } 724 725 #define VIRTIO_MBUF_BURST_SZ 64 726 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc)) 727 uint16_t 728 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 729 { 730 struct virtnet_rx *rxvq = rx_queue; 731 struct virtqueue *vq = rxvq->vq; 732 struct virtio_hw *hw = vq->hw; 733 struct rte_mbuf *rxm, *new_mbuf; 734 uint16_t nb_used, num, nb_rx; 735 uint32_t len[VIRTIO_MBUF_BURST_SZ]; 736 struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ]; 737 int error; 738 uint32_t i, nb_enqueued; 739 uint32_t hdr_size; 740 int offload; 741 struct virtio_net_hdr *hdr; 742 743 nb_rx = 0; 744 if (unlikely(hw->started == 0)) 745 return nb_rx; 746 747 nb_used = VIRTQUEUE_NUSED(vq); 748 749 virtio_rmb(); 750 751 num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts; 752 if (unlikely(num > VIRTIO_MBUF_BURST_SZ)) 753 num = VIRTIO_MBUF_BURST_SZ; 754 if (likely(num > DESC_PER_CACHELINE)) 755 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE); 756 757 num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num); 758 PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num); 759 760 nb_enqueued = 0; 761 hdr_size = hw->vtnet_hdr_size; 762 offload = rx_offload_enabled(hw); 763 764 for (i = 0; i < num ; i++) { 765 rxm = rcv_pkts[i]; 766 767 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]); 768 769 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) { 770 PMD_RX_LOG(ERR, "Packet drop"); 771 nb_enqueued++; 772 virtio_discard_rxbuf(vq, rxm); 773 rxvq->stats.errors++; 774 continue; 775 } 776 777 rxm->port = rxvq->port_id; 778 rxm->data_off = RTE_PKTMBUF_HEADROOM; 779 rxm->ol_flags = 0; 780 rxm->vlan_tci = 0; 781 782 rxm->pkt_len = (uint32_t)(len[i] - hdr_size); 783 rxm->data_len = (uint16_t)(len[i] - hdr_size); 784 785 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr + 786 RTE_PKTMBUF_HEADROOM - hdr_size); 787 788 if (hw->vlan_strip) 789 rte_vlan_strip(rxm); 790 791 if (offload && virtio_rx_offload(rxm, hdr) < 0) { 792 virtio_discard_rxbuf(vq, rxm); 793 rxvq->stats.errors++; 794 continue; 795 } 796 797 VIRTIO_DUMP_PACKET(rxm, rxm->data_len); 798 799 rx_pkts[nb_rx++] = rxm; 800 801 rxvq->stats.bytes += rxm->pkt_len; 802 virtio_update_packet_stats(&rxvq->stats, rxm); 803 } 804 805 rxvq->stats.packets += nb_rx; 806 807 /* Allocate new mbuf for the used descriptor */ 808 error = ENOSPC; 809 while (likely(!virtqueue_full(vq))) { 810 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool); 811 if (unlikely(new_mbuf == NULL)) { 812 struct rte_eth_dev *dev 813 = &rte_eth_devices[rxvq->port_id]; 814 dev->data->rx_mbuf_alloc_failed++; 815 break; 816 } 817 error = virtqueue_enqueue_recv_refill(vq, new_mbuf); 818 if (unlikely(error)) { 819 rte_pktmbuf_free(new_mbuf); 820 break; 821 } 822 nb_enqueued++; 823 } 824 825 if (likely(nb_enqueued)) { 826 vq_update_avail_idx(vq); 827 828 if (unlikely(virtqueue_kick_prepare(vq))) { 829 virtqueue_notify(vq); 830 PMD_RX_LOG(DEBUG, "Notified"); 831 } 832 } 833 834 return nb_rx; 835 } 836 837 uint16_t 838 virtio_recv_mergeable_pkts(void *rx_queue, 839 struct rte_mbuf **rx_pkts, 840 uint16_t nb_pkts) 841 { 842 struct virtnet_rx *rxvq = rx_queue; 843 struct virtqueue *vq = rxvq->vq; 844 struct virtio_hw *hw = vq->hw; 845 struct rte_mbuf *rxm, *new_mbuf; 846 uint16_t nb_used, num, nb_rx; 847 uint32_t len[VIRTIO_MBUF_BURST_SZ]; 848 struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ]; 849 struct rte_mbuf *prev; 850 int error; 851 uint32_t i, nb_enqueued; 852 uint32_t seg_num; 853 uint16_t extra_idx; 854 uint32_t seg_res; 855 uint32_t hdr_size; 856 int offload; 857 858 nb_rx = 0; 859 if (unlikely(hw->started == 0)) 860 return nb_rx; 861 862 nb_used = VIRTQUEUE_NUSED(vq); 863 864 virtio_rmb(); 865 866 PMD_RX_LOG(DEBUG, "used:%d", nb_used); 867 868 i = 0; 869 nb_enqueued = 0; 870 seg_num = 0; 871 extra_idx = 0; 872 seg_res = 0; 873 hdr_size = hw->vtnet_hdr_size; 874 offload = rx_offload_enabled(hw); 875 876 while (i < nb_used) { 877 struct virtio_net_hdr_mrg_rxbuf *header; 878 879 if (nb_rx == nb_pkts) 880 break; 881 882 num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, 1); 883 if (num != 1) 884 continue; 885 886 i++; 887 888 PMD_RX_LOG(DEBUG, "dequeue:%d", num); 889 PMD_RX_LOG(DEBUG, "packet len:%d", len[0]); 890 891 rxm = rcv_pkts[0]; 892 893 if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) { 894 PMD_RX_LOG(ERR, "Packet drop"); 895 nb_enqueued++; 896 virtio_discard_rxbuf(vq, rxm); 897 rxvq->stats.errors++; 898 continue; 899 } 900 901 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm->buf_addr + 902 RTE_PKTMBUF_HEADROOM - hdr_size); 903 seg_num = header->num_buffers; 904 905 if (seg_num == 0) 906 seg_num = 1; 907 908 rxm->data_off = RTE_PKTMBUF_HEADROOM; 909 rxm->nb_segs = seg_num; 910 rxm->ol_flags = 0; 911 rxm->vlan_tci = 0; 912 rxm->pkt_len = (uint32_t)(len[0] - hdr_size); 913 rxm->data_len = (uint16_t)(len[0] - hdr_size); 914 915 rxm->port = rxvq->port_id; 916 rx_pkts[nb_rx] = rxm; 917 prev = rxm; 918 919 if (offload && virtio_rx_offload(rxm, &header->hdr) < 0) { 920 virtio_discard_rxbuf(vq, rxm); 921 rxvq->stats.errors++; 922 continue; 923 } 924 925 seg_res = seg_num - 1; 926 927 while (seg_res != 0) { 928 /* 929 * Get extra segments for current uncompleted packet. 930 */ 931 uint16_t rcv_cnt = 932 RTE_MIN(seg_res, RTE_DIM(rcv_pkts)); 933 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) { 934 uint32_t rx_num = 935 virtqueue_dequeue_burst_rx(vq, 936 rcv_pkts, len, rcv_cnt); 937 i += rx_num; 938 rcv_cnt = rx_num; 939 } else { 940 PMD_RX_LOG(ERR, 941 "No enough segments for packet."); 942 nb_enqueued++; 943 virtio_discard_rxbuf(vq, rxm); 944 rxvq->stats.errors++; 945 break; 946 } 947 948 extra_idx = 0; 949 950 while (extra_idx < rcv_cnt) { 951 rxm = rcv_pkts[extra_idx]; 952 953 rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size; 954 rxm->pkt_len = (uint32_t)(len[extra_idx]); 955 rxm->data_len = (uint16_t)(len[extra_idx]); 956 957 if (prev) 958 prev->next = rxm; 959 960 prev = rxm; 961 rx_pkts[nb_rx]->pkt_len += rxm->pkt_len; 962 extra_idx++; 963 }; 964 seg_res -= rcv_cnt; 965 } 966 967 if (hw->vlan_strip) 968 rte_vlan_strip(rx_pkts[nb_rx]); 969 970 VIRTIO_DUMP_PACKET(rx_pkts[nb_rx], 971 rx_pkts[nb_rx]->data_len); 972 973 rxvq->stats.bytes += rx_pkts[nb_rx]->pkt_len; 974 virtio_update_packet_stats(&rxvq->stats, rx_pkts[nb_rx]); 975 nb_rx++; 976 } 977 978 rxvq->stats.packets += nb_rx; 979 980 /* Allocate new mbuf for the used descriptor */ 981 error = ENOSPC; 982 while (likely(!virtqueue_full(vq))) { 983 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool); 984 if (unlikely(new_mbuf == NULL)) { 985 struct rte_eth_dev *dev 986 = &rte_eth_devices[rxvq->port_id]; 987 dev->data->rx_mbuf_alloc_failed++; 988 break; 989 } 990 error = virtqueue_enqueue_recv_refill(vq, new_mbuf); 991 if (unlikely(error)) { 992 rte_pktmbuf_free(new_mbuf); 993 break; 994 } 995 nb_enqueued++; 996 } 997 998 if (likely(nb_enqueued)) { 999 vq_update_avail_idx(vq); 1000 1001 if (unlikely(virtqueue_kick_prepare(vq))) { 1002 virtqueue_notify(vq); 1003 PMD_RX_LOG(DEBUG, "Notified"); 1004 } 1005 } 1006 1007 return nb_rx; 1008 } 1009 1010 uint16_t 1011 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) 1012 { 1013 struct virtnet_tx *txvq = tx_queue; 1014 struct virtqueue *vq = txvq->vq; 1015 struct virtio_hw *hw = vq->hw; 1016 uint16_t hdr_size = hw->vtnet_hdr_size; 1017 uint16_t nb_used, nb_tx = 0; 1018 int error; 1019 1020 if (unlikely(hw->started == 0)) 1021 return nb_tx; 1022 1023 if (unlikely(nb_pkts < 1)) 1024 return nb_pkts; 1025 1026 PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts); 1027 nb_used = VIRTQUEUE_NUSED(vq); 1028 1029 virtio_rmb(); 1030 if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh)) 1031 virtio_xmit_cleanup(vq, nb_used); 1032 1033 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { 1034 struct rte_mbuf *txm = tx_pkts[nb_tx]; 1035 int can_push = 0, use_indirect = 0, slots, need; 1036 1037 /* Do VLAN tag insertion */ 1038 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) { 1039 error = rte_vlan_insert(&txm); 1040 if (unlikely(error)) { 1041 rte_pktmbuf_free(txm); 1042 continue; 1043 } 1044 } 1045 1046 /* optimize ring usage */ 1047 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) || 1048 vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) && 1049 rte_mbuf_refcnt_read(txm) == 1 && 1050 RTE_MBUF_DIRECT(txm) && 1051 txm->nb_segs == 1 && 1052 rte_pktmbuf_headroom(txm) >= hdr_size && 1053 rte_is_aligned(rte_pktmbuf_mtod(txm, char *), 1054 __alignof__(struct virtio_net_hdr_mrg_rxbuf))) 1055 can_push = 1; 1056 else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) && 1057 txm->nb_segs < VIRTIO_MAX_TX_INDIRECT) 1058 use_indirect = 1; 1059 1060 /* How many main ring entries are needed to this Tx? 1061 * any_layout => number of segments 1062 * indirect => 1 1063 * default => number of segments + 1 1064 */ 1065 slots = use_indirect ? 1 : (txm->nb_segs + !can_push); 1066 need = slots - vq->vq_free_cnt; 1067 1068 /* Positive value indicates it need free vring descriptors */ 1069 if (unlikely(need > 0)) { 1070 nb_used = VIRTQUEUE_NUSED(vq); 1071 virtio_rmb(); 1072 need = RTE_MIN(need, (int)nb_used); 1073 1074 virtio_xmit_cleanup(vq, need); 1075 need = slots - vq->vq_free_cnt; 1076 if (unlikely(need > 0)) { 1077 PMD_TX_LOG(ERR, 1078 "No free tx descriptors to transmit"); 1079 break; 1080 } 1081 } 1082 1083 /* Enqueue Packet buffers */ 1084 virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect, can_push); 1085 1086 txvq->stats.bytes += txm->pkt_len; 1087 virtio_update_packet_stats(&txvq->stats, txm); 1088 } 1089 1090 txvq->stats.packets += nb_tx; 1091 1092 if (likely(nb_tx)) { 1093 vq_update_avail_idx(vq); 1094 1095 if (unlikely(virtqueue_kick_prepare(vq))) { 1096 virtqueue_notify(vq); 1097 PMD_TX_LOG(DEBUG, "Notified backend after xmit"); 1098 } 1099 } 1100 1101 return nb_tx; 1102 } 1103