1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <stdint.h> 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <string.h> 38 #include <errno.h> 39 40 #include <rte_cycles.h> 41 #include <rte_memory.h> 42 #include <rte_memzone.h> 43 #include <rte_branch_prediction.h> 44 #include <rte_mempool.h> 45 #include <rte_malloc.h> 46 #include <rte_mbuf.h> 47 #include <rte_ether.h> 48 #include <rte_ethdev.h> 49 #include <rte_prefetch.h> 50 #include <rte_string_fns.h> 51 #include <rte_errno.h> 52 #include <rte_byteorder.h> 53 #include <rte_cpuflags.h> 54 #include <rte_net.h> 55 #include <rte_ip.h> 56 #include <rte_udp.h> 57 #include <rte_tcp.h> 58 59 #include "virtio_logs.h" 60 #include "virtio_ethdev.h" 61 #include "virtio_pci.h" 62 #include "virtqueue.h" 63 #include "virtio_rxtx.h" 64 65 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP 66 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len) 67 #else 68 #define VIRTIO_DUMP_PACKET(m, len) do { } while (0) 69 #endif 70 71 72 #define VIRTIO_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS | \ 73 ETH_TXQ_FLAGS_NOOFFLOADS) 74 75 int 76 virtio_dev_rx_queue_done(void *rxq, uint16_t offset) 77 { 78 struct virtnet_rx *rxvq = rxq; 79 struct virtqueue *vq = rxvq->vq; 80 81 return VIRTQUEUE_NUSED(vq) >= offset; 82 } 83 84 static void 85 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx) 86 { 87 struct vring_desc *dp, *dp_tail; 88 struct vq_desc_extra *dxp; 89 uint16_t desc_idx_last = desc_idx; 90 91 dp = &vq->vq_ring.desc[desc_idx]; 92 dxp = &vq->vq_descx[desc_idx]; 93 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs); 94 if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) { 95 while (dp->flags & VRING_DESC_F_NEXT) { 96 desc_idx_last = dp->next; 97 dp = &vq->vq_ring.desc[dp->next]; 98 } 99 } 100 dxp->ndescs = 0; 101 102 /* 103 * We must append the existing free chain, if any, to the end of 104 * newly freed chain. If the virtqueue was completely used, then 105 * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above). 106 */ 107 if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) { 108 vq->vq_desc_head_idx = desc_idx; 109 } else { 110 dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx]; 111 dp_tail->next = desc_idx; 112 } 113 114 vq->vq_desc_tail_idx = desc_idx_last; 115 dp->next = VQ_RING_DESC_CHAIN_END; 116 } 117 118 static uint16_t 119 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts, 120 uint32_t *len, uint16_t num) 121 { 122 struct vring_used_elem *uep; 123 struct rte_mbuf *cookie; 124 uint16_t used_idx, desc_idx; 125 uint16_t i; 126 127 /* Caller does the check */ 128 for (i = 0; i < num ; i++) { 129 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); 130 uep = &vq->vq_ring.used->ring[used_idx]; 131 desc_idx = (uint16_t) uep->id; 132 len[i] = uep->len; 133 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie; 134 135 if (unlikely(cookie == NULL)) { 136 PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u", 137 vq->vq_used_cons_idx); 138 break; 139 } 140 141 rte_prefetch0(cookie); 142 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *)); 143 rx_pkts[i] = cookie; 144 vq->vq_used_cons_idx++; 145 vq_ring_free_chain(vq, desc_idx); 146 vq->vq_descx[desc_idx].cookie = NULL; 147 } 148 149 return i; 150 } 151 152 #ifndef DEFAULT_TX_FREE_THRESH 153 #define DEFAULT_TX_FREE_THRESH 32 154 #endif 155 156 /* Cleanup from completed transmits. */ 157 static void 158 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num) 159 { 160 uint16_t i, used_idx, desc_idx; 161 for (i = 0; i < num; i++) { 162 struct vring_used_elem *uep; 163 struct vq_desc_extra *dxp; 164 165 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); 166 uep = &vq->vq_ring.used->ring[used_idx]; 167 168 desc_idx = (uint16_t) uep->id; 169 dxp = &vq->vq_descx[desc_idx]; 170 vq->vq_used_cons_idx++; 171 vq_ring_free_chain(vq, desc_idx); 172 173 if (dxp->cookie != NULL) { 174 rte_pktmbuf_free(dxp->cookie); 175 dxp->cookie = NULL; 176 } 177 } 178 } 179 180 181 static inline int 182 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie) 183 { 184 struct vq_desc_extra *dxp; 185 struct virtio_hw *hw = vq->hw; 186 struct vring_desc *start_dp; 187 uint16_t needed = 1; 188 uint16_t head_idx, idx; 189 190 if (unlikely(vq->vq_free_cnt == 0)) 191 return -ENOSPC; 192 if (unlikely(vq->vq_free_cnt < needed)) 193 return -EMSGSIZE; 194 195 head_idx = vq->vq_desc_head_idx; 196 if (unlikely(head_idx >= vq->vq_nentries)) 197 return -EFAULT; 198 199 idx = head_idx; 200 dxp = &vq->vq_descx[idx]; 201 dxp->cookie = (void *)cookie; 202 dxp->ndescs = needed; 203 204 start_dp = vq->vq_ring.desc; 205 start_dp[idx].addr = 206 VIRTIO_MBUF_ADDR(cookie, vq) + 207 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size; 208 start_dp[idx].len = 209 cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size; 210 start_dp[idx].flags = VRING_DESC_F_WRITE; 211 idx = start_dp[idx].next; 212 vq->vq_desc_head_idx = idx; 213 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) 214 vq->vq_desc_tail_idx = idx; 215 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed); 216 vq_update_avail_ring(vq, head_idx); 217 218 return 0; 219 } 220 221 /* When doing TSO, the IP length is not included in the pseudo header 222 * checksum of the packet given to the PMD, but for virtio it is 223 * expected. 224 */ 225 static void 226 virtio_tso_fix_cksum(struct rte_mbuf *m) 227 { 228 /* common case: header is not fragmented */ 229 if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len + 230 m->l4_len)) { 231 struct ipv4_hdr *iph; 232 struct ipv6_hdr *ip6h; 233 struct tcp_hdr *th; 234 uint16_t prev_cksum, new_cksum, ip_len, ip_paylen; 235 uint32_t tmp; 236 237 iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len); 238 th = RTE_PTR_ADD(iph, m->l3_len); 239 if ((iph->version_ihl >> 4) == 4) { 240 iph->hdr_checksum = 0; 241 iph->hdr_checksum = rte_ipv4_cksum(iph); 242 ip_len = iph->total_length; 243 ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) - 244 m->l3_len); 245 } else { 246 ip6h = (struct ipv6_hdr *)iph; 247 ip_paylen = ip6h->payload_len; 248 } 249 250 /* calculate the new phdr checksum not including ip_paylen */ 251 prev_cksum = th->cksum; 252 tmp = prev_cksum; 253 tmp += ip_paylen; 254 tmp = (tmp & 0xffff) + (tmp >> 16); 255 new_cksum = tmp; 256 257 /* replace it in the packet */ 258 th->cksum = new_cksum; 259 } 260 } 261 262 static inline int 263 tx_offload_enabled(struct virtio_hw *hw) 264 { 265 return vtpci_with_feature(hw, VIRTIO_NET_F_CSUM) || 266 vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) || 267 vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO6); 268 } 269 270 /* avoid write operation when necessary, to lessen cache issues */ 271 #define ASSIGN_UNLESS_EQUAL(var, val) do { \ 272 if ((var) != (val)) \ 273 (var) = (val); \ 274 } while (0) 275 276 static inline void 277 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie, 278 uint16_t needed, int use_indirect, int can_push) 279 { 280 struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr; 281 struct vq_desc_extra *dxp; 282 struct virtqueue *vq = txvq->vq; 283 struct vring_desc *start_dp; 284 uint16_t seg_num = cookie->nb_segs; 285 uint16_t head_idx, idx; 286 uint16_t head_size = vq->hw->vtnet_hdr_size; 287 struct virtio_net_hdr *hdr; 288 int offload; 289 290 offload = tx_offload_enabled(vq->hw); 291 head_idx = vq->vq_desc_head_idx; 292 idx = head_idx; 293 dxp = &vq->vq_descx[idx]; 294 dxp->cookie = (void *)cookie; 295 dxp->ndescs = needed; 296 297 start_dp = vq->vq_ring.desc; 298 299 if (can_push) { 300 /* prepend cannot fail, checked by caller */ 301 hdr = (struct virtio_net_hdr *) 302 rte_pktmbuf_prepend(cookie, head_size); 303 /* if offload disabled, it is not zeroed below, do it now */ 304 if (offload == 0) { 305 ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0); 306 ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0); 307 ASSIGN_UNLESS_EQUAL(hdr->flags, 0); 308 ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0); 309 ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0); 310 ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0); 311 } 312 } else if (use_indirect) { 313 /* setup tx ring slot to point to indirect 314 * descriptor list stored in reserved region. 315 * 316 * the first slot in indirect ring is already preset 317 * to point to the header in reserved region 318 */ 319 start_dp[idx].addr = txvq->virtio_net_hdr_mem + 320 RTE_PTR_DIFF(&txr[idx].tx_indir, txr); 321 start_dp[idx].len = (seg_num + 1) * sizeof(struct vring_desc); 322 start_dp[idx].flags = VRING_DESC_F_INDIRECT; 323 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr; 324 325 /* loop below will fill in rest of the indirect elements */ 326 start_dp = txr[idx].tx_indir; 327 idx = 1; 328 } else { 329 /* setup first tx ring slot to point to header 330 * stored in reserved region. 331 */ 332 start_dp[idx].addr = txvq->virtio_net_hdr_mem + 333 RTE_PTR_DIFF(&txr[idx].tx_hdr, txr); 334 start_dp[idx].len = vq->hw->vtnet_hdr_size; 335 start_dp[idx].flags = VRING_DESC_F_NEXT; 336 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr; 337 338 idx = start_dp[idx].next; 339 } 340 341 /* Checksum Offload / TSO */ 342 if (offload) { 343 if (cookie->ol_flags & PKT_TX_TCP_SEG) 344 cookie->ol_flags |= PKT_TX_TCP_CKSUM; 345 346 switch (cookie->ol_flags & PKT_TX_L4_MASK) { 347 case PKT_TX_UDP_CKSUM: 348 hdr->csum_start = cookie->l2_len + cookie->l3_len; 349 hdr->csum_offset = offsetof(struct udp_hdr, 350 dgram_cksum); 351 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 352 break; 353 354 case PKT_TX_TCP_CKSUM: 355 hdr->csum_start = cookie->l2_len + cookie->l3_len; 356 hdr->csum_offset = offsetof(struct tcp_hdr, cksum); 357 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 358 break; 359 360 default: 361 ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0); 362 ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0); 363 ASSIGN_UNLESS_EQUAL(hdr->flags, 0); 364 break; 365 } 366 367 /* TCP Segmentation Offload */ 368 if (cookie->ol_flags & PKT_TX_TCP_SEG) { 369 virtio_tso_fix_cksum(cookie); 370 hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ? 371 VIRTIO_NET_HDR_GSO_TCPV6 : 372 VIRTIO_NET_HDR_GSO_TCPV4; 373 hdr->gso_size = cookie->tso_segsz; 374 hdr->hdr_len = 375 cookie->l2_len + 376 cookie->l3_len + 377 cookie->l4_len; 378 } else { 379 ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0); 380 ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0); 381 ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0); 382 } 383 } 384 385 do { 386 start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq); 387 start_dp[idx].len = cookie->data_len; 388 start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0; 389 idx = start_dp[idx].next; 390 } while ((cookie = cookie->next) != NULL); 391 392 if (use_indirect) 393 idx = vq->vq_ring.desc[head_idx].next; 394 395 vq->vq_desc_head_idx = idx; 396 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) 397 vq->vq_desc_tail_idx = idx; 398 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed); 399 vq_update_avail_ring(vq, head_idx); 400 } 401 402 void 403 virtio_dev_cq_start(struct rte_eth_dev *dev) 404 { 405 struct virtio_hw *hw = dev->data->dev_private; 406 407 if (hw->cvq && hw->cvq->vq) { 408 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq); 409 } 410 } 411 412 int 413 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev, 414 uint16_t queue_idx, 415 uint16_t nb_desc, 416 unsigned int socket_id __rte_unused, 417 __rte_unused const struct rte_eth_rxconf *rx_conf, 418 struct rte_mempool *mp) 419 { 420 uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX; 421 struct virtio_hw *hw = dev->data->dev_private; 422 struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; 423 struct virtnet_rx *rxvq; 424 int error, nbufs; 425 struct rte_mbuf *m; 426 uint16_t desc_idx; 427 428 PMD_INIT_FUNC_TRACE(); 429 430 if (nb_desc == 0 || nb_desc > vq->vq_nentries) 431 nb_desc = vq->vq_nentries; 432 vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc); 433 434 rxvq = &vq->rxq; 435 rxvq->queue_id = queue_idx; 436 rxvq->mpool = mp; 437 if (rxvq->mpool == NULL) { 438 rte_exit(EXIT_FAILURE, 439 "Cannot allocate mbufs for rx virtqueue"); 440 } 441 dev->data->rx_queues[queue_idx] = rxvq; 442 443 444 /* Allocate blank mbufs for the each rx descriptor */ 445 nbufs = 0; 446 error = ENOSPC; 447 448 if (hw->use_simple_rxtx) { 449 for (desc_idx = 0; desc_idx < vq->vq_nentries; 450 desc_idx++) { 451 vq->vq_ring.avail->ring[desc_idx] = desc_idx; 452 vq->vq_ring.desc[desc_idx].flags = 453 VRING_DESC_F_WRITE; 454 } 455 } 456 457 memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf)); 458 for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST; 459 desc_idx++) { 460 vq->sw_ring[vq->vq_nentries + desc_idx] = 461 &rxvq->fake_mbuf; 462 } 463 464 while (!virtqueue_full(vq)) { 465 m = rte_mbuf_raw_alloc(rxvq->mpool); 466 if (m == NULL) 467 break; 468 469 /* Enqueue allocated buffers */ 470 if (hw->use_simple_rxtx) 471 error = virtqueue_enqueue_recv_refill_simple(vq, m); 472 else 473 error = virtqueue_enqueue_recv_refill(vq, m); 474 475 if (error) { 476 rte_pktmbuf_free(m); 477 break; 478 } 479 nbufs++; 480 } 481 482 vq_update_avail_idx(vq); 483 484 PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs); 485 486 virtio_rxq_vec_setup(rxvq); 487 488 VIRTQUEUE_DUMP(vq); 489 490 return 0; 491 } 492 493 static void 494 virtio_update_rxtx_handler(struct rte_eth_dev *dev, 495 const struct rte_eth_txconf *tx_conf) 496 { 497 uint8_t use_simple_rxtx = 0; 498 struct virtio_hw *hw = dev->data->dev_private; 499 500 #if defined RTE_ARCH_X86 501 if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE3)) 502 use_simple_rxtx = 1; 503 #elif defined RTE_ARCH_ARM64 || defined CONFIG_RTE_ARCH_ARM 504 if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON)) 505 use_simple_rxtx = 1; 506 #endif 507 /* Use simple rx/tx func if single segment and no offloads */ 508 if (use_simple_rxtx && 509 (tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS && 510 !vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) { 511 PMD_INIT_LOG(INFO, "Using simple rx/tx path"); 512 dev->tx_pkt_burst = virtio_xmit_pkts_simple; 513 dev->rx_pkt_burst = virtio_recv_pkts_vec; 514 hw->use_simple_rxtx = use_simple_rxtx; 515 } 516 } 517 518 /* 519 * struct rte_eth_dev *dev: Used to update dev 520 * uint16_t nb_desc: Defaults to values read from config space 521 * unsigned int socket_id: Used to allocate memzone 522 * const struct rte_eth_txconf *tx_conf: Used to setup tx engine 523 * uint16_t queue_idx: Just used as an index in dev txq list 524 */ 525 int 526 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, 527 uint16_t queue_idx, 528 uint16_t nb_desc, 529 unsigned int socket_id __rte_unused, 530 const struct rte_eth_txconf *tx_conf) 531 { 532 uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX; 533 struct virtio_hw *hw = dev->data->dev_private; 534 struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; 535 struct virtnet_tx *txvq; 536 uint16_t tx_free_thresh; 537 uint16_t desc_idx; 538 539 PMD_INIT_FUNC_TRACE(); 540 541 virtio_update_rxtx_handler(dev, tx_conf); 542 543 if (nb_desc == 0 || nb_desc > vq->vq_nentries) 544 nb_desc = vq->vq_nentries; 545 vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc); 546 547 txvq = &vq->txq; 548 txvq->queue_id = queue_idx; 549 550 tx_free_thresh = tx_conf->tx_free_thresh; 551 if (tx_free_thresh == 0) 552 tx_free_thresh = 553 RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH); 554 555 if (tx_free_thresh >= (vq->vq_nentries - 3)) { 556 RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the " 557 "number of TX entries minus 3 (%u)." 558 " (tx_free_thresh=%u port=%u queue=%u)\n", 559 vq->vq_nentries - 3, 560 tx_free_thresh, dev->data->port_id, queue_idx); 561 return -EINVAL; 562 } 563 564 vq->vq_free_thresh = tx_free_thresh; 565 566 if (hw->use_simple_rxtx) { 567 uint16_t mid_idx = vq->vq_nentries >> 1; 568 569 for (desc_idx = 0; desc_idx < mid_idx; desc_idx++) { 570 vq->vq_ring.avail->ring[desc_idx] = 571 desc_idx + mid_idx; 572 vq->vq_ring.desc[desc_idx + mid_idx].next = 573 desc_idx; 574 vq->vq_ring.desc[desc_idx + mid_idx].addr = 575 txvq->virtio_net_hdr_mem + 576 offsetof(struct virtio_tx_region, tx_hdr); 577 vq->vq_ring.desc[desc_idx + mid_idx].len = 578 vq->hw->vtnet_hdr_size; 579 vq->vq_ring.desc[desc_idx + mid_idx].flags = 580 VRING_DESC_F_NEXT; 581 vq->vq_ring.desc[desc_idx].flags = 0; 582 } 583 for (desc_idx = mid_idx; desc_idx < vq->vq_nentries; 584 desc_idx++) 585 vq->vq_ring.avail->ring[desc_idx] = desc_idx; 586 } 587 588 VIRTQUEUE_DUMP(vq); 589 590 dev->data->tx_queues[queue_idx] = txvq; 591 return 0; 592 } 593 594 static void 595 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m) 596 { 597 int error; 598 /* 599 * Requeue the discarded mbuf. This should always be 600 * successful since it was just dequeued. 601 */ 602 error = virtqueue_enqueue_recv_refill(vq, m); 603 if (unlikely(error)) { 604 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf"); 605 rte_pktmbuf_free(m); 606 } 607 } 608 609 static void 610 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf) 611 { 612 uint32_t s = mbuf->pkt_len; 613 struct ether_addr *ea; 614 615 if (s == 64) { 616 stats->size_bins[1]++; 617 } else if (s > 64 && s < 1024) { 618 uint32_t bin; 619 620 /* count zeros, and offset into correct bin */ 621 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5; 622 stats->size_bins[bin]++; 623 } else { 624 if (s < 64) 625 stats->size_bins[0]++; 626 else if (s < 1519) 627 stats->size_bins[6]++; 628 else if (s >= 1519) 629 stats->size_bins[7]++; 630 } 631 632 ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *); 633 if (is_multicast_ether_addr(ea)) { 634 if (is_broadcast_ether_addr(ea)) 635 stats->broadcast++; 636 else 637 stats->multicast++; 638 } 639 } 640 641 /* Optionally fill offload information in structure */ 642 static int 643 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr) 644 { 645 struct rte_net_hdr_lens hdr_lens; 646 uint32_t hdrlen, ptype; 647 int l4_supported = 0; 648 649 /* nothing to do */ 650 if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE) 651 return 0; 652 653 m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN; 654 655 ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK); 656 m->packet_type = ptype; 657 if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP || 658 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP || 659 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP) 660 l4_supported = 1; 661 662 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 663 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len; 664 if (hdr->csum_start <= hdrlen && l4_supported) { 665 m->ol_flags |= PKT_RX_L4_CKSUM_NONE; 666 } else { 667 /* Unknown proto or tunnel, do sw cksum. We can assume 668 * the cksum field is in the first segment since the 669 * buffers we provided to the host are large enough. 670 * In case of SCTP, this will be wrong since it's a CRC 671 * but there's nothing we can do. 672 */ 673 uint16_t csum, off; 674 675 rte_raw_cksum_mbuf(m, hdr->csum_start, 676 rte_pktmbuf_pkt_len(m) - hdr->csum_start, 677 &csum); 678 if (likely(csum != 0xffff)) 679 csum = ~csum; 680 off = hdr->csum_offset + hdr->csum_start; 681 if (rte_pktmbuf_data_len(m) >= off + 1) 682 *rte_pktmbuf_mtod_offset(m, uint16_t *, 683 off) = csum; 684 } 685 } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) { 686 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD; 687 } 688 689 /* GSO request, save required information in mbuf */ 690 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 691 /* Check unsupported modes */ 692 if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) || 693 (hdr->gso_size == 0)) { 694 return -EINVAL; 695 } 696 697 /* Update mss lengthes in mbuf */ 698 m->tso_segsz = hdr->gso_size; 699 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 700 case VIRTIO_NET_HDR_GSO_TCPV4: 701 case VIRTIO_NET_HDR_GSO_TCPV6: 702 m->ol_flags |= PKT_RX_LRO | \ 703 PKT_RX_L4_CKSUM_NONE; 704 break; 705 default: 706 return -EINVAL; 707 } 708 } 709 710 return 0; 711 } 712 713 static inline int 714 rx_offload_enabled(struct virtio_hw *hw) 715 { 716 return vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) || 717 vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) || 718 vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6); 719 } 720 721 #define VIRTIO_MBUF_BURST_SZ 64 722 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc)) 723 uint16_t 724 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 725 { 726 struct virtnet_rx *rxvq = rx_queue; 727 struct virtqueue *vq = rxvq->vq; 728 struct virtio_hw *hw; 729 struct rte_mbuf *rxm, *new_mbuf; 730 uint16_t nb_used, num, nb_rx; 731 uint32_t len[VIRTIO_MBUF_BURST_SZ]; 732 struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ]; 733 int error; 734 uint32_t i, nb_enqueued; 735 uint32_t hdr_size; 736 int offload; 737 struct virtio_net_hdr *hdr; 738 739 nb_used = VIRTQUEUE_NUSED(vq); 740 741 virtio_rmb(); 742 743 num = (uint16_t)(likely(nb_used <= nb_pkts) ? nb_used : nb_pkts); 744 num = (uint16_t)(likely(num <= VIRTIO_MBUF_BURST_SZ) ? num : VIRTIO_MBUF_BURST_SZ); 745 if (likely(num > DESC_PER_CACHELINE)) 746 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE); 747 748 num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num); 749 PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num); 750 751 hw = vq->hw; 752 nb_rx = 0; 753 nb_enqueued = 0; 754 hdr_size = hw->vtnet_hdr_size; 755 offload = rx_offload_enabled(hw); 756 757 for (i = 0; i < num ; i++) { 758 rxm = rcv_pkts[i]; 759 760 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]); 761 762 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) { 763 PMD_RX_LOG(ERR, "Packet drop"); 764 nb_enqueued++; 765 virtio_discard_rxbuf(vq, rxm); 766 rxvq->stats.errors++; 767 continue; 768 } 769 770 rxm->port = rxvq->port_id; 771 rxm->data_off = RTE_PKTMBUF_HEADROOM; 772 rxm->ol_flags = 0; 773 rxm->vlan_tci = 0; 774 775 rxm->nb_segs = 1; 776 rxm->next = NULL; 777 rxm->pkt_len = (uint32_t)(len[i] - hdr_size); 778 rxm->data_len = (uint16_t)(len[i] - hdr_size); 779 780 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr + 781 RTE_PKTMBUF_HEADROOM - hdr_size); 782 783 if (hw->vlan_strip) 784 rte_vlan_strip(rxm); 785 786 if (offload && virtio_rx_offload(rxm, hdr) < 0) { 787 virtio_discard_rxbuf(vq, rxm); 788 rxvq->stats.errors++; 789 continue; 790 } 791 792 VIRTIO_DUMP_PACKET(rxm, rxm->data_len); 793 794 rx_pkts[nb_rx++] = rxm; 795 796 rxvq->stats.bytes += rxm->pkt_len; 797 virtio_update_packet_stats(&rxvq->stats, rxm); 798 } 799 800 rxvq->stats.packets += nb_rx; 801 802 /* Allocate new mbuf for the used descriptor */ 803 error = ENOSPC; 804 while (likely(!virtqueue_full(vq))) { 805 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool); 806 if (unlikely(new_mbuf == NULL)) { 807 struct rte_eth_dev *dev 808 = &rte_eth_devices[rxvq->port_id]; 809 dev->data->rx_mbuf_alloc_failed++; 810 break; 811 } 812 error = virtqueue_enqueue_recv_refill(vq, new_mbuf); 813 if (unlikely(error)) { 814 rte_pktmbuf_free(new_mbuf); 815 break; 816 } 817 nb_enqueued++; 818 } 819 820 if (likely(nb_enqueued)) { 821 vq_update_avail_idx(vq); 822 823 if (unlikely(virtqueue_kick_prepare(vq))) { 824 virtqueue_notify(vq); 825 PMD_RX_LOG(DEBUG, "Notified"); 826 } 827 } 828 829 return nb_rx; 830 } 831 832 uint16_t 833 virtio_recv_mergeable_pkts(void *rx_queue, 834 struct rte_mbuf **rx_pkts, 835 uint16_t nb_pkts) 836 { 837 struct virtnet_rx *rxvq = rx_queue; 838 struct virtqueue *vq = rxvq->vq; 839 struct virtio_hw *hw; 840 struct rte_mbuf *rxm, *new_mbuf; 841 uint16_t nb_used, num, nb_rx; 842 uint32_t len[VIRTIO_MBUF_BURST_SZ]; 843 struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ]; 844 struct rte_mbuf *prev; 845 int error; 846 uint32_t i, nb_enqueued; 847 uint32_t seg_num; 848 uint16_t extra_idx; 849 uint32_t seg_res; 850 uint32_t hdr_size; 851 int offload; 852 853 nb_used = VIRTQUEUE_NUSED(vq); 854 855 virtio_rmb(); 856 857 PMD_RX_LOG(DEBUG, "used:%d", nb_used); 858 859 hw = vq->hw; 860 nb_rx = 0; 861 i = 0; 862 nb_enqueued = 0; 863 seg_num = 0; 864 extra_idx = 0; 865 seg_res = 0; 866 hdr_size = hw->vtnet_hdr_size; 867 offload = rx_offload_enabled(hw); 868 869 while (i < nb_used) { 870 struct virtio_net_hdr_mrg_rxbuf *header; 871 872 if (nb_rx == nb_pkts) 873 break; 874 875 num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, 1); 876 if (num != 1) 877 continue; 878 879 i++; 880 881 PMD_RX_LOG(DEBUG, "dequeue:%d", num); 882 PMD_RX_LOG(DEBUG, "packet len:%d", len[0]); 883 884 rxm = rcv_pkts[0]; 885 886 if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) { 887 PMD_RX_LOG(ERR, "Packet drop"); 888 nb_enqueued++; 889 virtio_discard_rxbuf(vq, rxm); 890 rxvq->stats.errors++; 891 continue; 892 } 893 894 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm->buf_addr + 895 RTE_PKTMBUF_HEADROOM - hdr_size); 896 seg_num = header->num_buffers; 897 898 if (seg_num == 0) 899 seg_num = 1; 900 901 rxm->data_off = RTE_PKTMBUF_HEADROOM; 902 rxm->nb_segs = seg_num; 903 rxm->next = NULL; 904 rxm->ol_flags = 0; 905 rxm->vlan_tci = 0; 906 rxm->pkt_len = (uint32_t)(len[0] - hdr_size); 907 rxm->data_len = (uint16_t)(len[0] - hdr_size); 908 909 rxm->port = rxvq->port_id; 910 rx_pkts[nb_rx] = rxm; 911 prev = rxm; 912 913 if (offload && virtio_rx_offload(rxm, &header->hdr) < 0) { 914 virtio_discard_rxbuf(vq, rxm); 915 rxvq->stats.errors++; 916 continue; 917 } 918 919 seg_res = seg_num - 1; 920 921 while (seg_res != 0) { 922 /* 923 * Get extra segments for current uncompleted packet. 924 */ 925 uint16_t rcv_cnt = 926 RTE_MIN(seg_res, RTE_DIM(rcv_pkts)); 927 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) { 928 uint32_t rx_num = 929 virtqueue_dequeue_burst_rx(vq, 930 rcv_pkts, len, rcv_cnt); 931 i += rx_num; 932 rcv_cnt = rx_num; 933 } else { 934 PMD_RX_LOG(ERR, 935 "No enough segments for packet."); 936 nb_enqueued++; 937 virtio_discard_rxbuf(vq, rxm); 938 rxvq->stats.errors++; 939 break; 940 } 941 942 extra_idx = 0; 943 944 while (extra_idx < rcv_cnt) { 945 rxm = rcv_pkts[extra_idx]; 946 947 rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size; 948 rxm->next = NULL; 949 rxm->pkt_len = (uint32_t)(len[extra_idx]); 950 rxm->data_len = (uint16_t)(len[extra_idx]); 951 952 if (prev) 953 prev->next = rxm; 954 955 prev = rxm; 956 rx_pkts[nb_rx]->pkt_len += rxm->pkt_len; 957 extra_idx++; 958 }; 959 seg_res -= rcv_cnt; 960 } 961 962 if (hw->vlan_strip) 963 rte_vlan_strip(rx_pkts[nb_rx]); 964 965 VIRTIO_DUMP_PACKET(rx_pkts[nb_rx], 966 rx_pkts[nb_rx]->data_len); 967 968 rxvq->stats.bytes += rx_pkts[nb_rx]->pkt_len; 969 virtio_update_packet_stats(&rxvq->stats, rx_pkts[nb_rx]); 970 nb_rx++; 971 } 972 973 rxvq->stats.packets += nb_rx; 974 975 /* Allocate new mbuf for the used descriptor */ 976 error = ENOSPC; 977 while (likely(!virtqueue_full(vq))) { 978 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool); 979 if (unlikely(new_mbuf == NULL)) { 980 struct rte_eth_dev *dev 981 = &rte_eth_devices[rxvq->port_id]; 982 dev->data->rx_mbuf_alloc_failed++; 983 break; 984 } 985 error = virtqueue_enqueue_recv_refill(vq, new_mbuf); 986 if (unlikely(error)) { 987 rte_pktmbuf_free(new_mbuf); 988 break; 989 } 990 nb_enqueued++; 991 } 992 993 if (likely(nb_enqueued)) { 994 vq_update_avail_idx(vq); 995 996 if (unlikely(virtqueue_kick_prepare(vq))) { 997 virtqueue_notify(vq); 998 PMD_RX_LOG(DEBUG, "Notified"); 999 } 1000 } 1001 1002 return nb_rx; 1003 } 1004 1005 uint16_t 1006 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) 1007 { 1008 struct virtnet_tx *txvq = tx_queue; 1009 struct virtqueue *vq = txvq->vq; 1010 struct virtio_hw *hw = vq->hw; 1011 uint16_t hdr_size = hw->vtnet_hdr_size; 1012 uint16_t nb_used, nb_tx; 1013 int error; 1014 1015 if (unlikely(nb_pkts < 1)) 1016 return nb_pkts; 1017 1018 PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts); 1019 nb_used = VIRTQUEUE_NUSED(vq); 1020 1021 virtio_rmb(); 1022 if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh)) 1023 virtio_xmit_cleanup(vq, nb_used); 1024 1025 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { 1026 struct rte_mbuf *txm = tx_pkts[nb_tx]; 1027 int can_push = 0, use_indirect = 0, slots, need; 1028 1029 /* Do VLAN tag insertion */ 1030 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) { 1031 error = rte_vlan_insert(&txm); 1032 if (unlikely(error)) { 1033 rte_pktmbuf_free(txm); 1034 continue; 1035 } 1036 } 1037 1038 /* optimize ring usage */ 1039 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) || 1040 vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) && 1041 rte_mbuf_refcnt_read(txm) == 1 && 1042 RTE_MBUF_DIRECT(txm) && 1043 txm->nb_segs == 1 && 1044 rte_pktmbuf_headroom(txm) >= hdr_size && 1045 rte_is_aligned(rte_pktmbuf_mtod(txm, char *), 1046 __alignof__(struct virtio_net_hdr_mrg_rxbuf))) 1047 can_push = 1; 1048 else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) && 1049 txm->nb_segs < VIRTIO_MAX_TX_INDIRECT) 1050 use_indirect = 1; 1051 1052 /* How many main ring entries are needed to this Tx? 1053 * any_layout => number of segments 1054 * indirect => 1 1055 * default => number of segments + 1 1056 */ 1057 slots = use_indirect ? 1 : (txm->nb_segs + !can_push); 1058 need = slots - vq->vq_free_cnt; 1059 1060 /* Positive value indicates it need free vring descriptors */ 1061 if (unlikely(need > 0)) { 1062 nb_used = VIRTQUEUE_NUSED(vq); 1063 virtio_rmb(); 1064 need = RTE_MIN(need, (int)nb_used); 1065 1066 virtio_xmit_cleanup(vq, need); 1067 need = slots - vq->vq_free_cnt; 1068 if (unlikely(need > 0)) { 1069 PMD_TX_LOG(ERR, 1070 "No free tx descriptors to transmit"); 1071 break; 1072 } 1073 } 1074 1075 /* Enqueue Packet buffers */ 1076 virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect, can_push); 1077 1078 txvq->stats.bytes += txm->pkt_len; 1079 virtio_update_packet_stats(&txvq->stats, txm); 1080 } 1081 1082 txvq->stats.packets += nb_tx; 1083 1084 if (likely(nb_tx)) { 1085 vq_update_avail_idx(vq); 1086 1087 if (unlikely(virtqueue_kick_prepare(vq))) { 1088 virtqueue_notify(vq); 1089 PMD_TX_LOG(DEBUG, "Notified backend after xmit"); 1090 } 1091 } 1092 1093 return nb_tx; 1094 } 1095