1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #include <stdint.h> 6 #include <stdio.h> 7 #include <stdlib.h> 8 #include <string.h> 9 #include <errno.h> 10 11 #include <rte_cycles.h> 12 #include <rte_memory.h> 13 #include <rte_branch_prediction.h> 14 #include <rte_mempool.h> 15 #include <rte_malloc.h> 16 #include <rte_mbuf.h> 17 #include <rte_ether.h> 18 #include <rte_ethdev_driver.h> 19 #include <rte_prefetch.h> 20 #include <rte_string_fns.h> 21 #include <rte_errno.h> 22 #include <rte_byteorder.h> 23 #include <rte_net.h> 24 #include <rte_ip.h> 25 #include <rte_udp.h> 26 #include <rte_tcp.h> 27 28 #include "virtio_logs.h" 29 #include "virtio_ethdev.h" 30 #include "virtio_pci.h" 31 #include "virtqueue.h" 32 #include "virtio_rxtx.h" 33 #include "virtio_rxtx_simple.h" 34 35 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP 36 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len) 37 #else 38 #define VIRTIO_DUMP_PACKET(m, len) do { } while (0) 39 #endif 40 41 int 42 virtio_dev_rx_queue_done(void *rxq, uint16_t offset) 43 { 44 struct virtnet_rx *rxvq = rxq; 45 struct virtqueue *vq = rxvq->vq; 46 47 return VIRTQUEUE_NUSED(vq) >= offset; 48 } 49 50 void 51 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx) 52 { 53 struct vring_desc *dp, *dp_tail; 54 struct vq_desc_extra *dxp; 55 uint16_t desc_idx_last = desc_idx; 56 57 dp = &vq->vq_ring.desc[desc_idx]; 58 dxp = &vq->vq_descx[desc_idx]; 59 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs); 60 if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) { 61 while (dp->flags & VRING_DESC_F_NEXT) { 62 desc_idx_last = dp->next; 63 dp = &vq->vq_ring.desc[dp->next]; 64 } 65 } 66 dxp->ndescs = 0; 67 68 /* 69 * We must append the existing free chain, if any, to the end of 70 * newly freed chain. If the virtqueue was completely used, then 71 * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above). 72 */ 73 if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) { 74 vq->vq_desc_head_idx = desc_idx; 75 } else { 76 dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx]; 77 dp_tail->next = desc_idx; 78 } 79 80 vq->vq_desc_tail_idx = desc_idx_last; 81 dp->next = VQ_RING_DESC_CHAIN_END; 82 } 83 84 static uint16_t 85 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts, 86 uint32_t *len, uint16_t num) 87 { 88 struct vring_used_elem *uep; 89 struct rte_mbuf *cookie; 90 uint16_t used_idx, desc_idx; 91 uint16_t i; 92 93 /* Caller does the check */ 94 for (i = 0; i < num ; i++) { 95 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); 96 uep = &vq->vq_ring.used->ring[used_idx]; 97 desc_idx = (uint16_t) uep->id; 98 len[i] = uep->len; 99 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie; 100 101 if (unlikely(cookie == NULL)) { 102 PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u", 103 vq->vq_used_cons_idx); 104 break; 105 } 106 107 rte_prefetch0(cookie); 108 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *)); 109 rx_pkts[i] = cookie; 110 vq->vq_used_cons_idx++; 111 vq_ring_free_chain(vq, desc_idx); 112 vq->vq_descx[desc_idx].cookie = NULL; 113 } 114 115 return i; 116 } 117 118 #ifndef DEFAULT_TX_FREE_THRESH 119 #define DEFAULT_TX_FREE_THRESH 32 120 #endif 121 122 /* Cleanup from completed transmits. */ 123 static void 124 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num) 125 { 126 uint16_t i, used_idx, desc_idx; 127 for (i = 0; i < num; i++) { 128 struct vring_used_elem *uep; 129 struct vq_desc_extra *dxp; 130 131 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); 132 uep = &vq->vq_ring.used->ring[used_idx]; 133 134 desc_idx = (uint16_t) uep->id; 135 dxp = &vq->vq_descx[desc_idx]; 136 vq->vq_used_cons_idx++; 137 vq_ring_free_chain(vq, desc_idx); 138 139 if (dxp->cookie != NULL) { 140 rte_pktmbuf_free(dxp->cookie); 141 dxp->cookie = NULL; 142 } 143 } 144 } 145 146 147 static inline int 148 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie) 149 { 150 struct vq_desc_extra *dxp; 151 struct virtio_hw *hw = vq->hw; 152 struct vring_desc *start_dp; 153 uint16_t needed = 1; 154 uint16_t head_idx, idx; 155 156 if (unlikely(vq->vq_free_cnt == 0)) 157 return -ENOSPC; 158 if (unlikely(vq->vq_free_cnt < needed)) 159 return -EMSGSIZE; 160 161 head_idx = vq->vq_desc_head_idx; 162 if (unlikely(head_idx >= vq->vq_nentries)) 163 return -EFAULT; 164 165 idx = head_idx; 166 dxp = &vq->vq_descx[idx]; 167 dxp->cookie = (void *)cookie; 168 dxp->ndescs = needed; 169 170 start_dp = vq->vq_ring.desc; 171 start_dp[idx].addr = 172 VIRTIO_MBUF_ADDR(cookie, vq) + 173 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size; 174 start_dp[idx].len = 175 cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size; 176 start_dp[idx].flags = VRING_DESC_F_WRITE; 177 idx = start_dp[idx].next; 178 vq->vq_desc_head_idx = idx; 179 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) 180 vq->vq_desc_tail_idx = idx; 181 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed); 182 vq_update_avail_ring(vq, head_idx); 183 184 return 0; 185 } 186 187 /* When doing TSO, the IP length is not included in the pseudo header 188 * checksum of the packet given to the PMD, but for virtio it is 189 * expected. 190 */ 191 static void 192 virtio_tso_fix_cksum(struct rte_mbuf *m) 193 { 194 /* common case: header is not fragmented */ 195 if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len + 196 m->l4_len)) { 197 struct ipv4_hdr *iph; 198 struct ipv6_hdr *ip6h; 199 struct tcp_hdr *th; 200 uint16_t prev_cksum, new_cksum, ip_len, ip_paylen; 201 uint32_t tmp; 202 203 iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len); 204 th = RTE_PTR_ADD(iph, m->l3_len); 205 if ((iph->version_ihl >> 4) == 4) { 206 iph->hdr_checksum = 0; 207 iph->hdr_checksum = rte_ipv4_cksum(iph); 208 ip_len = iph->total_length; 209 ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) - 210 m->l3_len); 211 } else { 212 ip6h = (struct ipv6_hdr *)iph; 213 ip_paylen = ip6h->payload_len; 214 } 215 216 /* calculate the new phdr checksum not including ip_paylen */ 217 prev_cksum = th->cksum; 218 tmp = prev_cksum; 219 tmp += ip_paylen; 220 tmp = (tmp & 0xffff) + (tmp >> 16); 221 new_cksum = tmp; 222 223 /* replace it in the packet */ 224 th->cksum = new_cksum; 225 } 226 } 227 228 static inline int 229 tx_offload_enabled(struct virtio_hw *hw) 230 { 231 return vtpci_with_feature(hw, VIRTIO_NET_F_CSUM) || 232 vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) || 233 vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO6); 234 } 235 236 /* avoid write operation when necessary, to lessen cache issues */ 237 #define ASSIGN_UNLESS_EQUAL(var, val) do { \ 238 if ((var) != (val)) \ 239 (var) = (val); \ 240 } while (0) 241 242 static inline void 243 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie, 244 uint16_t needed, int use_indirect, int can_push) 245 { 246 struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr; 247 struct vq_desc_extra *dxp; 248 struct virtqueue *vq = txvq->vq; 249 struct vring_desc *start_dp; 250 uint16_t seg_num = cookie->nb_segs; 251 uint16_t head_idx, idx; 252 uint16_t head_size = vq->hw->vtnet_hdr_size; 253 struct virtio_net_hdr *hdr; 254 int offload; 255 256 offload = tx_offload_enabled(vq->hw); 257 head_idx = vq->vq_desc_head_idx; 258 idx = head_idx; 259 dxp = &vq->vq_descx[idx]; 260 dxp->cookie = (void *)cookie; 261 dxp->ndescs = needed; 262 263 start_dp = vq->vq_ring.desc; 264 265 if (can_push) { 266 /* prepend cannot fail, checked by caller */ 267 hdr = (struct virtio_net_hdr *) 268 rte_pktmbuf_prepend(cookie, head_size); 269 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length, 270 * which is wrong. Below subtract restores correct pkt size. 271 */ 272 cookie->pkt_len -= head_size; 273 /* if offload disabled, it is not zeroed below, do it now */ 274 if (offload == 0) { 275 ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0); 276 ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0); 277 ASSIGN_UNLESS_EQUAL(hdr->flags, 0); 278 ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0); 279 ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0); 280 ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0); 281 } 282 } else if (use_indirect) { 283 /* setup tx ring slot to point to indirect 284 * descriptor list stored in reserved region. 285 * 286 * the first slot in indirect ring is already preset 287 * to point to the header in reserved region 288 */ 289 start_dp[idx].addr = txvq->virtio_net_hdr_mem + 290 RTE_PTR_DIFF(&txr[idx].tx_indir, txr); 291 start_dp[idx].len = (seg_num + 1) * sizeof(struct vring_desc); 292 start_dp[idx].flags = VRING_DESC_F_INDIRECT; 293 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr; 294 295 /* loop below will fill in rest of the indirect elements */ 296 start_dp = txr[idx].tx_indir; 297 idx = 1; 298 } else { 299 /* setup first tx ring slot to point to header 300 * stored in reserved region. 301 */ 302 start_dp[idx].addr = txvq->virtio_net_hdr_mem + 303 RTE_PTR_DIFF(&txr[idx].tx_hdr, txr); 304 start_dp[idx].len = vq->hw->vtnet_hdr_size; 305 start_dp[idx].flags = VRING_DESC_F_NEXT; 306 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr; 307 308 idx = start_dp[idx].next; 309 } 310 311 /* Checksum Offload / TSO */ 312 if (offload) { 313 if (cookie->ol_flags & PKT_TX_TCP_SEG) 314 cookie->ol_flags |= PKT_TX_TCP_CKSUM; 315 316 switch (cookie->ol_flags & PKT_TX_L4_MASK) { 317 case PKT_TX_UDP_CKSUM: 318 hdr->csum_start = cookie->l2_len + cookie->l3_len; 319 hdr->csum_offset = offsetof(struct udp_hdr, 320 dgram_cksum); 321 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 322 break; 323 324 case PKT_TX_TCP_CKSUM: 325 hdr->csum_start = cookie->l2_len + cookie->l3_len; 326 hdr->csum_offset = offsetof(struct tcp_hdr, cksum); 327 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 328 break; 329 330 default: 331 ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0); 332 ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0); 333 ASSIGN_UNLESS_EQUAL(hdr->flags, 0); 334 break; 335 } 336 337 /* TCP Segmentation Offload */ 338 if (cookie->ol_flags & PKT_TX_TCP_SEG) { 339 virtio_tso_fix_cksum(cookie); 340 hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ? 341 VIRTIO_NET_HDR_GSO_TCPV6 : 342 VIRTIO_NET_HDR_GSO_TCPV4; 343 hdr->gso_size = cookie->tso_segsz; 344 hdr->hdr_len = 345 cookie->l2_len + 346 cookie->l3_len + 347 cookie->l4_len; 348 } else { 349 ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0); 350 ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0); 351 ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0); 352 } 353 } 354 355 do { 356 start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq); 357 start_dp[idx].len = cookie->data_len; 358 start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0; 359 idx = start_dp[idx].next; 360 } while ((cookie = cookie->next) != NULL); 361 362 if (use_indirect) 363 idx = vq->vq_ring.desc[head_idx].next; 364 365 vq->vq_desc_head_idx = idx; 366 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) 367 vq->vq_desc_tail_idx = idx; 368 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed); 369 vq_update_avail_ring(vq, head_idx); 370 } 371 372 void 373 virtio_dev_cq_start(struct rte_eth_dev *dev) 374 { 375 struct virtio_hw *hw = dev->data->dev_private; 376 377 if (hw->cvq && hw->cvq->vq) { 378 rte_spinlock_init(&hw->cvq->lock); 379 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq); 380 } 381 } 382 383 int 384 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev, 385 uint16_t queue_idx, 386 uint16_t nb_desc, 387 unsigned int socket_id __rte_unused, 388 const struct rte_eth_rxconf *rx_conf __rte_unused, 389 struct rte_mempool *mp) 390 { 391 uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX; 392 struct virtio_hw *hw = dev->data->dev_private; 393 struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; 394 struct virtnet_rx *rxvq; 395 396 PMD_INIT_FUNC_TRACE(); 397 398 if (nb_desc == 0 || nb_desc > vq->vq_nentries) 399 nb_desc = vq->vq_nentries; 400 vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc); 401 402 rxvq = &vq->rxq; 403 rxvq->queue_id = queue_idx; 404 rxvq->mpool = mp; 405 if (rxvq->mpool == NULL) { 406 rte_exit(EXIT_FAILURE, 407 "Cannot allocate mbufs for rx virtqueue"); 408 } 409 410 dev->data->rx_queues[queue_idx] = rxvq; 411 412 return 0; 413 } 414 415 int 416 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx) 417 { 418 uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX; 419 struct virtio_hw *hw = dev->data->dev_private; 420 struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; 421 struct virtnet_rx *rxvq = &vq->rxq; 422 struct rte_mbuf *m; 423 uint16_t desc_idx; 424 int error, nbufs; 425 426 PMD_INIT_FUNC_TRACE(); 427 428 /* Allocate blank mbufs for the each rx descriptor */ 429 nbufs = 0; 430 431 if (hw->use_simple_rx) { 432 for (desc_idx = 0; desc_idx < vq->vq_nentries; 433 desc_idx++) { 434 vq->vq_ring.avail->ring[desc_idx] = desc_idx; 435 vq->vq_ring.desc[desc_idx].flags = 436 VRING_DESC_F_WRITE; 437 } 438 439 virtio_rxq_vec_setup(rxvq); 440 } 441 442 memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf)); 443 for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST; 444 desc_idx++) { 445 vq->sw_ring[vq->vq_nentries + desc_idx] = 446 &rxvq->fake_mbuf; 447 } 448 449 if (hw->use_simple_rx) { 450 while (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) { 451 virtio_rxq_rearm_vec(rxvq); 452 nbufs += RTE_VIRTIO_VPMD_RX_REARM_THRESH; 453 } 454 } else { 455 while (!virtqueue_full(vq)) { 456 m = rte_mbuf_raw_alloc(rxvq->mpool); 457 if (m == NULL) 458 break; 459 460 /* Enqueue allocated buffers */ 461 error = virtqueue_enqueue_recv_refill(vq, m); 462 if (error) { 463 rte_pktmbuf_free(m); 464 break; 465 } 466 nbufs++; 467 } 468 469 vq_update_avail_idx(vq); 470 } 471 472 PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs); 473 474 VIRTQUEUE_DUMP(vq); 475 476 return 0; 477 } 478 479 /* 480 * struct rte_eth_dev *dev: Used to update dev 481 * uint16_t nb_desc: Defaults to values read from config space 482 * unsigned int socket_id: Used to allocate memzone 483 * const struct rte_eth_txconf *tx_conf: Used to setup tx engine 484 * uint16_t queue_idx: Just used as an index in dev txq list 485 */ 486 int 487 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, 488 uint16_t queue_idx, 489 uint16_t nb_desc, 490 unsigned int socket_id __rte_unused, 491 const struct rte_eth_txconf *tx_conf) 492 { 493 uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX; 494 struct virtio_hw *hw = dev->data->dev_private; 495 struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; 496 struct virtnet_tx *txvq; 497 uint16_t tx_free_thresh; 498 499 PMD_INIT_FUNC_TRACE(); 500 501 /* cannot use simple rxtx funcs with multisegs or offloads */ 502 if (dev->data->dev_conf.txmode.offloads) 503 hw->use_simple_tx = 0; 504 505 if (nb_desc == 0 || nb_desc > vq->vq_nentries) 506 nb_desc = vq->vq_nentries; 507 vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc); 508 509 txvq = &vq->txq; 510 txvq->queue_id = queue_idx; 511 512 tx_free_thresh = tx_conf->tx_free_thresh; 513 if (tx_free_thresh == 0) 514 tx_free_thresh = 515 RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH); 516 517 if (tx_free_thresh >= (vq->vq_nentries - 3)) { 518 RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the " 519 "number of TX entries minus 3 (%u)." 520 " (tx_free_thresh=%u port=%u queue=%u)\n", 521 vq->vq_nentries - 3, 522 tx_free_thresh, dev->data->port_id, queue_idx); 523 return -EINVAL; 524 } 525 526 vq->vq_free_thresh = tx_free_thresh; 527 528 dev->data->tx_queues[queue_idx] = txvq; 529 return 0; 530 } 531 532 int 533 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev, 534 uint16_t queue_idx) 535 { 536 uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX; 537 struct virtio_hw *hw = dev->data->dev_private; 538 struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; 539 uint16_t mid_idx = vq->vq_nentries >> 1; 540 struct virtnet_tx *txvq = &vq->txq; 541 uint16_t desc_idx; 542 543 PMD_INIT_FUNC_TRACE(); 544 545 if (hw->use_simple_tx) { 546 for (desc_idx = 0; desc_idx < mid_idx; desc_idx++) { 547 vq->vq_ring.avail->ring[desc_idx] = 548 desc_idx + mid_idx; 549 vq->vq_ring.desc[desc_idx + mid_idx].next = 550 desc_idx; 551 vq->vq_ring.desc[desc_idx + mid_idx].addr = 552 txvq->virtio_net_hdr_mem + 553 offsetof(struct virtio_tx_region, tx_hdr); 554 vq->vq_ring.desc[desc_idx + mid_idx].len = 555 vq->hw->vtnet_hdr_size; 556 vq->vq_ring.desc[desc_idx + mid_idx].flags = 557 VRING_DESC_F_NEXT; 558 vq->vq_ring.desc[desc_idx].flags = 0; 559 } 560 for (desc_idx = mid_idx; desc_idx < vq->vq_nentries; 561 desc_idx++) 562 vq->vq_ring.avail->ring[desc_idx] = desc_idx; 563 } 564 565 VIRTQUEUE_DUMP(vq); 566 567 return 0; 568 } 569 570 static void 571 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m) 572 { 573 int error; 574 /* 575 * Requeue the discarded mbuf. This should always be 576 * successful since it was just dequeued. 577 */ 578 error = virtqueue_enqueue_recv_refill(vq, m); 579 if (unlikely(error)) { 580 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf"); 581 rte_pktmbuf_free(m); 582 } 583 } 584 585 static void 586 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf) 587 { 588 uint32_t s = mbuf->pkt_len; 589 struct ether_addr *ea; 590 591 if (s == 64) { 592 stats->size_bins[1]++; 593 } else if (s > 64 && s < 1024) { 594 uint32_t bin; 595 596 /* count zeros, and offset into correct bin */ 597 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5; 598 stats->size_bins[bin]++; 599 } else { 600 if (s < 64) 601 stats->size_bins[0]++; 602 else if (s < 1519) 603 stats->size_bins[6]++; 604 else if (s >= 1519) 605 stats->size_bins[7]++; 606 } 607 608 ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *); 609 if (is_multicast_ether_addr(ea)) { 610 if (is_broadcast_ether_addr(ea)) 611 stats->broadcast++; 612 else 613 stats->multicast++; 614 } 615 } 616 617 /* Optionally fill offload information in structure */ 618 static int 619 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr) 620 { 621 struct rte_net_hdr_lens hdr_lens; 622 uint32_t hdrlen, ptype; 623 int l4_supported = 0; 624 625 /* nothing to do */ 626 if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE) 627 return 0; 628 629 m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN; 630 631 ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK); 632 m->packet_type = ptype; 633 if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP || 634 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP || 635 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP) 636 l4_supported = 1; 637 638 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 639 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len; 640 if (hdr->csum_start <= hdrlen && l4_supported) { 641 m->ol_flags |= PKT_RX_L4_CKSUM_NONE; 642 } else { 643 /* Unknown proto or tunnel, do sw cksum. We can assume 644 * the cksum field is in the first segment since the 645 * buffers we provided to the host are large enough. 646 * In case of SCTP, this will be wrong since it's a CRC 647 * but there's nothing we can do. 648 */ 649 uint16_t csum = 0, off; 650 651 rte_raw_cksum_mbuf(m, hdr->csum_start, 652 rte_pktmbuf_pkt_len(m) - hdr->csum_start, 653 &csum); 654 if (likely(csum != 0xffff)) 655 csum = ~csum; 656 off = hdr->csum_offset + hdr->csum_start; 657 if (rte_pktmbuf_data_len(m) >= off + 1) 658 *rte_pktmbuf_mtod_offset(m, uint16_t *, 659 off) = csum; 660 } 661 } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) { 662 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD; 663 } 664 665 /* GSO request, save required information in mbuf */ 666 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 667 /* Check unsupported modes */ 668 if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) || 669 (hdr->gso_size == 0)) { 670 return -EINVAL; 671 } 672 673 /* Update mss lengthes in mbuf */ 674 m->tso_segsz = hdr->gso_size; 675 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 676 case VIRTIO_NET_HDR_GSO_TCPV4: 677 case VIRTIO_NET_HDR_GSO_TCPV6: 678 m->ol_flags |= PKT_RX_LRO | \ 679 PKT_RX_L4_CKSUM_NONE; 680 break; 681 default: 682 return -EINVAL; 683 } 684 } 685 686 return 0; 687 } 688 689 static inline int 690 rx_offload_enabled(struct virtio_hw *hw) 691 { 692 return vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) || 693 vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) || 694 vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6); 695 } 696 697 #define VIRTIO_MBUF_BURST_SZ 64 698 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc)) 699 uint16_t 700 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 701 { 702 struct virtnet_rx *rxvq = rx_queue; 703 struct virtqueue *vq = rxvq->vq; 704 struct virtio_hw *hw = vq->hw; 705 struct rte_mbuf *rxm, *new_mbuf; 706 uint16_t nb_used, num, nb_rx; 707 uint32_t len[VIRTIO_MBUF_BURST_SZ]; 708 struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ]; 709 int error; 710 uint32_t i, nb_enqueued; 711 uint32_t hdr_size; 712 int offload; 713 struct virtio_net_hdr *hdr; 714 715 nb_rx = 0; 716 if (unlikely(hw->started == 0)) 717 return nb_rx; 718 719 nb_used = VIRTQUEUE_NUSED(vq); 720 721 virtio_rmb(); 722 723 num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts; 724 if (unlikely(num > VIRTIO_MBUF_BURST_SZ)) 725 num = VIRTIO_MBUF_BURST_SZ; 726 if (likely(num > DESC_PER_CACHELINE)) 727 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE); 728 729 num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num); 730 PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num); 731 732 nb_enqueued = 0; 733 hdr_size = hw->vtnet_hdr_size; 734 offload = rx_offload_enabled(hw); 735 736 for (i = 0; i < num ; i++) { 737 rxm = rcv_pkts[i]; 738 739 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]); 740 741 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) { 742 PMD_RX_LOG(ERR, "Packet drop"); 743 nb_enqueued++; 744 virtio_discard_rxbuf(vq, rxm); 745 rxvq->stats.errors++; 746 continue; 747 } 748 749 rxm->port = rxvq->port_id; 750 rxm->data_off = RTE_PKTMBUF_HEADROOM; 751 rxm->ol_flags = 0; 752 rxm->vlan_tci = 0; 753 754 rxm->pkt_len = (uint32_t)(len[i] - hdr_size); 755 rxm->data_len = (uint16_t)(len[i] - hdr_size); 756 757 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr + 758 RTE_PKTMBUF_HEADROOM - hdr_size); 759 760 if (hw->vlan_strip) 761 rte_vlan_strip(rxm); 762 763 if (offload && virtio_rx_offload(rxm, hdr) < 0) { 764 virtio_discard_rxbuf(vq, rxm); 765 rxvq->stats.errors++; 766 continue; 767 } 768 769 VIRTIO_DUMP_PACKET(rxm, rxm->data_len); 770 771 rx_pkts[nb_rx++] = rxm; 772 773 rxvq->stats.bytes += rxm->pkt_len; 774 virtio_update_packet_stats(&rxvq->stats, rxm); 775 } 776 777 rxvq->stats.packets += nb_rx; 778 779 /* Allocate new mbuf for the used descriptor */ 780 error = ENOSPC; 781 while (likely(!virtqueue_full(vq))) { 782 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool); 783 if (unlikely(new_mbuf == NULL)) { 784 struct rte_eth_dev *dev 785 = &rte_eth_devices[rxvq->port_id]; 786 dev->data->rx_mbuf_alloc_failed++; 787 break; 788 } 789 error = virtqueue_enqueue_recv_refill(vq, new_mbuf); 790 if (unlikely(error)) { 791 rte_pktmbuf_free(new_mbuf); 792 break; 793 } 794 nb_enqueued++; 795 } 796 797 if (likely(nb_enqueued)) { 798 vq_update_avail_idx(vq); 799 800 if (unlikely(virtqueue_kick_prepare(vq))) { 801 virtqueue_notify(vq); 802 PMD_RX_LOG(DEBUG, "Notified"); 803 } 804 } 805 806 return nb_rx; 807 } 808 809 uint16_t 810 virtio_recv_mergeable_pkts(void *rx_queue, 811 struct rte_mbuf **rx_pkts, 812 uint16_t nb_pkts) 813 { 814 struct virtnet_rx *rxvq = rx_queue; 815 struct virtqueue *vq = rxvq->vq; 816 struct virtio_hw *hw = vq->hw; 817 struct rte_mbuf *rxm, *new_mbuf; 818 uint16_t nb_used, num, nb_rx; 819 uint32_t len[VIRTIO_MBUF_BURST_SZ]; 820 struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ]; 821 struct rte_mbuf *prev; 822 int error; 823 uint32_t i, nb_enqueued; 824 uint32_t seg_num; 825 uint16_t extra_idx; 826 uint32_t seg_res; 827 uint32_t hdr_size; 828 int offload; 829 830 nb_rx = 0; 831 if (unlikely(hw->started == 0)) 832 return nb_rx; 833 834 nb_used = VIRTQUEUE_NUSED(vq); 835 836 virtio_rmb(); 837 838 PMD_RX_LOG(DEBUG, "used:%d", nb_used); 839 840 i = 0; 841 nb_enqueued = 0; 842 seg_num = 0; 843 extra_idx = 0; 844 seg_res = 0; 845 hdr_size = hw->vtnet_hdr_size; 846 offload = rx_offload_enabled(hw); 847 848 while (i < nb_used) { 849 struct virtio_net_hdr_mrg_rxbuf *header; 850 851 if (nb_rx == nb_pkts) 852 break; 853 854 num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, 1); 855 if (num != 1) 856 continue; 857 858 i++; 859 860 PMD_RX_LOG(DEBUG, "dequeue:%d", num); 861 PMD_RX_LOG(DEBUG, "packet len:%d", len[0]); 862 863 rxm = rcv_pkts[0]; 864 865 if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) { 866 PMD_RX_LOG(ERR, "Packet drop"); 867 nb_enqueued++; 868 virtio_discard_rxbuf(vq, rxm); 869 rxvq->stats.errors++; 870 continue; 871 } 872 873 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm->buf_addr + 874 RTE_PKTMBUF_HEADROOM - hdr_size); 875 seg_num = header->num_buffers; 876 877 if (seg_num == 0) 878 seg_num = 1; 879 880 rxm->data_off = RTE_PKTMBUF_HEADROOM; 881 rxm->nb_segs = seg_num; 882 rxm->ol_flags = 0; 883 rxm->vlan_tci = 0; 884 rxm->pkt_len = (uint32_t)(len[0] - hdr_size); 885 rxm->data_len = (uint16_t)(len[0] - hdr_size); 886 887 rxm->port = rxvq->port_id; 888 rx_pkts[nb_rx] = rxm; 889 prev = rxm; 890 891 if (offload && virtio_rx_offload(rxm, &header->hdr) < 0) { 892 virtio_discard_rxbuf(vq, rxm); 893 rxvq->stats.errors++; 894 continue; 895 } 896 897 seg_res = seg_num - 1; 898 899 while (seg_res != 0) { 900 /* 901 * Get extra segments for current uncompleted packet. 902 */ 903 uint16_t rcv_cnt = 904 RTE_MIN(seg_res, RTE_DIM(rcv_pkts)); 905 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) { 906 uint32_t rx_num = 907 virtqueue_dequeue_burst_rx(vq, 908 rcv_pkts, len, rcv_cnt); 909 i += rx_num; 910 rcv_cnt = rx_num; 911 } else { 912 PMD_RX_LOG(ERR, 913 "No enough segments for packet."); 914 nb_enqueued++; 915 virtio_discard_rxbuf(vq, rxm); 916 rxvq->stats.errors++; 917 break; 918 } 919 920 extra_idx = 0; 921 922 while (extra_idx < rcv_cnt) { 923 rxm = rcv_pkts[extra_idx]; 924 925 rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size; 926 rxm->pkt_len = (uint32_t)(len[extra_idx]); 927 rxm->data_len = (uint16_t)(len[extra_idx]); 928 929 if (prev) 930 prev->next = rxm; 931 932 prev = rxm; 933 rx_pkts[nb_rx]->pkt_len += rxm->pkt_len; 934 extra_idx++; 935 }; 936 seg_res -= rcv_cnt; 937 } 938 939 if (hw->vlan_strip) 940 rte_vlan_strip(rx_pkts[nb_rx]); 941 942 VIRTIO_DUMP_PACKET(rx_pkts[nb_rx], 943 rx_pkts[nb_rx]->data_len); 944 945 rxvq->stats.bytes += rx_pkts[nb_rx]->pkt_len; 946 virtio_update_packet_stats(&rxvq->stats, rx_pkts[nb_rx]); 947 nb_rx++; 948 } 949 950 rxvq->stats.packets += nb_rx; 951 952 /* Allocate new mbuf for the used descriptor */ 953 error = ENOSPC; 954 while (likely(!virtqueue_full(vq))) { 955 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool); 956 if (unlikely(new_mbuf == NULL)) { 957 struct rte_eth_dev *dev 958 = &rte_eth_devices[rxvq->port_id]; 959 dev->data->rx_mbuf_alloc_failed++; 960 break; 961 } 962 error = virtqueue_enqueue_recv_refill(vq, new_mbuf); 963 if (unlikely(error)) { 964 rte_pktmbuf_free(new_mbuf); 965 break; 966 } 967 nb_enqueued++; 968 } 969 970 if (likely(nb_enqueued)) { 971 vq_update_avail_idx(vq); 972 973 if (unlikely(virtqueue_kick_prepare(vq))) { 974 virtqueue_notify(vq); 975 PMD_RX_LOG(DEBUG, "Notified"); 976 } 977 } 978 979 return nb_rx; 980 } 981 982 uint16_t 983 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) 984 { 985 struct virtnet_tx *txvq = tx_queue; 986 struct virtqueue *vq = txvq->vq; 987 struct virtio_hw *hw = vq->hw; 988 uint16_t hdr_size = hw->vtnet_hdr_size; 989 uint16_t nb_used, nb_tx = 0; 990 int error; 991 992 if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts)) 993 return nb_tx; 994 995 if (unlikely(nb_pkts < 1)) 996 return nb_pkts; 997 998 PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts); 999 nb_used = VIRTQUEUE_NUSED(vq); 1000 1001 virtio_rmb(); 1002 if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh)) 1003 virtio_xmit_cleanup(vq, nb_used); 1004 1005 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { 1006 struct rte_mbuf *txm = tx_pkts[nb_tx]; 1007 int can_push = 0, use_indirect = 0, slots, need; 1008 1009 /* Do VLAN tag insertion */ 1010 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) { 1011 error = rte_vlan_insert(&txm); 1012 if (unlikely(error)) { 1013 rte_pktmbuf_free(txm); 1014 continue; 1015 } 1016 } 1017 1018 /* optimize ring usage */ 1019 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) || 1020 vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) && 1021 rte_mbuf_refcnt_read(txm) == 1 && 1022 RTE_MBUF_DIRECT(txm) && 1023 txm->nb_segs == 1 && 1024 rte_pktmbuf_headroom(txm) >= hdr_size && 1025 rte_is_aligned(rte_pktmbuf_mtod(txm, char *), 1026 __alignof__(struct virtio_net_hdr_mrg_rxbuf))) 1027 can_push = 1; 1028 else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) && 1029 txm->nb_segs < VIRTIO_MAX_TX_INDIRECT) 1030 use_indirect = 1; 1031 1032 /* How many main ring entries are needed to this Tx? 1033 * any_layout => number of segments 1034 * indirect => 1 1035 * default => number of segments + 1 1036 */ 1037 slots = use_indirect ? 1 : (txm->nb_segs + !can_push); 1038 need = slots - vq->vq_free_cnt; 1039 1040 /* Positive value indicates it need free vring descriptors */ 1041 if (unlikely(need > 0)) { 1042 nb_used = VIRTQUEUE_NUSED(vq); 1043 virtio_rmb(); 1044 need = RTE_MIN(need, (int)nb_used); 1045 1046 virtio_xmit_cleanup(vq, need); 1047 need = slots - vq->vq_free_cnt; 1048 if (unlikely(need > 0)) { 1049 PMD_TX_LOG(ERR, 1050 "No free tx descriptors to transmit"); 1051 break; 1052 } 1053 } 1054 1055 /* Enqueue Packet buffers */ 1056 virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect, can_push); 1057 1058 txvq->stats.bytes += txm->pkt_len; 1059 virtio_update_packet_stats(&txvq->stats, txm); 1060 } 1061 1062 txvq->stats.packets += nb_tx; 1063 1064 if (likely(nb_tx)) { 1065 vq_update_avail_idx(vq); 1066 1067 if (unlikely(virtqueue_kick_prepare(vq))) { 1068 virtqueue_notify(vq); 1069 PMD_TX_LOG(DEBUG, "Notified backend after xmit"); 1070 } 1071 } 1072 1073 return nb_tx; 1074 } 1075