1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #include <stdint.h> 6 #include <stdio.h> 7 #include <stdlib.h> 8 #include <string.h> 9 #include <errno.h> 10 11 #include <rte_cycles.h> 12 #include <rte_memory.h> 13 #include <rte_branch_prediction.h> 14 #include <rte_mempool.h> 15 #include <rte_malloc.h> 16 #include <rte_mbuf.h> 17 #include <rte_ether.h> 18 #include <rte_ethdev_driver.h> 19 #include <rte_prefetch.h> 20 #include <rte_string_fns.h> 21 #include <rte_errno.h> 22 #include <rte_byteorder.h> 23 #include <rte_net.h> 24 #include <rte_ip.h> 25 #include <rte_udp.h> 26 #include <rte_tcp.h> 27 28 #include "virtio_logs.h" 29 #include "virtio_ethdev.h" 30 #include "virtio_pci.h" 31 #include "virtqueue.h" 32 #include "virtio_rxtx.h" 33 34 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP 35 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len) 36 #else 37 #define VIRTIO_DUMP_PACKET(m, len) do { } while (0) 38 #endif 39 40 41 #define VIRTIO_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS | \ 42 ETH_TXQ_FLAGS_NOOFFLOADS) 43 44 int 45 virtio_dev_rx_queue_done(void *rxq, uint16_t offset) 46 { 47 struct virtnet_rx *rxvq = rxq; 48 struct virtqueue *vq = rxvq->vq; 49 50 return VIRTQUEUE_NUSED(vq) >= offset; 51 } 52 53 void 54 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx) 55 { 56 struct vring_desc *dp, *dp_tail; 57 struct vq_desc_extra *dxp; 58 uint16_t desc_idx_last = desc_idx; 59 60 dp = &vq->vq_ring.desc[desc_idx]; 61 dxp = &vq->vq_descx[desc_idx]; 62 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs); 63 if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) { 64 while (dp->flags & VRING_DESC_F_NEXT) { 65 desc_idx_last = dp->next; 66 dp = &vq->vq_ring.desc[dp->next]; 67 } 68 } 69 dxp->ndescs = 0; 70 71 /* 72 * We must append the existing free chain, if any, to the end of 73 * newly freed chain. If the virtqueue was completely used, then 74 * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above). 75 */ 76 if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) { 77 vq->vq_desc_head_idx = desc_idx; 78 } else { 79 dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx]; 80 dp_tail->next = desc_idx; 81 } 82 83 vq->vq_desc_tail_idx = desc_idx_last; 84 dp->next = VQ_RING_DESC_CHAIN_END; 85 } 86 87 static uint16_t 88 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts, 89 uint32_t *len, uint16_t num) 90 { 91 struct vring_used_elem *uep; 92 struct rte_mbuf *cookie; 93 uint16_t used_idx, desc_idx; 94 uint16_t i; 95 96 /* Caller does the check */ 97 for (i = 0; i < num ; i++) { 98 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); 99 uep = &vq->vq_ring.used->ring[used_idx]; 100 desc_idx = (uint16_t) uep->id; 101 len[i] = uep->len; 102 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie; 103 104 if (unlikely(cookie == NULL)) { 105 PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u", 106 vq->vq_used_cons_idx); 107 break; 108 } 109 110 rte_prefetch0(cookie); 111 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *)); 112 rx_pkts[i] = cookie; 113 vq->vq_used_cons_idx++; 114 vq_ring_free_chain(vq, desc_idx); 115 vq->vq_descx[desc_idx].cookie = NULL; 116 } 117 118 return i; 119 } 120 121 #ifndef DEFAULT_TX_FREE_THRESH 122 #define DEFAULT_TX_FREE_THRESH 32 123 #endif 124 125 /* Cleanup from completed transmits. */ 126 static void 127 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num) 128 { 129 uint16_t i, used_idx, desc_idx; 130 for (i = 0; i < num; i++) { 131 struct vring_used_elem *uep; 132 struct vq_desc_extra *dxp; 133 134 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); 135 uep = &vq->vq_ring.used->ring[used_idx]; 136 137 desc_idx = (uint16_t) uep->id; 138 dxp = &vq->vq_descx[desc_idx]; 139 vq->vq_used_cons_idx++; 140 vq_ring_free_chain(vq, desc_idx); 141 142 if (dxp->cookie != NULL) { 143 rte_pktmbuf_free(dxp->cookie); 144 dxp->cookie = NULL; 145 } 146 } 147 } 148 149 150 static inline int 151 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie) 152 { 153 struct vq_desc_extra *dxp; 154 struct virtio_hw *hw = vq->hw; 155 struct vring_desc *start_dp; 156 uint16_t needed = 1; 157 uint16_t head_idx, idx; 158 159 if (unlikely(vq->vq_free_cnt == 0)) 160 return -ENOSPC; 161 if (unlikely(vq->vq_free_cnt < needed)) 162 return -EMSGSIZE; 163 164 head_idx = vq->vq_desc_head_idx; 165 if (unlikely(head_idx >= vq->vq_nentries)) 166 return -EFAULT; 167 168 idx = head_idx; 169 dxp = &vq->vq_descx[idx]; 170 dxp->cookie = (void *)cookie; 171 dxp->ndescs = needed; 172 173 start_dp = vq->vq_ring.desc; 174 start_dp[idx].addr = 175 VIRTIO_MBUF_ADDR(cookie, vq) + 176 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size; 177 start_dp[idx].len = 178 cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size; 179 start_dp[idx].flags = VRING_DESC_F_WRITE; 180 idx = start_dp[idx].next; 181 vq->vq_desc_head_idx = idx; 182 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) 183 vq->vq_desc_tail_idx = idx; 184 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed); 185 vq_update_avail_ring(vq, head_idx); 186 187 return 0; 188 } 189 190 /* When doing TSO, the IP length is not included in the pseudo header 191 * checksum of the packet given to the PMD, but for virtio it is 192 * expected. 193 */ 194 static void 195 virtio_tso_fix_cksum(struct rte_mbuf *m) 196 { 197 /* common case: header is not fragmented */ 198 if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len + 199 m->l4_len)) { 200 struct ipv4_hdr *iph; 201 struct ipv6_hdr *ip6h; 202 struct tcp_hdr *th; 203 uint16_t prev_cksum, new_cksum, ip_len, ip_paylen; 204 uint32_t tmp; 205 206 iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len); 207 th = RTE_PTR_ADD(iph, m->l3_len); 208 if ((iph->version_ihl >> 4) == 4) { 209 iph->hdr_checksum = 0; 210 iph->hdr_checksum = rte_ipv4_cksum(iph); 211 ip_len = iph->total_length; 212 ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) - 213 m->l3_len); 214 } else { 215 ip6h = (struct ipv6_hdr *)iph; 216 ip_paylen = ip6h->payload_len; 217 } 218 219 /* calculate the new phdr checksum not including ip_paylen */ 220 prev_cksum = th->cksum; 221 tmp = prev_cksum; 222 tmp += ip_paylen; 223 tmp = (tmp & 0xffff) + (tmp >> 16); 224 new_cksum = tmp; 225 226 /* replace it in the packet */ 227 th->cksum = new_cksum; 228 } 229 } 230 231 static inline int 232 tx_offload_enabled(struct virtio_hw *hw) 233 { 234 return vtpci_with_feature(hw, VIRTIO_NET_F_CSUM) || 235 vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) || 236 vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO6); 237 } 238 239 /* avoid write operation when necessary, to lessen cache issues */ 240 #define ASSIGN_UNLESS_EQUAL(var, val) do { \ 241 if ((var) != (val)) \ 242 (var) = (val); \ 243 } while (0) 244 245 static inline void 246 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie, 247 uint16_t needed, int use_indirect, int can_push) 248 { 249 struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr; 250 struct vq_desc_extra *dxp; 251 struct virtqueue *vq = txvq->vq; 252 struct vring_desc *start_dp; 253 uint16_t seg_num = cookie->nb_segs; 254 uint16_t head_idx, idx; 255 uint16_t head_size = vq->hw->vtnet_hdr_size; 256 struct virtio_net_hdr *hdr; 257 int offload; 258 259 offload = tx_offload_enabled(vq->hw); 260 head_idx = vq->vq_desc_head_idx; 261 idx = head_idx; 262 dxp = &vq->vq_descx[idx]; 263 dxp->cookie = (void *)cookie; 264 dxp->ndescs = needed; 265 266 start_dp = vq->vq_ring.desc; 267 268 if (can_push) { 269 /* prepend cannot fail, checked by caller */ 270 hdr = (struct virtio_net_hdr *) 271 rte_pktmbuf_prepend(cookie, head_size); 272 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length, 273 * which is wrong. Below subtract restores correct pkt size. 274 */ 275 cookie->pkt_len -= head_size; 276 /* if offload disabled, it is not zeroed below, do it now */ 277 if (offload == 0) { 278 ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0); 279 ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0); 280 ASSIGN_UNLESS_EQUAL(hdr->flags, 0); 281 ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0); 282 ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0); 283 ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0); 284 } 285 } else if (use_indirect) { 286 /* setup tx ring slot to point to indirect 287 * descriptor list stored in reserved region. 288 * 289 * the first slot in indirect ring is already preset 290 * to point to the header in reserved region 291 */ 292 start_dp[idx].addr = txvq->virtio_net_hdr_mem + 293 RTE_PTR_DIFF(&txr[idx].tx_indir, txr); 294 start_dp[idx].len = (seg_num + 1) * sizeof(struct vring_desc); 295 start_dp[idx].flags = VRING_DESC_F_INDIRECT; 296 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr; 297 298 /* loop below will fill in rest of the indirect elements */ 299 start_dp = txr[idx].tx_indir; 300 idx = 1; 301 } else { 302 /* setup first tx ring slot to point to header 303 * stored in reserved region. 304 */ 305 start_dp[idx].addr = txvq->virtio_net_hdr_mem + 306 RTE_PTR_DIFF(&txr[idx].tx_hdr, txr); 307 start_dp[idx].len = vq->hw->vtnet_hdr_size; 308 start_dp[idx].flags = VRING_DESC_F_NEXT; 309 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr; 310 311 idx = start_dp[idx].next; 312 } 313 314 /* Checksum Offload / TSO */ 315 if (offload) { 316 if (cookie->ol_flags & PKT_TX_TCP_SEG) 317 cookie->ol_flags |= PKT_TX_TCP_CKSUM; 318 319 switch (cookie->ol_flags & PKT_TX_L4_MASK) { 320 case PKT_TX_UDP_CKSUM: 321 hdr->csum_start = cookie->l2_len + cookie->l3_len; 322 hdr->csum_offset = offsetof(struct udp_hdr, 323 dgram_cksum); 324 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 325 break; 326 327 case PKT_TX_TCP_CKSUM: 328 hdr->csum_start = cookie->l2_len + cookie->l3_len; 329 hdr->csum_offset = offsetof(struct tcp_hdr, cksum); 330 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 331 break; 332 333 default: 334 ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0); 335 ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0); 336 ASSIGN_UNLESS_EQUAL(hdr->flags, 0); 337 break; 338 } 339 340 /* TCP Segmentation Offload */ 341 if (cookie->ol_flags & PKT_TX_TCP_SEG) { 342 virtio_tso_fix_cksum(cookie); 343 hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ? 344 VIRTIO_NET_HDR_GSO_TCPV6 : 345 VIRTIO_NET_HDR_GSO_TCPV4; 346 hdr->gso_size = cookie->tso_segsz; 347 hdr->hdr_len = 348 cookie->l2_len + 349 cookie->l3_len + 350 cookie->l4_len; 351 } else { 352 ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0); 353 ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0); 354 ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0); 355 } 356 } 357 358 do { 359 start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq); 360 start_dp[idx].len = cookie->data_len; 361 start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0; 362 idx = start_dp[idx].next; 363 } while ((cookie = cookie->next) != NULL); 364 365 if (use_indirect) 366 idx = vq->vq_ring.desc[head_idx].next; 367 368 vq->vq_desc_head_idx = idx; 369 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) 370 vq->vq_desc_tail_idx = idx; 371 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed); 372 vq_update_avail_ring(vq, head_idx); 373 } 374 375 void 376 virtio_dev_cq_start(struct rte_eth_dev *dev) 377 { 378 struct virtio_hw *hw = dev->data->dev_private; 379 380 if (hw->cvq && hw->cvq->vq) { 381 rte_spinlock_init(&hw->cvq->lock); 382 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq); 383 } 384 } 385 386 int 387 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev, 388 uint16_t queue_idx, 389 uint16_t nb_desc, 390 unsigned int socket_id __rte_unused, 391 __rte_unused const struct rte_eth_rxconf *rx_conf, 392 struct rte_mempool *mp) 393 { 394 uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX; 395 struct virtio_hw *hw = dev->data->dev_private; 396 struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; 397 struct virtnet_rx *rxvq; 398 399 PMD_INIT_FUNC_TRACE(); 400 401 if (nb_desc == 0 || nb_desc > vq->vq_nentries) 402 nb_desc = vq->vq_nentries; 403 vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc); 404 405 rxvq = &vq->rxq; 406 rxvq->queue_id = queue_idx; 407 rxvq->mpool = mp; 408 if (rxvq->mpool == NULL) { 409 rte_exit(EXIT_FAILURE, 410 "Cannot allocate mbufs for rx virtqueue"); 411 } 412 dev->data->rx_queues[queue_idx] = rxvq; 413 414 return 0; 415 } 416 417 int 418 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx) 419 { 420 uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX; 421 struct virtio_hw *hw = dev->data->dev_private; 422 struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; 423 struct virtnet_rx *rxvq = &vq->rxq; 424 struct rte_mbuf *m; 425 uint16_t desc_idx; 426 int error, nbufs; 427 428 PMD_INIT_FUNC_TRACE(); 429 430 /* Allocate blank mbufs for the each rx descriptor */ 431 nbufs = 0; 432 433 if (hw->use_simple_rx) { 434 for (desc_idx = 0; desc_idx < vq->vq_nentries; 435 desc_idx++) { 436 vq->vq_ring.avail->ring[desc_idx] = desc_idx; 437 vq->vq_ring.desc[desc_idx].flags = 438 VRING_DESC_F_WRITE; 439 } 440 } 441 442 memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf)); 443 for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST; 444 desc_idx++) { 445 vq->sw_ring[vq->vq_nentries + desc_idx] = 446 &rxvq->fake_mbuf; 447 } 448 449 while (!virtqueue_full(vq)) { 450 m = rte_mbuf_raw_alloc(rxvq->mpool); 451 if (m == NULL) 452 break; 453 454 /* Enqueue allocated buffers */ 455 if (hw->use_simple_rx) 456 error = virtqueue_enqueue_recv_refill_simple(vq, m); 457 else 458 error = virtqueue_enqueue_recv_refill(vq, m); 459 460 if (error) { 461 rte_pktmbuf_free(m); 462 break; 463 } 464 nbufs++; 465 } 466 467 vq_update_avail_idx(vq); 468 469 PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs); 470 471 virtio_rxq_vec_setup(rxvq); 472 473 VIRTQUEUE_DUMP(vq); 474 475 return 0; 476 } 477 478 /* 479 * struct rte_eth_dev *dev: Used to update dev 480 * uint16_t nb_desc: Defaults to values read from config space 481 * unsigned int socket_id: Used to allocate memzone 482 * const struct rte_eth_txconf *tx_conf: Used to setup tx engine 483 * uint16_t queue_idx: Just used as an index in dev txq list 484 */ 485 int 486 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, 487 uint16_t queue_idx, 488 uint16_t nb_desc, 489 unsigned int socket_id __rte_unused, 490 const struct rte_eth_txconf *tx_conf) 491 { 492 uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX; 493 struct virtio_hw *hw = dev->data->dev_private; 494 struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; 495 struct virtnet_tx *txvq; 496 uint16_t tx_free_thresh; 497 498 PMD_INIT_FUNC_TRACE(); 499 500 /* cannot use simple rxtx funcs with multisegs or offloads */ 501 if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) != VIRTIO_SIMPLE_FLAGS) 502 hw->use_simple_tx = 0; 503 504 if (nb_desc == 0 || nb_desc > vq->vq_nentries) 505 nb_desc = vq->vq_nentries; 506 vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc); 507 508 txvq = &vq->txq; 509 txvq->queue_id = queue_idx; 510 511 tx_free_thresh = tx_conf->tx_free_thresh; 512 if (tx_free_thresh == 0) 513 tx_free_thresh = 514 RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH); 515 516 if (tx_free_thresh >= (vq->vq_nentries - 3)) { 517 RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the " 518 "number of TX entries minus 3 (%u)." 519 " (tx_free_thresh=%u port=%u queue=%u)\n", 520 vq->vq_nentries - 3, 521 tx_free_thresh, dev->data->port_id, queue_idx); 522 return -EINVAL; 523 } 524 525 vq->vq_free_thresh = tx_free_thresh; 526 527 dev->data->tx_queues[queue_idx] = txvq; 528 return 0; 529 } 530 531 int 532 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev, 533 uint16_t queue_idx) 534 { 535 uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX; 536 struct virtio_hw *hw = dev->data->dev_private; 537 struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; 538 uint16_t mid_idx = vq->vq_nentries >> 1; 539 struct virtnet_tx *txvq = &vq->txq; 540 uint16_t desc_idx; 541 542 PMD_INIT_FUNC_TRACE(); 543 544 if (hw->use_simple_tx) { 545 for (desc_idx = 0; desc_idx < mid_idx; desc_idx++) { 546 vq->vq_ring.avail->ring[desc_idx] = 547 desc_idx + mid_idx; 548 vq->vq_ring.desc[desc_idx + mid_idx].next = 549 desc_idx; 550 vq->vq_ring.desc[desc_idx + mid_idx].addr = 551 txvq->virtio_net_hdr_mem + 552 offsetof(struct virtio_tx_region, tx_hdr); 553 vq->vq_ring.desc[desc_idx + mid_idx].len = 554 vq->hw->vtnet_hdr_size; 555 vq->vq_ring.desc[desc_idx + mid_idx].flags = 556 VRING_DESC_F_NEXT; 557 vq->vq_ring.desc[desc_idx].flags = 0; 558 } 559 for (desc_idx = mid_idx; desc_idx < vq->vq_nentries; 560 desc_idx++) 561 vq->vq_ring.avail->ring[desc_idx] = desc_idx; 562 } 563 564 VIRTQUEUE_DUMP(vq); 565 566 return 0; 567 } 568 569 static void 570 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m) 571 { 572 int error; 573 /* 574 * Requeue the discarded mbuf. This should always be 575 * successful since it was just dequeued. 576 */ 577 error = virtqueue_enqueue_recv_refill(vq, m); 578 if (unlikely(error)) { 579 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf"); 580 rte_pktmbuf_free(m); 581 } 582 } 583 584 static void 585 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf) 586 { 587 uint32_t s = mbuf->pkt_len; 588 struct ether_addr *ea; 589 590 if (s == 64) { 591 stats->size_bins[1]++; 592 } else if (s > 64 && s < 1024) { 593 uint32_t bin; 594 595 /* count zeros, and offset into correct bin */ 596 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5; 597 stats->size_bins[bin]++; 598 } else { 599 if (s < 64) 600 stats->size_bins[0]++; 601 else if (s < 1519) 602 stats->size_bins[6]++; 603 else if (s >= 1519) 604 stats->size_bins[7]++; 605 } 606 607 ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *); 608 if (is_multicast_ether_addr(ea)) { 609 if (is_broadcast_ether_addr(ea)) 610 stats->broadcast++; 611 else 612 stats->multicast++; 613 } 614 } 615 616 /* Optionally fill offload information in structure */ 617 static int 618 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr) 619 { 620 struct rte_net_hdr_lens hdr_lens; 621 uint32_t hdrlen, ptype; 622 int l4_supported = 0; 623 624 /* nothing to do */ 625 if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE) 626 return 0; 627 628 m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN; 629 630 ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK); 631 m->packet_type = ptype; 632 if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP || 633 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP || 634 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP) 635 l4_supported = 1; 636 637 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 638 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len; 639 if (hdr->csum_start <= hdrlen && l4_supported) { 640 m->ol_flags |= PKT_RX_L4_CKSUM_NONE; 641 } else { 642 /* Unknown proto or tunnel, do sw cksum. We can assume 643 * the cksum field is in the first segment since the 644 * buffers we provided to the host are large enough. 645 * In case of SCTP, this will be wrong since it's a CRC 646 * but there's nothing we can do. 647 */ 648 uint16_t csum = 0, off; 649 650 rte_raw_cksum_mbuf(m, hdr->csum_start, 651 rte_pktmbuf_pkt_len(m) - hdr->csum_start, 652 &csum); 653 if (likely(csum != 0xffff)) 654 csum = ~csum; 655 off = hdr->csum_offset + hdr->csum_start; 656 if (rte_pktmbuf_data_len(m) >= off + 1) 657 *rte_pktmbuf_mtod_offset(m, uint16_t *, 658 off) = csum; 659 } 660 } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) { 661 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD; 662 } 663 664 /* GSO request, save required information in mbuf */ 665 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 666 /* Check unsupported modes */ 667 if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) || 668 (hdr->gso_size == 0)) { 669 return -EINVAL; 670 } 671 672 /* Update mss lengthes in mbuf */ 673 m->tso_segsz = hdr->gso_size; 674 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 675 case VIRTIO_NET_HDR_GSO_TCPV4: 676 case VIRTIO_NET_HDR_GSO_TCPV6: 677 m->ol_flags |= PKT_RX_LRO | \ 678 PKT_RX_L4_CKSUM_NONE; 679 break; 680 default: 681 return -EINVAL; 682 } 683 } 684 685 return 0; 686 } 687 688 static inline int 689 rx_offload_enabled(struct virtio_hw *hw) 690 { 691 return vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) || 692 vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) || 693 vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6); 694 } 695 696 #define VIRTIO_MBUF_BURST_SZ 64 697 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc)) 698 uint16_t 699 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 700 { 701 struct virtnet_rx *rxvq = rx_queue; 702 struct virtqueue *vq = rxvq->vq; 703 struct virtio_hw *hw = vq->hw; 704 struct rte_mbuf *rxm, *new_mbuf; 705 uint16_t nb_used, num, nb_rx; 706 uint32_t len[VIRTIO_MBUF_BURST_SZ]; 707 struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ]; 708 int error; 709 uint32_t i, nb_enqueued; 710 uint32_t hdr_size; 711 int offload; 712 struct virtio_net_hdr *hdr; 713 714 nb_rx = 0; 715 if (unlikely(hw->started == 0)) 716 return nb_rx; 717 718 nb_used = VIRTQUEUE_NUSED(vq); 719 720 virtio_rmb(); 721 722 num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts; 723 if (unlikely(num > VIRTIO_MBUF_BURST_SZ)) 724 num = VIRTIO_MBUF_BURST_SZ; 725 if (likely(num > DESC_PER_CACHELINE)) 726 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE); 727 728 num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num); 729 PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num); 730 731 nb_enqueued = 0; 732 hdr_size = hw->vtnet_hdr_size; 733 offload = rx_offload_enabled(hw); 734 735 for (i = 0; i < num ; i++) { 736 rxm = rcv_pkts[i]; 737 738 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]); 739 740 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) { 741 PMD_RX_LOG(ERR, "Packet drop"); 742 nb_enqueued++; 743 virtio_discard_rxbuf(vq, rxm); 744 rxvq->stats.errors++; 745 continue; 746 } 747 748 rxm->port = rxvq->port_id; 749 rxm->data_off = RTE_PKTMBUF_HEADROOM; 750 rxm->ol_flags = 0; 751 rxm->vlan_tci = 0; 752 753 rxm->pkt_len = (uint32_t)(len[i] - hdr_size); 754 rxm->data_len = (uint16_t)(len[i] - hdr_size); 755 756 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr + 757 RTE_PKTMBUF_HEADROOM - hdr_size); 758 759 if (hw->vlan_strip) 760 rte_vlan_strip(rxm); 761 762 if (offload && virtio_rx_offload(rxm, hdr) < 0) { 763 virtio_discard_rxbuf(vq, rxm); 764 rxvq->stats.errors++; 765 continue; 766 } 767 768 VIRTIO_DUMP_PACKET(rxm, rxm->data_len); 769 770 rx_pkts[nb_rx++] = rxm; 771 772 rxvq->stats.bytes += rxm->pkt_len; 773 virtio_update_packet_stats(&rxvq->stats, rxm); 774 } 775 776 rxvq->stats.packets += nb_rx; 777 778 /* Allocate new mbuf for the used descriptor */ 779 error = ENOSPC; 780 while (likely(!virtqueue_full(vq))) { 781 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool); 782 if (unlikely(new_mbuf == NULL)) { 783 struct rte_eth_dev *dev 784 = &rte_eth_devices[rxvq->port_id]; 785 dev->data->rx_mbuf_alloc_failed++; 786 break; 787 } 788 error = virtqueue_enqueue_recv_refill(vq, new_mbuf); 789 if (unlikely(error)) { 790 rte_pktmbuf_free(new_mbuf); 791 break; 792 } 793 nb_enqueued++; 794 } 795 796 if (likely(nb_enqueued)) { 797 vq_update_avail_idx(vq); 798 799 if (unlikely(virtqueue_kick_prepare(vq))) { 800 virtqueue_notify(vq); 801 PMD_RX_LOG(DEBUG, "Notified"); 802 } 803 } 804 805 return nb_rx; 806 } 807 808 uint16_t 809 virtio_recv_mergeable_pkts(void *rx_queue, 810 struct rte_mbuf **rx_pkts, 811 uint16_t nb_pkts) 812 { 813 struct virtnet_rx *rxvq = rx_queue; 814 struct virtqueue *vq = rxvq->vq; 815 struct virtio_hw *hw = vq->hw; 816 struct rte_mbuf *rxm, *new_mbuf; 817 uint16_t nb_used, num, nb_rx; 818 uint32_t len[VIRTIO_MBUF_BURST_SZ]; 819 struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ]; 820 struct rte_mbuf *prev; 821 int error; 822 uint32_t i, nb_enqueued; 823 uint32_t seg_num; 824 uint16_t extra_idx; 825 uint32_t seg_res; 826 uint32_t hdr_size; 827 int offload; 828 829 nb_rx = 0; 830 if (unlikely(hw->started == 0)) 831 return nb_rx; 832 833 nb_used = VIRTQUEUE_NUSED(vq); 834 835 virtio_rmb(); 836 837 PMD_RX_LOG(DEBUG, "used:%d", nb_used); 838 839 i = 0; 840 nb_enqueued = 0; 841 seg_num = 0; 842 extra_idx = 0; 843 seg_res = 0; 844 hdr_size = hw->vtnet_hdr_size; 845 offload = rx_offload_enabled(hw); 846 847 while (i < nb_used) { 848 struct virtio_net_hdr_mrg_rxbuf *header; 849 850 if (nb_rx == nb_pkts) 851 break; 852 853 num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, 1); 854 if (num != 1) 855 continue; 856 857 i++; 858 859 PMD_RX_LOG(DEBUG, "dequeue:%d", num); 860 PMD_RX_LOG(DEBUG, "packet len:%d", len[0]); 861 862 rxm = rcv_pkts[0]; 863 864 if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) { 865 PMD_RX_LOG(ERR, "Packet drop"); 866 nb_enqueued++; 867 virtio_discard_rxbuf(vq, rxm); 868 rxvq->stats.errors++; 869 continue; 870 } 871 872 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm->buf_addr + 873 RTE_PKTMBUF_HEADROOM - hdr_size); 874 seg_num = header->num_buffers; 875 876 if (seg_num == 0) 877 seg_num = 1; 878 879 rxm->data_off = RTE_PKTMBUF_HEADROOM; 880 rxm->nb_segs = seg_num; 881 rxm->ol_flags = 0; 882 rxm->vlan_tci = 0; 883 rxm->pkt_len = (uint32_t)(len[0] - hdr_size); 884 rxm->data_len = (uint16_t)(len[0] - hdr_size); 885 886 rxm->port = rxvq->port_id; 887 rx_pkts[nb_rx] = rxm; 888 prev = rxm; 889 890 if (offload && virtio_rx_offload(rxm, &header->hdr) < 0) { 891 virtio_discard_rxbuf(vq, rxm); 892 rxvq->stats.errors++; 893 continue; 894 } 895 896 seg_res = seg_num - 1; 897 898 while (seg_res != 0) { 899 /* 900 * Get extra segments for current uncompleted packet. 901 */ 902 uint16_t rcv_cnt = 903 RTE_MIN(seg_res, RTE_DIM(rcv_pkts)); 904 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) { 905 uint32_t rx_num = 906 virtqueue_dequeue_burst_rx(vq, 907 rcv_pkts, len, rcv_cnt); 908 i += rx_num; 909 rcv_cnt = rx_num; 910 } else { 911 PMD_RX_LOG(ERR, 912 "No enough segments for packet."); 913 nb_enqueued++; 914 virtio_discard_rxbuf(vq, rxm); 915 rxvq->stats.errors++; 916 break; 917 } 918 919 extra_idx = 0; 920 921 while (extra_idx < rcv_cnt) { 922 rxm = rcv_pkts[extra_idx]; 923 924 rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size; 925 rxm->pkt_len = (uint32_t)(len[extra_idx]); 926 rxm->data_len = (uint16_t)(len[extra_idx]); 927 928 if (prev) 929 prev->next = rxm; 930 931 prev = rxm; 932 rx_pkts[nb_rx]->pkt_len += rxm->pkt_len; 933 extra_idx++; 934 }; 935 seg_res -= rcv_cnt; 936 } 937 938 if (hw->vlan_strip) 939 rte_vlan_strip(rx_pkts[nb_rx]); 940 941 VIRTIO_DUMP_PACKET(rx_pkts[nb_rx], 942 rx_pkts[nb_rx]->data_len); 943 944 rxvq->stats.bytes += rx_pkts[nb_rx]->pkt_len; 945 virtio_update_packet_stats(&rxvq->stats, rx_pkts[nb_rx]); 946 nb_rx++; 947 } 948 949 rxvq->stats.packets += nb_rx; 950 951 /* Allocate new mbuf for the used descriptor */ 952 error = ENOSPC; 953 while (likely(!virtqueue_full(vq))) { 954 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool); 955 if (unlikely(new_mbuf == NULL)) { 956 struct rte_eth_dev *dev 957 = &rte_eth_devices[rxvq->port_id]; 958 dev->data->rx_mbuf_alloc_failed++; 959 break; 960 } 961 error = virtqueue_enqueue_recv_refill(vq, new_mbuf); 962 if (unlikely(error)) { 963 rte_pktmbuf_free(new_mbuf); 964 break; 965 } 966 nb_enqueued++; 967 } 968 969 if (likely(nb_enqueued)) { 970 vq_update_avail_idx(vq); 971 972 if (unlikely(virtqueue_kick_prepare(vq))) { 973 virtqueue_notify(vq); 974 PMD_RX_LOG(DEBUG, "Notified"); 975 } 976 } 977 978 return nb_rx; 979 } 980 981 uint16_t 982 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) 983 { 984 struct virtnet_tx *txvq = tx_queue; 985 struct virtqueue *vq = txvq->vq; 986 struct virtio_hw *hw = vq->hw; 987 uint16_t hdr_size = hw->vtnet_hdr_size; 988 uint16_t nb_used, nb_tx = 0; 989 int error; 990 991 if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts)) 992 return nb_tx; 993 994 if (unlikely(nb_pkts < 1)) 995 return nb_pkts; 996 997 PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts); 998 nb_used = VIRTQUEUE_NUSED(vq); 999 1000 virtio_rmb(); 1001 if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh)) 1002 virtio_xmit_cleanup(vq, nb_used); 1003 1004 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { 1005 struct rte_mbuf *txm = tx_pkts[nb_tx]; 1006 int can_push = 0, use_indirect = 0, slots, need; 1007 1008 /* Do VLAN tag insertion */ 1009 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) { 1010 error = rte_vlan_insert(&txm); 1011 if (unlikely(error)) { 1012 rte_pktmbuf_free(txm); 1013 continue; 1014 } 1015 } 1016 1017 /* optimize ring usage */ 1018 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) || 1019 vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) && 1020 rte_mbuf_refcnt_read(txm) == 1 && 1021 RTE_MBUF_DIRECT(txm) && 1022 txm->nb_segs == 1 && 1023 rte_pktmbuf_headroom(txm) >= hdr_size && 1024 rte_is_aligned(rte_pktmbuf_mtod(txm, char *), 1025 __alignof__(struct virtio_net_hdr_mrg_rxbuf))) 1026 can_push = 1; 1027 else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) && 1028 txm->nb_segs < VIRTIO_MAX_TX_INDIRECT) 1029 use_indirect = 1; 1030 1031 /* How many main ring entries are needed to this Tx? 1032 * any_layout => number of segments 1033 * indirect => 1 1034 * default => number of segments + 1 1035 */ 1036 slots = use_indirect ? 1 : (txm->nb_segs + !can_push); 1037 need = slots - vq->vq_free_cnt; 1038 1039 /* Positive value indicates it need free vring descriptors */ 1040 if (unlikely(need > 0)) { 1041 nb_used = VIRTQUEUE_NUSED(vq); 1042 virtio_rmb(); 1043 need = RTE_MIN(need, (int)nb_used); 1044 1045 virtio_xmit_cleanup(vq, need); 1046 need = slots - vq->vq_free_cnt; 1047 if (unlikely(need > 0)) { 1048 PMD_TX_LOG(ERR, 1049 "No free tx descriptors to transmit"); 1050 break; 1051 } 1052 } 1053 1054 /* Enqueue Packet buffers */ 1055 virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect, can_push); 1056 1057 txvq->stats.bytes += txm->pkt_len; 1058 virtio_update_packet_stats(&txvq->stats, txm); 1059 } 1060 1061 txvq->stats.packets += nb_tx; 1062 1063 if (likely(nb_tx)) { 1064 vq_update_avail_idx(vq); 1065 1066 if (unlikely(virtqueue_kick_prepare(vq))) { 1067 virtqueue_notify(vq); 1068 PMD_TX_LOG(DEBUG, "Notified backend after xmit"); 1069 } 1070 } 1071 1072 return nb_tx; 1073 } 1074