1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #include <stdint.h> 6 #include <stdio.h> 7 #include <stdlib.h> 8 #include <string.h> 9 #include <errno.h> 10 11 #include <rte_cycles.h> 12 #include <rte_memory.h> 13 #include <rte_branch_prediction.h> 14 #include <rte_mempool.h> 15 #include <rte_malloc.h> 16 #include <rte_mbuf.h> 17 #include <rte_ether.h> 18 #include <rte_ethdev_driver.h> 19 #include <rte_prefetch.h> 20 #include <rte_string_fns.h> 21 #include <rte_errno.h> 22 #include <rte_byteorder.h> 23 #include <rte_net.h> 24 #include <rte_ip.h> 25 #include <rte_udp.h> 26 #include <rte_tcp.h> 27 28 #include "virtio_logs.h" 29 #include "virtio_ethdev.h" 30 #include "virtio_pci.h" 31 #include "virtqueue.h" 32 #include "virtio_rxtx.h" 33 #include "virtio_rxtx_simple.h" 34 35 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP 36 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len) 37 #else 38 #define VIRTIO_DUMP_PACKET(m, len) do { } while (0) 39 #endif 40 41 42 #define VIRTIO_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS | \ 43 ETH_TXQ_FLAGS_NOOFFLOADS) 44 45 int 46 virtio_dev_rx_queue_done(void *rxq, uint16_t offset) 47 { 48 struct virtnet_rx *rxvq = rxq; 49 struct virtqueue *vq = rxvq->vq; 50 51 return VIRTQUEUE_NUSED(vq) >= offset; 52 } 53 54 void 55 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx) 56 { 57 struct vring_desc *dp, *dp_tail; 58 struct vq_desc_extra *dxp; 59 uint16_t desc_idx_last = desc_idx; 60 61 dp = &vq->vq_ring.desc[desc_idx]; 62 dxp = &vq->vq_descx[desc_idx]; 63 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs); 64 if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) { 65 while (dp->flags & VRING_DESC_F_NEXT) { 66 desc_idx_last = dp->next; 67 dp = &vq->vq_ring.desc[dp->next]; 68 } 69 } 70 dxp->ndescs = 0; 71 72 /* 73 * We must append the existing free chain, if any, to the end of 74 * newly freed chain. If the virtqueue was completely used, then 75 * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above). 76 */ 77 if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) { 78 vq->vq_desc_head_idx = desc_idx; 79 } else { 80 dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx]; 81 dp_tail->next = desc_idx; 82 } 83 84 vq->vq_desc_tail_idx = desc_idx_last; 85 dp->next = VQ_RING_DESC_CHAIN_END; 86 } 87 88 static uint16_t 89 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts, 90 uint32_t *len, uint16_t num) 91 { 92 struct vring_used_elem *uep; 93 struct rte_mbuf *cookie; 94 uint16_t used_idx, desc_idx; 95 uint16_t i; 96 97 /* Caller does the check */ 98 for (i = 0; i < num ; i++) { 99 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); 100 uep = &vq->vq_ring.used->ring[used_idx]; 101 desc_idx = (uint16_t) uep->id; 102 len[i] = uep->len; 103 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie; 104 105 if (unlikely(cookie == NULL)) { 106 PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u", 107 vq->vq_used_cons_idx); 108 break; 109 } 110 111 rte_prefetch0(cookie); 112 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *)); 113 rx_pkts[i] = cookie; 114 vq->vq_used_cons_idx++; 115 vq_ring_free_chain(vq, desc_idx); 116 vq->vq_descx[desc_idx].cookie = NULL; 117 } 118 119 return i; 120 } 121 122 #ifndef DEFAULT_TX_FREE_THRESH 123 #define DEFAULT_TX_FREE_THRESH 32 124 #endif 125 126 /* Cleanup from completed transmits. */ 127 static void 128 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num) 129 { 130 uint16_t i, used_idx, desc_idx; 131 for (i = 0; i < num; i++) { 132 struct vring_used_elem *uep; 133 struct vq_desc_extra *dxp; 134 135 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); 136 uep = &vq->vq_ring.used->ring[used_idx]; 137 138 desc_idx = (uint16_t) uep->id; 139 dxp = &vq->vq_descx[desc_idx]; 140 vq->vq_used_cons_idx++; 141 vq_ring_free_chain(vq, desc_idx); 142 143 if (dxp->cookie != NULL) { 144 rte_pktmbuf_free(dxp->cookie); 145 dxp->cookie = NULL; 146 } 147 } 148 } 149 150 151 static inline int 152 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie) 153 { 154 struct vq_desc_extra *dxp; 155 struct virtio_hw *hw = vq->hw; 156 struct vring_desc *start_dp; 157 uint16_t needed = 1; 158 uint16_t head_idx, idx; 159 160 if (unlikely(vq->vq_free_cnt == 0)) 161 return -ENOSPC; 162 if (unlikely(vq->vq_free_cnt < needed)) 163 return -EMSGSIZE; 164 165 head_idx = vq->vq_desc_head_idx; 166 if (unlikely(head_idx >= vq->vq_nentries)) 167 return -EFAULT; 168 169 idx = head_idx; 170 dxp = &vq->vq_descx[idx]; 171 dxp->cookie = (void *)cookie; 172 dxp->ndescs = needed; 173 174 start_dp = vq->vq_ring.desc; 175 start_dp[idx].addr = 176 VIRTIO_MBUF_ADDR(cookie, vq) + 177 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size; 178 start_dp[idx].len = 179 cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size; 180 start_dp[idx].flags = VRING_DESC_F_WRITE; 181 idx = start_dp[idx].next; 182 vq->vq_desc_head_idx = idx; 183 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) 184 vq->vq_desc_tail_idx = idx; 185 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed); 186 vq_update_avail_ring(vq, head_idx); 187 188 return 0; 189 } 190 191 /* When doing TSO, the IP length is not included in the pseudo header 192 * checksum of the packet given to the PMD, but for virtio it is 193 * expected. 194 */ 195 static void 196 virtio_tso_fix_cksum(struct rte_mbuf *m) 197 { 198 /* common case: header is not fragmented */ 199 if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len + 200 m->l4_len)) { 201 struct ipv4_hdr *iph; 202 struct ipv6_hdr *ip6h; 203 struct tcp_hdr *th; 204 uint16_t prev_cksum, new_cksum, ip_len, ip_paylen; 205 uint32_t tmp; 206 207 iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len); 208 th = RTE_PTR_ADD(iph, m->l3_len); 209 if ((iph->version_ihl >> 4) == 4) { 210 iph->hdr_checksum = 0; 211 iph->hdr_checksum = rte_ipv4_cksum(iph); 212 ip_len = iph->total_length; 213 ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) - 214 m->l3_len); 215 } else { 216 ip6h = (struct ipv6_hdr *)iph; 217 ip_paylen = ip6h->payload_len; 218 } 219 220 /* calculate the new phdr checksum not including ip_paylen */ 221 prev_cksum = th->cksum; 222 tmp = prev_cksum; 223 tmp += ip_paylen; 224 tmp = (tmp & 0xffff) + (tmp >> 16); 225 new_cksum = tmp; 226 227 /* replace it in the packet */ 228 th->cksum = new_cksum; 229 } 230 } 231 232 static inline int 233 tx_offload_enabled(struct virtio_hw *hw) 234 { 235 return vtpci_with_feature(hw, VIRTIO_NET_F_CSUM) || 236 vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) || 237 vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO6); 238 } 239 240 /* avoid write operation when necessary, to lessen cache issues */ 241 #define ASSIGN_UNLESS_EQUAL(var, val) do { \ 242 if ((var) != (val)) \ 243 (var) = (val); \ 244 } while (0) 245 246 static inline void 247 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie, 248 uint16_t needed, int use_indirect, int can_push) 249 { 250 struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr; 251 struct vq_desc_extra *dxp; 252 struct virtqueue *vq = txvq->vq; 253 struct vring_desc *start_dp; 254 uint16_t seg_num = cookie->nb_segs; 255 uint16_t head_idx, idx; 256 uint16_t head_size = vq->hw->vtnet_hdr_size; 257 struct virtio_net_hdr *hdr; 258 int offload; 259 260 offload = tx_offload_enabled(vq->hw); 261 head_idx = vq->vq_desc_head_idx; 262 idx = head_idx; 263 dxp = &vq->vq_descx[idx]; 264 dxp->cookie = (void *)cookie; 265 dxp->ndescs = needed; 266 267 start_dp = vq->vq_ring.desc; 268 269 if (can_push) { 270 /* prepend cannot fail, checked by caller */ 271 hdr = (struct virtio_net_hdr *) 272 rte_pktmbuf_prepend(cookie, head_size); 273 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length, 274 * which is wrong. Below subtract restores correct pkt size. 275 */ 276 cookie->pkt_len -= head_size; 277 /* if offload disabled, it is not zeroed below, do it now */ 278 if (offload == 0) { 279 ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0); 280 ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0); 281 ASSIGN_UNLESS_EQUAL(hdr->flags, 0); 282 ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0); 283 ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0); 284 ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0); 285 } 286 } else if (use_indirect) { 287 /* setup tx ring slot to point to indirect 288 * descriptor list stored in reserved region. 289 * 290 * the first slot in indirect ring is already preset 291 * to point to the header in reserved region 292 */ 293 start_dp[idx].addr = txvq->virtio_net_hdr_mem + 294 RTE_PTR_DIFF(&txr[idx].tx_indir, txr); 295 start_dp[idx].len = (seg_num + 1) * sizeof(struct vring_desc); 296 start_dp[idx].flags = VRING_DESC_F_INDIRECT; 297 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr; 298 299 /* loop below will fill in rest of the indirect elements */ 300 start_dp = txr[idx].tx_indir; 301 idx = 1; 302 } else { 303 /* setup first tx ring slot to point to header 304 * stored in reserved region. 305 */ 306 start_dp[idx].addr = txvq->virtio_net_hdr_mem + 307 RTE_PTR_DIFF(&txr[idx].tx_hdr, txr); 308 start_dp[idx].len = vq->hw->vtnet_hdr_size; 309 start_dp[idx].flags = VRING_DESC_F_NEXT; 310 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr; 311 312 idx = start_dp[idx].next; 313 } 314 315 /* Checksum Offload / TSO */ 316 if (offload) { 317 if (cookie->ol_flags & PKT_TX_TCP_SEG) 318 cookie->ol_flags |= PKT_TX_TCP_CKSUM; 319 320 switch (cookie->ol_flags & PKT_TX_L4_MASK) { 321 case PKT_TX_UDP_CKSUM: 322 hdr->csum_start = cookie->l2_len + cookie->l3_len; 323 hdr->csum_offset = offsetof(struct udp_hdr, 324 dgram_cksum); 325 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 326 break; 327 328 case PKT_TX_TCP_CKSUM: 329 hdr->csum_start = cookie->l2_len + cookie->l3_len; 330 hdr->csum_offset = offsetof(struct tcp_hdr, cksum); 331 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 332 break; 333 334 default: 335 ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0); 336 ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0); 337 ASSIGN_UNLESS_EQUAL(hdr->flags, 0); 338 break; 339 } 340 341 /* TCP Segmentation Offload */ 342 if (cookie->ol_flags & PKT_TX_TCP_SEG) { 343 virtio_tso_fix_cksum(cookie); 344 hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ? 345 VIRTIO_NET_HDR_GSO_TCPV6 : 346 VIRTIO_NET_HDR_GSO_TCPV4; 347 hdr->gso_size = cookie->tso_segsz; 348 hdr->hdr_len = 349 cookie->l2_len + 350 cookie->l3_len + 351 cookie->l4_len; 352 } else { 353 ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0); 354 ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0); 355 ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0); 356 } 357 } 358 359 do { 360 start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq); 361 start_dp[idx].len = cookie->data_len; 362 start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0; 363 idx = start_dp[idx].next; 364 } while ((cookie = cookie->next) != NULL); 365 366 if (use_indirect) 367 idx = vq->vq_ring.desc[head_idx].next; 368 369 vq->vq_desc_head_idx = idx; 370 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) 371 vq->vq_desc_tail_idx = idx; 372 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed); 373 vq_update_avail_ring(vq, head_idx); 374 } 375 376 void 377 virtio_dev_cq_start(struct rte_eth_dev *dev) 378 { 379 struct virtio_hw *hw = dev->data->dev_private; 380 381 if (hw->cvq && hw->cvq->vq) { 382 rte_spinlock_init(&hw->cvq->lock); 383 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq); 384 } 385 } 386 387 int 388 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev, 389 uint16_t queue_idx, 390 uint16_t nb_desc, 391 unsigned int socket_id __rte_unused, 392 __rte_unused const struct rte_eth_rxconf *rx_conf, 393 struct rte_mempool *mp) 394 { 395 uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX; 396 struct virtio_hw *hw = dev->data->dev_private; 397 struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; 398 struct virtnet_rx *rxvq; 399 400 PMD_INIT_FUNC_TRACE(); 401 402 if (nb_desc == 0 || nb_desc > vq->vq_nentries) 403 nb_desc = vq->vq_nentries; 404 vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc); 405 406 rxvq = &vq->rxq; 407 rxvq->queue_id = queue_idx; 408 rxvq->mpool = mp; 409 if (rxvq->mpool == NULL) { 410 rte_exit(EXIT_FAILURE, 411 "Cannot allocate mbufs for rx virtqueue"); 412 } 413 dev->data->rx_queues[queue_idx] = rxvq; 414 415 return 0; 416 } 417 418 int 419 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx) 420 { 421 uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX; 422 struct virtio_hw *hw = dev->data->dev_private; 423 struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; 424 struct virtnet_rx *rxvq = &vq->rxq; 425 struct rte_mbuf *m; 426 uint16_t desc_idx; 427 int error, nbufs; 428 429 PMD_INIT_FUNC_TRACE(); 430 431 /* Allocate blank mbufs for the each rx descriptor */ 432 nbufs = 0; 433 434 if (hw->use_simple_rx) { 435 for (desc_idx = 0; desc_idx < vq->vq_nentries; 436 desc_idx++) { 437 vq->vq_ring.avail->ring[desc_idx] = desc_idx; 438 vq->vq_ring.desc[desc_idx].flags = 439 VRING_DESC_F_WRITE; 440 } 441 442 virtio_rxq_vec_setup(rxvq); 443 } 444 445 memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf)); 446 for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST; 447 desc_idx++) { 448 vq->sw_ring[vq->vq_nentries + desc_idx] = 449 &rxvq->fake_mbuf; 450 } 451 452 if (hw->use_simple_rx) { 453 while (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) { 454 virtio_rxq_rearm_vec(rxvq); 455 nbufs += RTE_VIRTIO_VPMD_RX_REARM_THRESH; 456 } 457 } else { 458 while (!virtqueue_full(vq)) { 459 m = rte_mbuf_raw_alloc(rxvq->mpool); 460 if (m == NULL) 461 break; 462 463 /* Enqueue allocated buffers */ 464 error = virtqueue_enqueue_recv_refill(vq, m); 465 if (error) { 466 rte_pktmbuf_free(m); 467 break; 468 } 469 nbufs++; 470 } 471 472 vq_update_avail_idx(vq); 473 } 474 475 PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs); 476 477 VIRTQUEUE_DUMP(vq); 478 479 return 0; 480 } 481 482 /* 483 * struct rte_eth_dev *dev: Used to update dev 484 * uint16_t nb_desc: Defaults to values read from config space 485 * unsigned int socket_id: Used to allocate memzone 486 * const struct rte_eth_txconf *tx_conf: Used to setup tx engine 487 * uint16_t queue_idx: Just used as an index in dev txq list 488 */ 489 int 490 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, 491 uint16_t queue_idx, 492 uint16_t nb_desc, 493 unsigned int socket_id __rte_unused, 494 const struct rte_eth_txconf *tx_conf) 495 { 496 uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX; 497 struct virtio_hw *hw = dev->data->dev_private; 498 struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; 499 struct virtnet_tx *txvq; 500 uint16_t tx_free_thresh; 501 502 PMD_INIT_FUNC_TRACE(); 503 504 /* cannot use simple rxtx funcs with multisegs or offloads */ 505 if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) != VIRTIO_SIMPLE_FLAGS) 506 hw->use_simple_tx = 0; 507 508 if (nb_desc == 0 || nb_desc > vq->vq_nentries) 509 nb_desc = vq->vq_nentries; 510 vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc); 511 512 txvq = &vq->txq; 513 txvq->queue_id = queue_idx; 514 515 tx_free_thresh = tx_conf->tx_free_thresh; 516 if (tx_free_thresh == 0) 517 tx_free_thresh = 518 RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH); 519 520 if (tx_free_thresh >= (vq->vq_nentries - 3)) { 521 RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the " 522 "number of TX entries minus 3 (%u)." 523 " (tx_free_thresh=%u port=%u queue=%u)\n", 524 vq->vq_nentries - 3, 525 tx_free_thresh, dev->data->port_id, queue_idx); 526 return -EINVAL; 527 } 528 529 vq->vq_free_thresh = tx_free_thresh; 530 531 dev->data->tx_queues[queue_idx] = txvq; 532 return 0; 533 } 534 535 int 536 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev, 537 uint16_t queue_idx) 538 { 539 uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX; 540 struct virtio_hw *hw = dev->data->dev_private; 541 struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; 542 uint16_t mid_idx = vq->vq_nentries >> 1; 543 struct virtnet_tx *txvq = &vq->txq; 544 uint16_t desc_idx; 545 546 PMD_INIT_FUNC_TRACE(); 547 548 if (hw->use_simple_tx) { 549 for (desc_idx = 0; desc_idx < mid_idx; desc_idx++) { 550 vq->vq_ring.avail->ring[desc_idx] = 551 desc_idx + mid_idx; 552 vq->vq_ring.desc[desc_idx + mid_idx].next = 553 desc_idx; 554 vq->vq_ring.desc[desc_idx + mid_idx].addr = 555 txvq->virtio_net_hdr_mem + 556 offsetof(struct virtio_tx_region, tx_hdr); 557 vq->vq_ring.desc[desc_idx + mid_idx].len = 558 vq->hw->vtnet_hdr_size; 559 vq->vq_ring.desc[desc_idx + mid_idx].flags = 560 VRING_DESC_F_NEXT; 561 vq->vq_ring.desc[desc_idx].flags = 0; 562 } 563 for (desc_idx = mid_idx; desc_idx < vq->vq_nentries; 564 desc_idx++) 565 vq->vq_ring.avail->ring[desc_idx] = desc_idx; 566 } 567 568 VIRTQUEUE_DUMP(vq); 569 570 return 0; 571 } 572 573 static void 574 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m) 575 { 576 int error; 577 /* 578 * Requeue the discarded mbuf. This should always be 579 * successful since it was just dequeued. 580 */ 581 error = virtqueue_enqueue_recv_refill(vq, m); 582 if (unlikely(error)) { 583 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf"); 584 rte_pktmbuf_free(m); 585 } 586 } 587 588 static void 589 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf) 590 { 591 uint32_t s = mbuf->pkt_len; 592 struct ether_addr *ea; 593 594 if (s == 64) { 595 stats->size_bins[1]++; 596 } else if (s > 64 && s < 1024) { 597 uint32_t bin; 598 599 /* count zeros, and offset into correct bin */ 600 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5; 601 stats->size_bins[bin]++; 602 } else { 603 if (s < 64) 604 stats->size_bins[0]++; 605 else if (s < 1519) 606 stats->size_bins[6]++; 607 else if (s >= 1519) 608 stats->size_bins[7]++; 609 } 610 611 ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *); 612 if (is_multicast_ether_addr(ea)) { 613 if (is_broadcast_ether_addr(ea)) 614 stats->broadcast++; 615 else 616 stats->multicast++; 617 } 618 } 619 620 /* Optionally fill offload information in structure */ 621 static int 622 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr) 623 { 624 struct rte_net_hdr_lens hdr_lens; 625 uint32_t hdrlen, ptype; 626 int l4_supported = 0; 627 628 /* nothing to do */ 629 if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE) 630 return 0; 631 632 m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN; 633 634 ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK); 635 m->packet_type = ptype; 636 if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP || 637 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP || 638 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP) 639 l4_supported = 1; 640 641 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 642 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len; 643 if (hdr->csum_start <= hdrlen && l4_supported) { 644 m->ol_flags |= PKT_RX_L4_CKSUM_NONE; 645 } else { 646 /* Unknown proto or tunnel, do sw cksum. We can assume 647 * the cksum field is in the first segment since the 648 * buffers we provided to the host are large enough. 649 * In case of SCTP, this will be wrong since it's a CRC 650 * but there's nothing we can do. 651 */ 652 uint16_t csum = 0, off; 653 654 rte_raw_cksum_mbuf(m, hdr->csum_start, 655 rte_pktmbuf_pkt_len(m) - hdr->csum_start, 656 &csum); 657 if (likely(csum != 0xffff)) 658 csum = ~csum; 659 off = hdr->csum_offset + hdr->csum_start; 660 if (rte_pktmbuf_data_len(m) >= off + 1) 661 *rte_pktmbuf_mtod_offset(m, uint16_t *, 662 off) = csum; 663 } 664 } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) { 665 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD; 666 } 667 668 /* GSO request, save required information in mbuf */ 669 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 670 /* Check unsupported modes */ 671 if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) || 672 (hdr->gso_size == 0)) { 673 return -EINVAL; 674 } 675 676 /* Update mss lengthes in mbuf */ 677 m->tso_segsz = hdr->gso_size; 678 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 679 case VIRTIO_NET_HDR_GSO_TCPV4: 680 case VIRTIO_NET_HDR_GSO_TCPV6: 681 m->ol_flags |= PKT_RX_LRO | \ 682 PKT_RX_L4_CKSUM_NONE; 683 break; 684 default: 685 return -EINVAL; 686 } 687 } 688 689 return 0; 690 } 691 692 static inline int 693 rx_offload_enabled(struct virtio_hw *hw) 694 { 695 return vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) || 696 vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) || 697 vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6); 698 } 699 700 #define VIRTIO_MBUF_BURST_SZ 64 701 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc)) 702 uint16_t 703 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 704 { 705 struct virtnet_rx *rxvq = rx_queue; 706 struct virtqueue *vq = rxvq->vq; 707 struct virtio_hw *hw = vq->hw; 708 struct rte_mbuf *rxm, *new_mbuf; 709 uint16_t nb_used, num, nb_rx; 710 uint32_t len[VIRTIO_MBUF_BURST_SZ]; 711 struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ]; 712 int error; 713 uint32_t i, nb_enqueued; 714 uint32_t hdr_size; 715 int offload; 716 struct virtio_net_hdr *hdr; 717 718 nb_rx = 0; 719 if (unlikely(hw->started == 0)) 720 return nb_rx; 721 722 nb_used = VIRTQUEUE_NUSED(vq); 723 724 virtio_rmb(); 725 726 num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts; 727 if (unlikely(num > VIRTIO_MBUF_BURST_SZ)) 728 num = VIRTIO_MBUF_BURST_SZ; 729 if (likely(num > DESC_PER_CACHELINE)) 730 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE); 731 732 num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num); 733 PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num); 734 735 nb_enqueued = 0; 736 hdr_size = hw->vtnet_hdr_size; 737 offload = rx_offload_enabled(hw); 738 739 for (i = 0; i < num ; i++) { 740 rxm = rcv_pkts[i]; 741 742 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]); 743 744 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) { 745 PMD_RX_LOG(ERR, "Packet drop"); 746 nb_enqueued++; 747 virtio_discard_rxbuf(vq, rxm); 748 rxvq->stats.errors++; 749 continue; 750 } 751 752 rxm->port = rxvq->port_id; 753 rxm->data_off = RTE_PKTMBUF_HEADROOM; 754 rxm->ol_flags = 0; 755 rxm->vlan_tci = 0; 756 757 rxm->pkt_len = (uint32_t)(len[i] - hdr_size); 758 rxm->data_len = (uint16_t)(len[i] - hdr_size); 759 760 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr + 761 RTE_PKTMBUF_HEADROOM - hdr_size); 762 763 if (hw->vlan_strip) 764 rte_vlan_strip(rxm); 765 766 if (offload && virtio_rx_offload(rxm, hdr) < 0) { 767 virtio_discard_rxbuf(vq, rxm); 768 rxvq->stats.errors++; 769 continue; 770 } 771 772 VIRTIO_DUMP_PACKET(rxm, rxm->data_len); 773 774 rx_pkts[nb_rx++] = rxm; 775 776 rxvq->stats.bytes += rxm->pkt_len; 777 virtio_update_packet_stats(&rxvq->stats, rxm); 778 } 779 780 rxvq->stats.packets += nb_rx; 781 782 /* Allocate new mbuf for the used descriptor */ 783 error = ENOSPC; 784 while (likely(!virtqueue_full(vq))) { 785 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool); 786 if (unlikely(new_mbuf == NULL)) { 787 struct rte_eth_dev *dev 788 = &rte_eth_devices[rxvq->port_id]; 789 dev->data->rx_mbuf_alloc_failed++; 790 break; 791 } 792 error = virtqueue_enqueue_recv_refill(vq, new_mbuf); 793 if (unlikely(error)) { 794 rte_pktmbuf_free(new_mbuf); 795 break; 796 } 797 nb_enqueued++; 798 } 799 800 if (likely(nb_enqueued)) { 801 vq_update_avail_idx(vq); 802 803 if (unlikely(virtqueue_kick_prepare(vq))) { 804 virtqueue_notify(vq); 805 PMD_RX_LOG(DEBUG, "Notified"); 806 } 807 } 808 809 return nb_rx; 810 } 811 812 uint16_t 813 virtio_recv_mergeable_pkts(void *rx_queue, 814 struct rte_mbuf **rx_pkts, 815 uint16_t nb_pkts) 816 { 817 struct virtnet_rx *rxvq = rx_queue; 818 struct virtqueue *vq = rxvq->vq; 819 struct virtio_hw *hw = vq->hw; 820 struct rte_mbuf *rxm, *new_mbuf; 821 uint16_t nb_used, num, nb_rx; 822 uint32_t len[VIRTIO_MBUF_BURST_SZ]; 823 struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ]; 824 struct rte_mbuf *prev; 825 int error; 826 uint32_t i, nb_enqueued; 827 uint32_t seg_num; 828 uint16_t extra_idx; 829 uint32_t seg_res; 830 uint32_t hdr_size; 831 int offload; 832 833 nb_rx = 0; 834 if (unlikely(hw->started == 0)) 835 return nb_rx; 836 837 nb_used = VIRTQUEUE_NUSED(vq); 838 839 virtio_rmb(); 840 841 PMD_RX_LOG(DEBUG, "used:%d", nb_used); 842 843 i = 0; 844 nb_enqueued = 0; 845 seg_num = 0; 846 extra_idx = 0; 847 seg_res = 0; 848 hdr_size = hw->vtnet_hdr_size; 849 offload = rx_offload_enabled(hw); 850 851 while (i < nb_used) { 852 struct virtio_net_hdr_mrg_rxbuf *header; 853 854 if (nb_rx == nb_pkts) 855 break; 856 857 num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, 1); 858 if (num != 1) 859 continue; 860 861 i++; 862 863 PMD_RX_LOG(DEBUG, "dequeue:%d", num); 864 PMD_RX_LOG(DEBUG, "packet len:%d", len[0]); 865 866 rxm = rcv_pkts[0]; 867 868 if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) { 869 PMD_RX_LOG(ERR, "Packet drop"); 870 nb_enqueued++; 871 virtio_discard_rxbuf(vq, rxm); 872 rxvq->stats.errors++; 873 continue; 874 } 875 876 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm->buf_addr + 877 RTE_PKTMBUF_HEADROOM - hdr_size); 878 seg_num = header->num_buffers; 879 880 if (seg_num == 0) 881 seg_num = 1; 882 883 rxm->data_off = RTE_PKTMBUF_HEADROOM; 884 rxm->nb_segs = seg_num; 885 rxm->ol_flags = 0; 886 rxm->vlan_tci = 0; 887 rxm->pkt_len = (uint32_t)(len[0] - hdr_size); 888 rxm->data_len = (uint16_t)(len[0] - hdr_size); 889 890 rxm->port = rxvq->port_id; 891 rx_pkts[nb_rx] = rxm; 892 prev = rxm; 893 894 if (offload && virtio_rx_offload(rxm, &header->hdr) < 0) { 895 virtio_discard_rxbuf(vq, rxm); 896 rxvq->stats.errors++; 897 continue; 898 } 899 900 seg_res = seg_num - 1; 901 902 while (seg_res != 0) { 903 /* 904 * Get extra segments for current uncompleted packet. 905 */ 906 uint16_t rcv_cnt = 907 RTE_MIN(seg_res, RTE_DIM(rcv_pkts)); 908 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) { 909 uint32_t rx_num = 910 virtqueue_dequeue_burst_rx(vq, 911 rcv_pkts, len, rcv_cnt); 912 i += rx_num; 913 rcv_cnt = rx_num; 914 } else { 915 PMD_RX_LOG(ERR, 916 "No enough segments for packet."); 917 nb_enqueued++; 918 virtio_discard_rxbuf(vq, rxm); 919 rxvq->stats.errors++; 920 break; 921 } 922 923 extra_idx = 0; 924 925 while (extra_idx < rcv_cnt) { 926 rxm = rcv_pkts[extra_idx]; 927 928 rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size; 929 rxm->pkt_len = (uint32_t)(len[extra_idx]); 930 rxm->data_len = (uint16_t)(len[extra_idx]); 931 932 if (prev) 933 prev->next = rxm; 934 935 prev = rxm; 936 rx_pkts[nb_rx]->pkt_len += rxm->pkt_len; 937 extra_idx++; 938 }; 939 seg_res -= rcv_cnt; 940 } 941 942 if (hw->vlan_strip) 943 rte_vlan_strip(rx_pkts[nb_rx]); 944 945 VIRTIO_DUMP_PACKET(rx_pkts[nb_rx], 946 rx_pkts[nb_rx]->data_len); 947 948 rxvq->stats.bytes += rx_pkts[nb_rx]->pkt_len; 949 virtio_update_packet_stats(&rxvq->stats, rx_pkts[nb_rx]); 950 nb_rx++; 951 } 952 953 rxvq->stats.packets += nb_rx; 954 955 /* Allocate new mbuf for the used descriptor */ 956 error = ENOSPC; 957 while (likely(!virtqueue_full(vq))) { 958 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool); 959 if (unlikely(new_mbuf == NULL)) { 960 struct rte_eth_dev *dev 961 = &rte_eth_devices[rxvq->port_id]; 962 dev->data->rx_mbuf_alloc_failed++; 963 break; 964 } 965 error = virtqueue_enqueue_recv_refill(vq, new_mbuf); 966 if (unlikely(error)) { 967 rte_pktmbuf_free(new_mbuf); 968 break; 969 } 970 nb_enqueued++; 971 } 972 973 if (likely(nb_enqueued)) { 974 vq_update_avail_idx(vq); 975 976 if (unlikely(virtqueue_kick_prepare(vq))) { 977 virtqueue_notify(vq); 978 PMD_RX_LOG(DEBUG, "Notified"); 979 } 980 } 981 982 return nb_rx; 983 } 984 985 uint16_t 986 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) 987 { 988 struct virtnet_tx *txvq = tx_queue; 989 struct virtqueue *vq = txvq->vq; 990 struct virtio_hw *hw = vq->hw; 991 uint16_t hdr_size = hw->vtnet_hdr_size; 992 uint16_t nb_used, nb_tx = 0; 993 int error; 994 995 if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts)) 996 return nb_tx; 997 998 if (unlikely(nb_pkts < 1)) 999 return nb_pkts; 1000 1001 PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts); 1002 nb_used = VIRTQUEUE_NUSED(vq); 1003 1004 virtio_rmb(); 1005 if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh)) 1006 virtio_xmit_cleanup(vq, nb_used); 1007 1008 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { 1009 struct rte_mbuf *txm = tx_pkts[nb_tx]; 1010 int can_push = 0, use_indirect = 0, slots, need; 1011 1012 /* Do VLAN tag insertion */ 1013 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) { 1014 error = rte_vlan_insert(&txm); 1015 if (unlikely(error)) { 1016 rte_pktmbuf_free(txm); 1017 continue; 1018 } 1019 } 1020 1021 /* optimize ring usage */ 1022 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) || 1023 vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) && 1024 rte_mbuf_refcnt_read(txm) == 1 && 1025 RTE_MBUF_DIRECT(txm) && 1026 txm->nb_segs == 1 && 1027 rte_pktmbuf_headroom(txm) >= hdr_size && 1028 rte_is_aligned(rte_pktmbuf_mtod(txm, char *), 1029 __alignof__(struct virtio_net_hdr_mrg_rxbuf))) 1030 can_push = 1; 1031 else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) && 1032 txm->nb_segs < VIRTIO_MAX_TX_INDIRECT) 1033 use_indirect = 1; 1034 1035 /* How many main ring entries are needed to this Tx? 1036 * any_layout => number of segments 1037 * indirect => 1 1038 * default => number of segments + 1 1039 */ 1040 slots = use_indirect ? 1 : (txm->nb_segs + !can_push); 1041 need = slots - vq->vq_free_cnt; 1042 1043 /* Positive value indicates it need free vring descriptors */ 1044 if (unlikely(need > 0)) { 1045 nb_used = VIRTQUEUE_NUSED(vq); 1046 virtio_rmb(); 1047 need = RTE_MIN(need, (int)nb_used); 1048 1049 virtio_xmit_cleanup(vq, need); 1050 need = slots - vq->vq_free_cnt; 1051 if (unlikely(need > 0)) { 1052 PMD_TX_LOG(ERR, 1053 "No free tx descriptors to transmit"); 1054 break; 1055 } 1056 } 1057 1058 /* Enqueue Packet buffers */ 1059 virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect, can_push); 1060 1061 txvq->stats.bytes += txm->pkt_len; 1062 virtio_update_packet_stats(&txvq->stats, txm); 1063 } 1064 1065 txvq->stats.packets += nb_tx; 1066 1067 if (likely(nb_tx)) { 1068 vq_update_avail_idx(vq); 1069 1070 if (unlikely(virtqueue_kick_prepare(vq))) { 1071 virtqueue_notify(vq); 1072 PMD_TX_LOG(DEBUG, "Notified backend after xmit"); 1073 } 1074 } 1075 1076 return nb_tx; 1077 } 1078