1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #include <stdint.h> 6 #include <stdio.h> 7 #include <stdlib.h> 8 #include <string.h> 9 #include <errno.h> 10 11 #include <rte_cycles.h> 12 #include <rte_memory.h> 13 #include <rte_branch_prediction.h> 14 #include <rte_mempool.h> 15 #include <rte_malloc.h> 16 #include <rte_mbuf.h> 17 #include <rte_ether.h> 18 #include <rte_ethdev_driver.h> 19 #include <rte_prefetch.h> 20 #include <rte_string_fns.h> 21 #include <rte_errno.h> 22 #include <rte_byteorder.h> 23 #include <rte_net.h> 24 #include <rte_ip.h> 25 #include <rte_udp.h> 26 #include <rte_tcp.h> 27 28 #include "virtio_logs.h" 29 #include "virtio_ethdev.h" 30 #include "virtio_pci.h" 31 #include "virtqueue.h" 32 #include "virtio_rxtx.h" 33 #include "virtio_rxtx_simple.h" 34 35 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP 36 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len) 37 #else 38 #define VIRTIO_DUMP_PACKET(m, len) do { } while (0) 39 #endif 40 41 int 42 virtio_dev_rx_queue_done(void *rxq, uint16_t offset) 43 { 44 struct virtnet_rx *rxvq = rxq; 45 struct virtqueue *vq = rxvq->vq; 46 47 return VIRTQUEUE_NUSED(vq) >= offset; 48 } 49 50 void 51 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx) 52 { 53 struct vring_desc *dp, *dp_tail; 54 struct vq_desc_extra *dxp; 55 uint16_t desc_idx_last = desc_idx; 56 57 dp = &vq->vq_ring.desc[desc_idx]; 58 dxp = &vq->vq_descx[desc_idx]; 59 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs); 60 if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) { 61 while (dp->flags & VRING_DESC_F_NEXT) { 62 desc_idx_last = dp->next; 63 dp = &vq->vq_ring.desc[dp->next]; 64 } 65 } 66 dxp->ndescs = 0; 67 68 /* 69 * We must append the existing free chain, if any, to the end of 70 * newly freed chain. If the virtqueue was completely used, then 71 * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above). 72 */ 73 if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) { 74 vq->vq_desc_head_idx = desc_idx; 75 } else { 76 dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx]; 77 dp_tail->next = desc_idx; 78 } 79 80 vq->vq_desc_tail_idx = desc_idx_last; 81 dp->next = VQ_RING_DESC_CHAIN_END; 82 } 83 84 static uint16_t 85 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts, 86 uint32_t *len, uint16_t num) 87 { 88 struct vring_used_elem *uep; 89 struct rte_mbuf *cookie; 90 uint16_t used_idx, desc_idx; 91 uint16_t i; 92 93 /* Caller does the check */ 94 for (i = 0; i < num ; i++) { 95 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); 96 uep = &vq->vq_ring.used->ring[used_idx]; 97 desc_idx = (uint16_t) uep->id; 98 len[i] = uep->len; 99 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie; 100 101 if (unlikely(cookie == NULL)) { 102 PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u", 103 vq->vq_used_cons_idx); 104 break; 105 } 106 107 rte_prefetch0(cookie); 108 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *)); 109 rx_pkts[i] = cookie; 110 vq->vq_used_cons_idx++; 111 vq_ring_free_chain(vq, desc_idx); 112 vq->vq_descx[desc_idx].cookie = NULL; 113 } 114 115 return i; 116 } 117 118 #ifndef DEFAULT_TX_FREE_THRESH 119 #define DEFAULT_TX_FREE_THRESH 32 120 #endif 121 122 /* Cleanup from completed transmits. */ 123 static void 124 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num) 125 { 126 uint16_t i, used_idx, desc_idx; 127 for (i = 0; i < num; i++) { 128 struct vring_used_elem *uep; 129 struct vq_desc_extra *dxp; 130 131 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); 132 uep = &vq->vq_ring.used->ring[used_idx]; 133 134 desc_idx = (uint16_t) uep->id; 135 dxp = &vq->vq_descx[desc_idx]; 136 vq->vq_used_cons_idx++; 137 vq_ring_free_chain(vq, desc_idx); 138 139 if (dxp->cookie != NULL) { 140 rte_pktmbuf_free(dxp->cookie); 141 dxp->cookie = NULL; 142 } 143 } 144 } 145 146 147 static inline int 148 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie) 149 { 150 struct vq_desc_extra *dxp; 151 struct virtio_hw *hw = vq->hw; 152 struct vring_desc *start_dp; 153 uint16_t needed = 1; 154 uint16_t head_idx, idx; 155 156 if (unlikely(vq->vq_free_cnt == 0)) 157 return -ENOSPC; 158 if (unlikely(vq->vq_free_cnt < needed)) 159 return -EMSGSIZE; 160 161 head_idx = vq->vq_desc_head_idx; 162 if (unlikely(head_idx >= vq->vq_nentries)) 163 return -EFAULT; 164 165 idx = head_idx; 166 dxp = &vq->vq_descx[idx]; 167 dxp->cookie = (void *)cookie; 168 dxp->ndescs = needed; 169 170 start_dp = vq->vq_ring.desc; 171 start_dp[idx].addr = 172 VIRTIO_MBUF_ADDR(cookie, vq) + 173 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size; 174 start_dp[idx].len = 175 cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size; 176 start_dp[idx].flags = VRING_DESC_F_WRITE; 177 idx = start_dp[idx].next; 178 vq->vq_desc_head_idx = idx; 179 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) 180 vq->vq_desc_tail_idx = idx; 181 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed); 182 vq_update_avail_ring(vq, head_idx); 183 184 return 0; 185 } 186 187 /* When doing TSO, the IP length is not included in the pseudo header 188 * checksum of the packet given to the PMD, but for virtio it is 189 * expected. 190 */ 191 static void 192 virtio_tso_fix_cksum(struct rte_mbuf *m) 193 { 194 /* common case: header is not fragmented */ 195 if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len + 196 m->l4_len)) { 197 struct ipv4_hdr *iph; 198 struct ipv6_hdr *ip6h; 199 struct tcp_hdr *th; 200 uint16_t prev_cksum, new_cksum, ip_len, ip_paylen; 201 uint32_t tmp; 202 203 iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len); 204 th = RTE_PTR_ADD(iph, m->l3_len); 205 if ((iph->version_ihl >> 4) == 4) { 206 iph->hdr_checksum = 0; 207 iph->hdr_checksum = rte_ipv4_cksum(iph); 208 ip_len = iph->total_length; 209 ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) - 210 m->l3_len); 211 } else { 212 ip6h = (struct ipv6_hdr *)iph; 213 ip_paylen = ip6h->payload_len; 214 } 215 216 /* calculate the new phdr checksum not including ip_paylen */ 217 prev_cksum = th->cksum; 218 tmp = prev_cksum; 219 tmp += ip_paylen; 220 tmp = (tmp & 0xffff) + (tmp >> 16); 221 new_cksum = tmp; 222 223 /* replace it in the packet */ 224 th->cksum = new_cksum; 225 } 226 } 227 228 static inline int 229 tx_offload_enabled(struct virtio_hw *hw) 230 { 231 return vtpci_with_feature(hw, VIRTIO_NET_F_CSUM) || 232 vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) || 233 vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO6); 234 } 235 236 /* avoid write operation when necessary, to lessen cache issues */ 237 #define ASSIGN_UNLESS_EQUAL(var, val) do { \ 238 if ((var) != (val)) \ 239 (var) = (val); \ 240 } while (0) 241 242 static inline void 243 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie, 244 uint16_t needed, int use_indirect, int can_push) 245 { 246 struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr; 247 struct vq_desc_extra *dxp; 248 struct virtqueue *vq = txvq->vq; 249 struct vring_desc *start_dp; 250 uint16_t seg_num = cookie->nb_segs; 251 uint16_t head_idx, idx; 252 uint16_t head_size = vq->hw->vtnet_hdr_size; 253 struct virtio_net_hdr *hdr; 254 int offload; 255 256 offload = tx_offload_enabled(vq->hw); 257 head_idx = vq->vq_desc_head_idx; 258 idx = head_idx; 259 dxp = &vq->vq_descx[idx]; 260 dxp->cookie = (void *)cookie; 261 dxp->ndescs = needed; 262 263 start_dp = vq->vq_ring.desc; 264 265 if (can_push) { 266 /* prepend cannot fail, checked by caller */ 267 hdr = (struct virtio_net_hdr *) 268 rte_pktmbuf_prepend(cookie, head_size); 269 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length, 270 * which is wrong. Below subtract restores correct pkt size. 271 */ 272 cookie->pkt_len -= head_size; 273 /* if offload disabled, it is not zeroed below, do it now */ 274 if (offload == 0) { 275 ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0); 276 ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0); 277 ASSIGN_UNLESS_EQUAL(hdr->flags, 0); 278 ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0); 279 ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0); 280 ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0); 281 } 282 } else if (use_indirect) { 283 /* setup tx ring slot to point to indirect 284 * descriptor list stored in reserved region. 285 * 286 * the first slot in indirect ring is already preset 287 * to point to the header in reserved region 288 */ 289 start_dp[idx].addr = txvq->virtio_net_hdr_mem + 290 RTE_PTR_DIFF(&txr[idx].tx_indir, txr); 291 start_dp[idx].len = (seg_num + 1) * sizeof(struct vring_desc); 292 start_dp[idx].flags = VRING_DESC_F_INDIRECT; 293 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr; 294 295 /* loop below will fill in rest of the indirect elements */ 296 start_dp = txr[idx].tx_indir; 297 idx = 1; 298 } else { 299 /* setup first tx ring slot to point to header 300 * stored in reserved region. 301 */ 302 start_dp[idx].addr = txvq->virtio_net_hdr_mem + 303 RTE_PTR_DIFF(&txr[idx].tx_hdr, txr); 304 start_dp[idx].len = vq->hw->vtnet_hdr_size; 305 start_dp[idx].flags = VRING_DESC_F_NEXT; 306 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr; 307 308 idx = start_dp[idx].next; 309 } 310 311 /* Checksum Offload / TSO */ 312 if (offload) { 313 if (cookie->ol_flags & PKT_TX_TCP_SEG) 314 cookie->ol_flags |= PKT_TX_TCP_CKSUM; 315 316 switch (cookie->ol_flags & PKT_TX_L4_MASK) { 317 case PKT_TX_UDP_CKSUM: 318 hdr->csum_start = cookie->l2_len + cookie->l3_len; 319 hdr->csum_offset = offsetof(struct udp_hdr, 320 dgram_cksum); 321 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 322 break; 323 324 case PKT_TX_TCP_CKSUM: 325 hdr->csum_start = cookie->l2_len + cookie->l3_len; 326 hdr->csum_offset = offsetof(struct tcp_hdr, cksum); 327 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 328 break; 329 330 default: 331 ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0); 332 ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0); 333 ASSIGN_UNLESS_EQUAL(hdr->flags, 0); 334 break; 335 } 336 337 /* TCP Segmentation Offload */ 338 if (cookie->ol_flags & PKT_TX_TCP_SEG) { 339 virtio_tso_fix_cksum(cookie); 340 hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ? 341 VIRTIO_NET_HDR_GSO_TCPV6 : 342 VIRTIO_NET_HDR_GSO_TCPV4; 343 hdr->gso_size = cookie->tso_segsz; 344 hdr->hdr_len = 345 cookie->l2_len + 346 cookie->l3_len + 347 cookie->l4_len; 348 } else { 349 ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0); 350 ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0); 351 ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0); 352 } 353 } 354 355 do { 356 start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq); 357 start_dp[idx].len = cookie->data_len; 358 start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0; 359 idx = start_dp[idx].next; 360 } while ((cookie = cookie->next) != NULL); 361 362 if (use_indirect) 363 idx = vq->vq_ring.desc[head_idx].next; 364 365 vq->vq_desc_head_idx = idx; 366 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) 367 vq->vq_desc_tail_idx = idx; 368 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed); 369 vq_update_avail_ring(vq, head_idx); 370 } 371 372 void 373 virtio_dev_cq_start(struct rte_eth_dev *dev) 374 { 375 struct virtio_hw *hw = dev->data->dev_private; 376 377 if (hw->cvq && hw->cvq->vq) { 378 rte_spinlock_init(&hw->cvq->lock); 379 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq); 380 } 381 } 382 383 int 384 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev, 385 uint16_t queue_idx, 386 uint16_t nb_desc, 387 unsigned int socket_id __rte_unused, 388 const struct rte_eth_rxconf *rx_conf, 389 struct rte_mempool *mp) 390 { 391 const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; 392 uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX; 393 struct virtio_hw *hw = dev->data->dev_private; 394 struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; 395 struct virtnet_rx *rxvq; 396 397 PMD_INIT_FUNC_TRACE(); 398 399 if (nb_desc == 0 || nb_desc > vq->vq_nentries) 400 nb_desc = vq->vq_nentries; 401 vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc); 402 403 rxvq = &vq->rxq; 404 rxvq->queue_id = queue_idx; 405 rxvq->mpool = mp; 406 if (rxvq->mpool == NULL) { 407 rte_exit(EXIT_FAILURE, 408 "Cannot allocate mbufs for rx virtqueue"); 409 } 410 411 if ((rx_conf->offloads ^ rxmode->offloads) & 412 VIRTIO_PMD_PER_DEVICE_RX_OFFLOADS) 413 return -EINVAL; 414 415 dev->data->rx_queues[queue_idx] = rxvq; 416 417 return 0; 418 } 419 420 int 421 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx) 422 { 423 uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX; 424 struct virtio_hw *hw = dev->data->dev_private; 425 struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; 426 struct virtnet_rx *rxvq = &vq->rxq; 427 struct rte_mbuf *m; 428 uint16_t desc_idx; 429 int error, nbufs; 430 431 PMD_INIT_FUNC_TRACE(); 432 433 /* Allocate blank mbufs for the each rx descriptor */ 434 nbufs = 0; 435 436 if (hw->use_simple_rx) { 437 for (desc_idx = 0; desc_idx < vq->vq_nentries; 438 desc_idx++) { 439 vq->vq_ring.avail->ring[desc_idx] = desc_idx; 440 vq->vq_ring.desc[desc_idx].flags = 441 VRING_DESC_F_WRITE; 442 } 443 444 virtio_rxq_vec_setup(rxvq); 445 } 446 447 memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf)); 448 for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST; 449 desc_idx++) { 450 vq->sw_ring[vq->vq_nentries + desc_idx] = 451 &rxvq->fake_mbuf; 452 } 453 454 if (hw->use_simple_rx) { 455 while (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) { 456 virtio_rxq_rearm_vec(rxvq); 457 nbufs += RTE_VIRTIO_VPMD_RX_REARM_THRESH; 458 } 459 } else { 460 while (!virtqueue_full(vq)) { 461 m = rte_mbuf_raw_alloc(rxvq->mpool); 462 if (m == NULL) 463 break; 464 465 /* Enqueue allocated buffers */ 466 error = virtqueue_enqueue_recv_refill(vq, m); 467 if (error) { 468 rte_pktmbuf_free(m); 469 break; 470 } 471 nbufs++; 472 } 473 474 vq_update_avail_idx(vq); 475 } 476 477 PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs); 478 479 VIRTQUEUE_DUMP(vq); 480 481 return 0; 482 } 483 484 /* 485 * struct rte_eth_dev *dev: Used to update dev 486 * uint16_t nb_desc: Defaults to values read from config space 487 * unsigned int socket_id: Used to allocate memzone 488 * const struct rte_eth_txconf *tx_conf: Used to setup tx engine 489 * uint16_t queue_idx: Just used as an index in dev txq list 490 */ 491 int 492 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, 493 uint16_t queue_idx, 494 uint16_t nb_desc, 495 unsigned int socket_id __rte_unused, 496 const struct rte_eth_txconf *tx_conf) 497 { 498 uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX; 499 struct virtio_hw *hw = dev->data->dev_private; 500 struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; 501 struct virtnet_tx *txvq; 502 uint16_t tx_free_thresh; 503 504 PMD_INIT_FUNC_TRACE(); 505 506 /* cannot use simple rxtx funcs with multisegs or offloads */ 507 if (tx_conf->offloads) 508 hw->use_simple_tx = 0; 509 510 if (nb_desc == 0 || nb_desc > vq->vq_nentries) 511 nb_desc = vq->vq_nentries; 512 vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc); 513 514 txvq = &vq->txq; 515 txvq->queue_id = queue_idx; 516 517 tx_free_thresh = tx_conf->tx_free_thresh; 518 if (tx_free_thresh == 0) 519 tx_free_thresh = 520 RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH); 521 522 if (tx_free_thresh >= (vq->vq_nentries - 3)) { 523 RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the " 524 "number of TX entries minus 3 (%u)." 525 " (tx_free_thresh=%u port=%u queue=%u)\n", 526 vq->vq_nentries - 3, 527 tx_free_thresh, dev->data->port_id, queue_idx); 528 return -EINVAL; 529 } 530 531 vq->vq_free_thresh = tx_free_thresh; 532 533 dev->data->tx_queues[queue_idx] = txvq; 534 return 0; 535 } 536 537 int 538 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev, 539 uint16_t queue_idx) 540 { 541 uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX; 542 struct virtio_hw *hw = dev->data->dev_private; 543 struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; 544 uint16_t mid_idx = vq->vq_nentries >> 1; 545 struct virtnet_tx *txvq = &vq->txq; 546 uint16_t desc_idx; 547 548 PMD_INIT_FUNC_TRACE(); 549 550 if (hw->use_simple_tx) { 551 for (desc_idx = 0; desc_idx < mid_idx; desc_idx++) { 552 vq->vq_ring.avail->ring[desc_idx] = 553 desc_idx + mid_idx; 554 vq->vq_ring.desc[desc_idx + mid_idx].next = 555 desc_idx; 556 vq->vq_ring.desc[desc_idx + mid_idx].addr = 557 txvq->virtio_net_hdr_mem + 558 offsetof(struct virtio_tx_region, tx_hdr); 559 vq->vq_ring.desc[desc_idx + mid_idx].len = 560 vq->hw->vtnet_hdr_size; 561 vq->vq_ring.desc[desc_idx + mid_idx].flags = 562 VRING_DESC_F_NEXT; 563 vq->vq_ring.desc[desc_idx].flags = 0; 564 } 565 for (desc_idx = mid_idx; desc_idx < vq->vq_nentries; 566 desc_idx++) 567 vq->vq_ring.avail->ring[desc_idx] = desc_idx; 568 } 569 570 VIRTQUEUE_DUMP(vq); 571 572 return 0; 573 } 574 575 static void 576 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m) 577 { 578 int error; 579 /* 580 * Requeue the discarded mbuf. This should always be 581 * successful since it was just dequeued. 582 */ 583 error = virtqueue_enqueue_recv_refill(vq, m); 584 if (unlikely(error)) { 585 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf"); 586 rte_pktmbuf_free(m); 587 } 588 } 589 590 static void 591 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf) 592 { 593 uint32_t s = mbuf->pkt_len; 594 struct ether_addr *ea; 595 596 if (s == 64) { 597 stats->size_bins[1]++; 598 } else if (s > 64 && s < 1024) { 599 uint32_t bin; 600 601 /* count zeros, and offset into correct bin */ 602 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5; 603 stats->size_bins[bin]++; 604 } else { 605 if (s < 64) 606 stats->size_bins[0]++; 607 else if (s < 1519) 608 stats->size_bins[6]++; 609 else if (s >= 1519) 610 stats->size_bins[7]++; 611 } 612 613 ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *); 614 if (is_multicast_ether_addr(ea)) { 615 if (is_broadcast_ether_addr(ea)) 616 stats->broadcast++; 617 else 618 stats->multicast++; 619 } 620 } 621 622 /* Optionally fill offload information in structure */ 623 static int 624 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr) 625 { 626 struct rte_net_hdr_lens hdr_lens; 627 uint32_t hdrlen, ptype; 628 int l4_supported = 0; 629 630 /* nothing to do */ 631 if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE) 632 return 0; 633 634 m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN; 635 636 ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK); 637 m->packet_type = ptype; 638 if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP || 639 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP || 640 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP) 641 l4_supported = 1; 642 643 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 644 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len; 645 if (hdr->csum_start <= hdrlen && l4_supported) { 646 m->ol_flags |= PKT_RX_L4_CKSUM_NONE; 647 } else { 648 /* Unknown proto or tunnel, do sw cksum. We can assume 649 * the cksum field is in the first segment since the 650 * buffers we provided to the host are large enough. 651 * In case of SCTP, this will be wrong since it's a CRC 652 * but there's nothing we can do. 653 */ 654 uint16_t csum = 0, off; 655 656 rte_raw_cksum_mbuf(m, hdr->csum_start, 657 rte_pktmbuf_pkt_len(m) - hdr->csum_start, 658 &csum); 659 if (likely(csum != 0xffff)) 660 csum = ~csum; 661 off = hdr->csum_offset + hdr->csum_start; 662 if (rte_pktmbuf_data_len(m) >= off + 1) 663 *rte_pktmbuf_mtod_offset(m, uint16_t *, 664 off) = csum; 665 } 666 } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) { 667 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD; 668 } 669 670 /* GSO request, save required information in mbuf */ 671 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 672 /* Check unsupported modes */ 673 if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) || 674 (hdr->gso_size == 0)) { 675 return -EINVAL; 676 } 677 678 /* Update mss lengthes in mbuf */ 679 m->tso_segsz = hdr->gso_size; 680 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 681 case VIRTIO_NET_HDR_GSO_TCPV4: 682 case VIRTIO_NET_HDR_GSO_TCPV6: 683 m->ol_flags |= PKT_RX_LRO | \ 684 PKT_RX_L4_CKSUM_NONE; 685 break; 686 default: 687 return -EINVAL; 688 } 689 } 690 691 return 0; 692 } 693 694 static inline int 695 rx_offload_enabled(struct virtio_hw *hw) 696 { 697 return vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) || 698 vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) || 699 vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6); 700 } 701 702 #define VIRTIO_MBUF_BURST_SZ 64 703 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc)) 704 uint16_t 705 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 706 { 707 struct virtnet_rx *rxvq = rx_queue; 708 struct virtqueue *vq = rxvq->vq; 709 struct virtio_hw *hw = vq->hw; 710 struct rte_mbuf *rxm, *new_mbuf; 711 uint16_t nb_used, num, nb_rx; 712 uint32_t len[VIRTIO_MBUF_BURST_SZ]; 713 struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ]; 714 int error; 715 uint32_t i, nb_enqueued; 716 uint32_t hdr_size; 717 int offload; 718 struct virtio_net_hdr *hdr; 719 720 nb_rx = 0; 721 if (unlikely(hw->started == 0)) 722 return nb_rx; 723 724 nb_used = VIRTQUEUE_NUSED(vq); 725 726 virtio_rmb(); 727 728 num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts; 729 if (unlikely(num > VIRTIO_MBUF_BURST_SZ)) 730 num = VIRTIO_MBUF_BURST_SZ; 731 if (likely(num > DESC_PER_CACHELINE)) 732 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE); 733 734 num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num); 735 PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num); 736 737 nb_enqueued = 0; 738 hdr_size = hw->vtnet_hdr_size; 739 offload = rx_offload_enabled(hw); 740 741 for (i = 0; i < num ; i++) { 742 rxm = rcv_pkts[i]; 743 744 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]); 745 746 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) { 747 PMD_RX_LOG(ERR, "Packet drop"); 748 nb_enqueued++; 749 virtio_discard_rxbuf(vq, rxm); 750 rxvq->stats.errors++; 751 continue; 752 } 753 754 rxm->port = rxvq->port_id; 755 rxm->data_off = RTE_PKTMBUF_HEADROOM; 756 rxm->ol_flags = 0; 757 rxm->vlan_tci = 0; 758 759 rxm->pkt_len = (uint32_t)(len[i] - hdr_size); 760 rxm->data_len = (uint16_t)(len[i] - hdr_size); 761 762 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr + 763 RTE_PKTMBUF_HEADROOM - hdr_size); 764 765 if (hw->vlan_strip) 766 rte_vlan_strip(rxm); 767 768 if (offload && virtio_rx_offload(rxm, hdr) < 0) { 769 virtio_discard_rxbuf(vq, rxm); 770 rxvq->stats.errors++; 771 continue; 772 } 773 774 VIRTIO_DUMP_PACKET(rxm, rxm->data_len); 775 776 rx_pkts[nb_rx++] = rxm; 777 778 rxvq->stats.bytes += rxm->pkt_len; 779 virtio_update_packet_stats(&rxvq->stats, rxm); 780 } 781 782 rxvq->stats.packets += nb_rx; 783 784 /* Allocate new mbuf for the used descriptor */ 785 error = ENOSPC; 786 while (likely(!virtqueue_full(vq))) { 787 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool); 788 if (unlikely(new_mbuf == NULL)) { 789 struct rte_eth_dev *dev 790 = &rte_eth_devices[rxvq->port_id]; 791 dev->data->rx_mbuf_alloc_failed++; 792 break; 793 } 794 error = virtqueue_enqueue_recv_refill(vq, new_mbuf); 795 if (unlikely(error)) { 796 rte_pktmbuf_free(new_mbuf); 797 break; 798 } 799 nb_enqueued++; 800 } 801 802 if (likely(nb_enqueued)) { 803 vq_update_avail_idx(vq); 804 805 if (unlikely(virtqueue_kick_prepare(vq))) { 806 virtqueue_notify(vq); 807 PMD_RX_LOG(DEBUG, "Notified"); 808 } 809 } 810 811 return nb_rx; 812 } 813 814 uint16_t 815 virtio_recv_mergeable_pkts(void *rx_queue, 816 struct rte_mbuf **rx_pkts, 817 uint16_t nb_pkts) 818 { 819 struct virtnet_rx *rxvq = rx_queue; 820 struct virtqueue *vq = rxvq->vq; 821 struct virtio_hw *hw = vq->hw; 822 struct rte_mbuf *rxm, *new_mbuf; 823 uint16_t nb_used, num, nb_rx; 824 uint32_t len[VIRTIO_MBUF_BURST_SZ]; 825 struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ]; 826 struct rte_mbuf *prev; 827 int error; 828 uint32_t i, nb_enqueued; 829 uint32_t seg_num; 830 uint16_t extra_idx; 831 uint32_t seg_res; 832 uint32_t hdr_size; 833 int offload; 834 835 nb_rx = 0; 836 if (unlikely(hw->started == 0)) 837 return nb_rx; 838 839 nb_used = VIRTQUEUE_NUSED(vq); 840 841 virtio_rmb(); 842 843 PMD_RX_LOG(DEBUG, "used:%d", nb_used); 844 845 i = 0; 846 nb_enqueued = 0; 847 seg_num = 0; 848 extra_idx = 0; 849 seg_res = 0; 850 hdr_size = hw->vtnet_hdr_size; 851 offload = rx_offload_enabled(hw); 852 853 while (i < nb_used) { 854 struct virtio_net_hdr_mrg_rxbuf *header; 855 856 if (nb_rx == nb_pkts) 857 break; 858 859 num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, 1); 860 if (num != 1) 861 continue; 862 863 i++; 864 865 PMD_RX_LOG(DEBUG, "dequeue:%d", num); 866 PMD_RX_LOG(DEBUG, "packet len:%d", len[0]); 867 868 rxm = rcv_pkts[0]; 869 870 if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) { 871 PMD_RX_LOG(ERR, "Packet drop"); 872 nb_enqueued++; 873 virtio_discard_rxbuf(vq, rxm); 874 rxvq->stats.errors++; 875 continue; 876 } 877 878 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm->buf_addr + 879 RTE_PKTMBUF_HEADROOM - hdr_size); 880 seg_num = header->num_buffers; 881 882 if (seg_num == 0) 883 seg_num = 1; 884 885 rxm->data_off = RTE_PKTMBUF_HEADROOM; 886 rxm->nb_segs = seg_num; 887 rxm->ol_flags = 0; 888 rxm->vlan_tci = 0; 889 rxm->pkt_len = (uint32_t)(len[0] - hdr_size); 890 rxm->data_len = (uint16_t)(len[0] - hdr_size); 891 892 rxm->port = rxvq->port_id; 893 rx_pkts[nb_rx] = rxm; 894 prev = rxm; 895 896 if (offload && virtio_rx_offload(rxm, &header->hdr) < 0) { 897 virtio_discard_rxbuf(vq, rxm); 898 rxvq->stats.errors++; 899 continue; 900 } 901 902 seg_res = seg_num - 1; 903 904 while (seg_res != 0) { 905 /* 906 * Get extra segments for current uncompleted packet. 907 */ 908 uint16_t rcv_cnt = 909 RTE_MIN(seg_res, RTE_DIM(rcv_pkts)); 910 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) { 911 uint32_t rx_num = 912 virtqueue_dequeue_burst_rx(vq, 913 rcv_pkts, len, rcv_cnt); 914 i += rx_num; 915 rcv_cnt = rx_num; 916 } else { 917 PMD_RX_LOG(ERR, 918 "No enough segments for packet."); 919 nb_enqueued++; 920 virtio_discard_rxbuf(vq, rxm); 921 rxvq->stats.errors++; 922 break; 923 } 924 925 extra_idx = 0; 926 927 while (extra_idx < rcv_cnt) { 928 rxm = rcv_pkts[extra_idx]; 929 930 rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size; 931 rxm->pkt_len = (uint32_t)(len[extra_idx]); 932 rxm->data_len = (uint16_t)(len[extra_idx]); 933 934 if (prev) 935 prev->next = rxm; 936 937 prev = rxm; 938 rx_pkts[nb_rx]->pkt_len += rxm->pkt_len; 939 extra_idx++; 940 }; 941 seg_res -= rcv_cnt; 942 } 943 944 if (hw->vlan_strip) 945 rte_vlan_strip(rx_pkts[nb_rx]); 946 947 VIRTIO_DUMP_PACKET(rx_pkts[nb_rx], 948 rx_pkts[nb_rx]->data_len); 949 950 rxvq->stats.bytes += rx_pkts[nb_rx]->pkt_len; 951 virtio_update_packet_stats(&rxvq->stats, rx_pkts[nb_rx]); 952 nb_rx++; 953 } 954 955 rxvq->stats.packets += nb_rx; 956 957 /* Allocate new mbuf for the used descriptor */ 958 error = ENOSPC; 959 while (likely(!virtqueue_full(vq))) { 960 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool); 961 if (unlikely(new_mbuf == NULL)) { 962 struct rte_eth_dev *dev 963 = &rte_eth_devices[rxvq->port_id]; 964 dev->data->rx_mbuf_alloc_failed++; 965 break; 966 } 967 error = virtqueue_enqueue_recv_refill(vq, new_mbuf); 968 if (unlikely(error)) { 969 rte_pktmbuf_free(new_mbuf); 970 break; 971 } 972 nb_enqueued++; 973 } 974 975 if (likely(nb_enqueued)) { 976 vq_update_avail_idx(vq); 977 978 if (unlikely(virtqueue_kick_prepare(vq))) { 979 virtqueue_notify(vq); 980 PMD_RX_LOG(DEBUG, "Notified"); 981 } 982 } 983 984 return nb_rx; 985 } 986 987 uint16_t 988 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) 989 { 990 struct virtnet_tx *txvq = tx_queue; 991 struct virtqueue *vq = txvq->vq; 992 struct virtio_hw *hw = vq->hw; 993 uint16_t hdr_size = hw->vtnet_hdr_size; 994 uint16_t nb_used, nb_tx = 0; 995 int error; 996 997 if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts)) 998 return nb_tx; 999 1000 if (unlikely(nb_pkts < 1)) 1001 return nb_pkts; 1002 1003 PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts); 1004 nb_used = VIRTQUEUE_NUSED(vq); 1005 1006 virtio_rmb(); 1007 if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh)) 1008 virtio_xmit_cleanup(vq, nb_used); 1009 1010 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { 1011 struct rte_mbuf *txm = tx_pkts[nb_tx]; 1012 int can_push = 0, use_indirect = 0, slots, need; 1013 1014 /* Do VLAN tag insertion */ 1015 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) { 1016 error = rte_vlan_insert(&txm); 1017 if (unlikely(error)) { 1018 rte_pktmbuf_free(txm); 1019 continue; 1020 } 1021 } 1022 1023 /* optimize ring usage */ 1024 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) || 1025 vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) && 1026 rte_mbuf_refcnt_read(txm) == 1 && 1027 RTE_MBUF_DIRECT(txm) && 1028 txm->nb_segs == 1 && 1029 rte_pktmbuf_headroom(txm) >= hdr_size && 1030 rte_is_aligned(rte_pktmbuf_mtod(txm, char *), 1031 __alignof__(struct virtio_net_hdr_mrg_rxbuf))) 1032 can_push = 1; 1033 else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) && 1034 txm->nb_segs < VIRTIO_MAX_TX_INDIRECT) 1035 use_indirect = 1; 1036 1037 /* How many main ring entries are needed to this Tx? 1038 * any_layout => number of segments 1039 * indirect => 1 1040 * default => number of segments + 1 1041 */ 1042 slots = use_indirect ? 1 : (txm->nb_segs + !can_push); 1043 need = slots - vq->vq_free_cnt; 1044 1045 /* Positive value indicates it need free vring descriptors */ 1046 if (unlikely(need > 0)) { 1047 nb_used = VIRTQUEUE_NUSED(vq); 1048 virtio_rmb(); 1049 need = RTE_MIN(need, (int)nb_used); 1050 1051 virtio_xmit_cleanup(vq, need); 1052 need = slots - vq->vq_free_cnt; 1053 if (unlikely(need > 0)) { 1054 PMD_TX_LOG(ERR, 1055 "No free tx descriptors to transmit"); 1056 break; 1057 } 1058 } 1059 1060 /* Enqueue Packet buffers */ 1061 virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect, can_push); 1062 1063 txvq->stats.bytes += txm->pkt_len; 1064 virtio_update_packet_stats(&txvq->stats, txm); 1065 } 1066 1067 txvq->stats.packets += nb_tx; 1068 1069 if (likely(nb_tx)) { 1070 vq_update_avail_idx(vq); 1071 1072 if (unlikely(virtqueue_kick_prepare(vq))) { 1073 virtqueue_notify(vq); 1074 PMD_TX_LOG(DEBUG, "Notified backend after xmit"); 1075 } 1076 } 1077 1078 return nb_tx; 1079 } 1080