1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #include <stdint.h> 6 #include <stdio.h> 7 #include <stdlib.h> 8 #include <string.h> 9 #include <errno.h> 10 11 #include <rte_cycles.h> 12 #include <rte_memory.h> 13 #include <rte_branch_prediction.h> 14 #include <rte_mempool.h> 15 #include <rte_malloc.h> 16 #include <rte_mbuf.h> 17 #include <rte_ether.h> 18 #include <rte_ethdev.h> 19 #include <rte_prefetch.h> 20 #include <rte_string_fns.h> 21 #include <rte_errno.h> 22 #include <rte_byteorder.h> 23 #include <rte_net.h> 24 #include <rte_ip.h> 25 #include <rte_udp.h> 26 #include <rte_tcp.h> 27 28 #include "virtio_logs.h" 29 #include "virtio_ethdev.h" 30 #include "virtio_pci.h" 31 #include "virtqueue.h" 32 #include "virtio_rxtx.h" 33 34 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP 35 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len) 36 #else 37 #define VIRTIO_DUMP_PACKET(m, len) do { } while (0) 38 #endif 39 40 41 #define VIRTIO_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS | \ 42 ETH_TXQ_FLAGS_NOOFFLOADS) 43 44 int 45 virtio_dev_rx_queue_done(void *rxq, uint16_t offset) 46 { 47 struct virtnet_rx *rxvq = rxq; 48 struct virtqueue *vq = rxvq->vq; 49 50 return VIRTQUEUE_NUSED(vq) >= offset; 51 } 52 53 void 54 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx) 55 { 56 struct vring_desc *dp, *dp_tail; 57 struct vq_desc_extra *dxp; 58 uint16_t desc_idx_last = desc_idx; 59 60 dp = &vq->vq_ring.desc[desc_idx]; 61 dxp = &vq->vq_descx[desc_idx]; 62 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs); 63 if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) { 64 while (dp->flags & VRING_DESC_F_NEXT) { 65 desc_idx_last = dp->next; 66 dp = &vq->vq_ring.desc[dp->next]; 67 } 68 } 69 dxp->ndescs = 0; 70 71 /* 72 * We must append the existing free chain, if any, to the end of 73 * newly freed chain. If the virtqueue was completely used, then 74 * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above). 75 */ 76 if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) { 77 vq->vq_desc_head_idx = desc_idx; 78 } else { 79 dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx]; 80 dp_tail->next = desc_idx; 81 } 82 83 vq->vq_desc_tail_idx = desc_idx_last; 84 dp->next = VQ_RING_DESC_CHAIN_END; 85 } 86 87 static uint16_t 88 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts, 89 uint32_t *len, uint16_t num) 90 { 91 struct vring_used_elem *uep; 92 struct rte_mbuf *cookie; 93 uint16_t used_idx, desc_idx; 94 uint16_t i; 95 96 /* Caller does the check */ 97 for (i = 0; i < num ; i++) { 98 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); 99 uep = &vq->vq_ring.used->ring[used_idx]; 100 desc_idx = (uint16_t) uep->id; 101 len[i] = uep->len; 102 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie; 103 104 if (unlikely(cookie == NULL)) { 105 PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u", 106 vq->vq_used_cons_idx); 107 break; 108 } 109 110 rte_prefetch0(cookie); 111 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *)); 112 rx_pkts[i] = cookie; 113 vq->vq_used_cons_idx++; 114 vq_ring_free_chain(vq, desc_idx); 115 vq->vq_descx[desc_idx].cookie = NULL; 116 } 117 118 return i; 119 } 120 121 #ifndef DEFAULT_TX_FREE_THRESH 122 #define DEFAULT_TX_FREE_THRESH 32 123 #endif 124 125 /* Cleanup from completed transmits. */ 126 static void 127 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num) 128 { 129 uint16_t i, used_idx, desc_idx; 130 for (i = 0; i < num; i++) { 131 struct vring_used_elem *uep; 132 struct vq_desc_extra *dxp; 133 134 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); 135 uep = &vq->vq_ring.used->ring[used_idx]; 136 137 desc_idx = (uint16_t) uep->id; 138 dxp = &vq->vq_descx[desc_idx]; 139 vq->vq_used_cons_idx++; 140 vq_ring_free_chain(vq, desc_idx); 141 142 if (dxp->cookie != NULL) { 143 rte_pktmbuf_free(dxp->cookie); 144 dxp->cookie = NULL; 145 } 146 } 147 } 148 149 150 static inline int 151 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie) 152 { 153 struct vq_desc_extra *dxp; 154 struct virtio_hw *hw = vq->hw; 155 struct vring_desc *start_dp; 156 uint16_t needed = 1; 157 uint16_t head_idx, idx; 158 159 if (unlikely(vq->vq_free_cnt == 0)) 160 return -ENOSPC; 161 if (unlikely(vq->vq_free_cnt < needed)) 162 return -EMSGSIZE; 163 164 head_idx = vq->vq_desc_head_idx; 165 if (unlikely(head_idx >= vq->vq_nentries)) 166 return -EFAULT; 167 168 idx = head_idx; 169 dxp = &vq->vq_descx[idx]; 170 dxp->cookie = (void *)cookie; 171 dxp->ndescs = needed; 172 173 start_dp = vq->vq_ring.desc; 174 start_dp[idx].addr = 175 VIRTIO_MBUF_ADDR(cookie, vq) + 176 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size; 177 start_dp[idx].len = 178 cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size; 179 start_dp[idx].flags = VRING_DESC_F_WRITE; 180 idx = start_dp[idx].next; 181 vq->vq_desc_head_idx = idx; 182 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) 183 vq->vq_desc_tail_idx = idx; 184 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed); 185 vq_update_avail_ring(vq, head_idx); 186 187 return 0; 188 } 189 190 /* When doing TSO, the IP length is not included in the pseudo header 191 * checksum of the packet given to the PMD, but for virtio it is 192 * expected. 193 */ 194 static void 195 virtio_tso_fix_cksum(struct rte_mbuf *m) 196 { 197 /* common case: header is not fragmented */ 198 if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len + 199 m->l4_len)) { 200 struct ipv4_hdr *iph; 201 struct ipv6_hdr *ip6h; 202 struct tcp_hdr *th; 203 uint16_t prev_cksum, new_cksum, ip_len, ip_paylen; 204 uint32_t tmp; 205 206 iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len); 207 th = RTE_PTR_ADD(iph, m->l3_len); 208 if ((iph->version_ihl >> 4) == 4) { 209 iph->hdr_checksum = 0; 210 iph->hdr_checksum = rte_ipv4_cksum(iph); 211 ip_len = iph->total_length; 212 ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) - 213 m->l3_len); 214 } else { 215 ip6h = (struct ipv6_hdr *)iph; 216 ip_paylen = ip6h->payload_len; 217 } 218 219 /* calculate the new phdr checksum not including ip_paylen */ 220 prev_cksum = th->cksum; 221 tmp = prev_cksum; 222 tmp += ip_paylen; 223 tmp = (tmp & 0xffff) + (tmp >> 16); 224 new_cksum = tmp; 225 226 /* replace it in the packet */ 227 th->cksum = new_cksum; 228 } 229 } 230 231 static inline int 232 tx_offload_enabled(struct virtio_hw *hw) 233 { 234 return vtpci_with_feature(hw, VIRTIO_NET_F_CSUM) || 235 vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) || 236 vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO6); 237 } 238 239 /* avoid write operation when necessary, to lessen cache issues */ 240 #define ASSIGN_UNLESS_EQUAL(var, val) do { \ 241 if ((var) != (val)) \ 242 (var) = (val); \ 243 } while (0) 244 245 static inline void 246 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie, 247 uint16_t needed, int use_indirect, int can_push) 248 { 249 struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr; 250 struct vq_desc_extra *dxp; 251 struct virtqueue *vq = txvq->vq; 252 struct vring_desc *start_dp; 253 uint16_t seg_num = cookie->nb_segs; 254 uint16_t head_idx, idx; 255 uint16_t head_size = vq->hw->vtnet_hdr_size; 256 struct virtio_net_hdr *hdr; 257 int offload; 258 259 offload = tx_offload_enabled(vq->hw); 260 head_idx = vq->vq_desc_head_idx; 261 idx = head_idx; 262 dxp = &vq->vq_descx[idx]; 263 dxp->cookie = (void *)cookie; 264 dxp->ndescs = needed; 265 266 start_dp = vq->vq_ring.desc; 267 268 if (can_push) { 269 /* prepend cannot fail, checked by caller */ 270 hdr = (struct virtio_net_hdr *) 271 rte_pktmbuf_prepend(cookie, head_size); 272 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length, 273 * which is wrong. Below subtract restores correct pkt size. 274 */ 275 cookie->pkt_len -= head_size; 276 /* if offload disabled, it is not zeroed below, do it now */ 277 if (offload == 0) { 278 ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0); 279 ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0); 280 ASSIGN_UNLESS_EQUAL(hdr->flags, 0); 281 ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0); 282 ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0); 283 ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0); 284 } 285 } else if (use_indirect) { 286 /* setup tx ring slot to point to indirect 287 * descriptor list stored in reserved region. 288 * 289 * the first slot in indirect ring is already preset 290 * to point to the header in reserved region 291 */ 292 start_dp[idx].addr = txvq->virtio_net_hdr_mem + 293 RTE_PTR_DIFF(&txr[idx].tx_indir, txr); 294 start_dp[idx].len = (seg_num + 1) * sizeof(struct vring_desc); 295 start_dp[idx].flags = VRING_DESC_F_INDIRECT; 296 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr; 297 298 /* loop below will fill in rest of the indirect elements */ 299 start_dp = txr[idx].tx_indir; 300 idx = 1; 301 } else { 302 /* setup first tx ring slot to point to header 303 * stored in reserved region. 304 */ 305 start_dp[idx].addr = txvq->virtio_net_hdr_mem + 306 RTE_PTR_DIFF(&txr[idx].tx_hdr, txr); 307 start_dp[idx].len = vq->hw->vtnet_hdr_size; 308 start_dp[idx].flags = VRING_DESC_F_NEXT; 309 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr; 310 311 idx = start_dp[idx].next; 312 } 313 314 /* Checksum Offload / TSO */ 315 if (offload) { 316 if (cookie->ol_flags & PKT_TX_TCP_SEG) 317 cookie->ol_flags |= PKT_TX_TCP_CKSUM; 318 319 switch (cookie->ol_flags & PKT_TX_L4_MASK) { 320 case PKT_TX_UDP_CKSUM: 321 hdr->csum_start = cookie->l2_len + cookie->l3_len; 322 hdr->csum_offset = offsetof(struct udp_hdr, 323 dgram_cksum); 324 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 325 break; 326 327 case PKT_TX_TCP_CKSUM: 328 hdr->csum_start = cookie->l2_len + cookie->l3_len; 329 hdr->csum_offset = offsetof(struct tcp_hdr, cksum); 330 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 331 break; 332 333 default: 334 ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0); 335 ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0); 336 ASSIGN_UNLESS_EQUAL(hdr->flags, 0); 337 break; 338 } 339 340 /* TCP Segmentation Offload */ 341 if (cookie->ol_flags & PKT_TX_TCP_SEG) { 342 virtio_tso_fix_cksum(cookie); 343 hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ? 344 VIRTIO_NET_HDR_GSO_TCPV6 : 345 VIRTIO_NET_HDR_GSO_TCPV4; 346 hdr->gso_size = cookie->tso_segsz; 347 hdr->hdr_len = 348 cookie->l2_len + 349 cookie->l3_len + 350 cookie->l4_len; 351 } else { 352 ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0); 353 ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0); 354 ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0); 355 } 356 } 357 358 do { 359 start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq); 360 start_dp[idx].len = cookie->data_len; 361 start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0; 362 idx = start_dp[idx].next; 363 } while ((cookie = cookie->next) != NULL); 364 365 if (use_indirect) 366 idx = vq->vq_ring.desc[head_idx].next; 367 368 vq->vq_desc_head_idx = idx; 369 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) 370 vq->vq_desc_tail_idx = idx; 371 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed); 372 vq_update_avail_ring(vq, head_idx); 373 } 374 375 void 376 virtio_dev_cq_start(struct rte_eth_dev *dev) 377 { 378 struct virtio_hw *hw = dev->data->dev_private; 379 380 if (hw->cvq && hw->cvq->vq) { 381 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq); 382 } 383 } 384 385 int 386 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev, 387 uint16_t queue_idx, 388 uint16_t nb_desc, 389 unsigned int socket_id __rte_unused, 390 __rte_unused const struct rte_eth_rxconf *rx_conf, 391 struct rte_mempool *mp) 392 { 393 uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX; 394 struct virtio_hw *hw = dev->data->dev_private; 395 struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; 396 struct virtnet_rx *rxvq; 397 398 PMD_INIT_FUNC_TRACE(); 399 400 if (nb_desc == 0 || nb_desc > vq->vq_nentries) 401 nb_desc = vq->vq_nentries; 402 vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc); 403 404 rxvq = &vq->rxq; 405 rxvq->queue_id = queue_idx; 406 rxvq->mpool = mp; 407 if (rxvq->mpool == NULL) { 408 rte_exit(EXIT_FAILURE, 409 "Cannot allocate mbufs for rx virtqueue"); 410 } 411 dev->data->rx_queues[queue_idx] = rxvq; 412 413 return 0; 414 } 415 416 int 417 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx) 418 { 419 uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX; 420 struct virtio_hw *hw = dev->data->dev_private; 421 struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; 422 struct virtnet_rx *rxvq = &vq->rxq; 423 struct rte_mbuf *m; 424 uint16_t desc_idx; 425 int error, nbufs; 426 427 PMD_INIT_FUNC_TRACE(); 428 429 /* Allocate blank mbufs for the each rx descriptor */ 430 nbufs = 0; 431 432 if (hw->use_simple_rx) { 433 for (desc_idx = 0; desc_idx < vq->vq_nentries; 434 desc_idx++) { 435 vq->vq_ring.avail->ring[desc_idx] = desc_idx; 436 vq->vq_ring.desc[desc_idx].flags = 437 VRING_DESC_F_WRITE; 438 } 439 } 440 441 memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf)); 442 for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST; 443 desc_idx++) { 444 vq->sw_ring[vq->vq_nentries + desc_idx] = 445 &rxvq->fake_mbuf; 446 } 447 448 while (!virtqueue_full(vq)) { 449 m = rte_mbuf_raw_alloc(rxvq->mpool); 450 if (m == NULL) 451 break; 452 453 /* Enqueue allocated buffers */ 454 if (hw->use_simple_rx) 455 error = virtqueue_enqueue_recv_refill_simple(vq, m); 456 else 457 error = virtqueue_enqueue_recv_refill(vq, m); 458 459 if (error) { 460 rte_pktmbuf_free(m); 461 break; 462 } 463 nbufs++; 464 } 465 466 vq_update_avail_idx(vq); 467 468 PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs); 469 470 virtio_rxq_vec_setup(rxvq); 471 472 VIRTQUEUE_DUMP(vq); 473 474 return 0; 475 } 476 477 /* 478 * struct rte_eth_dev *dev: Used to update dev 479 * uint16_t nb_desc: Defaults to values read from config space 480 * unsigned int socket_id: Used to allocate memzone 481 * const struct rte_eth_txconf *tx_conf: Used to setup tx engine 482 * uint16_t queue_idx: Just used as an index in dev txq list 483 */ 484 int 485 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, 486 uint16_t queue_idx, 487 uint16_t nb_desc, 488 unsigned int socket_id __rte_unused, 489 const struct rte_eth_txconf *tx_conf) 490 { 491 uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX; 492 struct virtio_hw *hw = dev->data->dev_private; 493 struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; 494 struct virtnet_tx *txvq; 495 uint16_t tx_free_thresh; 496 497 PMD_INIT_FUNC_TRACE(); 498 499 /* cannot use simple rxtx funcs with multisegs or offloads */ 500 if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) != VIRTIO_SIMPLE_FLAGS) 501 hw->use_simple_tx = 0; 502 503 if (nb_desc == 0 || nb_desc > vq->vq_nentries) 504 nb_desc = vq->vq_nentries; 505 vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc); 506 507 txvq = &vq->txq; 508 txvq->queue_id = queue_idx; 509 510 tx_free_thresh = tx_conf->tx_free_thresh; 511 if (tx_free_thresh == 0) 512 tx_free_thresh = 513 RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH); 514 515 if (tx_free_thresh >= (vq->vq_nentries - 3)) { 516 RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the " 517 "number of TX entries minus 3 (%u)." 518 " (tx_free_thresh=%u port=%u queue=%u)\n", 519 vq->vq_nentries - 3, 520 tx_free_thresh, dev->data->port_id, queue_idx); 521 return -EINVAL; 522 } 523 524 vq->vq_free_thresh = tx_free_thresh; 525 526 dev->data->tx_queues[queue_idx] = txvq; 527 return 0; 528 } 529 530 int 531 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev, 532 uint16_t queue_idx) 533 { 534 uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX; 535 struct virtio_hw *hw = dev->data->dev_private; 536 struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; 537 uint16_t mid_idx = vq->vq_nentries >> 1; 538 struct virtnet_tx *txvq = &vq->txq; 539 uint16_t desc_idx; 540 541 PMD_INIT_FUNC_TRACE(); 542 543 if (hw->use_simple_tx) { 544 for (desc_idx = 0; desc_idx < mid_idx; desc_idx++) { 545 vq->vq_ring.avail->ring[desc_idx] = 546 desc_idx + mid_idx; 547 vq->vq_ring.desc[desc_idx + mid_idx].next = 548 desc_idx; 549 vq->vq_ring.desc[desc_idx + mid_idx].addr = 550 txvq->virtio_net_hdr_mem + 551 offsetof(struct virtio_tx_region, tx_hdr); 552 vq->vq_ring.desc[desc_idx + mid_idx].len = 553 vq->hw->vtnet_hdr_size; 554 vq->vq_ring.desc[desc_idx + mid_idx].flags = 555 VRING_DESC_F_NEXT; 556 vq->vq_ring.desc[desc_idx].flags = 0; 557 } 558 for (desc_idx = mid_idx; desc_idx < vq->vq_nentries; 559 desc_idx++) 560 vq->vq_ring.avail->ring[desc_idx] = desc_idx; 561 } 562 563 VIRTQUEUE_DUMP(vq); 564 565 return 0; 566 } 567 568 static void 569 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m) 570 { 571 int error; 572 /* 573 * Requeue the discarded mbuf. This should always be 574 * successful since it was just dequeued. 575 */ 576 error = virtqueue_enqueue_recv_refill(vq, m); 577 if (unlikely(error)) { 578 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf"); 579 rte_pktmbuf_free(m); 580 } 581 } 582 583 static void 584 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf) 585 { 586 uint32_t s = mbuf->pkt_len; 587 struct ether_addr *ea; 588 589 if (s == 64) { 590 stats->size_bins[1]++; 591 } else if (s > 64 && s < 1024) { 592 uint32_t bin; 593 594 /* count zeros, and offset into correct bin */ 595 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5; 596 stats->size_bins[bin]++; 597 } else { 598 if (s < 64) 599 stats->size_bins[0]++; 600 else if (s < 1519) 601 stats->size_bins[6]++; 602 else if (s >= 1519) 603 stats->size_bins[7]++; 604 } 605 606 ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *); 607 if (is_multicast_ether_addr(ea)) { 608 if (is_broadcast_ether_addr(ea)) 609 stats->broadcast++; 610 else 611 stats->multicast++; 612 } 613 } 614 615 /* Optionally fill offload information in structure */ 616 static int 617 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr) 618 { 619 struct rte_net_hdr_lens hdr_lens; 620 uint32_t hdrlen, ptype; 621 int l4_supported = 0; 622 623 /* nothing to do */ 624 if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE) 625 return 0; 626 627 m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN; 628 629 ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK); 630 m->packet_type = ptype; 631 if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP || 632 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP || 633 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP) 634 l4_supported = 1; 635 636 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 637 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len; 638 if (hdr->csum_start <= hdrlen && l4_supported) { 639 m->ol_flags |= PKT_RX_L4_CKSUM_NONE; 640 } else { 641 /* Unknown proto or tunnel, do sw cksum. We can assume 642 * the cksum field is in the first segment since the 643 * buffers we provided to the host are large enough. 644 * In case of SCTP, this will be wrong since it's a CRC 645 * but there's nothing we can do. 646 */ 647 uint16_t csum = 0, off; 648 649 rte_raw_cksum_mbuf(m, hdr->csum_start, 650 rte_pktmbuf_pkt_len(m) - hdr->csum_start, 651 &csum); 652 if (likely(csum != 0xffff)) 653 csum = ~csum; 654 off = hdr->csum_offset + hdr->csum_start; 655 if (rte_pktmbuf_data_len(m) >= off + 1) 656 *rte_pktmbuf_mtod_offset(m, uint16_t *, 657 off) = csum; 658 } 659 } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) { 660 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD; 661 } 662 663 /* GSO request, save required information in mbuf */ 664 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 665 /* Check unsupported modes */ 666 if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) || 667 (hdr->gso_size == 0)) { 668 return -EINVAL; 669 } 670 671 /* Update mss lengthes in mbuf */ 672 m->tso_segsz = hdr->gso_size; 673 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 674 case VIRTIO_NET_HDR_GSO_TCPV4: 675 case VIRTIO_NET_HDR_GSO_TCPV6: 676 m->ol_flags |= PKT_RX_LRO | \ 677 PKT_RX_L4_CKSUM_NONE; 678 break; 679 default: 680 return -EINVAL; 681 } 682 } 683 684 return 0; 685 } 686 687 static inline int 688 rx_offload_enabled(struct virtio_hw *hw) 689 { 690 return vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) || 691 vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) || 692 vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6); 693 } 694 695 #define VIRTIO_MBUF_BURST_SZ 64 696 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc)) 697 uint16_t 698 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 699 { 700 struct virtnet_rx *rxvq = rx_queue; 701 struct virtqueue *vq = rxvq->vq; 702 struct virtio_hw *hw = vq->hw; 703 struct rte_mbuf *rxm, *new_mbuf; 704 uint16_t nb_used, num, nb_rx; 705 uint32_t len[VIRTIO_MBUF_BURST_SZ]; 706 struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ]; 707 int error; 708 uint32_t i, nb_enqueued; 709 uint32_t hdr_size; 710 int offload; 711 struct virtio_net_hdr *hdr; 712 713 nb_rx = 0; 714 if (unlikely(hw->started == 0)) 715 return nb_rx; 716 717 nb_used = VIRTQUEUE_NUSED(vq); 718 719 virtio_rmb(); 720 721 num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts; 722 if (unlikely(num > VIRTIO_MBUF_BURST_SZ)) 723 num = VIRTIO_MBUF_BURST_SZ; 724 if (likely(num > DESC_PER_CACHELINE)) 725 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE); 726 727 num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num); 728 PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num); 729 730 nb_enqueued = 0; 731 hdr_size = hw->vtnet_hdr_size; 732 offload = rx_offload_enabled(hw); 733 734 for (i = 0; i < num ; i++) { 735 rxm = rcv_pkts[i]; 736 737 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]); 738 739 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) { 740 PMD_RX_LOG(ERR, "Packet drop"); 741 nb_enqueued++; 742 virtio_discard_rxbuf(vq, rxm); 743 rxvq->stats.errors++; 744 continue; 745 } 746 747 rxm->port = rxvq->port_id; 748 rxm->data_off = RTE_PKTMBUF_HEADROOM; 749 rxm->ol_flags = 0; 750 rxm->vlan_tci = 0; 751 752 rxm->pkt_len = (uint32_t)(len[i] - hdr_size); 753 rxm->data_len = (uint16_t)(len[i] - hdr_size); 754 755 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr + 756 RTE_PKTMBUF_HEADROOM - hdr_size); 757 758 if (hw->vlan_strip) 759 rte_vlan_strip(rxm); 760 761 if (offload && virtio_rx_offload(rxm, hdr) < 0) { 762 virtio_discard_rxbuf(vq, rxm); 763 rxvq->stats.errors++; 764 continue; 765 } 766 767 VIRTIO_DUMP_PACKET(rxm, rxm->data_len); 768 769 rx_pkts[nb_rx++] = rxm; 770 771 rxvq->stats.bytes += rxm->pkt_len; 772 virtio_update_packet_stats(&rxvq->stats, rxm); 773 } 774 775 rxvq->stats.packets += nb_rx; 776 777 /* Allocate new mbuf for the used descriptor */ 778 error = ENOSPC; 779 while (likely(!virtqueue_full(vq))) { 780 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool); 781 if (unlikely(new_mbuf == NULL)) { 782 struct rte_eth_dev *dev 783 = &rte_eth_devices[rxvq->port_id]; 784 dev->data->rx_mbuf_alloc_failed++; 785 break; 786 } 787 error = virtqueue_enqueue_recv_refill(vq, new_mbuf); 788 if (unlikely(error)) { 789 rte_pktmbuf_free(new_mbuf); 790 break; 791 } 792 nb_enqueued++; 793 } 794 795 if (likely(nb_enqueued)) { 796 vq_update_avail_idx(vq); 797 798 if (unlikely(virtqueue_kick_prepare(vq))) { 799 virtqueue_notify(vq); 800 PMD_RX_LOG(DEBUG, "Notified"); 801 } 802 } 803 804 return nb_rx; 805 } 806 807 uint16_t 808 virtio_recv_mergeable_pkts(void *rx_queue, 809 struct rte_mbuf **rx_pkts, 810 uint16_t nb_pkts) 811 { 812 struct virtnet_rx *rxvq = rx_queue; 813 struct virtqueue *vq = rxvq->vq; 814 struct virtio_hw *hw = vq->hw; 815 struct rte_mbuf *rxm, *new_mbuf; 816 uint16_t nb_used, num, nb_rx; 817 uint32_t len[VIRTIO_MBUF_BURST_SZ]; 818 struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ]; 819 struct rte_mbuf *prev; 820 int error; 821 uint32_t i, nb_enqueued; 822 uint32_t seg_num; 823 uint16_t extra_idx; 824 uint32_t seg_res; 825 uint32_t hdr_size; 826 int offload; 827 828 nb_rx = 0; 829 if (unlikely(hw->started == 0)) 830 return nb_rx; 831 832 nb_used = VIRTQUEUE_NUSED(vq); 833 834 virtio_rmb(); 835 836 PMD_RX_LOG(DEBUG, "used:%d", nb_used); 837 838 i = 0; 839 nb_enqueued = 0; 840 seg_num = 0; 841 extra_idx = 0; 842 seg_res = 0; 843 hdr_size = hw->vtnet_hdr_size; 844 offload = rx_offload_enabled(hw); 845 846 while (i < nb_used) { 847 struct virtio_net_hdr_mrg_rxbuf *header; 848 849 if (nb_rx == nb_pkts) 850 break; 851 852 num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, 1); 853 if (num != 1) 854 continue; 855 856 i++; 857 858 PMD_RX_LOG(DEBUG, "dequeue:%d", num); 859 PMD_RX_LOG(DEBUG, "packet len:%d", len[0]); 860 861 rxm = rcv_pkts[0]; 862 863 if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) { 864 PMD_RX_LOG(ERR, "Packet drop"); 865 nb_enqueued++; 866 virtio_discard_rxbuf(vq, rxm); 867 rxvq->stats.errors++; 868 continue; 869 } 870 871 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm->buf_addr + 872 RTE_PKTMBUF_HEADROOM - hdr_size); 873 seg_num = header->num_buffers; 874 875 if (seg_num == 0) 876 seg_num = 1; 877 878 rxm->data_off = RTE_PKTMBUF_HEADROOM; 879 rxm->nb_segs = seg_num; 880 rxm->ol_flags = 0; 881 rxm->vlan_tci = 0; 882 rxm->pkt_len = (uint32_t)(len[0] - hdr_size); 883 rxm->data_len = (uint16_t)(len[0] - hdr_size); 884 885 rxm->port = rxvq->port_id; 886 rx_pkts[nb_rx] = rxm; 887 prev = rxm; 888 889 if (offload && virtio_rx_offload(rxm, &header->hdr) < 0) { 890 virtio_discard_rxbuf(vq, rxm); 891 rxvq->stats.errors++; 892 continue; 893 } 894 895 seg_res = seg_num - 1; 896 897 while (seg_res != 0) { 898 /* 899 * Get extra segments for current uncompleted packet. 900 */ 901 uint16_t rcv_cnt = 902 RTE_MIN(seg_res, RTE_DIM(rcv_pkts)); 903 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) { 904 uint32_t rx_num = 905 virtqueue_dequeue_burst_rx(vq, 906 rcv_pkts, len, rcv_cnt); 907 i += rx_num; 908 rcv_cnt = rx_num; 909 } else { 910 PMD_RX_LOG(ERR, 911 "No enough segments for packet."); 912 nb_enqueued++; 913 virtio_discard_rxbuf(vq, rxm); 914 rxvq->stats.errors++; 915 break; 916 } 917 918 extra_idx = 0; 919 920 while (extra_idx < rcv_cnt) { 921 rxm = rcv_pkts[extra_idx]; 922 923 rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size; 924 rxm->pkt_len = (uint32_t)(len[extra_idx]); 925 rxm->data_len = (uint16_t)(len[extra_idx]); 926 927 if (prev) 928 prev->next = rxm; 929 930 prev = rxm; 931 rx_pkts[nb_rx]->pkt_len += rxm->pkt_len; 932 extra_idx++; 933 }; 934 seg_res -= rcv_cnt; 935 } 936 937 if (hw->vlan_strip) 938 rte_vlan_strip(rx_pkts[nb_rx]); 939 940 VIRTIO_DUMP_PACKET(rx_pkts[nb_rx], 941 rx_pkts[nb_rx]->data_len); 942 943 rxvq->stats.bytes += rx_pkts[nb_rx]->pkt_len; 944 virtio_update_packet_stats(&rxvq->stats, rx_pkts[nb_rx]); 945 nb_rx++; 946 } 947 948 rxvq->stats.packets += nb_rx; 949 950 /* Allocate new mbuf for the used descriptor */ 951 error = ENOSPC; 952 while (likely(!virtqueue_full(vq))) { 953 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool); 954 if (unlikely(new_mbuf == NULL)) { 955 struct rte_eth_dev *dev 956 = &rte_eth_devices[rxvq->port_id]; 957 dev->data->rx_mbuf_alloc_failed++; 958 break; 959 } 960 error = virtqueue_enqueue_recv_refill(vq, new_mbuf); 961 if (unlikely(error)) { 962 rte_pktmbuf_free(new_mbuf); 963 break; 964 } 965 nb_enqueued++; 966 } 967 968 if (likely(nb_enqueued)) { 969 vq_update_avail_idx(vq); 970 971 if (unlikely(virtqueue_kick_prepare(vq))) { 972 virtqueue_notify(vq); 973 PMD_RX_LOG(DEBUG, "Notified"); 974 } 975 } 976 977 return nb_rx; 978 } 979 980 uint16_t 981 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) 982 { 983 struct virtnet_tx *txvq = tx_queue; 984 struct virtqueue *vq = txvq->vq; 985 struct virtio_hw *hw = vq->hw; 986 uint16_t hdr_size = hw->vtnet_hdr_size; 987 uint16_t nb_used, nb_tx = 0; 988 int error; 989 990 if (unlikely(hw->started == 0)) 991 return nb_tx; 992 993 if (unlikely(nb_pkts < 1)) 994 return nb_pkts; 995 996 PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts); 997 nb_used = VIRTQUEUE_NUSED(vq); 998 999 virtio_rmb(); 1000 if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh)) 1001 virtio_xmit_cleanup(vq, nb_used); 1002 1003 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { 1004 struct rte_mbuf *txm = tx_pkts[nb_tx]; 1005 int can_push = 0, use_indirect = 0, slots, need; 1006 1007 /* Do VLAN tag insertion */ 1008 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) { 1009 error = rte_vlan_insert(&txm); 1010 if (unlikely(error)) { 1011 rte_pktmbuf_free(txm); 1012 continue; 1013 } 1014 } 1015 1016 /* optimize ring usage */ 1017 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) || 1018 vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) && 1019 rte_mbuf_refcnt_read(txm) == 1 && 1020 RTE_MBUF_DIRECT(txm) && 1021 txm->nb_segs == 1 && 1022 rte_pktmbuf_headroom(txm) >= hdr_size && 1023 rte_is_aligned(rte_pktmbuf_mtod(txm, char *), 1024 __alignof__(struct virtio_net_hdr_mrg_rxbuf))) 1025 can_push = 1; 1026 else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) && 1027 txm->nb_segs < VIRTIO_MAX_TX_INDIRECT) 1028 use_indirect = 1; 1029 1030 /* How many main ring entries are needed to this Tx? 1031 * any_layout => number of segments 1032 * indirect => 1 1033 * default => number of segments + 1 1034 */ 1035 slots = use_indirect ? 1 : (txm->nb_segs + !can_push); 1036 need = slots - vq->vq_free_cnt; 1037 1038 /* Positive value indicates it need free vring descriptors */ 1039 if (unlikely(need > 0)) { 1040 nb_used = VIRTQUEUE_NUSED(vq); 1041 virtio_rmb(); 1042 need = RTE_MIN(need, (int)nb_used); 1043 1044 virtio_xmit_cleanup(vq, need); 1045 need = slots - vq->vq_free_cnt; 1046 if (unlikely(need > 0)) { 1047 PMD_TX_LOG(ERR, 1048 "No free tx descriptors to transmit"); 1049 break; 1050 } 1051 } 1052 1053 /* Enqueue Packet buffers */ 1054 virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect, can_push); 1055 1056 txvq->stats.bytes += txm->pkt_len; 1057 virtio_update_packet_stats(&txvq->stats, txm); 1058 } 1059 1060 txvq->stats.packets += nb_tx; 1061 1062 if (likely(nb_tx)) { 1063 vq_update_avail_idx(vq); 1064 1065 if (unlikely(virtqueue_kick_prepare(vq))) { 1066 virtqueue_notify(vq); 1067 PMD_TX_LOG(DEBUG, "Notified backend after xmit"); 1068 } 1069 } 1070 1071 return nb_tx; 1072 } 1073