1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2016 Intel Corporation 3 */ 4 5 #include <stdint.h> 6 #include <stdbool.h> 7 #include <linux/virtio_net.h> 8 9 #include <rte_mbuf.h> 10 #include <rte_memcpy.h> 11 #include <rte_net.h> 12 #include <rte_ether.h> 13 #include <rte_ip.h> 14 #include <rte_dmadev.h> 15 #include <rte_vhost.h> 16 #include <rte_tcp.h> 17 #include <rte_udp.h> 18 #include <rte_sctp.h> 19 #include <rte_arp.h> 20 #include <rte_spinlock.h> 21 #include <rte_malloc.h> 22 #include <rte_vhost_async.h> 23 24 #include "iotlb.h" 25 #include "vhost.h" 26 27 #define MAX_BATCH_LEN 256 28 29 static __rte_always_inline uint16_t 30 async_poll_dequeue_completed(struct virtio_net *dev, struct vhost_virtqueue *vq, 31 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, 32 uint16_t vchan_id, bool legacy_ol_flags); 33 34 /* DMA device copy operation tracking array. */ 35 struct async_dma_info dma_copy_track[RTE_DMADEV_DEFAULT_MAX]; 36 37 static __rte_always_inline bool 38 rxvq_is_mergeable(struct virtio_net *dev) 39 { 40 return dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF); 41 } 42 43 static __rte_always_inline bool 44 virtio_net_is_inorder(struct virtio_net *dev) 45 { 46 return dev->features & (1ULL << VIRTIO_F_IN_ORDER); 47 } 48 49 static bool 50 is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t nr_vring) 51 { 52 return (is_tx ^ (idx & 1)) == 0 && idx < nr_vring; 53 } 54 55 /* 56 * This function must be called with virtqueue's access_lock taken. 57 */ 58 static inline void 59 vhost_queue_stats_update(struct virtio_net *dev, struct vhost_virtqueue *vq, 60 struct rte_mbuf **pkts, uint16_t count) 61 { 62 struct virtqueue_stats *stats = &vq->stats; 63 int i; 64 65 if (!(dev->flags & VIRTIO_DEV_STATS_ENABLED)) 66 return; 67 68 for (i = 0; i < count; i++) { 69 struct rte_ether_addr *ea; 70 struct rte_mbuf *pkt = pkts[i]; 71 uint32_t pkt_len = rte_pktmbuf_pkt_len(pkt); 72 73 stats->packets++; 74 stats->bytes += pkt_len; 75 76 if (pkt_len == 64) { 77 stats->size_bins[1]++; 78 } else if (pkt_len > 64 && pkt_len < 1024) { 79 uint32_t bin; 80 81 /* count zeros, and offset into correct bin */ 82 bin = (sizeof(pkt_len) * 8) - __builtin_clz(pkt_len) - 5; 83 stats->size_bins[bin]++; 84 } else { 85 if (pkt_len < 64) 86 stats->size_bins[0]++; 87 else if (pkt_len < 1519) 88 stats->size_bins[6]++; 89 else 90 stats->size_bins[7]++; 91 } 92 93 ea = rte_pktmbuf_mtod(pkt, struct rte_ether_addr *); 94 if (rte_is_multicast_ether_addr(ea)) { 95 if (rte_is_broadcast_ether_addr(ea)) 96 stats->broadcast++; 97 else 98 stats->multicast++; 99 } 100 } 101 } 102 103 static __rte_always_inline int64_t 104 vhost_async_dma_transfer_one(struct virtio_net *dev, struct vhost_virtqueue *vq, 105 int16_t dma_id, uint16_t vchan_id, uint16_t flag_idx, 106 struct vhost_iov_iter *pkt) 107 { 108 struct async_dma_vchan_info *dma_info = &dma_copy_track[dma_id].vchans[vchan_id]; 109 uint16_t ring_mask = dma_info->ring_mask; 110 static bool vhost_async_dma_copy_log; 111 112 113 struct vhost_iovec *iov = pkt->iov; 114 int copy_idx = 0; 115 uint32_t nr_segs = pkt->nr_segs; 116 uint16_t i; 117 118 if (rte_dma_burst_capacity(dma_id, vchan_id) < nr_segs) 119 return -1; 120 121 for (i = 0; i < nr_segs; i++) { 122 copy_idx = rte_dma_copy(dma_id, vchan_id, (rte_iova_t)iov[i].src_addr, 123 (rte_iova_t)iov[i].dst_addr, iov[i].len, RTE_DMA_OP_FLAG_LLC); 124 /** 125 * Since all memory is pinned and DMA vChannel 126 * ring has enough space, failure should be a 127 * rare case. If failure happens, it means DMA 128 * device encounters serious errors; in this 129 * case, please stop async data-path and check 130 * what has happened to DMA device. 131 */ 132 if (unlikely(copy_idx < 0)) { 133 if (!vhost_async_dma_copy_log) { 134 VHOST_LOG_DATA(ERR, "(%s) DMA copy failed for channel %d:%u\n", 135 dev->ifname, dma_id, vchan_id); 136 vhost_async_dma_copy_log = true; 137 } 138 return -1; 139 } 140 } 141 142 /** 143 * Only store packet completion flag address in the last copy's 144 * slot, and other slots are set to NULL. 145 */ 146 dma_info->pkts_cmpl_flag_addr[copy_idx & ring_mask] = &vq->async->pkts_cmpl_flag[flag_idx]; 147 148 return nr_segs; 149 } 150 151 static __rte_always_inline uint16_t 152 vhost_async_dma_transfer(struct virtio_net *dev, struct vhost_virtqueue *vq, 153 int16_t dma_id, uint16_t vchan_id, uint16_t head_idx, 154 struct vhost_iov_iter *pkts, uint16_t nr_pkts) 155 { 156 struct async_dma_vchan_info *dma_info = &dma_copy_track[dma_id].vchans[vchan_id]; 157 int64_t ret, nr_copies = 0; 158 uint16_t pkt_idx; 159 160 rte_spinlock_lock(&dma_info->dma_lock); 161 162 for (pkt_idx = 0; pkt_idx < nr_pkts; pkt_idx++) { 163 ret = vhost_async_dma_transfer_one(dev, vq, dma_id, vchan_id, head_idx, 164 &pkts[pkt_idx]); 165 if (unlikely(ret < 0)) 166 break; 167 168 nr_copies += ret; 169 head_idx++; 170 if (head_idx >= vq->size) 171 head_idx -= vq->size; 172 } 173 174 if (likely(nr_copies > 0)) 175 rte_dma_submit(dma_id, vchan_id); 176 177 rte_spinlock_unlock(&dma_info->dma_lock); 178 179 return pkt_idx; 180 } 181 182 static __rte_always_inline uint16_t 183 vhost_async_dma_check_completed(struct virtio_net *dev, int16_t dma_id, uint16_t vchan_id, 184 uint16_t max_pkts) 185 { 186 struct async_dma_vchan_info *dma_info = &dma_copy_track[dma_id].vchans[vchan_id]; 187 uint16_t ring_mask = dma_info->ring_mask; 188 uint16_t last_idx = 0; 189 uint16_t nr_copies; 190 uint16_t copy_idx; 191 uint16_t i; 192 bool has_error = false; 193 static bool vhost_async_dma_complete_log; 194 195 rte_spinlock_lock(&dma_info->dma_lock); 196 197 /** 198 * Print error log for debugging, if DMA reports error during 199 * DMA transfer. We do not handle error in vhost level. 200 */ 201 nr_copies = rte_dma_completed(dma_id, vchan_id, max_pkts, &last_idx, &has_error); 202 if (unlikely(!vhost_async_dma_complete_log && has_error)) { 203 VHOST_LOG_DATA(ERR, "(%s) DMA completion failure on channel %d:%u\n", dev->ifname, 204 dma_id, vchan_id); 205 vhost_async_dma_complete_log = true; 206 } else if (nr_copies == 0) { 207 goto out; 208 } 209 210 copy_idx = last_idx - nr_copies + 1; 211 for (i = 0; i < nr_copies; i++) { 212 bool *flag; 213 214 flag = dma_info->pkts_cmpl_flag_addr[copy_idx & ring_mask]; 215 if (flag) { 216 /** 217 * Mark the packet flag as received. The flag 218 * could belong to another virtqueue but write 219 * is atomic. 220 */ 221 *flag = true; 222 dma_info->pkts_cmpl_flag_addr[copy_idx & ring_mask] = NULL; 223 } 224 copy_idx++; 225 } 226 227 out: 228 rte_spinlock_unlock(&dma_info->dma_lock); 229 return nr_copies; 230 } 231 232 static inline void 233 do_data_copy_enqueue(struct virtio_net *dev, struct vhost_virtqueue *vq) 234 { 235 struct batch_copy_elem *elem = vq->batch_copy_elems; 236 uint16_t count = vq->batch_copy_nb_elems; 237 int i; 238 239 for (i = 0; i < count; i++) { 240 rte_memcpy(elem[i].dst, elem[i].src, elem[i].len); 241 vhost_log_cache_write_iova(dev, vq, elem[i].log_addr, 242 elem[i].len); 243 PRINT_PACKET(dev, (uintptr_t)elem[i].dst, elem[i].len, 0); 244 } 245 246 vq->batch_copy_nb_elems = 0; 247 } 248 249 static inline void 250 do_data_copy_dequeue(struct vhost_virtqueue *vq) 251 { 252 struct batch_copy_elem *elem = vq->batch_copy_elems; 253 uint16_t count = vq->batch_copy_nb_elems; 254 int i; 255 256 for (i = 0; i < count; i++) 257 rte_memcpy(elem[i].dst, elem[i].src, elem[i].len); 258 259 vq->batch_copy_nb_elems = 0; 260 } 261 262 static __rte_always_inline void 263 do_flush_shadow_used_ring_split(struct virtio_net *dev, 264 struct vhost_virtqueue *vq, 265 uint16_t to, uint16_t from, uint16_t size) 266 { 267 rte_memcpy(&vq->used->ring[to], 268 &vq->shadow_used_split[from], 269 size * sizeof(struct vring_used_elem)); 270 vhost_log_cache_used_vring(dev, vq, 271 offsetof(struct vring_used, ring[to]), 272 size * sizeof(struct vring_used_elem)); 273 } 274 275 static __rte_always_inline void 276 flush_shadow_used_ring_split(struct virtio_net *dev, struct vhost_virtqueue *vq) 277 { 278 uint16_t used_idx = vq->last_used_idx & (vq->size - 1); 279 280 if (used_idx + vq->shadow_used_idx <= vq->size) { 281 do_flush_shadow_used_ring_split(dev, vq, used_idx, 0, 282 vq->shadow_used_idx); 283 } else { 284 uint16_t size; 285 286 /* update used ring interval [used_idx, vq->size] */ 287 size = vq->size - used_idx; 288 do_flush_shadow_used_ring_split(dev, vq, used_idx, 0, size); 289 290 /* update the left half used ring interval [0, left_size] */ 291 do_flush_shadow_used_ring_split(dev, vq, 0, size, 292 vq->shadow_used_idx - size); 293 } 294 vq->last_used_idx += vq->shadow_used_idx; 295 296 vhost_log_cache_sync(dev, vq); 297 298 __atomic_add_fetch(&vq->used->idx, vq->shadow_used_idx, 299 __ATOMIC_RELEASE); 300 vq->shadow_used_idx = 0; 301 vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx), 302 sizeof(vq->used->idx)); 303 } 304 305 static __rte_always_inline void 306 update_shadow_used_ring_split(struct vhost_virtqueue *vq, 307 uint16_t desc_idx, uint32_t len) 308 { 309 uint16_t i = vq->shadow_used_idx++; 310 311 vq->shadow_used_split[i].id = desc_idx; 312 vq->shadow_used_split[i].len = len; 313 } 314 315 static __rte_always_inline void 316 vhost_flush_enqueue_shadow_packed(struct virtio_net *dev, 317 struct vhost_virtqueue *vq) 318 { 319 int i; 320 uint16_t used_idx = vq->last_used_idx; 321 uint16_t head_idx = vq->last_used_idx; 322 uint16_t head_flags = 0; 323 324 /* Split loop in two to save memory barriers */ 325 for (i = 0; i < vq->shadow_used_idx; i++) { 326 vq->desc_packed[used_idx].id = vq->shadow_used_packed[i].id; 327 vq->desc_packed[used_idx].len = vq->shadow_used_packed[i].len; 328 329 used_idx += vq->shadow_used_packed[i].count; 330 if (used_idx >= vq->size) 331 used_idx -= vq->size; 332 } 333 334 /* The ordering for storing desc flags needs to be enforced. */ 335 rte_atomic_thread_fence(__ATOMIC_RELEASE); 336 337 for (i = 0; i < vq->shadow_used_idx; i++) { 338 uint16_t flags; 339 340 if (vq->shadow_used_packed[i].len) 341 flags = VRING_DESC_F_WRITE; 342 else 343 flags = 0; 344 345 if (vq->used_wrap_counter) { 346 flags |= VRING_DESC_F_USED; 347 flags |= VRING_DESC_F_AVAIL; 348 } else { 349 flags &= ~VRING_DESC_F_USED; 350 flags &= ~VRING_DESC_F_AVAIL; 351 } 352 353 if (i > 0) { 354 vq->desc_packed[vq->last_used_idx].flags = flags; 355 356 vhost_log_cache_used_vring(dev, vq, 357 vq->last_used_idx * 358 sizeof(struct vring_packed_desc), 359 sizeof(struct vring_packed_desc)); 360 } else { 361 head_idx = vq->last_used_idx; 362 head_flags = flags; 363 } 364 365 vq_inc_last_used_packed(vq, vq->shadow_used_packed[i].count); 366 } 367 368 vq->desc_packed[head_idx].flags = head_flags; 369 370 vhost_log_cache_used_vring(dev, vq, 371 head_idx * 372 sizeof(struct vring_packed_desc), 373 sizeof(struct vring_packed_desc)); 374 375 vq->shadow_used_idx = 0; 376 vhost_log_cache_sync(dev, vq); 377 } 378 379 static __rte_always_inline void 380 vhost_flush_dequeue_shadow_packed(struct virtio_net *dev, 381 struct vhost_virtqueue *vq) 382 { 383 struct vring_used_elem_packed *used_elem = &vq->shadow_used_packed[0]; 384 385 vq->desc_packed[vq->shadow_last_used_idx].id = used_elem->id; 386 /* desc flags is the synchronization point for virtio packed vring */ 387 __atomic_store_n(&vq->desc_packed[vq->shadow_last_used_idx].flags, 388 used_elem->flags, __ATOMIC_RELEASE); 389 390 vhost_log_cache_used_vring(dev, vq, vq->shadow_last_used_idx * 391 sizeof(struct vring_packed_desc), 392 sizeof(struct vring_packed_desc)); 393 vq->shadow_used_idx = 0; 394 vhost_log_cache_sync(dev, vq); 395 } 396 397 static __rte_always_inline void 398 vhost_flush_enqueue_batch_packed(struct virtio_net *dev, 399 struct vhost_virtqueue *vq, 400 uint64_t *lens, 401 uint16_t *ids) 402 { 403 uint16_t i; 404 uint16_t flags; 405 uint16_t last_used_idx; 406 struct vring_packed_desc *desc_base; 407 408 last_used_idx = vq->last_used_idx; 409 desc_base = &vq->desc_packed[last_used_idx]; 410 411 flags = PACKED_DESC_ENQUEUE_USED_FLAG(vq->used_wrap_counter); 412 413 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 414 desc_base[i].id = ids[i]; 415 desc_base[i].len = lens[i]; 416 } 417 418 rte_atomic_thread_fence(__ATOMIC_RELEASE); 419 420 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 421 desc_base[i].flags = flags; 422 } 423 424 vhost_log_cache_used_vring(dev, vq, last_used_idx * 425 sizeof(struct vring_packed_desc), 426 sizeof(struct vring_packed_desc) * 427 PACKED_BATCH_SIZE); 428 vhost_log_cache_sync(dev, vq); 429 430 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE); 431 } 432 433 static __rte_always_inline void 434 vhost_shadow_dequeue_batch_packed_inorder(struct vhost_virtqueue *vq, 435 uint16_t id) 436 { 437 vq->shadow_used_packed[0].id = id; 438 439 if (!vq->shadow_used_idx) { 440 vq->shadow_last_used_idx = vq->last_used_idx; 441 vq->shadow_used_packed[0].flags = 442 PACKED_DESC_DEQUEUE_USED_FLAG(vq->used_wrap_counter); 443 vq->shadow_used_packed[0].len = 0; 444 vq->shadow_used_packed[0].count = 1; 445 vq->shadow_used_idx++; 446 } 447 448 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE); 449 } 450 451 static __rte_always_inline void 452 vhost_shadow_dequeue_batch_packed(struct virtio_net *dev, 453 struct vhost_virtqueue *vq, 454 uint16_t *ids) 455 { 456 uint16_t flags; 457 uint16_t i; 458 uint16_t begin; 459 460 flags = PACKED_DESC_DEQUEUE_USED_FLAG(vq->used_wrap_counter); 461 462 if (!vq->shadow_used_idx) { 463 vq->shadow_last_used_idx = vq->last_used_idx; 464 vq->shadow_used_packed[0].id = ids[0]; 465 vq->shadow_used_packed[0].len = 0; 466 vq->shadow_used_packed[0].count = 1; 467 vq->shadow_used_packed[0].flags = flags; 468 vq->shadow_used_idx++; 469 begin = 1; 470 } else 471 begin = 0; 472 473 vhost_for_each_try_unroll(i, begin, PACKED_BATCH_SIZE) { 474 vq->desc_packed[vq->last_used_idx + i].id = ids[i]; 475 vq->desc_packed[vq->last_used_idx + i].len = 0; 476 } 477 478 rte_atomic_thread_fence(__ATOMIC_RELEASE); 479 vhost_for_each_try_unroll(i, begin, PACKED_BATCH_SIZE) 480 vq->desc_packed[vq->last_used_idx + i].flags = flags; 481 482 vhost_log_cache_used_vring(dev, vq, vq->last_used_idx * 483 sizeof(struct vring_packed_desc), 484 sizeof(struct vring_packed_desc) * 485 PACKED_BATCH_SIZE); 486 vhost_log_cache_sync(dev, vq); 487 488 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE); 489 } 490 491 static __rte_always_inline void 492 vhost_shadow_dequeue_single_packed(struct vhost_virtqueue *vq, 493 uint16_t buf_id, 494 uint16_t count) 495 { 496 uint16_t flags; 497 498 flags = vq->desc_packed[vq->last_used_idx].flags; 499 if (vq->used_wrap_counter) { 500 flags |= VRING_DESC_F_USED; 501 flags |= VRING_DESC_F_AVAIL; 502 } else { 503 flags &= ~VRING_DESC_F_USED; 504 flags &= ~VRING_DESC_F_AVAIL; 505 } 506 507 if (!vq->shadow_used_idx) { 508 vq->shadow_last_used_idx = vq->last_used_idx; 509 510 vq->shadow_used_packed[0].id = buf_id; 511 vq->shadow_used_packed[0].len = 0; 512 vq->shadow_used_packed[0].flags = flags; 513 vq->shadow_used_idx++; 514 } else { 515 vq->desc_packed[vq->last_used_idx].id = buf_id; 516 vq->desc_packed[vq->last_used_idx].len = 0; 517 vq->desc_packed[vq->last_used_idx].flags = flags; 518 } 519 520 vq_inc_last_used_packed(vq, count); 521 } 522 523 static __rte_always_inline void 524 vhost_shadow_dequeue_single_packed_inorder(struct vhost_virtqueue *vq, 525 uint16_t buf_id, 526 uint16_t count) 527 { 528 uint16_t flags; 529 530 vq->shadow_used_packed[0].id = buf_id; 531 532 flags = vq->desc_packed[vq->last_used_idx].flags; 533 if (vq->used_wrap_counter) { 534 flags |= VRING_DESC_F_USED; 535 flags |= VRING_DESC_F_AVAIL; 536 } else { 537 flags &= ~VRING_DESC_F_USED; 538 flags &= ~VRING_DESC_F_AVAIL; 539 } 540 541 if (!vq->shadow_used_idx) { 542 vq->shadow_last_used_idx = vq->last_used_idx; 543 vq->shadow_used_packed[0].len = 0; 544 vq->shadow_used_packed[0].flags = flags; 545 vq->shadow_used_idx++; 546 } 547 548 vq_inc_last_used_packed(vq, count); 549 } 550 551 static __rte_always_inline void 552 vhost_shadow_enqueue_packed(struct vhost_virtqueue *vq, 553 uint32_t *len, 554 uint16_t *id, 555 uint16_t *count, 556 uint16_t num_buffers) 557 { 558 uint16_t i; 559 560 for (i = 0; i < num_buffers; i++) { 561 /* enqueue shadow flush action aligned with batch num */ 562 if (!vq->shadow_used_idx) 563 vq->shadow_aligned_idx = vq->last_used_idx & 564 PACKED_BATCH_MASK; 565 vq->shadow_used_packed[vq->shadow_used_idx].id = id[i]; 566 vq->shadow_used_packed[vq->shadow_used_idx].len = len[i]; 567 vq->shadow_used_packed[vq->shadow_used_idx].count = count[i]; 568 vq->shadow_aligned_idx += count[i]; 569 vq->shadow_used_idx++; 570 } 571 } 572 573 static __rte_always_inline void 574 vhost_shadow_enqueue_single_packed(struct virtio_net *dev, 575 struct vhost_virtqueue *vq, 576 uint32_t *len, 577 uint16_t *id, 578 uint16_t *count, 579 uint16_t num_buffers) 580 { 581 vhost_shadow_enqueue_packed(vq, len, id, count, num_buffers); 582 583 if (vq->shadow_aligned_idx >= PACKED_BATCH_SIZE) { 584 do_data_copy_enqueue(dev, vq); 585 vhost_flush_enqueue_shadow_packed(dev, vq); 586 } 587 } 588 589 /* avoid write operation when necessary, to lessen cache issues */ 590 #define ASSIGN_UNLESS_EQUAL(var, val) do { \ 591 if ((var) != (val)) \ 592 (var) = (val); \ 593 } while (0) 594 595 static __rte_always_inline void 596 virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr) 597 { 598 uint64_t csum_l4 = m_buf->ol_flags & RTE_MBUF_F_TX_L4_MASK; 599 600 if (m_buf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) 601 csum_l4 |= RTE_MBUF_F_TX_TCP_CKSUM; 602 603 if (csum_l4) { 604 /* 605 * Pseudo-header checksum must be set as per Virtio spec. 606 * 607 * Note: We don't propagate rte_net_intel_cksum_prepare() 608 * errors, as it would have an impact on performance, and an 609 * error would mean the packet is dropped by the guest instead 610 * of being dropped here. 611 */ 612 rte_net_intel_cksum_prepare(m_buf); 613 614 net_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 615 net_hdr->csum_start = m_buf->l2_len + m_buf->l3_len; 616 617 switch (csum_l4) { 618 case RTE_MBUF_F_TX_TCP_CKSUM: 619 net_hdr->csum_offset = (offsetof(struct rte_tcp_hdr, 620 cksum)); 621 break; 622 case RTE_MBUF_F_TX_UDP_CKSUM: 623 net_hdr->csum_offset = (offsetof(struct rte_udp_hdr, 624 dgram_cksum)); 625 break; 626 case RTE_MBUF_F_TX_SCTP_CKSUM: 627 net_hdr->csum_offset = (offsetof(struct rte_sctp_hdr, 628 cksum)); 629 break; 630 } 631 } else { 632 ASSIGN_UNLESS_EQUAL(net_hdr->csum_start, 0); 633 ASSIGN_UNLESS_EQUAL(net_hdr->csum_offset, 0); 634 ASSIGN_UNLESS_EQUAL(net_hdr->flags, 0); 635 } 636 637 /* IP cksum verification cannot be bypassed, then calculate here */ 638 if (m_buf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) { 639 struct rte_ipv4_hdr *ipv4_hdr; 640 641 ipv4_hdr = rte_pktmbuf_mtod_offset(m_buf, struct rte_ipv4_hdr *, 642 m_buf->l2_len); 643 ipv4_hdr->hdr_checksum = 0; 644 ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr); 645 } 646 647 if (m_buf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) { 648 if (m_buf->ol_flags & RTE_MBUF_F_TX_IPV4) 649 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 650 else 651 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 652 net_hdr->gso_size = m_buf->tso_segsz; 653 net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len 654 + m_buf->l4_len; 655 } else if (m_buf->ol_flags & RTE_MBUF_F_TX_UDP_SEG) { 656 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP; 657 net_hdr->gso_size = m_buf->tso_segsz; 658 net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len + 659 m_buf->l4_len; 660 } else { 661 ASSIGN_UNLESS_EQUAL(net_hdr->gso_type, 0); 662 ASSIGN_UNLESS_EQUAL(net_hdr->gso_size, 0); 663 ASSIGN_UNLESS_EQUAL(net_hdr->hdr_len, 0); 664 } 665 } 666 667 static __rte_always_inline int 668 map_one_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, 669 struct buf_vector *buf_vec, uint16_t *vec_idx, 670 uint64_t desc_iova, uint64_t desc_len, uint8_t perm) 671 { 672 uint16_t vec_id = *vec_idx; 673 674 while (desc_len) { 675 uint64_t desc_addr; 676 uint64_t desc_chunck_len = desc_len; 677 678 if (unlikely(vec_id >= BUF_VECTOR_MAX)) 679 return -1; 680 681 desc_addr = vhost_iova_to_vva(dev, vq, 682 desc_iova, 683 &desc_chunck_len, 684 perm); 685 if (unlikely(!desc_addr)) 686 return -1; 687 688 rte_prefetch0((void *)(uintptr_t)desc_addr); 689 690 buf_vec[vec_id].buf_iova = desc_iova; 691 buf_vec[vec_id].buf_addr = desc_addr; 692 buf_vec[vec_id].buf_len = desc_chunck_len; 693 694 desc_len -= desc_chunck_len; 695 desc_iova += desc_chunck_len; 696 vec_id++; 697 } 698 *vec_idx = vec_id; 699 700 return 0; 701 } 702 703 static __rte_always_inline int 704 fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 705 uint32_t avail_idx, uint16_t *vec_idx, 706 struct buf_vector *buf_vec, uint16_t *desc_chain_head, 707 uint32_t *desc_chain_len, uint8_t perm) 708 { 709 uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)]; 710 uint16_t vec_id = *vec_idx; 711 uint32_t len = 0; 712 uint64_t dlen; 713 uint32_t nr_descs = vq->size; 714 uint32_t cnt = 0; 715 struct vring_desc *descs = vq->desc; 716 struct vring_desc *idesc = NULL; 717 718 if (unlikely(idx >= vq->size)) 719 return -1; 720 721 *desc_chain_head = idx; 722 723 if (vq->desc[idx].flags & VRING_DESC_F_INDIRECT) { 724 dlen = vq->desc[idx].len; 725 nr_descs = dlen / sizeof(struct vring_desc); 726 if (unlikely(nr_descs > vq->size)) 727 return -1; 728 729 descs = (struct vring_desc *)(uintptr_t) 730 vhost_iova_to_vva(dev, vq, vq->desc[idx].addr, 731 &dlen, 732 VHOST_ACCESS_RO); 733 if (unlikely(!descs)) 734 return -1; 735 736 if (unlikely(dlen < vq->desc[idx].len)) { 737 /* 738 * The indirect desc table is not contiguous 739 * in process VA space, we have to copy it. 740 */ 741 idesc = vhost_alloc_copy_ind_table(dev, vq, 742 vq->desc[idx].addr, vq->desc[idx].len); 743 if (unlikely(!idesc)) 744 return -1; 745 746 descs = idesc; 747 } 748 749 idx = 0; 750 } 751 752 while (1) { 753 if (unlikely(idx >= nr_descs || cnt++ >= nr_descs)) { 754 free_ind_table(idesc); 755 return -1; 756 } 757 758 dlen = descs[idx].len; 759 len += dlen; 760 761 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id, 762 descs[idx].addr, dlen, 763 perm))) { 764 free_ind_table(idesc); 765 return -1; 766 } 767 768 if ((descs[idx].flags & VRING_DESC_F_NEXT) == 0) 769 break; 770 771 idx = descs[idx].next; 772 } 773 774 *desc_chain_len = len; 775 *vec_idx = vec_id; 776 777 if (unlikely(!!idesc)) 778 free_ind_table(idesc); 779 780 return 0; 781 } 782 783 /* 784 * Returns -1 on fail, 0 on success 785 */ 786 static inline int 787 reserve_avail_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 788 uint32_t size, struct buf_vector *buf_vec, 789 uint16_t *num_buffers, uint16_t avail_head, 790 uint16_t *nr_vec) 791 { 792 uint16_t cur_idx; 793 uint16_t vec_idx = 0; 794 uint16_t max_tries, tries = 0; 795 796 uint16_t head_idx = 0; 797 uint32_t len = 0; 798 799 *num_buffers = 0; 800 cur_idx = vq->last_avail_idx; 801 802 if (rxvq_is_mergeable(dev)) 803 max_tries = vq->size - 1; 804 else 805 max_tries = 1; 806 807 while (size > 0) { 808 if (unlikely(cur_idx == avail_head)) 809 return -1; 810 /* 811 * if we tried all available ring items, and still 812 * can't get enough buf, it means something abnormal 813 * happened. 814 */ 815 if (unlikely(++tries > max_tries)) 816 return -1; 817 818 if (unlikely(fill_vec_buf_split(dev, vq, cur_idx, 819 &vec_idx, buf_vec, 820 &head_idx, &len, 821 VHOST_ACCESS_RW) < 0)) 822 return -1; 823 len = RTE_MIN(len, size); 824 update_shadow_used_ring_split(vq, head_idx, len); 825 size -= len; 826 827 cur_idx++; 828 *num_buffers += 1; 829 } 830 831 *nr_vec = vec_idx; 832 833 return 0; 834 } 835 836 static __rte_always_inline int 837 fill_vec_buf_packed_indirect(struct virtio_net *dev, 838 struct vhost_virtqueue *vq, 839 struct vring_packed_desc *desc, uint16_t *vec_idx, 840 struct buf_vector *buf_vec, uint32_t *len, uint8_t perm) 841 { 842 uint16_t i; 843 uint32_t nr_descs; 844 uint16_t vec_id = *vec_idx; 845 uint64_t dlen; 846 struct vring_packed_desc *descs, *idescs = NULL; 847 848 dlen = desc->len; 849 descs = (struct vring_packed_desc *)(uintptr_t) 850 vhost_iova_to_vva(dev, vq, desc->addr, &dlen, VHOST_ACCESS_RO); 851 if (unlikely(!descs)) 852 return -1; 853 854 if (unlikely(dlen < desc->len)) { 855 /* 856 * The indirect desc table is not contiguous 857 * in process VA space, we have to copy it. 858 */ 859 idescs = vhost_alloc_copy_ind_table(dev, 860 vq, desc->addr, desc->len); 861 if (unlikely(!idescs)) 862 return -1; 863 864 descs = idescs; 865 } 866 867 nr_descs = desc->len / sizeof(struct vring_packed_desc); 868 if (unlikely(nr_descs >= vq->size)) { 869 free_ind_table(idescs); 870 return -1; 871 } 872 873 for (i = 0; i < nr_descs; i++) { 874 if (unlikely(vec_id >= BUF_VECTOR_MAX)) { 875 free_ind_table(idescs); 876 return -1; 877 } 878 879 dlen = descs[i].len; 880 *len += dlen; 881 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id, 882 descs[i].addr, dlen, 883 perm))) 884 return -1; 885 } 886 *vec_idx = vec_id; 887 888 if (unlikely(!!idescs)) 889 free_ind_table(idescs); 890 891 return 0; 892 } 893 894 static __rte_always_inline int 895 fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, 896 uint16_t avail_idx, uint16_t *desc_count, 897 struct buf_vector *buf_vec, uint16_t *vec_idx, 898 uint16_t *buf_id, uint32_t *len, uint8_t perm) 899 { 900 bool wrap_counter = vq->avail_wrap_counter; 901 struct vring_packed_desc *descs = vq->desc_packed; 902 uint16_t vec_id = *vec_idx; 903 uint64_t dlen; 904 905 if (avail_idx < vq->last_avail_idx) 906 wrap_counter ^= 1; 907 908 /* 909 * Perform a load-acquire barrier in desc_is_avail to 910 * enforce the ordering between desc flags and desc 911 * content. 912 */ 913 if (unlikely(!desc_is_avail(&descs[avail_idx], wrap_counter))) 914 return -1; 915 916 *desc_count = 0; 917 *len = 0; 918 919 while (1) { 920 if (unlikely(vec_id >= BUF_VECTOR_MAX)) 921 return -1; 922 923 if (unlikely(*desc_count >= vq->size)) 924 return -1; 925 926 *desc_count += 1; 927 *buf_id = descs[avail_idx].id; 928 929 if (descs[avail_idx].flags & VRING_DESC_F_INDIRECT) { 930 if (unlikely(fill_vec_buf_packed_indirect(dev, vq, 931 &descs[avail_idx], 932 &vec_id, buf_vec, 933 len, perm) < 0)) 934 return -1; 935 } else { 936 dlen = descs[avail_idx].len; 937 *len += dlen; 938 939 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id, 940 descs[avail_idx].addr, 941 dlen, 942 perm))) 943 return -1; 944 } 945 946 if ((descs[avail_idx].flags & VRING_DESC_F_NEXT) == 0) 947 break; 948 949 if (++avail_idx >= vq->size) { 950 avail_idx -= vq->size; 951 wrap_counter ^= 1; 952 } 953 } 954 955 *vec_idx = vec_id; 956 957 return 0; 958 } 959 960 static __rte_noinline void 961 copy_vnet_hdr_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, 962 struct buf_vector *buf_vec, 963 struct virtio_net_hdr_mrg_rxbuf *hdr) 964 { 965 uint64_t len; 966 uint64_t remain = dev->vhost_hlen; 967 uint64_t src = (uint64_t)(uintptr_t)hdr, dst; 968 uint64_t iova = buf_vec->buf_iova; 969 970 while (remain) { 971 len = RTE_MIN(remain, 972 buf_vec->buf_len); 973 dst = buf_vec->buf_addr; 974 rte_memcpy((void *)(uintptr_t)dst, 975 (void *)(uintptr_t)src, 976 len); 977 978 PRINT_PACKET(dev, (uintptr_t)dst, 979 (uint32_t)len, 0); 980 vhost_log_cache_write_iova(dev, vq, 981 iova, len); 982 983 remain -= len; 984 iova += len; 985 src += len; 986 buf_vec++; 987 } 988 } 989 990 static __rte_always_inline int 991 async_iter_initialize(struct virtio_net *dev, struct vhost_async *async) 992 { 993 struct vhost_iov_iter *iter; 994 995 if (unlikely(async->iovec_idx >= VHOST_MAX_ASYNC_VEC)) { 996 VHOST_LOG_DATA(ERR, "(%s) no more async iovec available\n", dev->ifname); 997 return -1; 998 } 999 1000 iter = async->iov_iter + async->iter_idx; 1001 iter->iov = async->iovec + async->iovec_idx; 1002 iter->nr_segs = 0; 1003 1004 return 0; 1005 } 1006 1007 static __rte_always_inline int 1008 async_iter_add_iovec(struct virtio_net *dev, struct vhost_async *async, 1009 void *src, void *dst, size_t len) 1010 { 1011 struct vhost_iov_iter *iter; 1012 struct vhost_iovec *iovec; 1013 1014 if (unlikely(async->iovec_idx >= VHOST_MAX_ASYNC_VEC)) { 1015 static bool vhost_max_async_vec_log; 1016 1017 if (!vhost_max_async_vec_log) { 1018 VHOST_LOG_DATA(ERR, "(%s) no more async iovec available\n", dev->ifname); 1019 vhost_max_async_vec_log = true; 1020 } 1021 1022 return -1; 1023 } 1024 1025 iter = async->iov_iter + async->iter_idx; 1026 iovec = async->iovec + async->iovec_idx; 1027 1028 iovec->src_addr = src; 1029 iovec->dst_addr = dst; 1030 iovec->len = len; 1031 1032 iter->nr_segs++; 1033 async->iovec_idx++; 1034 1035 return 0; 1036 } 1037 1038 static __rte_always_inline void 1039 async_iter_finalize(struct vhost_async *async) 1040 { 1041 async->iter_idx++; 1042 } 1043 1044 static __rte_always_inline void 1045 async_iter_cancel(struct vhost_async *async) 1046 { 1047 struct vhost_iov_iter *iter; 1048 1049 iter = async->iov_iter + async->iter_idx; 1050 async->iovec_idx -= iter->nr_segs; 1051 iter->nr_segs = 0; 1052 iter->iov = NULL; 1053 } 1054 1055 static __rte_always_inline void 1056 async_iter_reset(struct vhost_async *async) 1057 { 1058 async->iter_idx = 0; 1059 async->iovec_idx = 0; 1060 } 1061 1062 static __rte_always_inline int 1063 async_fill_seg(struct virtio_net *dev, struct vhost_virtqueue *vq, 1064 struct rte_mbuf *m, uint32_t mbuf_offset, 1065 uint64_t buf_iova, uint32_t cpy_len, bool to_desc) 1066 { 1067 struct vhost_async *async = vq->async; 1068 uint64_t mapped_len; 1069 uint32_t buf_offset = 0; 1070 void *src, *dst; 1071 void *host_iova; 1072 1073 while (cpy_len) { 1074 host_iova = (void *)(uintptr_t)gpa_to_first_hpa(dev, 1075 buf_iova + buf_offset, cpy_len, &mapped_len); 1076 if (unlikely(!host_iova)) { 1077 VHOST_LOG_DATA(ERR, "(%s) %s: failed to get host iova.\n", 1078 dev->ifname, __func__); 1079 return -1; 1080 } 1081 1082 if (to_desc) { 1083 src = (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, mbuf_offset); 1084 dst = host_iova; 1085 } else { 1086 src = host_iova; 1087 dst = (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, mbuf_offset); 1088 } 1089 1090 if (unlikely(async_iter_add_iovec(dev, async, src, dst, (size_t)mapped_len))) 1091 return -1; 1092 1093 cpy_len -= (uint32_t)mapped_len; 1094 mbuf_offset += (uint32_t)mapped_len; 1095 buf_offset += (uint32_t)mapped_len; 1096 } 1097 1098 return 0; 1099 } 1100 1101 static __rte_always_inline void 1102 sync_fill_seg(struct virtio_net *dev, struct vhost_virtqueue *vq, 1103 struct rte_mbuf *m, uint32_t mbuf_offset, 1104 uint64_t buf_addr, uint64_t buf_iova, uint32_t cpy_len, bool to_desc) 1105 { 1106 struct batch_copy_elem *batch_copy = vq->batch_copy_elems; 1107 1108 if (likely(cpy_len > MAX_BATCH_LEN || vq->batch_copy_nb_elems >= vq->size)) { 1109 if (to_desc) { 1110 rte_memcpy((void *)((uintptr_t)(buf_addr)), 1111 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), 1112 cpy_len); 1113 } else { 1114 rte_memcpy(rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), 1115 (void *)((uintptr_t)(buf_addr)), 1116 cpy_len); 1117 } 1118 vhost_log_cache_write_iova(dev, vq, buf_iova, cpy_len); 1119 PRINT_PACKET(dev, (uintptr_t)(buf_addr), cpy_len, 0); 1120 } else { 1121 if (to_desc) { 1122 batch_copy[vq->batch_copy_nb_elems].dst = 1123 (void *)((uintptr_t)(buf_addr)); 1124 batch_copy[vq->batch_copy_nb_elems].src = 1125 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset); 1126 } else { 1127 batch_copy[vq->batch_copy_nb_elems].dst = 1128 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset); 1129 batch_copy[vq->batch_copy_nb_elems].src = 1130 (void *)((uintptr_t)(buf_addr)); 1131 } 1132 batch_copy[vq->batch_copy_nb_elems].log_addr = buf_iova; 1133 batch_copy[vq->batch_copy_nb_elems].len = cpy_len; 1134 vq->batch_copy_nb_elems++; 1135 } 1136 } 1137 1138 static __rte_always_inline int 1139 mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, 1140 struct rte_mbuf *m, struct buf_vector *buf_vec, 1141 uint16_t nr_vec, uint16_t num_buffers, bool is_async) 1142 { 1143 uint32_t vec_idx = 0; 1144 uint32_t mbuf_offset, mbuf_avail; 1145 uint32_t buf_offset, buf_avail; 1146 uint64_t buf_addr, buf_iova, buf_len; 1147 uint32_t cpy_len; 1148 uint64_t hdr_addr; 1149 struct rte_mbuf *hdr_mbuf; 1150 struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL; 1151 struct vhost_async *async = vq->async; 1152 1153 if (unlikely(m == NULL)) 1154 return -1; 1155 1156 buf_addr = buf_vec[vec_idx].buf_addr; 1157 buf_iova = buf_vec[vec_idx].buf_iova; 1158 buf_len = buf_vec[vec_idx].buf_len; 1159 1160 if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) 1161 return -1; 1162 1163 hdr_mbuf = m; 1164 hdr_addr = buf_addr; 1165 if (unlikely(buf_len < dev->vhost_hlen)) { 1166 memset(&tmp_hdr, 0, sizeof(struct virtio_net_hdr_mrg_rxbuf)); 1167 hdr = &tmp_hdr; 1168 } else 1169 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr; 1170 1171 VHOST_LOG_DATA(DEBUG, "(%s) RX: num merge buffers %d\n", 1172 dev->ifname, num_buffers); 1173 1174 if (unlikely(buf_len < dev->vhost_hlen)) { 1175 buf_offset = dev->vhost_hlen - buf_len; 1176 vec_idx++; 1177 buf_addr = buf_vec[vec_idx].buf_addr; 1178 buf_iova = buf_vec[vec_idx].buf_iova; 1179 buf_len = buf_vec[vec_idx].buf_len; 1180 buf_avail = buf_len - buf_offset; 1181 } else { 1182 buf_offset = dev->vhost_hlen; 1183 buf_avail = buf_len - dev->vhost_hlen; 1184 } 1185 1186 mbuf_avail = rte_pktmbuf_data_len(m); 1187 mbuf_offset = 0; 1188 1189 if (is_async) { 1190 if (async_iter_initialize(dev, async)) 1191 return -1; 1192 } 1193 1194 while (mbuf_avail != 0 || m->next != NULL) { 1195 /* done with current buf, get the next one */ 1196 if (buf_avail == 0) { 1197 vec_idx++; 1198 if (unlikely(vec_idx >= nr_vec)) 1199 goto error; 1200 1201 buf_addr = buf_vec[vec_idx].buf_addr; 1202 buf_iova = buf_vec[vec_idx].buf_iova; 1203 buf_len = buf_vec[vec_idx].buf_len; 1204 1205 buf_offset = 0; 1206 buf_avail = buf_len; 1207 } 1208 1209 /* done with current mbuf, get the next one */ 1210 if (mbuf_avail == 0) { 1211 m = m->next; 1212 1213 mbuf_offset = 0; 1214 mbuf_avail = rte_pktmbuf_data_len(m); 1215 } 1216 1217 if (hdr_addr) { 1218 virtio_enqueue_offload(hdr_mbuf, &hdr->hdr); 1219 if (rxvq_is_mergeable(dev)) 1220 ASSIGN_UNLESS_EQUAL(hdr->num_buffers, 1221 num_buffers); 1222 1223 if (unlikely(hdr == &tmp_hdr)) { 1224 copy_vnet_hdr_to_desc(dev, vq, buf_vec, hdr); 1225 } else { 1226 PRINT_PACKET(dev, (uintptr_t)hdr_addr, 1227 dev->vhost_hlen, 0); 1228 vhost_log_cache_write_iova(dev, vq, 1229 buf_vec[0].buf_iova, 1230 dev->vhost_hlen); 1231 } 1232 1233 hdr_addr = 0; 1234 } 1235 1236 cpy_len = RTE_MIN(buf_avail, mbuf_avail); 1237 1238 if (is_async) { 1239 if (async_fill_seg(dev, vq, m, mbuf_offset, 1240 buf_iova + buf_offset, cpy_len, true) < 0) 1241 goto error; 1242 } else { 1243 sync_fill_seg(dev, vq, m, mbuf_offset, 1244 buf_addr + buf_offset, 1245 buf_iova + buf_offset, cpy_len, true); 1246 } 1247 1248 mbuf_avail -= cpy_len; 1249 mbuf_offset += cpy_len; 1250 buf_avail -= cpy_len; 1251 buf_offset += cpy_len; 1252 } 1253 1254 if (is_async) 1255 async_iter_finalize(async); 1256 1257 return 0; 1258 error: 1259 if (is_async) 1260 async_iter_cancel(async); 1261 1262 return -1; 1263 } 1264 1265 static __rte_always_inline int 1266 vhost_enqueue_single_packed(struct virtio_net *dev, 1267 struct vhost_virtqueue *vq, 1268 struct rte_mbuf *pkt, 1269 struct buf_vector *buf_vec, 1270 uint16_t *nr_descs) 1271 { 1272 uint16_t nr_vec = 0; 1273 uint16_t avail_idx = vq->last_avail_idx; 1274 uint16_t max_tries, tries = 0; 1275 uint16_t buf_id = 0; 1276 uint32_t len = 0; 1277 uint16_t desc_count; 1278 uint32_t size = pkt->pkt_len + sizeof(struct virtio_net_hdr_mrg_rxbuf); 1279 uint16_t num_buffers = 0; 1280 uint32_t buffer_len[vq->size]; 1281 uint16_t buffer_buf_id[vq->size]; 1282 uint16_t buffer_desc_count[vq->size]; 1283 1284 if (rxvq_is_mergeable(dev)) 1285 max_tries = vq->size - 1; 1286 else 1287 max_tries = 1; 1288 1289 while (size > 0) { 1290 /* 1291 * if we tried all available ring items, and still 1292 * can't get enough buf, it means something abnormal 1293 * happened. 1294 */ 1295 if (unlikely(++tries > max_tries)) 1296 return -1; 1297 1298 if (unlikely(fill_vec_buf_packed(dev, vq, 1299 avail_idx, &desc_count, 1300 buf_vec, &nr_vec, 1301 &buf_id, &len, 1302 VHOST_ACCESS_RW) < 0)) 1303 return -1; 1304 1305 len = RTE_MIN(len, size); 1306 size -= len; 1307 1308 buffer_len[num_buffers] = len; 1309 buffer_buf_id[num_buffers] = buf_id; 1310 buffer_desc_count[num_buffers] = desc_count; 1311 num_buffers += 1; 1312 1313 *nr_descs += desc_count; 1314 avail_idx += desc_count; 1315 if (avail_idx >= vq->size) 1316 avail_idx -= vq->size; 1317 } 1318 1319 if (mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, num_buffers, false) < 0) 1320 return -1; 1321 1322 vhost_shadow_enqueue_single_packed(dev, vq, buffer_len, buffer_buf_id, 1323 buffer_desc_count, num_buffers); 1324 1325 return 0; 1326 } 1327 1328 static __rte_noinline uint32_t 1329 virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 1330 struct rte_mbuf **pkts, uint32_t count) 1331 { 1332 uint32_t pkt_idx = 0; 1333 uint16_t num_buffers; 1334 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1335 uint16_t avail_head; 1336 1337 /* 1338 * The ordering between avail index and 1339 * desc reads needs to be enforced. 1340 */ 1341 avail_head = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE); 1342 1343 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); 1344 1345 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { 1346 uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen; 1347 uint16_t nr_vec = 0; 1348 1349 if (unlikely(reserve_avail_buf_split(dev, vq, 1350 pkt_len, buf_vec, &num_buffers, 1351 avail_head, &nr_vec) < 0)) { 1352 VHOST_LOG_DATA(DEBUG, 1353 "(%s) failed to get enough desc from vring\n", 1354 dev->ifname); 1355 vq->shadow_used_idx -= num_buffers; 1356 break; 1357 } 1358 1359 VHOST_LOG_DATA(DEBUG, "(%s) current index %d | end index %d\n", 1360 dev->ifname, vq->last_avail_idx, 1361 vq->last_avail_idx + num_buffers); 1362 1363 if (mbuf_to_desc(dev, vq, pkts[pkt_idx], buf_vec, nr_vec, 1364 num_buffers, false) < 0) { 1365 vq->shadow_used_idx -= num_buffers; 1366 break; 1367 } 1368 1369 vq->last_avail_idx += num_buffers; 1370 } 1371 1372 do_data_copy_enqueue(dev, vq); 1373 1374 if (likely(vq->shadow_used_idx)) { 1375 flush_shadow_used_ring_split(dev, vq); 1376 vhost_vring_call_split(dev, vq); 1377 } 1378 1379 return pkt_idx; 1380 } 1381 1382 static __rte_always_inline int 1383 virtio_dev_rx_sync_batch_check(struct virtio_net *dev, 1384 struct vhost_virtqueue *vq, 1385 struct rte_mbuf **pkts, 1386 uint64_t *desc_addrs, 1387 uint64_t *lens) 1388 { 1389 bool wrap_counter = vq->avail_wrap_counter; 1390 struct vring_packed_desc *descs = vq->desc_packed; 1391 uint16_t avail_idx = vq->last_avail_idx; 1392 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 1393 uint16_t i; 1394 1395 if (unlikely(avail_idx & PACKED_BATCH_MASK)) 1396 return -1; 1397 1398 if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size)) 1399 return -1; 1400 1401 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1402 if (unlikely(pkts[i]->next != NULL)) 1403 return -1; 1404 if (unlikely(!desc_is_avail(&descs[avail_idx + i], 1405 wrap_counter))) 1406 return -1; 1407 } 1408 1409 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1410 lens[i] = descs[avail_idx + i].len; 1411 1412 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1413 if (unlikely(pkts[i]->pkt_len > (lens[i] - buf_offset))) 1414 return -1; 1415 } 1416 1417 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1418 desc_addrs[i] = vhost_iova_to_vva(dev, vq, 1419 descs[avail_idx + i].addr, 1420 &lens[i], 1421 VHOST_ACCESS_RW); 1422 1423 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1424 if (unlikely(!desc_addrs[i])) 1425 return -1; 1426 if (unlikely(lens[i] != descs[avail_idx + i].len)) 1427 return -1; 1428 } 1429 1430 return 0; 1431 } 1432 1433 static __rte_always_inline void 1434 virtio_dev_rx_batch_packed_copy(struct virtio_net *dev, 1435 struct vhost_virtqueue *vq, 1436 struct rte_mbuf **pkts, 1437 uint64_t *desc_addrs, 1438 uint64_t *lens) 1439 { 1440 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 1441 struct virtio_net_hdr_mrg_rxbuf *hdrs[PACKED_BATCH_SIZE]; 1442 struct vring_packed_desc *descs = vq->desc_packed; 1443 uint16_t avail_idx = vq->last_avail_idx; 1444 uint16_t ids[PACKED_BATCH_SIZE]; 1445 uint16_t i; 1446 1447 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1448 rte_prefetch0((void *)(uintptr_t)desc_addrs[i]); 1449 hdrs[i] = (struct virtio_net_hdr_mrg_rxbuf *) 1450 (uintptr_t)desc_addrs[i]; 1451 lens[i] = pkts[i]->pkt_len + 1452 sizeof(struct virtio_net_hdr_mrg_rxbuf); 1453 } 1454 1455 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1456 virtio_enqueue_offload(pkts[i], &hdrs[i]->hdr); 1457 1458 vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE); 1459 1460 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1461 rte_memcpy((void *)(uintptr_t)(desc_addrs[i] + buf_offset), 1462 rte_pktmbuf_mtod_offset(pkts[i], void *, 0), 1463 pkts[i]->pkt_len); 1464 } 1465 1466 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1467 vhost_log_cache_write_iova(dev, vq, descs[avail_idx + i].addr, 1468 lens[i]); 1469 1470 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1471 ids[i] = descs[avail_idx + i].id; 1472 1473 vhost_flush_enqueue_batch_packed(dev, vq, lens, ids); 1474 } 1475 1476 static __rte_always_inline int 1477 virtio_dev_rx_sync_batch_packed(struct virtio_net *dev, 1478 struct vhost_virtqueue *vq, 1479 struct rte_mbuf **pkts) 1480 { 1481 uint64_t desc_addrs[PACKED_BATCH_SIZE]; 1482 uint64_t lens[PACKED_BATCH_SIZE]; 1483 1484 if (virtio_dev_rx_sync_batch_check(dev, vq, pkts, desc_addrs, lens) == -1) 1485 return -1; 1486 1487 if (vq->shadow_used_idx) { 1488 do_data_copy_enqueue(dev, vq); 1489 vhost_flush_enqueue_shadow_packed(dev, vq); 1490 } 1491 1492 virtio_dev_rx_batch_packed_copy(dev, vq, pkts, desc_addrs, lens); 1493 1494 return 0; 1495 } 1496 1497 static __rte_always_inline int16_t 1498 virtio_dev_rx_single_packed(struct virtio_net *dev, 1499 struct vhost_virtqueue *vq, 1500 struct rte_mbuf *pkt) 1501 { 1502 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1503 uint16_t nr_descs = 0; 1504 1505 if (unlikely(vhost_enqueue_single_packed(dev, vq, pkt, buf_vec, 1506 &nr_descs) < 0)) { 1507 VHOST_LOG_DATA(DEBUG, "(%s) failed to get enough desc from vring\n", 1508 dev->ifname); 1509 return -1; 1510 } 1511 1512 VHOST_LOG_DATA(DEBUG, "(%s) current index %d | end index %d\n", 1513 dev->ifname, vq->last_avail_idx, 1514 vq->last_avail_idx + nr_descs); 1515 1516 vq_inc_last_avail_packed(vq, nr_descs); 1517 1518 return 0; 1519 } 1520 1521 static __rte_noinline uint32_t 1522 virtio_dev_rx_packed(struct virtio_net *dev, 1523 struct vhost_virtqueue *__rte_restrict vq, 1524 struct rte_mbuf **__rte_restrict pkts, 1525 uint32_t count) 1526 { 1527 uint32_t pkt_idx = 0; 1528 1529 do { 1530 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); 1531 1532 if (count - pkt_idx >= PACKED_BATCH_SIZE) { 1533 if (!virtio_dev_rx_sync_batch_packed(dev, vq, 1534 &pkts[pkt_idx])) { 1535 pkt_idx += PACKED_BATCH_SIZE; 1536 continue; 1537 } 1538 } 1539 1540 if (virtio_dev_rx_single_packed(dev, vq, pkts[pkt_idx])) 1541 break; 1542 pkt_idx++; 1543 1544 } while (pkt_idx < count); 1545 1546 if (vq->shadow_used_idx) { 1547 do_data_copy_enqueue(dev, vq); 1548 vhost_flush_enqueue_shadow_packed(dev, vq); 1549 } 1550 1551 if (pkt_idx) 1552 vhost_vring_call_packed(dev, vq); 1553 1554 return pkt_idx; 1555 } 1556 1557 static __rte_always_inline uint32_t 1558 virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, 1559 struct rte_mbuf **pkts, uint32_t count) 1560 { 1561 struct vhost_virtqueue *vq; 1562 uint32_t nb_tx = 0; 1563 1564 VHOST_LOG_DATA(DEBUG, "(%s) %s\n", dev->ifname, __func__); 1565 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { 1566 VHOST_LOG_DATA(ERR, "(%s) %s: invalid virtqueue idx %d.\n", 1567 dev->ifname, __func__, queue_id); 1568 return 0; 1569 } 1570 1571 vq = dev->virtqueue[queue_id]; 1572 1573 rte_spinlock_lock(&vq->access_lock); 1574 1575 if (unlikely(!vq->enabled)) 1576 goto out_access_unlock; 1577 1578 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 1579 vhost_user_iotlb_rd_lock(vq); 1580 1581 if (unlikely(!vq->access_ok)) 1582 if (unlikely(vring_translate(dev, vq) < 0)) 1583 goto out; 1584 1585 count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); 1586 if (count == 0) 1587 goto out; 1588 1589 if (vq_is_packed(dev)) 1590 nb_tx = virtio_dev_rx_packed(dev, vq, pkts, count); 1591 else 1592 nb_tx = virtio_dev_rx_split(dev, vq, pkts, count); 1593 1594 vhost_queue_stats_update(dev, vq, pkts, nb_tx); 1595 1596 out: 1597 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 1598 vhost_user_iotlb_rd_unlock(vq); 1599 1600 out_access_unlock: 1601 rte_spinlock_unlock(&vq->access_lock); 1602 1603 return nb_tx; 1604 } 1605 1606 uint16_t 1607 rte_vhost_enqueue_burst(int vid, uint16_t queue_id, 1608 struct rte_mbuf **__rte_restrict pkts, uint16_t count) 1609 { 1610 struct virtio_net *dev = get_device(vid); 1611 1612 if (!dev) 1613 return 0; 1614 1615 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { 1616 VHOST_LOG_DATA(ERR, "(%s) %s: built-in vhost net backend is disabled.\n", 1617 dev->ifname, __func__); 1618 return 0; 1619 } 1620 1621 return virtio_dev_rx(dev, queue_id, pkts, count); 1622 } 1623 1624 static __rte_always_inline uint16_t 1625 async_get_first_inflight_pkt_idx(struct vhost_virtqueue *vq) 1626 { 1627 struct vhost_async *async = vq->async; 1628 1629 if (async->pkts_idx >= async->pkts_inflight_n) 1630 return async->pkts_idx - async->pkts_inflight_n; 1631 else 1632 return vq->size - async->pkts_inflight_n + async->pkts_idx; 1633 } 1634 1635 static __rte_always_inline void 1636 store_dma_desc_info_split(struct vring_used_elem *s_ring, struct vring_used_elem *d_ring, 1637 uint16_t ring_size, uint16_t s_idx, uint16_t d_idx, uint16_t count) 1638 { 1639 size_t elem_size = sizeof(struct vring_used_elem); 1640 1641 if (d_idx + count <= ring_size) { 1642 rte_memcpy(d_ring + d_idx, s_ring + s_idx, count * elem_size); 1643 } else { 1644 uint16_t size = ring_size - d_idx; 1645 1646 rte_memcpy(d_ring + d_idx, s_ring + s_idx, size * elem_size); 1647 rte_memcpy(d_ring, s_ring + s_idx + size, (count - size) * elem_size); 1648 } 1649 } 1650 1651 static __rte_always_inline void 1652 store_dma_desc_info_packed(struct vring_used_elem_packed *s_ring, 1653 struct vring_used_elem_packed *d_ring, 1654 uint16_t ring_size, uint16_t s_idx, uint16_t d_idx, uint16_t count) 1655 { 1656 size_t elem_size = sizeof(struct vring_used_elem_packed); 1657 1658 if (d_idx + count <= ring_size) { 1659 rte_memcpy(d_ring + d_idx, s_ring + s_idx, count * elem_size); 1660 } else { 1661 uint16_t size = ring_size - d_idx; 1662 1663 rte_memcpy(d_ring + d_idx, s_ring + s_idx, size * elem_size); 1664 rte_memcpy(d_ring, s_ring + s_idx + size, (count - size) * elem_size); 1665 } 1666 } 1667 1668 static __rte_noinline uint32_t 1669 virtio_dev_rx_async_submit_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 1670 uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count, 1671 int16_t dma_id, uint16_t vchan_id) 1672 { 1673 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1674 uint32_t pkt_idx = 0; 1675 uint16_t num_buffers; 1676 uint16_t avail_head; 1677 1678 struct vhost_async *async = vq->async; 1679 struct async_inflight_info *pkts_info = async->pkts_info; 1680 uint32_t pkt_err = 0; 1681 uint16_t n_xfer; 1682 uint16_t slot_idx = 0; 1683 1684 /* 1685 * The ordering between avail index and desc reads need to be enforced. 1686 */ 1687 avail_head = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE); 1688 1689 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); 1690 1691 async_iter_reset(async); 1692 1693 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { 1694 uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen; 1695 uint16_t nr_vec = 0; 1696 1697 if (unlikely(reserve_avail_buf_split(dev, vq, pkt_len, buf_vec, 1698 &num_buffers, avail_head, &nr_vec) < 0)) { 1699 VHOST_LOG_DATA(DEBUG, "(%s) failed to get enough desc from vring\n", 1700 dev->ifname); 1701 vq->shadow_used_idx -= num_buffers; 1702 break; 1703 } 1704 1705 VHOST_LOG_DATA(DEBUG, "(%s) current index %d | end index %d\n", 1706 dev->ifname, vq->last_avail_idx, vq->last_avail_idx + num_buffers); 1707 1708 if (mbuf_to_desc(dev, vq, pkts[pkt_idx], buf_vec, nr_vec, num_buffers, true) < 0) { 1709 vq->shadow_used_idx -= num_buffers; 1710 break; 1711 } 1712 1713 slot_idx = (async->pkts_idx + pkt_idx) & (vq->size - 1); 1714 pkts_info[slot_idx].descs = num_buffers; 1715 pkts_info[slot_idx].mbuf = pkts[pkt_idx]; 1716 1717 vq->last_avail_idx += num_buffers; 1718 } 1719 1720 if (unlikely(pkt_idx == 0)) 1721 return 0; 1722 1723 n_xfer = vhost_async_dma_transfer(dev, vq, dma_id, vchan_id, async->pkts_idx, 1724 async->iov_iter, pkt_idx); 1725 1726 pkt_err = pkt_idx - n_xfer; 1727 if (unlikely(pkt_err)) { 1728 uint16_t num_descs = 0; 1729 1730 VHOST_LOG_DATA(DEBUG, "(%s) %s: failed to transfer %u packets for queue %u.\n", 1731 dev->ifname, __func__, pkt_err, queue_id); 1732 1733 /* update number of completed packets */ 1734 pkt_idx = n_xfer; 1735 1736 /* calculate the sum of descriptors to revert */ 1737 while (pkt_err-- > 0) { 1738 num_descs += pkts_info[slot_idx & (vq->size - 1)].descs; 1739 slot_idx--; 1740 } 1741 1742 /* recover shadow used ring and available ring */ 1743 vq->shadow_used_idx -= num_descs; 1744 vq->last_avail_idx -= num_descs; 1745 } 1746 1747 /* keep used descriptors */ 1748 if (likely(vq->shadow_used_idx)) { 1749 uint16_t to = async->desc_idx_split & (vq->size - 1); 1750 1751 store_dma_desc_info_split(vq->shadow_used_split, 1752 async->descs_split, vq->size, 0, to, 1753 vq->shadow_used_idx); 1754 1755 async->desc_idx_split += vq->shadow_used_idx; 1756 1757 async->pkts_idx += pkt_idx; 1758 if (async->pkts_idx >= vq->size) 1759 async->pkts_idx -= vq->size; 1760 1761 async->pkts_inflight_n += pkt_idx; 1762 vq->shadow_used_idx = 0; 1763 } 1764 1765 return pkt_idx; 1766 } 1767 1768 1769 static __rte_always_inline int 1770 vhost_enqueue_async_packed(struct virtio_net *dev, 1771 struct vhost_virtqueue *vq, 1772 struct rte_mbuf *pkt, 1773 struct buf_vector *buf_vec, 1774 uint16_t *nr_descs, 1775 uint16_t *nr_buffers) 1776 { 1777 uint16_t nr_vec = 0; 1778 uint16_t avail_idx = vq->last_avail_idx; 1779 uint16_t max_tries, tries = 0; 1780 uint16_t buf_id = 0; 1781 uint32_t len = 0; 1782 uint16_t desc_count = 0; 1783 uint32_t size = pkt->pkt_len + sizeof(struct virtio_net_hdr_mrg_rxbuf); 1784 uint32_t buffer_len[vq->size]; 1785 uint16_t buffer_buf_id[vq->size]; 1786 uint16_t buffer_desc_count[vq->size]; 1787 1788 if (rxvq_is_mergeable(dev)) 1789 max_tries = vq->size - 1; 1790 else 1791 max_tries = 1; 1792 1793 while (size > 0) { 1794 /* 1795 * if we tried all available ring items, and still 1796 * can't get enough buf, it means something abnormal 1797 * happened. 1798 */ 1799 if (unlikely(++tries > max_tries)) 1800 return -1; 1801 1802 if (unlikely(fill_vec_buf_packed(dev, vq, 1803 avail_idx, &desc_count, 1804 buf_vec, &nr_vec, 1805 &buf_id, &len, 1806 VHOST_ACCESS_RW) < 0)) 1807 return -1; 1808 1809 len = RTE_MIN(len, size); 1810 size -= len; 1811 1812 buffer_len[*nr_buffers] = len; 1813 buffer_buf_id[*nr_buffers] = buf_id; 1814 buffer_desc_count[*nr_buffers] = desc_count; 1815 *nr_buffers += 1; 1816 *nr_descs += desc_count; 1817 avail_idx += desc_count; 1818 if (avail_idx >= vq->size) 1819 avail_idx -= vq->size; 1820 } 1821 1822 if (unlikely(mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, *nr_buffers, true) < 0)) 1823 return -1; 1824 1825 vhost_shadow_enqueue_packed(vq, buffer_len, buffer_buf_id, buffer_desc_count, *nr_buffers); 1826 1827 return 0; 1828 } 1829 1830 static __rte_always_inline int16_t 1831 virtio_dev_rx_async_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, 1832 struct rte_mbuf *pkt, uint16_t *nr_descs, uint16_t *nr_buffers) 1833 { 1834 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1835 1836 if (unlikely(vhost_enqueue_async_packed(dev, vq, pkt, buf_vec, 1837 nr_descs, nr_buffers) < 0)) { 1838 VHOST_LOG_DATA(DEBUG, "(%s) failed to get enough desc from vring\n", dev->ifname); 1839 return -1; 1840 } 1841 1842 VHOST_LOG_DATA(DEBUG, "(%s) current index %d | end index %d\n", 1843 dev->ifname, vq->last_avail_idx, vq->last_avail_idx + *nr_descs); 1844 1845 return 0; 1846 } 1847 1848 static __rte_always_inline void 1849 dma_error_handler_packed(struct vhost_virtqueue *vq, uint16_t slot_idx, 1850 uint32_t nr_err, uint32_t *pkt_idx) 1851 { 1852 uint16_t descs_err = 0; 1853 uint16_t buffers_err = 0; 1854 struct async_inflight_info *pkts_info = vq->async->pkts_info; 1855 1856 *pkt_idx -= nr_err; 1857 /* calculate the sum of buffers and descs of DMA-error packets. */ 1858 while (nr_err-- > 0) { 1859 descs_err += pkts_info[slot_idx % vq->size].descs; 1860 buffers_err += pkts_info[slot_idx % vq->size].nr_buffers; 1861 slot_idx--; 1862 } 1863 1864 if (vq->last_avail_idx >= descs_err) { 1865 vq->last_avail_idx -= descs_err; 1866 } else { 1867 vq->last_avail_idx = vq->last_avail_idx + vq->size - descs_err; 1868 vq->avail_wrap_counter ^= 1; 1869 } 1870 1871 vq->shadow_used_idx -= buffers_err; 1872 } 1873 1874 static __rte_noinline uint32_t 1875 virtio_dev_rx_async_submit_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, 1876 uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count, 1877 int16_t dma_id, uint16_t vchan_id) 1878 { 1879 uint32_t pkt_idx = 0; 1880 uint32_t remained = count; 1881 uint16_t n_xfer; 1882 uint16_t num_buffers; 1883 uint16_t num_descs; 1884 1885 struct vhost_async *async = vq->async; 1886 struct async_inflight_info *pkts_info = async->pkts_info; 1887 uint32_t pkt_err = 0; 1888 uint16_t slot_idx = 0; 1889 1890 do { 1891 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); 1892 1893 num_buffers = 0; 1894 num_descs = 0; 1895 if (unlikely(virtio_dev_rx_async_packed(dev, vq, pkts[pkt_idx], 1896 &num_descs, &num_buffers) < 0)) 1897 break; 1898 1899 slot_idx = (async->pkts_idx + pkt_idx) % vq->size; 1900 1901 pkts_info[slot_idx].descs = num_descs; 1902 pkts_info[slot_idx].nr_buffers = num_buffers; 1903 pkts_info[slot_idx].mbuf = pkts[pkt_idx]; 1904 1905 pkt_idx++; 1906 remained--; 1907 vq_inc_last_avail_packed(vq, num_descs); 1908 } while (pkt_idx < count); 1909 1910 if (unlikely(pkt_idx == 0)) 1911 return 0; 1912 1913 n_xfer = vhost_async_dma_transfer(dev, vq, dma_id, vchan_id, async->pkts_idx, 1914 async->iov_iter, pkt_idx); 1915 1916 async_iter_reset(async); 1917 1918 pkt_err = pkt_idx - n_xfer; 1919 if (unlikely(pkt_err)) { 1920 VHOST_LOG_DATA(DEBUG, "(%s) %s: failed to transfer %u packets for queue %u.\n", 1921 dev->ifname, __func__, pkt_err, queue_id); 1922 dma_error_handler_packed(vq, slot_idx, pkt_err, &pkt_idx); 1923 } 1924 1925 if (likely(vq->shadow_used_idx)) { 1926 /* keep used descriptors. */ 1927 store_dma_desc_info_packed(vq->shadow_used_packed, async->buffers_packed, 1928 vq->size, 0, async->buffer_idx_packed, 1929 vq->shadow_used_idx); 1930 1931 async->buffer_idx_packed += vq->shadow_used_idx; 1932 if (async->buffer_idx_packed >= vq->size) 1933 async->buffer_idx_packed -= vq->size; 1934 1935 async->pkts_idx += pkt_idx; 1936 if (async->pkts_idx >= vq->size) 1937 async->pkts_idx -= vq->size; 1938 1939 vq->shadow_used_idx = 0; 1940 async->pkts_inflight_n += pkt_idx; 1941 } 1942 1943 return pkt_idx; 1944 } 1945 1946 static __rte_always_inline void 1947 write_back_completed_descs_split(struct vhost_virtqueue *vq, uint16_t n_descs) 1948 { 1949 struct vhost_async *async = vq->async; 1950 uint16_t nr_left = n_descs; 1951 uint16_t nr_copy; 1952 uint16_t to, from; 1953 1954 do { 1955 from = async->last_desc_idx_split & (vq->size - 1); 1956 nr_copy = nr_left + from <= vq->size ? nr_left : vq->size - from; 1957 to = vq->last_used_idx & (vq->size - 1); 1958 1959 if (to + nr_copy <= vq->size) { 1960 rte_memcpy(&vq->used->ring[to], &async->descs_split[from], 1961 nr_copy * sizeof(struct vring_used_elem)); 1962 } else { 1963 uint16_t size = vq->size - to; 1964 1965 rte_memcpy(&vq->used->ring[to], &async->descs_split[from], 1966 size * sizeof(struct vring_used_elem)); 1967 rte_memcpy(&vq->used->ring[0], &async->descs_split[from + size], 1968 (nr_copy - size) * sizeof(struct vring_used_elem)); 1969 } 1970 1971 async->last_desc_idx_split += nr_copy; 1972 vq->last_used_idx += nr_copy; 1973 nr_left -= nr_copy; 1974 } while (nr_left > 0); 1975 } 1976 1977 static __rte_always_inline void 1978 write_back_completed_descs_packed(struct vhost_virtqueue *vq, 1979 uint16_t n_buffers) 1980 { 1981 struct vhost_async *async = vq->async; 1982 uint16_t from = async->last_buffer_idx_packed; 1983 uint16_t used_idx = vq->last_used_idx; 1984 uint16_t head_idx = vq->last_used_idx; 1985 uint16_t head_flags = 0; 1986 uint16_t i; 1987 1988 /* Split loop in two to save memory barriers */ 1989 for (i = 0; i < n_buffers; i++) { 1990 vq->desc_packed[used_idx].id = async->buffers_packed[from].id; 1991 vq->desc_packed[used_idx].len = async->buffers_packed[from].len; 1992 1993 used_idx += async->buffers_packed[from].count; 1994 if (used_idx >= vq->size) 1995 used_idx -= vq->size; 1996 1997 from++; 1998 if (from >= vq->size) 1999 from = 0; 2000 } 2001 2002 /* The ordering for storing desc flags needs to be enforced. */ 2003 rte_atomic_thread_fence(__ATOMIC_RELEASE); 2004 2005 from = async->last_buffer_idx_packed; 2006 2007 for (i = 0; i < n_buffers; i++) { 2008 uint16_t flags; 2009 2010 if (async->buffers_packed[from].len) 2011 flags = VRING_DESC_F_WRITE; 2012 else 2013 flags = 0; 2014 2015 if (vq->used_wrap_counter) { 2016 flags |= VRING_DESC_F_USED; 2017 flags |= VRING_DESC_F_AVAIL; 2018 } else { 2019 flags &= ~VRING_DESC_F_USED; 2020 flags &= ~VRING_DESC_F_AVAIL; 2021 } 2022 2023 if (i > 0) { 2024 vq->desc_packed[vq->last_used_idx].flags = flags; 2025 } else { 2026 head_idx = vq->last_used_idx; 2027 head_flags = flags; 2028 } 2029 2030 vq_inc_last_used_packed(vq, async->buffers_packed[from].count); 2031 2032 from++; 2033 if (from == vq->size) 2034 from = 0; 2035 } 2036 2037 vq->desc_packed[head_idx].flags = head_flags; 2038 async->last_buffer_idx_packed = from; 2039 } 2040 2041 static __rte_always_inline uint16_t 2042 vhost_poll_enqueue_completed(struct virtio_net *dev, uint16_t queue_id, 2043 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, 2044 uint16_t vchan_id) 2045 { 2046 struct vhost_virtqueue *vq = dev->virtqueue[queue_id]; 2047 struct vhost_async *async = vq->async; 2048 struct async_inflight_info *pkts_info = async->pkts_info; 2049 uint16_t nr_cpl_pkts = 0; 2050 uint16_t n_descs = 0, n_buffers = 0; 2051 uint16_t start_idx, from, i; 2052 2053 /* Check completed copies for the given DMA vChannel */ 2054 vhost_async_dma_check_completed(dev, dma_id, vchan_id, VHOST_DMA_MAX_COPY_COMPLETE); 2055 2056 start_idx = async_get_first_inflight_pkt_idx(vq); 2057 /** 2058 * Calculate the number of copy completed packets. 2059 * Note that there may be completed packets even if 2060 * no copies are reported done by the given DMA vChannel, 2061 * as it's possible that a virtqueue uses multiple DMA 2062 * vChannels. 2063 */ 2064 from = start_idx; 2065 while (vq->async->pkts_cmpl_flag[from] && count--) { 2066 vq->async->pkts_cmpl_flag[from] = false; 2067 from++; 2068 if (from >= vq->size) 2069 from -= vq->size; 2070 nr_cpl_pkts++; 2071 } 2072 2073 if (nr_cpl_pkts == 0) 2074 return 0; 2075 2076 for (i = 0; i < nr_cpl_pkts; i++) { 2077 from = (start_idx + i) % vq->size; 2078 /* Only used with packed ring */ 2079 n_buffers += pkts_info[from].nr_buffers; 2080 /* Only used with split ring */ 2081 n_descs += pkts_info[from].descs; 2082 pkts[i] = pkts_info[from].mbuf; 2083 } 2084 2085 async->pkts_inflight_n -= nr_cpl_pkts; 2086 2087 if (likely(vq->enabled && vq->access_ok)) { 2088 if (vq_is_packed(dev)) { 2089 write_back_completed_descs_packed(vq, n_buffers); 2090 vhost_vring_call_packed(dev, vq); 2091 } else { 2092 write_back_completed_descs_split(vq, n_descs); 2093 __atomic_add_fetch(&vq->used->idx, n_descs, __ATOMIC_RELEASE); 2094 vhost_vring_call_split(dev, vq); 2095 } 2096 } else { 2097 if (vq_is_packed(dev)) { 2098 async->last_buffer_idx_packed += n_buffers; 2099 if (async->last_buffer_idx_packed >= vq->size) 2100 async->last_buffer_idx_packed -= vq->size; 2101 } else { 2102 async->last_desc_idx_split += n_descs; 2103 } 2104 } 2105 2106 return nr_cpl_pkts; 2107 } 2108 2109 uint16_t 2110 rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id, 2111 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, 2112 uint16_t vchan_id) 2113 { 2114 struct virtio_net *dev = get_device(vid); 2115 struct vhost_virtqueue *vq; 2116 uint16_t n_pkts_cpl = 0; 2117 2118 if (unlikely(!dev)) 2119 return 0; 2120 2121 VHOST_LOG_DATA(DEBUG, "(%s) %s\n", dev->ifname, __func__); 2122 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { 2123 VHOST_LOG_DATA(ERR, "(%s) %s: invalid virtqueue idx %d.\n", 2124 dev->ifname, __func__, queue_id); 2125 return 0; 2126 } 2127 2128 if (unlikely(!dma_copy_track[dma_id].vchans || 2129 !dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) { 2130 VHOST_LOG_DATA(ERR, "(%s) %s: invalid channel %d:%u.\n", dev->ifname, __func__, 2131 dma_id, vchan_id); 2132 return 0; 2133 } 2134 2135 vq = dev->virtqueue[queue_id]; 2136 2137 if (!rte_spinlock_trylock(&vq->access_lock)) { 2138 VHOST_LOG_DATA(DEBUG, "(%s) %s: virtqueue %u is busy.\n", dev->ifname, __func__, 2139 queue_id); 2140 return 0; 2141 } 2142 2143 if (unlikely(!vq->async)) { 2144 VHOST_LOG_DATA(ERR, "(%s) %s: async not registered for virtqueue %d.\n", 2145 dev->ifname, __func__, queue_id); 2146 goto out; 2147 } 2148 2149 n_pkts_cpl = vhost_poll_enqueue_completed(dev, queue_id, pkts, count, dma_id, vchan_id); 2150 2151 vhost_queue_stats_update(dev, vq, pkts, n_pkts_cpl); 2152 vq->stats.inflight_completed += n_pkts_cpl; 2153 2154 out: 2155 rte_spinlock_unlock(&vq->access_lock); 2156 2157 return n_pkts_cpl; 2158 } 2159 2160 uint16_t 2161 rte_vhost_clear_queue_thread_unsafe(int vid, uint16_t queue_id, 2162 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, 2163 uint16_t vchan_id) 2164 { 2165 struct virtio_net *dev = get_device(vid); 2166 struct vhost_virtqueue *vq; 2167 uint16_t n_pkts_cpl = 0; 2168 2169 if (!dev) 2170 return 0; 2171 2172 VHOST_LOG_DATA(DEBUG, "(%s) %s\n", dev->ifname, __func__); 2173 if (unlikely(queue_id >= dev->nr_vring)) { 2174 VHOST_LOG_DATA(ERR, "(%s) %s: invalid virtqueue idx %d.\n", 2175 dev->ifname, __func__, queue_id); 2176 return 0; 2177 } 2178 2179 if (unlikely(dma_id < 0 || dma_id >= RTE_DMADEV_DEFAULT_MAX)) { 2180 VHOST_LOG_DATA(ERR, "(%s) %s: invalid dma id %d.\n", 2181 dev->ifname, __func__, dma_id); 2182 return 0; 2183 } 2184 2185 vq = dev->virtqueue[queue_id]; 2186 2187 if (unlikely(!rte_spinlock_is_locked(&vq->access_lock))) { 2188 VHOST_LOG_DATA(ERR, "(%s) %s() called without access lock taken.\n", 2189 dev->ifname, __func__); 2190 return -1; 2191 } 2192 2193 if (unlikely(!vq->async)) { 2194 VHOST_LOG_DATA(ERR, "(%s) %s: async not registered for queue id %d.\n", 2195 dev->ifname, __func__, queue_id); 2196 return 0; 2197 } 2198 2199 if (unlikely(!dma_copy_track[dma_id].vchans || 2200 !dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) { 2201 VHOST_LOG_DATA(ERR, "(%s) %s: invalid channel %d:%u.\n", dev->ifname, __func__, 2202 dma_id, vchan_id); 2203 return 0; 2204 } 2205 2206 if ((queue_id & 1) == 0) 2207 n_pkts_cpl = vhost_poll_enqueue_completed(dev, queue_id, 2208 pkts, count, dma_id, vchan_id); 2209 else { 2210 n_pkts_cpl = async_poll_dequeue_completed(dev, vq, pkts, count, 2211 dma_id, vchan_id, dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS); 2212 } 2213 2214 vhost_queue_stats_update(dev, vq, pkts, n_pkts_cpl); 2215 vq->stats.inflight_completed += n_pkts_cpl; 2216 2217 return n_pkts_cpl; 2218 } 2219 2220 uint16_t 2221 rte_vhost_clear_queue(int vid, uint16_t queue_id, struct rte_mbuf **pkts, 2222 uint16_t count, int16_t dma_id, uint16_t vchan_id) 2223 { 2224 struct virtio_net *dev = get_device(vid); 2225 struct vhost_virtqueue *vq; 2226 uint16_t n_pkts_cpl = 0; 2227 2228 if (!dev) 2229 return 0; 2230 2231 VHOST_LOG_DATA(DEBUG, "(%s) %s\n", dev->ifname, __func__); 2232 if (unlikely(queue_id >= dev->nr_vring)) { 2233 VHOST_LOG_DATA(ERR, "(%s) %s: invalid virtqueue idx %u.\n", 2234 dev->ifname, __func__, queue_id); 2235 return 0; 2236 } 2237 2238 if (unlikely(dma_id < 0 || dma_id >= RTE_DMADEV_DEFAULT_MAX)) { 2239 VHOST_LOG_DATA(ERR, "(%s) %s: invalid dma id %d.\n", 2240 dev->ifname, __func__, dma_id); 2241 return 0; 2242 } 2243 2244 vq = dev->virtqueue[queue_id]; 2245 2246 if (!rte_spinlock_trylock(&vq->access_lock)) { 2247 VHOST_LOG_DATA(DEBUG, "(%s) %s: virtqueue %u is busy.\n", 2248 dev->ifname, __func__, queue_id); 2249 return 0; 2250 } 2251 2252 if (unlikely(!vq->async)) { 2253 VHOST_LOG_DATA(ERR, "(%s) %s: async not registered for queue id %u.\n", 2254 dev->ifname, __func__, queue_id); 2255 goto out_access_unlock; 2256 } 2257 2258 if (unlikely(!dma_copy_track[dma_id].vchans || 2259 !dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) { 2260 VHOST_LOG_DATA(ERR, "(%s) %s: invalid channel %d:%u.\n", dev->ifname, __func__, 2261 dma_id, vchan_id); 2262 goto out_access_unlock; 2263 } 2264 2265 if ((queue_id & 1) == 0) 2266 n_pkts_cpl = vhost_poll_enqueue_completed(dev, queue_id, 2267 pkts, count, dma_id, vchan_id); 2268 else { 2269 n_pkts_cpl = async_poll_dequeue_completed(dev, vq, pkts, count, 2270 dma_id, vchan_id, dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS); 2271 } 2272 2273 vhost_queue_stats_update(dev, vq, pkts, n_pkts_cpl); 2274 vq->stats.inflight_completed += n_pkts_cpl; 2275 2276 out_access_unlock: 2277 rte_spinlock_unlock(&vq->access_lock); 2278 2279 return n_pkts_cpl; 2280 } 2281 2282 static __rte_always_inline uint32_t 2283 virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id, 2284 struct rte_mbuf **pkts, uint32_t count, int16_t dma_id, uint16_t vchan_id) 2285 { 2286 struct vhost_virtqueue *vq; 2287 uint32_t nb_tx = 0; 2288 2289 VHOST_LOG_DATA(DEBUG, "(%s) %s\n", dev->ifname, __func__); 2290 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { 2291 VHOST_LOG_DATA(ERR, "(%s) %s: invalid virtqueue idx %d.\n", 2292 dev->ifname, __func__, queue_id); 2293 return 0; 2294 } 2295 2296 if (unlikely(!dma_copy_track[dma_id].vchans || 2297 !dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) { 2298 VHOST_LOG_DATA(ERR, "(%s) %s: invalid channel %d:%u.\n", dev->ifname, __func__, 2299 dma_id, vchan_id); 2300 return 0; 2301 } 2302 2303 vq = dev->virtqueue[queue_id]; 2304 2305 rte_spinlock_lock(&vq->access_lock); 2306 2307 if (unlikely(!vq->enabled || !vq->async)) 2308 goto out_access_unlock; 2309 2310 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 2311 vhost_user_iotlb_rd_lock(vq); 2312 2313 if (unlikely(!vq->access_ok)) 2314 if (unlikely(vring_translate(dev, vq) < 0)) 2315 goto out; 2316 2317 count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); 2318 if (count == 0) 2319 goto out; 2320 2321 if (vq_is_packed(dev)) 2322 nb_tx = virtio_dev_rx_async_submit_packed(dev, vq, queue_id, 2323 pkts, count, dma_id, vchan_id); 2324 else 2325 nb_tx = virtio_dev_rx_async_submit_split(dev, vq, queue_id, 2326 pkts, count, dma_id, vchan_id); 2327 2328 vq->stats.inflight_submitted += nb_tx; 2329 2330 out: 2331 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 2332 vhost_user_iotlb_rd_unlock(vq); 2333 2334 out_access_unlock: 2335 rte_spinlock_unlock(&vq->access_lock); 2336 2337 return nb_tx; 2338 } 2339 2340 uint16_t 2341 rte_vhost_submit_enqueue_burst(int vid, uint16_t queue_id, 2342 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, 2343 uint16_t vchan_id) 2344 { 2345 struct virtio_net *dev = get_device(vid); 2346 2347 if (!dev) 2348 return 0; 2349 2350 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { 2351 VHOST_LOG_DATA(ERR, "(%s) %s: built-in vhost net backend is disabled.\n", 2352 dev->ifname, __func__); 2353 return 0; 2354 } 2355 2356 return virtio_dev_rx_async_submit(dev, queue_id, pkts, count, dma_id, vchan_id); 2357 } 2358 2359 static inline bool 2360 virtio_net_with_host_offload(struct virtio_net *dev) 2361 { 2362 if (dev->features & 2363 ((1ULL << VIRTIO_NET_F_CSUM) | 2364 (1ULL << VIRTIO_NET_F_HOST_ECN) | 2365 (1ULL << VIRTIO_NET_F_HOST_TSO4) | 2366 (1ULL << VIRTIO_NET_F_HOST_TSO6) | 2367 (1ULL << VIRTIO_NET_F_HOST_UFO))) 2368 return true; 2369 2370 return false; 2371 } 2372 2373 static int 2374 parse_headers(struct rte_mbuf *m, uint8_t *l4_proto) 2375 { 2376 struct rte_ipv4_hdr *ipv4_hdr; 2377 struct rte_ipv6_hdr *ipv6_hdr; 2378 struct rte_ether_hdr *eth_hdr; 2379 uint16_t ethertype; 2380 uint16_t data_len = rte_pktmbuf_data_len(m); 2381 2382 if (data_len < sizeof(struct rte_ether_hdr)) 2383 return -EINVAL; 2384 2385 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 2386 2387 m->l2_len = sizeof(struct rte_ether_hdr); 2388 ethertype = rte_be_to_cpu_16(eth_hdr->ether_type); 2389 2390 if (ethertype == RTE_ETHER_TYPE_VLAN) { 2391 if (data_len < sizeof(struct rte_ether_hdr) + 2392 sizeof(struct rte_vlan_hdr)) 2393 goto error; 2394 2395 struct rte_vlan_hdr *vlan_hdr = 2396 (struct rte_vlan_hdr *)(eth_hdr + 1); 2397 2398 m->l2_len += sizeof(struct rte_vlan_hdr); 2399 ethertype = rte_be_to_cpu_16(vlan_hdr->eth_proto); 2400 } 2401 2402 switch (ethertype) { 2403 case RTE_ETHER_TYPE_IPV4: 2404 if (data_len < m->l2_len + sizeof(struct rte_ipv4_hdr)) 2405 goto error; 2406 ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *, 2407 m->l2_len); 2408 m->l3_len = rte_ipv4_hdr_len(ipv4_hdr); 2409 if (data_len < m->l2_len + m->l3_len) 2410 goto error; 2411 m->ol_flags |= RTE_MBUF_F_TX_IPV4; 2412 *l4_proto = ipv4_hdr->next_proto_id; 2413 break; 2414 case RTE_ETHER_TYPE_IPV6: 2415 if (data_len < m->l2_len + sizeof(struct rte_ipv6_hdr)) 2416 goto error; 2417 ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *, 2418 m->l2_len); 2419 m->l3_len = sizeof(struct rte_ipv6_hdr); 2420 m->ol_flags |= RTE_MBUF_F_TX_IPV6; 2421 *l4_proto = ipv6_hdr->proto; 2422 break; 2423 default: 2424 /* a valid L3 header is needed for further L4 parsing */ 2425 goto error; 2426 } 2427 2428 /* both CSUM and GSO need a valid L4 header */ 2429 switch (*l4_proto) { 2430 case IPPROTO_TCP: 2431 if (data_len < m->l2_len + m->l3_len + 2432 sizeof(struct rte_tcp_hdr)) 2433 goto error; 2434 break; 2435 case IPPROTO_UDP: 2436 if (data_len < m->l2_len + m->l3_len + 2437 sizeof(struct rte_udp_hdr)) 2438 goto error; 2439 break; 2440 case IPPROTO_SCTP: 2441 if (data_len < m->l2_len + m->l3_len + 2442 sizeof(struct rte_sctp_hdr)) 2443 goto error; 2444 break; 2445 default: 2446 goto error; 2447 } 2448 2449 return 0; 2450 2451 error: 2452 m->l2_len = 0; 2453 m->l3_len = 0; 2454 m->ol_flags = 0; 2455 return -EINVAL; 2456 } 2457 2458 static __rte_always_inline void 2459 vhost_dequeue_offload_legacy(struct virtio_net *dev, struct virtio_net_hdr *hdr, 2460 struct rte_mbuf *m) 2461 { 2462 uint8_t l4_proto = 0; 2463 struct rte_tcp_hdr *tcp_hdr = NULL; 2464 uint16_t tcp_len; 2465 uint16_t data_len = rte_pktmbuf_data_len(m); 2466 2467 if (parse_headers(m, &l4_proto) < 0) 2468 return; 2469 2470 if (hdr->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) { 2471 if (hdr->csum_start == (m->l2_len + m->l3_len)) { 2472 switch (hdr->csum_offset) { 2473 case (offsetof(struct rte_tcp_hdr, cksum)): 2474 if (l4_proto != IPPROTO_TCP) 2475 goto error; 2476 m->ol_flags |= RTE_MBUF_F_TX_TCP_CKSUM; 2477 break; 2478 case (offsetof(struct rte_udp_hdr, dgram_cksum)): 2479 if (l4_proto != IPPROTO_UDP) 2480 goto error; 2481 m->ol_flags |= RTE_MBUF_F_TX_UDP_CKSUM; 2482 break; 2483 case (offsetof(struct rte_sctp_hdr, cksum)): 2484 if (l4_proto != IPPROTO_SCTP) 2485 goto error; 2486 m->ol_flags |= RTE_MBUF_F_TX_SCTP_CKSUM; 2487 break; 2488 default: 2489 goto error; 2490 } 2491 } else { 2492 goto error; 2493 } 2494 } 2495 2496 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 2497 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 2498 case VIRTIO_NET_HDR_GSO_TCPV4: 2499 case VIRTIO_NET_HDR_GSO_TCPV6: 2500 if (l4_proto != IPPROTO_TCP) 2501 goto error; 2502 tcp_hdr = rte_pktmbuf_mtod_offset(m, 2503 struct rte_tcp_hdr *, 2504 m->l2_len + m->l3_len); 2505 tcp_len = (tcp_hdr->data_off & 0xf0) >> 2; 2506 if (data_len < m->l2_len + m->l3_len + tcp_len) 2507 goto error; 2508 m->ol_flags |= RTE_MBUF_F_TX_TCP_SEG; 2509 m->tso_segsz = hdr->gso_size; 2510 m->l4_len = tcp_len; 2511 break; 2512 case VIRTIO_NET_HDR_GSO_UDP: 2513 if (l4_proto != IPPROTO_UDP) 2514 goto error; 2515 m->ol_flags |= RTE_MBUF_F_TX_UDP_SEG; 2516 m->tso_segsz = hdr->gso_size; 2517 m->l4_len = sizeof(struct rte_udp_hdr); 2518 break; 2519 default: 2520 VHOST_LOG_DATA(WARNING, "(%s) unsupported gso type %u.\n", 2521 dev->ifname, hdr->gso_type); 2522 goto error; 2523 } 2524 } 2525 return; 2526 2527 error: 2528 m->l2_len = 0; 2529 m->l3_len = 0; 2530 m->ol_flags = 0; 2531 } 2532 2533 static __rte_always_inline void 2534 vhost_dequeue_offload(struct virtio_net *dev, struct virtio_net_hdr *hdr, 2535 struct rte_mbuf *m, bool legacy_ol_flags) 2536 { 2537 struct rte_net_hdr_lens hdr_lens; 2538 int l4_supported = 0; 2539 uint32_t ptype; 2540 2541 if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE) 2542 return; 2543 2544 if (legacy_ol_flags) { 2545 vhost_dequeue_offload_legacy(dev, hdr, m); 2546 return; 2547 } 2548 2549 m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN; 2550 2551 ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK); 2552 m->packet_type = ptype; 2553 if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP || 2554 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP || 2555 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP) 2556 l4_supported = 1; 2557 2558 /* According to Virtio 1.1 spec, the device only needs to look at 2559 * VIRTIO_NET_HDR_F_NEEDS_CSUM in the packet transmission path. 2560 * This differs from the processing incoming packets path where the 2561 * driver could rely on VIRTIO_NET_HDR_F_DATA_VALID flag set by the 2562 * device. 2563 * 2564 * 5.1.6.2.1 Driver Requirements: Packet Transmission 2565 * The driver MUST NOT set the VIRTIO_NET_HDR_F_DATA_VALID and 2566 * VIRTIO_NET_HDR_F_RSC_INFO bits in flags. 2567 * 2568 * 5.1.6.2.2 Device Requirements: Packet Transmission 2569 * The device MUST ignore flag bits that it does not recognize. 2570 */ 2571 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 2572 uint32_t hdrlen; 2573 2574 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len; 2575 if (hdr->csum_start <= hdrlen && l4_supported != 0) { 2576 m->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_NONE; 2577 } else { 2578 /* Unknown proto or tunnel, do sw cksum. We can assume 2579 * the cksum field is in the first segment since the 2580 * buffers we provided to the host are large enough. 2581 * In case of SCTP, this will be wrong since it's a CRC 2582 * but there's nothing we can do. 2583 */ 2584 uint16_t csum = 0, off; 2585 2586 if (rte_raw_cksum_mbuf(m, hdr->csum_start, 2587 rte_pktmbuf_pkt_len(m) - hdr->csum_start, &csum) < 0) 2588 return; 2589 if (likely(csum != 0xffff)) 2590 csum = ~csum; 2591 off = hdr->csum_offset + hdr->csum_start; 2592 if (rte_pktmbuf_data_len(m) >= off + 1) 2593 *rte_pktmbuf_mtod_offset(m, uint16_t *, off) = csum; 2594 } 2595 } 2596 2597 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 2598 if (hdr->gso_size == 0) 2599 return; 2600 2601 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 2602 case VIRTIO_NET_HDR_GSO_TCPV4: 2603 case VIRTIO_NET_HDR_GSO_TCPV6: 2604 if ((ptype & RTE_PTYPE_L4_MASK) != RTE_PTYPE_L4_TCP) 2605 break; 2606 m->ol_flags |= RTE_MBUF_F_RX_LRO | RTE_MBUF_F_RX_L4_CKSUM_NONE; 2607 m->tso_segsz = hdr->gso_size; 2608 break; 2609 case VIRTIO_NET_HDR_GSO_UDP: 2610 if ((ptype & RTE_PTYPE_L4_MASK) != RTE_PTYPE_L4_UDP) 2611 break; 2612 m->ol_flags |= RTE_MBUF_F_RX_LRO | RTE_MBUF_F_RX_L4_CKSUM_NONE; 2613 m->tso_segsz = hdr->gso_size; 2614 break; 2615 default: 2616 break; 2617 } 2618 } 2619 } 2620 2621 static __rte_noinline void 2622 copy_vnet_hdr_from_desc(struct virtio_net_hdr *hdr, 2623 struct buf_vector *buf_vec) 2624 { 2625 uint64_t len; 2626 uint64_t remain = sizeof(struct virtio_net_hdr); 2627 uint64_t src; 2628 uint64_t dst = (uint64_t)(uintptr_t)hdr; 2629 2630 while (remain) { 2631 len = RTE_MIN(remain, buf_vec->buf_len); 2632 src = buf_vec->buf_addr; 2633 rte_memcpy((void *)(uintptr_t)dst, 2634 (void *)(uintptr_t)src, len); 2635 2636 remain -= len; 2637 dst += len; 2638 buf_vec++; 2639 } 2640 } 2641 2642 static __rte_always_inline int 2643 desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, 2644 struct buf_vector *buf_vec, uint16_t nr_vec, 2645 struct rte_mbuf *m, struct rte_mempool *mbuf_pool, 2646 bool legacy_ol_flags, uint16_t slot_idx, bool is_async) 2647 { 2648 uint32_t buf_avail, buf_offset, buf_len; 2649 uint64_t buf_addr, buf_iova; 2650 uint32_t mbuf_avail, mbuf_offset; 2651 uint32_t cpy_len; 2652 struct rte_mbuf *cur = m, *prev = m; 2653 struct virtio_net_hdr tmp_hdr; 2654 struct virtio_net_hdr *hdr = NULL; 2655 /* A counter to avoid desc dead loop chain */ 2656 uint16_t vec_idx = 0; 2657 struct vhost_async *async = vq->async; 2658 struct async_inflight_info *pkts_info; 2659 2660 buf_addr = buf_vec[vec_idx].buf_addr; 2661 buf_iova = buf_vec[vec_idx].buf_iova; 2662 buf_len = buf_vec[vec_idx].buf_len; 2663 2664 if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) 2665 return -1; 2666 2667 if (virtio_net_with_host_offload(dev)) { 2668 if (unlikely(buf_len < sizeof(struct virtio_net_hdr))) { 2669 /* 2670 * No luck, the virtio-net header doesn't fit 2671 * in a contiguous virtual area. 2672 */ 2673 copy_vnet_hdr_from_desc(&tmp_hdr, buf_vec); 2674 hdr = &tmp_hdr; 2675 } else { 2676 hdr = (struct virtio_net_hdr *)((uintptr_t)buf_addr); 2677 } 2678 } 2679 2680 /* 2681 * A virtio driver normally uses at least 2 desc buffers 2682 * for Tx: the first for storing the header, and others 2683 * for storing the data. 2684 */ 2685 if (unlikely(buf_len < dev->vhost_hlen)) { 2686 buf_offset = dev->vhost_hlen - buf_len; 2687 vec_idx++; 2688 buf_addr = buf_vec[vec_idx].buf_addr; 2689 buf_iova = buf_vec[vec_idx].buf_iova; 2690 buf_len = buf_vec[vec_idx].buf_len; 2691 buf_avail = buf_len - buf_offset; 2692 } else if (buf_len == dev->vhost_hlen) { 2693 if (unlikely(++vec_idx >= nr_vec)) 2694 goto error; 2695 buf_addr = buf_vec[vec_idx].buf_addr; 2696 buf_iova = buf_vec[vec_idx].buf_iova; 2697 buf_len = buf_vec[vec_idx].buf_len; 2698 2699 buf_offset = 0; 2700 buf_avail = buf_len; 2701 } else { 2702 buf_offset = dev->vhost_hlen; 2703 buf_avail = buf_vec[vec_idx].buf_len - dev->vhost_hlen; 2704 } 2705 2706 PRINT_PACKET(dev, 2707 (uintptr_t)(buf_addr + buf_offset), 2708 (uint32_t)buf_avail, 0); 2709 2710 mbuf_offset = 0; 2711 mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM; 2712 2713 if (is_async) { 2714 pkts_info = async->pkts_info; 2715 if (async_iter_initialize(dev, async)) 2716 return -1; 2717 } 2718 2719 while (1) { 2720 cpy_len = RTE_MIN(buf_avail, mbuf_avail); 2721 2722 if (is_async) { 2723 if (async_fill_seg(dev, vq, cur, mbuf_offset, 2724 buf_iova + buf_offset, cpy_len, false) < 0) 2725 goto error; 2726 } else { 2727 sync_fill_seg(dev, vq, cur, mbuf_offset, 2728 buf_addr + buf_offset, 2729 buf_iova + buf_offset, cpy_len, false); 2730 } 2731 2732 mbuf_avail -= cpy_len; 2733 mbuf_offset += cpy_len; 2734 buf_avail -= cpy_len; 2735 buf_offset += cpy_len; 2736 2737 /* This buf reaches to its end, get the next one */ 2738 if (buf_avail == 0) { 2739 if (++vec_idx >= nr_vec) 2740 break; 2741 2742 buf_addr = buf_vec[vec_idx].buf_addr; 2743 buf_iova = buf_vec[vec_idx].buf_iova; 2744 buf_len = buf_vec[vec_idx].buf_len; 2745 2746 buf_offset = 0; 2747 buf_avail = buf_len; 2748 2749 PRINT_PACKET(dev, (uintptr_t)buf_addr, 2750 (uint32_t)buf_avail, 0); 2751 } 2752 2753 /* 2754 * This mbuf reaches to its end, get a new one 2755 * to hold more data. 2756 */ 2757 if (mbuf_avail == 0) { 2758 cur = rte_pktmbuf_alloc(mbuf_pool); 2759 if (unlikely(cur == NULL)) { 2760 VHOST_LOG_DATA(ERR, "(%s) failed to allocate memory for mbuf.\n", 2761 dev->ifname); 2762 goto error; 2763 } 2764 2765 prev->next = cur; 2766 prev->data_len = mbuf_offset; 2767 m->nb_segs += 1; 2768 m->pkt_len += mbuf_offset; 2769 prev = cur; 2770 2771 mbuf_offset = 0; 2772 mbuf_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM; 2773 } 2774 } 2775 2776 prev->data_len = mbuf_offset; 2777 m->pkt_len += mbuf_offset; 2778 2779 if (is_async) { 2780 async_iter_finalize(async); 2781 if (hdr) 2782 pkts_info[slot_idx].nethdr = *hdr; 2783 } else { 2784 if (hdr) 2785 vhost_dequeue_offload(dev, hdr, m, legacy_ol_flags); 2786 } 2787 2788 return 0; 2789 error: 2790 if (is_async) 2791 async_iter_cancel(async); 2792 2793 return -1; 2794 } 2795 2796 static void 2797 virtio_dev_extbuf_free(void *addr __rte_unused, void *opaque) 2798 { 2799 rte_free(opaque); 2800 } 2801 2802 static int 2803 virtio_dev_extbuf_alloc(struct virtio_net *dev, struct rte_mbuf *pkt, uint32_t size) 2804 { 2805 struct rte_mbuf_ext_shared_info *shinfo = NULL; 2806 uint32_t total_len = RTE_PKTMBUF_HEADROOM + size; 2807 uint16_t buf_len; 2808 rte_iova_t iova; 2809 void *buf; 2810 2811 total_len += sizeof(*shinfo) + sizeof(uintptr_t); 2812 total_len = RTE_ALIGN_CEIL(total_len, sizeof(uintptr_t)); 2813 2814 if (unlikely(total_len > UINT16_MAX)) 2815 return -ENOSPC; 2816 2817 buf_len = total_len; 2818 buf = rte_malloc(NULL, buf_len, RTE_CACHE_LINE_SIZE); 2819 if (unlikely(buf == NULL)) 2820 return -ENOMEM; 2821 2822 /* Initialize shinfo */ 2823 shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len, 2824 virtio_dev_extbuf_free, buf); 2825 if (unlikely(shinfo == NULL)) { 2826 rte_free(buf); 2827 VHOST_LOG_DATA(ERR, "(%s) failed to init shinfo\n", dev->ifname); 2828 return -1; 2829 } 2830 2831 iova = rte_malloc_virt2iova(buf); 2832 rte_pktmbuf_attach_extbuf(pkt, buf, iova, buf_len, shinfo); 2833 rte_pktmbuf_reset_headroom(pkt); 2834 2835 return 0; 2836 } 2837 2838 /* 2839 * Prepare a host supported pktmbuf. 2840 */ 2841 static __rte_always_inline int 2842 virtio_dev_pktmbuf_prep(struct virtio_net *dev, struct rte_mbuf *pkt, 2843 uint32_t data_len) 2844 { 2845 if (rte_pktmbuf_tailroom(pkt) >= data_len) 2846 return 0; 2847 2848 /* attach an external buffer if supported */ 2849 if (dev->extbuf && !virtio_dev_extbuf_alloc(dev, pkt, data_len)) 2850 return 0; 2851 2852 /* check if chained buffers are allowed */ 2853 if (!dev->linearbuf) 2854 return 0; 2855 2856 return -1; 2857 } 2858 2859 __rte_always_inline 2860 static uint16_t 2861 virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 2862 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count, 2863 bool legacy_ol_flags) 2864 { 2865 uint16_t i; 2866 uint16_t avail_entries; 2867 uint16_t dropped = 0; 2868 static bool allocerr_warned; 2869 2870 /* 2871 * The ordering between avail index and 2872 * desc reads needs to be enforced. 2873 */ 2874 avail_entries = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE) - 2875 vq->last_avail_idx; 2876 if (avail_entries == 0) 2877 return 0; 2878 2879 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); 2880 2881 VHOST_LOG_DATA(DEBUG, "(%s) %s\n", dev->ifname, __func__); 2882 2883 count = RTE_MIN(count, MAX_PKT_BURST); 2884 count = RTE_MIN(count, avail_entries); 2885 VHOST_LOG_DATA(DEBUG, "(%s) about to dequeue %u buffers\n", 2886 dev->ifname, count); 2887 2888 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, count)) 2889 return 0; 2890 2891 for (i = 0; i < count; i++) { 2892 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 2893 uint16_t head_idx; 2894 uint32_t buf_len; 2895 uint16_t nr_vec = 0; 2896 int err; 2897 2898 if (unlikely(fill_vec_buf_split(dev, vq, 2899 vq->last_avail_idx + i, 2900 &nr_vec, buf_vec, 2901 &head_idx, &buf_len, 2902 VHOST_ACCESS_RO) < 0)) 2903 break; 2904 2905 update_shadow_used_ring_split(vq, head_idx, 0); 2906 2907 err = virtio_dev_pktmbuf_prep(dev, pkts[i], buf_len); 2908 if (unlikely(err)) { 2909 /* 2910 * mbuf allocation fails for jumbo packets when external 2911 * buffer allocation is not allowed and linear buffer 2912 * is required. Drop this packet. 2913 */ 2914 if (!allocerr_warned) { 2915 VHOST_LOG_DATA(ERR, "(%s) failed mbuf alloc of size %d from %s.\n", 2916 dev->ifname, buf_len, mbuf_pool->name); 2917 allocerr_warned = true; 2918 } 2919 dropped += 1; 2920 i++; 2921 break; 2922 } 2923 2924 err = desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i], 2925 mbuf_pool, legacy_ol_flags, 0, false); 2926 if (unlikely(err)) { 2927 if (!allocerr_warned) { 2928 VHOST_LOG_DATA(ERR, "(%s) failed to copy desc to mbuf.\n", 2929 dev->ifname); 2930 allocerr_warned = true; 2931 } 2932 dropped += 1; 2933 i++; 2934 break; 2935 } 2936 2937 } 2938 2939 if (dropped) 2940 rte_pktmbuf_free_bulk(&pkts[i - 1], count - i + 1); 2941 2942 vq->last_avail_idx += i; 2943 2944 do_data_copy_dequeue(vq); 2945 if (unlikely(i < count)) 2946 vq->shadow_used_idx = i; 2947 if (likely(vq->shadow_used_idx)) { 2948 flush_shadow_used_ring_split(dev, vq); 2949 vhost_vring_call_split(dev, vq); 2950 } 2951 2952 return (i - dropped); 2953 } 2954 2955 __rte_noinline 2956 static uint16_t 2957 virtio_dev_tx_split_legacy(struct virtio_net *dev, 2958 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, 2959 struct rte_mbuf **pkts, uint16_t count) 2960 { 2961 return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, true); 2962 } 2963 2964 __rte_noinline 2965 static uint16_t 2966 virtio_dev_tx_split_compliant(struct virtio_net *dev, 2967 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, 2968 struct rte_mbuf **pkts, uint16_t count) 2969 { 2970 return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, false); 2971 } 2972 2973 static __rte_always_inline int 2974 vhost_reserve_avail_batch_packed(struct virtio_net *dev, 2975 struct vhost_virtqueue *vq, 2976 struct rte_mbuf **pkts, 2977 uint16_t avail_idx, 2978 uintptr_t *desc_addrs, 2979 uint16_t *ids) 2980 { 2981 bool wrap = vq->avail_wrap_counter; 2982 struct vring_packed_desc *descs = vq->desc_packed; 2983 uint64_t lens[PACKED_BATCH_SIZE]; 2984 uint64_t buf_lens[PACKED_BATCH_SIZE]; 2985 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 2986 uint16_t flags, i; 2987 2988 if (unlikely(avail_idx & PACKED_BATCH_MASK)) 2989 return -1; 2990 if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size)) 2991 return -1; 2992 2993 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 2994 flags = descs[avail_idx + i].flags; 2995 if (unlikely((wrap != !!(flags & VRING_DESC_F_AVAIL)) || 2996 (wrap == !!(flags & VRING_DESC_F_USED)) || 2997 (flags & PACKED_DESC_SINGLE_DEQUEUE_FLAG))) 2998 return -1; 2999 } 3000 3001 rte_atomic_thread_fence(__ATOMIC_ACQUIRE); 3002 3003 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 3004 lens[i] = descs[avail_idx + i].len; 3005 3006 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3007 desc_addrs[i] = vhost_iova_to_vva(dev, vq, 3008 descs[avail_idx + i].addr, 3009 &lens[i], VHOST_ACCESS_RW); 3010 } 3011 3012 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3013 if (unlikely(!desc_addrs[i])) 3014 return -1; 3015 if (unlikely((lens[i] != descs[avail_idx + i].len))) 3016 return -1; 3017 } 3018 3019 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3020 if (virtio_dev_pktmbuf_prep(dev, pkts[i], lens[i])) 3021 goto err; 3022 } 3023 3024 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 3025 buf_lens[i] = pkts[i]->buf_len - pkts[i]->data_off; 3026 3027 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3028 if (unlikely(buf_lens[i] < (lens[i] - buf_offset))) 3029 goto err; 3030 } 3031 3032 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3033 pkts[i]->pkt_len = lens[i] - buf_offset; 3034 pkts[i]->data_len = pkts[i]->pkt_len; 3035 ids[i] = descs[avail_idx + i].id; 3036 } 3037 3038 return 0; 3039 3040 err: 3041 return -1; 3042 } 3043 3044 static __rte_always_inline int 3045 virtio_dev_tx_batch_packed(struct virtio_net *dev, 3046 struct vhost_virtqueue *vq, 3047 struct rte_mbuf **pkts, 3048 bool legacy_ol_flags) 3049 { 3050 uint16_t avail_idx = vq->last_avail_idx; 3051 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 3052 struct virtio_net_hdr *hdr; 3053 uintptr_t desc_addrs[PACKED_BATCH_SIZE]; 3054 uint16_t ids[PACKED_BATCH_SIZE]; 3055 uint16_t i; 3056 3057 if (vhost_reserve_avail_batch_packed(dev, vq, pkts, avail_idx, 3058 desc_addrs, ids)) 3059 return -1; 3060 3061 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 3062 rte_prefetch0((void *)(uintptr_t)desc_addrs[i]); 3063 3064 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 3065 rte_memcpy(rte_pktmbuf_mtod_offset(pkts[i], void *, 0), 3066 (void *)(uintptr_t)(desc_addrs[i] + buf_offset), 3067 pkts[i]->pkt_len); 3068 3069 if (virtio_net_with_host_offload(dev)) { 3070 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3071 hdr = (struct virtio_net_hdr *)(desc_addrs[i]); 3072 vhost_dequeue_offload(dev, hdr, pkts[i], legacy_ol_flags); 3073 } 3074 } 3075 3076 if (virtio_net_is_inorder(dev)) 3077 vhost_shadow_dequeue_batch_packed_inorder(vq, 3078 ids[PACKED_BATCH_SIZE - 1]); 3079 else 3080 vhost_shadow_dequeue_batch_packed(dev, vq, ids); 3081 3082 vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE); 3083 3084 return 0; 3085 } 3086 3087 static __rte_always_inline int 3088 vhost_dequeue_single_packed(struct virtio_net *dev, 3089 struct vhost_virtqueue *vq, 3090 struct rte_mempool *mbuf_pool, 3091 struct rte_mbuf *pkts, 3092 uint16_t *buf_id, 3093 uint16_t *desc_count, 3094 bool legacy_ol_flags) 3095 { 3096 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 3097 uint32_t buf_len; 3098 uint16_t nr_vec = 0; 3099 int err; 3100 static bool allocerr_warned; 3101 3102 if (unlikely(fill_vec_buf_packed(dev, vq, 3103 vq->last_avail_idx, desc_count, 3104 buf_vec, &nr_vec, 3105 buf_id, &buf_len, 3106 VHOST_ACCESS_RO) < 0)) 3107 return -1; 3108 3109 if (unlikely(virtio_dev_pktmbuf_prep(dev, pkts, buf_len))) { 3110 if (!allocerr_warned) { 3111 VHOST_LOG_DATA(ERR, "(%s) failed mbuf alloc of size %d from %s.\n", 3112 dev->ifname, buf_len, mbuf_pool->name); 3113 allocerr_warned = true; 3114 } 3115 return -1; 3116 } 3117 3118 err = desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts, 3119 mbuf_pool, legacy_ol_flags, 0, false); 3120 if (unlikely(err)) { 3121 if (!allocerr_warned) { 3122 VHOST_LOG_DATA(ERR, "(%s) failed to copy desc to mbuf.\n", 3123 dev->ifname); 3124 allocerr_warned = true; 3125 } 3126 return -1; 3127 } 3128 3129 return 0; 3130 } 3131 3132 static __rte_always_inline int 3133 virtio_dev_tx_single_packed(struct virtio_net *dev, 3134 struct vhost_virtqueue *vq, 3135 struct rte_mempool *mbuf_pool, 3136 struct rte_mbuf *pkts, 3137 bool legacy_ol_flags) 3138 { 3139 3140 uint16_t buf_id, desc_count = 0; 3141 int ret; 3142 3143 ret = vhost_dequeue_single_packed(dev, vq, mbuf_pool, pkts, &buf_id, 3144 &desc_count, legacy_ol_flags); 3145 3146 if (likely(desc_count > 0)) { 3147 if (virtio_net_is_inorder(dev)) 3148 vhost_shadow_dequeue_single_packed_inorder(vq, buf_id, 3149 desc_count); 3150 else 3151 vhost_shadow_dequeue_single_packed(vq, buf_id, 3152 desc_count); 3153 3154 vq_inc_last_avail_packed(vq, desc_count); 3155 } 3156 3157 return ret; 3158 } 3159 3160 __rte_always_inline 3161 static uint16_t 3162 virtio_dev_tx_packed(struct virtio_net *dev, 3163 struct vhost_virtqueue *__rte_restrict vq, 3164 struct rte_mempool *mbuf_pool, 3165 struct rte_mbuf **__rte_restrict pkts, 3166 uint32_t count, 3167 bool legacy_ol_flags) 3168 { 3169 uint32_t pkt_idx = 0; 3170 3171 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, count)) 3172 return 0; 3173 3174 do { 3175 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); 3176 3177 if (count - pkt_idx >= PACKED_BATCH_SIZE) { 3178 if (!virtio_dev_tx_batch_packed(dev, vq, 3179 &pkts[pkt_idx], 3180 legacy_ol_flags)) { 3181 pkt_idx += PACKED_BATCH_SIZE; 3182 continue; 3183 } 3184 } 3185 3186 if (virtio_dev_tx_single_packed(dev, vq, mbuf_pool, 3187 pkts[pkt_idx], 3188 legacy_ol_flags)) 3189 break; 3190 pkt_idx++; 3191 } while (pkt_idx < count); 3192 3193 if (pkt_idx != count) 3194 rte_pktmbuf_free_bulk(&pkts[pkt_idx], count - pkt_idx); 3195 3196 if (vq->shadow_used_idx) { 3197 do_data_copy_dequeue(vq); 3198 3199 vhost_flush_dequeue_shadow_packed(dev, vq); 3200 vhost_vring_call_packed(dev, vq); 3201 } 3202 3203 return pkt_idx; 3204 } 3205 3206 __rte_noinline 3207 static uint16_t 3208 virtio_dev_tx_packed_legacy(struct virtio_net *dev, 3209 struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool, 3210 struct rte_mbuf **__rte_restrict pkts, uint32_t count) 3211 { 3212 return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, true); 3213 } 3214 3215 __rte_noinline 3216 static uint16_t 3217 virtio_dev_tx_packed_compliant(struct virtio_net *dev, 3218 struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool, 3219 struct rte_mbuf **__rte_restrict pkts, uint32_t count) 3220 { 3221 return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, false); 3222 } 3223 3224 uint16_t 3225 rte_vhost_dequeue_burst(int vid, uint16_t queue_id, 3226 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) 3227 { 3228 struct virtio_net *dev; 3229 struct rte_mbuf *rarp_mbuf = NULL; 3230 struct vhost_virtqueue *vq; 3231 int16_t success = 1; 3232 3233 dev = get_device(vid); 3234 if (!dev) 3235 return 0; 3236 3237 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { 3238 VHOST_LOG_DATA(ERR, "(%s) %s: built-in vhost net backend is disabled.\n", 3239 dev->ifname, __func__); 3240 return 0; 3241 } 3242 3243 if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) { 3244 VHOST_LOG_DATA(ERR, "(%s) %s: invalid virtqueue idx %d.\n", 3245 dev->ifname, __func__, queue_id); 3246 return 0; 3247 } 3248 3249 vq = dev->virtqueue[queue_id]; 3250 3251 if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0)) 3252 return 0; 3253 3254 if (unlikely(!vq->enabled)) { 3255 count = 0; 3256 goto out_access_unlock; 3257 } 3258 3259 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 3260 vhost_user_iotlb_rd_lock(vq); 3261 3262 if (unlikely(!vq->access_ok)) 3263 if (unlikely(vring_translate(dev, vq) < 0)) { 3264 count = 0; 3265 goto out; 3266 } 3267 3268 /* 3269 * Construct a RARP broadcast packet, and inject it to the "pkts" 3270 * array, to looks like that guest actually send such packet. 3271 * 3272 * Check user_send_rarp() for more information. 3273 * 3274 * broadcast_rarp shares a cacheline in the virtio_net structure 3275 * with some fields that are accessed during enqueue and 3276 * __atomic_compare_exchange_n causes a write if performed compare 3277 * and exchange. This could result in false sharing between enqueue 3278 * and dequeue. 3279 * 3280 * Prevent unnecessary false sharing by reading broadcast_rarp first 3281 * and only performing compare and exchange if the read indicates it 3282 * is likely to be set. 3283 */ 3284 if (unlikely(__atomic_load_n(&dev->broadcast_rarp, __ATOMIC_ACQUIRE) && 3285 __atomic_compare_exchange_n(&dev->broadcast_rarp, 3286 &success, 0, 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED))) { 3287 3288 rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac); 3289 if (rarp_mbuf == NULL) { 3290 VHOST_LOG_DATA(ERR, "(%s) failed to make RARP packet.\n", dev->ifname); 3291 count = 0; 3292 goto out; 3293 } 3294 /* 3295 * Inject it to the head of "pkts" array, so that switch's mac 3296 * learning table will get updated first. 3297 */ 3298 pkts[0] = rarp_mbuf; 3299 vhost_queue_stats_update(dev, vq, pkts, 1); 3300 pkts++; 3301 count -= 1; 3302 } 3303 3304 if (vq_is_packed(dev)) { 3305 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS) 3306 count = virtio_dev_tx_packed_legacy(dev, vq, mbuf_pool, pkts, count); 3307 else 3308 count = virtio_dev_tx_packed_compliant(dev, vq, mbuf_pool, pkts, count); 3309 } else { 3310 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS) 3311 count = virtio_dev_tx_split_legacy(dev, vq, mbuf_pool, pkts, count); 3312 else 3313 count = virtio_dev_tx_split_compliant(dev, vq, mbuf_pool, pkts, count); 3314 } 3315 3316 vhost_queue_stats_update(dev, vq, pkts, count); 3317 3318 out: 3319 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 3320 vhost_user_iotlb_rd_unlock(vq); 3321 3322 out_access_unlock: 3323 rte_spinlock_unlock(&vq->access_lock); 3324 3325 if (unlikely(rarp_mbuf != NULL)) 3326 count += 1; 3327 3328 return count; 3329 } 3330 3331 static __rte_always_inline uint16_t 3332 async_poll_dequeue_completed(struct virtio_net *dev, struct vhost_virtqueue *vq, 3333 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, 3334 uint16_t vchan_id, bool legacy_ol_flags) 3335 { 3336 uint16_t start_idx, from, i; 3337 uint16_t nr_cpl_pkts = 0; 3338 struct async_inflight_info *pkts_info = vq->async->pkts_info; 3339 3340 vhost_async_dma_check_completed(dev, dma_id, vchan_id, VHOST_DMA_MAX_COPY_COMPLETE); 3341 3342 start_idx = async_get_first_inflight_pkt_idx(vq); 3343 3344 from = start_idx; 3345 while (vq->async->pkts_cmpl_flag[from] && count--) { 3346 vq->async->pkts_cmpl_flag[from] = false; 3347 from = (from + 1) % vq->size; 3348 nr_cpl_pkts++; 3349 } 3350 3351 if (nr_cpl_pkts == 0) 3352 return 0; 3353 3354 for (i = 0; i < nr_cpl_pkts; i++) { 3355 from = (start_idx + i) % vq->size; 3356 pkts[i] = pkts_info[from].mbuf; 3357 3358 if (virtio_net_with_host_offload(dev)) 3359 vhost_dequeue_offload(dev, &pkts_info[from].nethdr, pkts[i], 3360 legacy_ol_flags); 3361 } 3362 3363 /* write back completed descs to used ring and update used idx */ 3364 if (vq_is_packed(dev)) { 3365 write_back_completed_descs_packed(vq, nr_cpl_pkts); 3366 vhost_vring_call_packed(dev, vq); 3367 } else { 3368 write_back_completed_descs_split(vq, nr_cpl_pkts); 3369 __atomic_add_fetch(&vq->used->idx, nr_cpl_pkts, __ATOMIC_RELEASE); 3370 vhost_vring_call_split(dev, vq); 3371 } 3372 vq->async->pkts_inflight_n -= nr_cpl_pkts; 3373 3374 return nr_cpl_pkts; 3375 } 3376 3377 static __rte_always_inline uint16_t 3378 virtio_dev_tx_async_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 3379 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count, 3380 int16_t dma_id, uint16_t vchan_id, bool legacy_ol_flags) 3381 { 3382 static bool allocerr_warned; 3383 bool dropped = false; 3384 uint16_t avail_entries; 3385 uint16_t pkt_idx, slot_idx = 0; 3386 uint16_t nr_done_pkts = 0; 3387 uint16_t pkt_err = 0; 3388 uint16_t n_xfer; 3389 struct vhost_async *async = vq->async; 3390 struct async_inflight_info *pkts_info = async->pkts_info; 3391 struct rte_mbuf *pkts_prealloc[MAX_PKT_BURST]; 3392 uint16_t pkts_size = count; 3393 3394 /** 3395 * The ordering between avail index and 3396 * desc reads needs to be enforced. 3397 */ 3398 avail_entries = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE) - 3399 vq->last_avail_idx; 3400 if (avail_entries == 0) 3401 goto out; 3402 3403 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); 3404 3405 async_iter_reset(async); 3406 3407 count = RTE_MIN(count, MAX_PKT_BURST); 3408 count = RTE_MIN(count, avail_entries); 3409 VHOST_LOG_DATA(DEBUG, "(%s) about to dequeue %u buffers\n", 3410 dev->ifname, count); 3411 3412 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts_prealloc, count)) 3413 goto out; 3414 3415 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { 3416 uint16_t head_idx = 0; 3417 uint16_t nr_vec = 0; 3418 uint16_t to; 3419 uint32_t buf_len; 3420 int err; 3421 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 3422 struct rte_mbuf *pkt = pkts_prealloc[pkt_idx]; 3423 3424 if (unlikely(fill_vec_buf_split(dev, vq, vq->last_avail_idx, 3425 &nr_vec, buf_vec, 3426 &head_idx, &buf_len, 3427 VHOST_ACCESS_RO) < 0)) { 3428 dropped = true; 3429 break; 3430 } 3431 3432 err = virtio_dev_pktmbuf_prep(dev, pkt, buf_len); 3433 if (unlikely(err)) { 3434 /** 3435 * mbuf allocation fails for jumbo packets when external 3436 * buffer allocation is not allowed and linear buffer 3437 * is required. Drop this packet. 3438 */ 3439 if (!allocerr_warned) { 3440 VHOST_LOG_DATA(ERR, 3441 "(%s) %s: Failed mbuf alloc of size %d from %s\n", 3442 dev->ifname, __func__, buf_len, mbuf_pool->name); 3443 allocerr_warned = true; 3444 } 3445 dropped = true; 3446 break; 3447 } 3448 3449 slot_idx = (async->pkts_idx + pkt_idx) & (vq->size - 1); 3450 err = desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkt, mbuf_pool, 3451 legacy_ol_flags, slot_idx, true); 3452 if (unlikely(err)) { 3453 if (!allocerr_warned) { 3454 VHOST_LOG_DATA(ERR, 3455 "(%s) %s: Failed to offload copies to async channel.\n", 3456 dev->ifname, __func__); 3457 allocerr_warned = true; 3458 } 3459 dropped = true; 3460 break; 3461 } 3462 3463 pkts_info[slot_idx].mbuf = pkt; 3464 3465 /* store used descs */ 3466 to = async->desc_idx_split & (vq->size - 1); 3467 async->descs_split[to].id = head_idx; 3468 async->descs_split[to].len = 0; 3469 async->desc_idx_split++; 3470 3471 vq->last_avail_idx++; 3472 } 3473 3474 if (unlikely(dropped)) 3475 rte_pktmbuf_free_bulk(&pkts_prealloc[pkt_idx], count - pkt_idx); 3476 3477 n_xfer = vhost_async_dma_transfer(dev, vq, dma_id, vchan_id, async->pkts_idx, 3478 async->iov_iter, pkt_idx); 3479 3480 async->pkts_inflight_n += n_xfer; 3481 3482 pkt_err = pkt_idx - n_xfer; 3483 if (unlikely(pkt_err)) { 3484 VHOST_LOG_DATA(DEBUG, "(%s) %s: failed to transfer data.\n", 3485 dev->ifname, __func__); 3486 3487 pkt_idx = n_xfer; 3488 /* recover available ring */ 3489 vq->last_avail_idx -= pkt_err; 3490 3491 /** 3492 * recover async channel copy related structures and free pktmbufs 3493 * for error pkts. 3494 */ 3495 async->desc_idx_split -= pkt_err; 3496 while (pkt_err-- > 0) { 3497 rte_pktmbuf_free(pkts_info[slot_idx & (vq->size - 1)].mbuf); 3498 slot_idx--; 3499 } 3500 } 3501 3502 async->pkts_idx += pkt_idx; 3503 if (async->pkts_idx >= vq->size) 3504 async->pkts_idx -= vq->size; 3505 3506 out: 3507 /* DMA device may serve other queues, unconditionally check completed. */ 3508 nr_done_pkts = async_poll_dequeue_completed(dev, vq, pkts, pkts_size, 3509 dma_id, vchan_id, legacy_ol_flags); 3510 3511 return nr_done_pkts; 3512 } 3513 3514 __rte_noinline 3515 static uint16_t 3516 virtio_dev_tx_async_split_legacy(struct virtio_net *dev, 3517 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, 3518 struct rte_mbuf **pkts, uint16_t count, 3519 int16_t dma_id, uint16_t vchan_id) 3520 { 3521 return virtio_dev_tx_async_split(dev, vq, mbuf_pool, 3522 pkts, count, dma_id, vchan_id, true); 3523 } 3524 3525 __rte_noinline 3526 static uint16_t 3527 virtio_dev_tx_async_split_compliant(struct virtio_net *dev, 3528 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, 3529 struct rte_mbuf **pkts, uint16_t count, 3530 int16_t dma_id, uint16_t vchan_id) 3531 { 3532 return virtio_dev_tx_async_split(dev, vq, mbuf_pool, 3533 pkts, count, dma_id, vchan_id, false); 3534 } 3535 3536 static __rte_always_inline void 3537 vhost_async_shadow_dequeue_single_packed(struct vhost_virtqueue *vq, uint16_t buf_id) 3538 { 3539 struct vhost_async *async = vq->async; 3540 uint16_t idx = async->buffer_idx_packed; 3541 3542 async->buffers_packed[idx].id = buf_id; 3543 async->buffers_packed[idx].len = 0; 3544 async->buffers_packed[idx].count = 1; 3545 3546 async->buffer_idx_packed++; 3547 if (async->buffer_idx_packed >= vq->size) 3548 async->buffer_idx_packed -= vq->size; 3549 3550 } 3551 3552 static __rte_always_inline int 3553 virtio_dev_tx_async_single_packed(struct virtio_net *dev, 3554 struct vhost_virtqueue *vq, 3555 struct rte_mempool *mbuf_pool, 3556 struct rte_mbuf *pkts, 3557 uint16_t slot_idx, 3558 bool legacy_ol_flags) 3559 { 3560 int err; 3561 uint16_t buf_id, desc_count = 0; 3562 uint16_t nr_vec = 0; 3563 uint32_t buf_len; 3564 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 3565 static bool allocerr_warned; 3566 3567 if (unlikely(fill_vec_buf_packed(dev, vq, vq->last_avail_idx, &desc_count, 3568 buf_vec, &nr_vec, &buf_id, &buf_len, 3569 VHOST_ACCESS_RO) < 0)) 3570 return -1; 3571 3572 if (unlikely(virtio_dev_pktmbuf_prep(dev, pkts, buf_len))) { 3573 if (!allocerr_warned) { 3574 VHOST_LOG_DATA(ERR, "(%s) Failed mbuf alloc of size %d from %s.\n", 3575 dev->ifname, buf_len, mbuf_pool->name); 3576 3577 allocerr_warned = true; 3578 } 3579 return -1; 3580 } 3581 3582 err = desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts, mbuf_pool, 3583 legacy_ol_flags, slot_idx, true); 3584 if (unlikely(err)) { 3585 rte_pktmbuf_free(pkts); 3586 if (!allocerr_warned) { 3587 VHOST_LOG_DATA(ERR, "(%s) Failed to copy desc to mbuf on.\n", dev->ifname); 3588 allocerr_warned = true; 3589 } 3590 return -1; 3591 } 3592 3593 /* update async shadow packed ring */ 3594 vhost_async_shadow_dequeue_single_packed(vq, buf_id); 3595 3596 return err; 3597 } 3598 3599 static __rte_always_inline uint16_t 3600 virtio_dev_tx_async_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, 3601 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, 3602 uint16_t count, uint16_t dma_id, uint16_t vchan_id, bool legacy_ol_flags) 3603 { 3604 uint16_t pkt_idx; 3605 uint16_t slot_idx = 0; 3606 uint16_t nr_done_pkts = 0; 3607 uint16_t pkt_err = 0; 3608 uint32_t n_xfer; 3609 struct vhost_async *async = vq->async; 3610 struct async_inflight_info *pkts_info = async->pkts_info; 3611 struct rte_mbuf *pkts_prealloc[MAX_PKT_BURST]; 3612 3613 VHOST_LOG_DATA(DEBUG, "(%d) about to dequeue %u buffers\n", dev->vid, count); 3614 3615 async_iter_reset(async); 3616 3617 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts_prealloc, count)) 3618 goto out; 3619 3620 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { 3621 struct rte_mbuf *pkt = pkts_prealloc[pkt_idx]; 3622 3623 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); 3624 3625 slot_idx = (async->pkts_idx + pkt_idx) % vq->size; 3626 if (unlikely(virtio_dev_tx_async_single_packed(dev, vq, mbuf_pool, pkt, 3627 slot_idx, legacy_ol_flags))) { 3628 rte_pktmbuf_free_bulk(&pkts_prealloc[pkt_idx], count - pkt_idx); 3629 break; 3630 } 3631 3632 pkts_info[slot_idx].mbuf = pkt; 3633 3634 vq_inc_last_avail_packed(vq, 1); 3635 3636 } 3637 3638 n_xfer = vhost_async_dma_transfer(dev, vq, dma_id, vchan_id, async->pkts_idx, 3639 async->iov_iter, pkt_idx); 3640 3641 async->pkts_inflight_n += n_xfer; 3642 3643 pkt_err = pkt_idx - n_xfer; 3644 3645 if (unlikely(pkt_err)) { 3646 pkt_idx -= pkt_err; 3647 3648 /** 3649 * recover DMA-copy related structures and free pktmbuf for DMA-error pkts. 3650 */ 3651 if (async->buffer_idx_packed >= pkt_err) 3652 async->buffer_idx_packed -= pkt_err; 3653 else 3654 async->buffer_idx_packed += vq->size - pkt_err; 3655 3656 while (pkt_err-- > 0) { 3657 rte_pktmbuf_free(pkts_info[slot_idx % vq->size].mbuf); 3658 slot_idx--; 3659 } 3660 3661 /* recover available ring */ 3662 if (vq->last_avail_idx >= pkt_err) { 3663 vq->last_avail_idx -= pkt_err; 3664 } else { 3665 vq->last_avail_idx += vq->size - pkt_err; 3666 vq->avail_wrap_counter ^= 1; 3667 } 3668 } 3669 3670 async->pkts_idx += pkt_idx; 3671 if (async->pkts_idx >= vq->size) 3672 async->pkts_idx -= vq->size; 3673 3674 out: 3675 nr_done_pkts = async_poll_dequeue_completed(dev, vq, pkts, count, 3676 dma_id, vchan_id, legacy_ol_flags); 3677 3678 return nr_done_pkts; 3679 } 3680 3681 __rte_noinline 3682 static uint16_t 3683 virtio_dev_tx_async_packed_legacy(struct virtio_net *dev, struct vhost_virtqueue *vq, 3684 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, 3685 uint16_t count, uint16_t dma_id, uint16_t vchan_id) 3686 { 3687 return virtio_dev_tx_async_packed(dev, vq, mbuf_pool, 3688 pkts, count, dma_id, vchan_id, true); 3689 } 3690 3691 __rte_noinline 3692 static uint16_t 3693 virtio_dev_tx_async_packed_compliant(struct virtio_net *dev, struct vhost_virtqueue *vq, 3694 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, 3695 uint16_t count, uint16_t dma_id, uint16_t vchan_id) 3696 { 3697 return virtio_dev_tx_async_packed(dev, vq, mbuf_pool, 3698 pkts, count, dma_id, vchan_id, false); 3699 } 3700 3701 uint16_t 3702 rte_vhost_async_try_dequeue_burst(int vid, uint16_t queue_id, 3703 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count, 3704 int *nr_inflight, int16_t dma_id, uint16_t vchan_id) 3705 { 3706 struct virtio_net *dev; 3707 struct rte_mbuf *rarp_mbuf = NULL; 3708 struct vhost_virtqueue *vq; 3709 int16_t success = 1; 3710 3711 dev = get_device(vid); 3712 if (!dev || !nr_inflight) 3713 return 0; 3714 3715 *nr_inflight = -1; 3716 3717 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { 3718 VHOST_LOG_DATA(ERR, "(%s) %s: built-in vhost net backend is disabled.\n", 3719 dev->ifname, __func__); 3720 return 0; 3721 } 3722 3723 if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) { 3724 VHOST_LOG_DATA(ERR, "(%s) %s: invalid virtqueue idx %d.\n", 3725 dev->ifname, __func__, queue_id); 3726 return 0; 3727 } 3728 3729 if (unlikely(dma_id < 0 || dma_id >= RTE_DMADEV_DEFAULT_MAX)) { 3730 VHOST_LOG_DATA(ERR, "(%s) %s: invalid dma id %d.\n", 3731 dev->ifname, __func__, dma_id); 3732 return 0; 3733 } 3734 3735 if (unlikely(!dma_copy_track[dma_id].vchans || 3736 !dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) { 3737 VHOST_LOG_DATA(ERR, "(%s) %s: invalid channel %d:%u.\n", dev->ifname, __func__, 3738 dma_id, vchan_id); 3739 return 0; 3740 } 3741 3742 vq = dev->virtqueue[queue_id]; 3743 3744 if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0)) 3745 return 0; 3746 3747 if (unlikely(vq->enabled == 0)) { 3748 count = 0; 3749 goto out_access_unlock; 3750 } 3751 3752 if (unlikely(!vq->async)) { 3753 VHOST_LOG_DATA(ERR, "(%s) %s: async not registered for queue id %d.\n", 3754 dev->ifname, __func__, queue_id); 3755 count = 0; 3756 goto out_access_unlock; 3757 } 3758 3759 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 3760 vhost_user_iotlb_rd_lock(vq); 3761 3762 if (unlikely(vq->access_ok == 0)) 3763 if (unlikely(vring_translate(dev, vq) < 0)) { 3764 count = 0; 3765 goto out; 3766 } 3767 3768 /* 3769 * Construct a RARP broadcast packet, and inject it to the "pkts" 3770 * array, to looks like that guest actually send such packet. 3771 * 3772 * Check user_send_rarp() for more information. 3773 * 3774 * broadcast_rarp shares a cacheline in the virtio_net structure 3775 * with some fields that are accessed during enqueue and 3776 * __atomic_compare_exchange_n causes a write if performed compare 3777 * and exchange. This could result in false sharing between enqueue 3778 * and dequeue. 3779 * 3780 * Prevent unnecessary false sharing by reading broadcast_rarp first 3781 * and only performing compare and exchange if the read indicates it 3782 * is likely to be set. 3783 */ 3784 if (unlikely(__atomic_load_n(&dev->broadcast_rarp, __ATOMIC_ACQUIRE) && 3785 __atomic_compare_exchange_n(&dev->broadcast_rarp, 3786 &success, 0, 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED))) { 3787 3788 rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac); 3789 if (rarp_mbuf == NULL) { 3790 VHOST_LOG_DATA(ERR, "(%s) failed to make RARP packet.\n", dev->ifname); 3791 count = 0; 3792 goto out; 3793 } 3794 /* 3795 * Inject it to the head of "pkts" array, so that switch's mac 3796 * learning table will get updated first. 3797 */ 3798 pkts[0] = rarp_mbuf; 3799 vhost_queue_stats_update(dev, vq, pkts, 1); 3800 pkts++; 3801 count -= 1; 3802 } 3803 3804 if (vq_is_packed(dev)) { 3805 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS) 3806 count = virtio_dev_tx_async_packed_legacy(dev, vq, mbuf_pool, 3807 pkts, count, dma_id, vchan_id); 3808 else 3809 count = virtio_dev_tx_async_packed_compliant(dev, vq, mbuf_pool, 3810 pkts, count, dma_id, vchan_id); 3811 } else { 3812 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS) 3813 count = virtio_dev_tx_async_split_legacy(dev, vq, mbuf_pool, 3814 pkts, count, dma_id, vchan_id); 3815 else 3816 count = virtio_dev_tx_async_split_compliant(dev, vq, mbuf_pool, 3817 pkts, count, dma_id, vchan_id); 3818 } 3819 3820 *nr_inflight = vq->async->pkts_inflight_n; 3821 vhost_queue_stats_update(dev, vq, pkts, count); 3822 3823 out: 3824 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 3825 vhost_user_iotlb_rd_unlock(vq); 3826 3827 out_access_unlock: 3828 rte_spinlock_unlock(&vq->access_lock); 3829 3830 if (unlikely(rarp_mbuf != NULL)) 3831 count += 1; 3832 3833 return count; 3834 } 3835