1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2016 Intel Corporation 3 */ 4 5 #include <stdint.h> 6 #include <stdbool.h> 7 #include <linux/virtio_net.h> 8 9 #include <rte_mbuf.h> 10 #include <rte_memcpy.h> 11 #include <rte_net.h> 12 #include <rte_ether.h> 13 #include <rte_ip.h> 14 #include <rte_dmadev.h> 15 #include <rte_vhost.h> 16 #include <rte_tcp.h> 17 #include <rte_udp.h> 18 #include <rte_sctp.h> 19 #include <rte_arp.h> 20 #include <rte_spinlock.h> 21 #include <rte_malloc.h> 22 #include <rte_vhost_async.h> 23 24 #include "iotlb.h" 25 #include "vhost.h" 26 27 #define MAX_BATCH_LEN 256 28 29 static __rte_always_inline uint16_t 30 async_poll_dequeue_completed(struct virtio_net *dev, struct vhost_virtqueue *vq, 31 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, 32 uint16_t vchan_id, bool legacy_ol_flags); 33 34 /* DMA device copy operation tracking array. */ 35 struct async_dma_info dma_copy_track[RTE_DMADEV_DEFAULT_MAX]; 36 37 static __rte_always_inline bool 38 rxvq_is_mergeable(struct virtio_net *dev) 39 { 40 return dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF); 41 } 42 43 static __rte_always_inline bool 44 virtio_net_is_inorder(struct virtio_net *dev) 45 { 46 return dev->features & (1ULL << VIRTIO_F_IN_ORDER); 47 } 48 49 static bool 50 is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t nr_vring) 51 { 52 return (is_tx ^ (idx & 1)) == 0 && idx < nr_vring; 53 } 54 55 /* 56 * This function must be called with virtqueue's access_lock taken. 57 */ 58 static inline void 59 vhost_queue_stats_update(struct virtio_net *dev, struct vhost_virtqueue *vq, 60 struct rte_mbuf **pkts, uint16_t count) 61 { 62 struct virtqueue_stats *stats = &vq->stats; 63 int i; 64 65 if (!(dev->flags & VIRTIO_DEV_STATS_ENABLED)) 66 return; 67 68 for (i = 0; i < count; i++) { 69 struct rte_ether_addr *ea; 70 struct rte_mbuf *pkt = pkts[i]; 71 uint32_t pkt_len = rte_pktmbuf_pkt_len(pkt); 72 73 stats->packets++; 74 stats->bytes += pkt_len; 75 76 if (pkt_len == 64) { 77 stats->size_bins[1]++; 78 } else if (pkt_len > 64 && pkt_len < 1024) { 79 uint32_t bin; 80 81 /* count zeros, and offset into correct bin */ 82 bin = (sizeof(pkt_len) * 8) - __builtin_clz(pkt_len) - 5; 83 stats->size_bins[bin]++; 84 } else { 85 if (pkt_len < 64) 86 stats->size_bins[0]++; 87 else if (pkt_len < 1519) 88 stats->size_bins[6]++; 89 else 90 stats->size_bins[7]++; 91 } 92 93 ea = rte_pktmbuf_mtod(pkt, struct rte_ether_addr *); 94 if (rte_is_multicast_ether_addr(ea)) { 95 if (rte_is_broadcast_ether_addr(ea)) 96 stats->broadcast++; 97 else 98 stats->multicast++; 99 } 100 } 101 } 102 103 static __rte_always_inline int64_t 104 vhost_async_dma_transfer_one(struct virtio_net *dev, struct vhost_virtqueue *vq, 105 int16_t dma_id, uint16_t vchan_id, uint16_t flag_idx, 106 struct vhost_iov_iter *pkt) 107 { 108 struct async_dma_vchan_info *dma_info = &dma_copy_track[dma_id].vchans[vchan_id]; 109 uint16_t ring_mask = dma_info->ring_mask; 110 static bool vhost_async_dma_copy_log; 111 112 113 struct vhost_iovec *iov = pkt->iov; 114 int copy_idx = 0; 115 uint32_t nr_segs = pkt->nr_segs; 116 uint16_t i; 117 118 if (rte_dma_burst_capacity(dma_id, vchan_id) < nr_segs) 119 return -1; 120 121 for (i = 0; i < nr_segs; i++) { 122 copy_idx = rte_dma_copy(dma_id, vchan_id, (rte_iova_t)iov[i].src_addr, 123 (rte_iova_t)iov[i].dst_addr, iov[i].len, RTE_DMA_OP_FLAG_LLC); 124 /** 125 * Since all memory is pinned and DMA vChannel 126 * ring has enough space, failure should be a 127 * rare case. If failure happens, it means DMA 128 * device encounters serious errors; in this 129 * case, please stop async data-path and check 130 * what has happened to DMA device. 131 */ 132 if (unlikely(copy_idx < 0)) { 133 if (!vhost_async_dma_copy_log) { 134 VHOST_LOG_DATA(dev->ifname, ERR, 135 "DMA copy failed for channel %d:%u\n", 136 dma_id, vchan_id); 137 vhost_async_dma_copy_log = true; 138 } 139 return -1; 140 } 141 } 142 143 /** 144 * Only store packet completion flag address in the last copy's 145 * slot, and other slots are set to NULL. 146 */ 147 dma_info->pkts_cmpl_flag_addr[copy_idx & ring_mask] = &vq->async->pkts_cmpl_flag[flag_idx]; 148 149 return nr_segs; 150 } 151 152 static __rte_always_inline uint16_t 153 vhost_async_dma_transfer(struct virtio_net *dev, struct vhost_virtqueue *vq, 154 int16_t dma_id, uint16_t vchan_id, uint16_t head_idx, 155 struct vhost_iov_iter *pkts, uint16_t nr_pkts) 156 { 157 struct async_dma_vchan_info *dma_info = &dma_copy_track[dma_id].vchans[vchan_id]; 158 int64_t ret, nr_copies = 0; 159 uint16_t pkt_idx; 160 161 rte_spinlock_lock(&dma_info->dma_lock); 162 163 for (pkt_idx = 0; pkt_idx < nr_pkts; pkt_idx++) { 164 ret = vhost_async_dma_transfer_one(dev, vq, dma_id, vchan_id, head_idx, 165 &pkts[pkt_idx]); 166 if (unlikely(ret < 0)) 167 break; 168 169 nr_copies += ret; 170 head_idx++; 171 if (head_idx >= vq->size) 172 head_idx -= vq->size; 173 } 174 175 if (likely(nr_copies > 0)) 176 rte_dma_submit(dma_id, vchan_id); 177 178 rte_spinlock_unlock(&dma_info->dma_lock); 179 180 return pkt_idx; 181 } 182 183 static __rte_always_inline uint16_t 184 vhost_async_dma_check_completed(struct virtio_net *dev, int16_t dma_id, uint16_t vchan_id, 185 uint16_t max_pkts) 186 { 187 struct async_dma_vchan_info *dma_info = &dma_copy_track[dma_id].vchans[vchan_id]; 188 uint16_t ring_mask = dma_info->ring_mask; 189 uint16_t last_idx = 0; 190 uint16_t nr_copies; 191 uint16_t copy_idx; 192 uint16_t i; 193 bool has_error = false; 194 static bool vhost_async_dma_complete_log; 195 196 rte_spinlock_lock(&dma_info->dma_lock); 197 198 /** 199 * Print error log for debugging, if DMA reports error during 200 * DMA transfer. We do not handle error in vhost level. 201 */ 202 nr_copies = rte_dma_completed(dma_id, vchan_id, max_pkts, &last_idx, &has_error); 203 if (unlikely(!vhost_async_dma_complete_log && has_error)) { 204 VHOST_LOG_DATA(dev->ifname, ERR, 205 "DMA completion failure on channel %d:%u\n", 206 dma_id, vchan_id); 207 vhost_async_dma_complete_log = true; 208 } else if (nr_copies == 0) { 209 goto out; 210 } 211 212 copy_idx = last_idx - nr_copies + 1; 213 for (i = 0; i < nr_copies; i++) { 214 bool *flag; 215 216 flag = dma_info->pkts_cmpl_flag_addr[copy_idx & ring_mask]; 217 if (flag) { 218 /** 219 * Mark the packet flag as received. The flag 220 * could belong to another virtqueue but write 221 * is atomic. 222 */ 223 *flag = true; 224 dma_info->pkts_cmpl_flag_addr[copy_idx & ring_mask] = NULL; 225 } 226 copy_idx++; 227 } 228 229 out: 230 rte_spinlock_unlock(&dma_info->dma_lock); 231 return nr_copies; 232 } 233 234 static inline void 235 do_data_copy_enqueue(struct virtio_net *dev, struct vhost_virtqueue *vq) 236 { 237 struct batch_copy_elem *elem = vq->batch_copy_elems; 238 uint16_t count = vq->batch_copy_nb_elems; 239 int i; 240 241 for (i = 0; i < count; i++) { 242 rte_memcpy(elem[i].dst, elem[i].src, elem[i].len); 243 vhost_log_cache_write_iova(dev, vq, elem[i].log_addr, 244 elem[i].len); 245 PRINT_PACKET(dev, (uintptr_t)elem[i].dst, elem[i].len, 0); 246 } 247 248 vq->batch_copy_nb_elems = 0; 249 } 250 251 static inline void 252 do_data_copy_dequeue(struct vhost_virtqueue *vq) 253 { 254 struct batch_copy_elem *elem = vq->batch_copy_elems; 255 uint16_t count = vq->batch_copy_nb_elems; 256 int i; 257 258 for (i = 0; i < count; i++) 259 rte_memcpy(elem[i].dst, elem[i].src, elem[i].len); 260 261 vq->batch_copy_nb_elems = 0; 262 } 263 264 static __rte_always_inline void 265 do_flush_shadow_used_ring_split(struct virtio_net *dev, 266 struct vhost_virtqueue *vq, 267 uint16_t to, uint16_t from, uint16_t size) 268 { 269 rte_memcpy(&vq->used->ring[to], 270 &vq->shadow_used_split[from], 271 size * sizeof(struct vring_used_elem)); 272 vhost_log_cache_used_vring(dev, vq, 273 offsetof(struct vring_used, ring[to]), 274 size * sizeof(struct vring_used_elem)); 275 } 276 277 static __rte_always_inline void 278 flush_shadow_used_ring_split(struct virtio_net *dev, struct vhost_virtqueue *vq) 279 { 280 uint16_t used_idx = vq->last_used_idx & (vq->size - 1); 281 282 if (used_idx + vq->shadow_used_idx <= vq->size) { 283 do_flush_shadow_used_ring_split(dev, vq, used_idx, 0, 284 vq->shadow_used_idx); 285 } else { 286 uint16_t size; 287 288 /* update used ring interval [used_idx, vq->size] */ 289 size = vq->size - used_idx; 290 do_flush_shadow_used_ring_split(dev, vq, used_idx, 0, size); 291 292 /* update the left half used ring interval [0, left_size] */ 293 do_flush_shadow_used_ring_split(dev, vq, 0, size, 294 vq->shadow_used_idx - size); 295 } 296 vq->last_used_idx += vq->shadow_used_idx; 297 298 vhost_log_cache_sync(dev, vq); 299 300 __atomic_add_fetch(&vq->used->idx, vq->shadow_used_idx, 301 __ATOMIC_RELEASE); 302 vq->shadow_used_idx = 0; 303 vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx), 304 sizeof(vq->used->idx)); 305 } 306 307 static __rte_always_inline void 308 update_shadow_used_ring_split(struct vhost_virtqueue *vq, 309 uint16_t desc_idx, uint32_t len) 310 { 311 uint16_t i = vq->shadow_used_idx++; 312 313 vq->shadow_used_split[i].id = desc_idx; 314 vq->shadow_used_split[i].len = len; 315 } 316 317 static __rte_always_inline void 318 vhost_flush_enqueue_shadow_packed(struct virtio_net *dev, 319 struct vhost_virtqueue *vq) 320 { 321 int i; 322 uint16_t used_idx = vq->last_used_idx; 323 uint16_t head_idx = vq->last_used_idx; 324 uint16_t head_flags = 0; 325 326 /* Split loop in two to save memory barriers */ 327 for (i = 0; i < vq->shadow_used_idx; i++) { 328 vq->desc_packed[used_idx].id = vq->shadow_used_packed[i].id; 329 vq->desc_packed[used_idx].len = vq->shadow_used_packed[i].len; 330 331 used_idx += vq->shadow_used_packed[i].count; 332 if (used_idx >= vq->size) 333 used_idx -= vq->size; 334 } 335 336 /* The ordering for storing desc flags needs to be enforced. */ 337 rte_atomic_thread_fence(__ATOMIC_RELEASE); 338 339 for (i = 0; i < vq->shadow_used_idx; i++) { 340 uint16_t flags; 341 342 if (vq->shadow_used_packed[i].len) 343 flags = VRING_DESC_F_WRITE; 344 else 345 flags = 0; 346 347 if (vq->used_wrap_counter) { 348 flags |= VRING_DESC_F_USED; 349 flags |= VRING_DESC_F_AVAIL; 350 } else { 351 flags &= ~VRING_DESC_F_USED; 352 flags &= ~VRING_DESC_F_AVAIL; 353 } 354 355 if (i > 0) { 356 vq->desc_packed[vq->last_used_idx].flags = flags; 357 358 vhost_log_cache_used_vring(dev, vq, 359 vq->last_used_idx * 360 sizeof(struct vring_packed_desc), 361 sizeof(struct vring_packed_desc)); 362 } else { 363 head_idx = vq->last_used_idx; 364 head_flags = flags; 365 } 366 367 vq_inc_last_used_packed(vq, vq->shadow_used_packed[i].count); 368 } 369 370 vq->desc_packed[head_idx].flags = head_flags; 371 372 vhost_log_cache_used_vring(dev, vq, 373 head_idx * 374 sizeof(struct vring_packed_desc), 375 sizeof(struct vring_packed_desc)); 376 377 vq->shadow_used_idx = 0; 378 vhost_log_cache_sync(dev, vq); 379 } 380 381 static __rte_always_inline void 382 vhost_flush_dequeue_shadow_packed(struct virtio_net *dev, 383 struct vhost_virtqueue *vq) 384 { 385 struct vring_used_elem_packed *used_elem = &vq->shadow_used_packed[0]; 386 387 vq->desc_packed[vq->shadow_last_used_idx].id = used_elem->id; 388 /* desc flags is the synchronization point for virtio packed vring */ 389 __atomic_store_n(&vq->desc_packed[vq->shadow_last_used_idx].flags, 390 used_elem->flags, __ATOMIC_RELEASE); 391 392 vhost_log_cache_used_vring(dev, vq, vq->shadow_last_used_idx * 393 sizeof(struct vring_packed_desc), 394 sizeof(struct vring_packed_desc)); 395 vq->shadow_used_idx = 0; 396 vhost_log_cache_sync(dev, vq); 397 } 398 399 static __rte_always_inline void 400 vhost_flush_enqueue_batch_packed(struct virtio_net *dev, 401 struct vhost_virtqueue *vq, 402 uint64_t *lens, 403 uint16_t *ids) 404 { 405 uint16_t i; 406 uint16_t flags; 407 uint16_t last_used_idx; 408 struct vring_packed_desc *desc_base; 409 410 last_used_idx = vq->last_used_idx; 411 desc_base = &vq->desc_packed[last_used_idx]; 412 413 flags = PACKED_DESC_ENQUEUE_USED_FLAG(vq->used_wrap_counter); 414 415 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 416 desc_base[i].id = ids[i]; 417 desc_base[i].len = lens[i]; 418 } 419 420 rte_atomic_thread_fence(__ATOMIC_RELEASE); 421 422 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 423 desc_base[i].flags = flags; 424 } 425 426 vhost_log_cache_used_vring(dev, vq, last_used_idx * 427 sizeof(struct vring_packed_desc), 428 sizeof(struct vring_packed_desc) * 429 PACKED_BATCH_SIZE); 430 vhost_log_cache_sync(dev, vq); 431 432 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE); 433 } 434 435 static __rte_always_inline void 436 vhost_shadow_dequeue_batch_packed_inorder(struct vhost_virtqueue *vq, 437 uint16_t id) 438 { 439 vq->shadow_used_packed[0].id = id; 440 441 if (!vq->shadow_used_idx) { 442 vq->shadow_last_used_idx = vq->last_used_idx; 443 vq->shadow_used_packed[0].flags = 444 PACKED_DESC_DEQUEUE_USED_FLAG(vq->used_wrap_counter); 445 vq->shadow_used_packed[0].len = 0; 446 vq->shadow_used_packed[0].count = 1; 447 vq->shadow_used_idx++; 448 } 449 450 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE); 451 } 452 453 static __rte_always_inline void 454 vhost_shadow_dequeue_batch_packed(struct virtio_net *dev, 455 struct vhost_virtqueue *vq, 456 uint16_t *ids) 457 { 458 uint16_t flags; 459 uint16_t i; 460 uint16_t begin; 461 462 flags = PACKED_DESC_DEQUEUE_USED_FLAG(vq->used_wrap_counter); 463 464 if (!vq->shadow_used_idx) { 465 vq->shadow_last_used_idx = vq->last_used_idx; 466 vq->shadow_used_packed[0].id = ids[0]; 467 vq->shadow_used_packed[0].len = 0; 468 vq->shadow_used_packed[0].count = 1; 469 vq->shadow_used_packed[0].flags = flags; 470 vq->shadow_used_idx++; 471 begin = 1; 472 } else 473 begin = 0; 474 475 vhost_for_each_try_unroll(i, begin, PACKED_BATCH_SIZE) { 476 vq->desc_packed[vq->last_used_idx + i].id = ids[i]; 477 vq->desc_packed[vq->last_used_idx + i].len = 0; 478 } 479 480 rte_atomic_thread_fence(__ATOMIC_RELEASE); 481 vhost_for_each_try_unroll(i, begin, PACKED_BATCH_SIZE) 482 vq->desc_packed[vq->last_used_idx + i].flags = flags; 483 484 vhost_log_cache_used_vring(dev, vq, vq->last_used_idx * 485 sizeof(struct vring_packed_desc), 486 sizeof(struct vring_packed_desc) * 487 PACKED_BATCH_SIZE); 488 vhost_log_cache_sync(dev, vq); 489 490 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE); 491 } 492 493 static __rte_always_inline void 494 vhost_shadow_dequeue_single_packed(struct vhost_virtqueue *vq, 495 uint16_t buf_id, 496 uint16_t count) 497 { 498 uint16_t flags; 499 500 flags = vq->desc_packed[vq->last_used_idx].flags; 501 if (vq->used_wrap_counter) { 502 flags |= VRING_DESC_F_USED; 503 flags |= VRING_DESC_F_AVAIL; 504 } else { 505 flags &= ~VRING_DESC_F_USED; 506 flags &= ~VRING_DESC_F_AVAIL; 507 } 508 509 if (!vq->shadow_used_idx) { 510 vq->shadow_last_used_idx = vq->last_used_idx; 511 512 vq->shadow_used_packed[0].id = buf_id; 513 vq->shadow_used_packed[0].len = 0; 514 vq->shadow_used_packed[0].flags = flags; 515 vq->shadow_used_idx++; 516 } else { 517 vq->desc_packed[vq->last_used_idx].id = buf_id; 518 vq->desc_packed[vq->last_used_idx].len = 0; 519 vq->desc_packed[vq->last_used_idx].flags = flags; 520 } 521 522 vq_inc_last_used_packed(vq, count); 523 } 524 525 static __rte_always_inline void 526 vhost_shadow_dequeue_single_packed_inorder(struct vhost_virtqueue *vq, 527 uint16_t buf_id, 528 uint16_t count) 529 { 530 uint16_t flags; 531 532 vq->shadow_used_packed[0].id = buf_id; 533 534 flags = vq->desc_packed[vq->last_used_idx].flags; 535 if (vq->used_wrap_counter) { 536 flags |= VRING_DESC_F_USED; 537 flags |= VRING_DESC_F_AVAIL; 538 } else { 539 flags &= ~VRING_DESC_F_USED; 540 flags &= ~VRING_DESC_F_AVAIL; 541 } 542 543 if (!vq->shadow_used_idx) { 544 vq->shadow_last_used_idx = vq->last_used_idx; 545 vq->shadow_used_packed[0].len = 0; 546 vq->shadow_used_packed[0].flags = flags; 547 vq->shadow_used_idx++; 548 } 549 550 vq_inc_last_used_packed(vq, count); 551 } 552 553 static __rte_always_inline void 554 vhost_shadow_enqueue_packed(struct vhost_virtqueue *vq, 555 uint32_t *len, 556 uint16_t *id, 557 uint16_t *count, 558 uint16_t num_buffers) 559 { 560 uint16_t i; 561 562 for (i = 0; i < num_buffers; i++) { 563 /* enqueue shadow flush action aligned with batch num */ 564 if (!vq->shadow_used_idx) 565 vq->shadow_aligned_idx = vq->last_used_idx & 566 PACKED_BATCH_MASK; 567 vq->shadow_used_packed[vq->shadow_used_idx].id = id[i]; 568 vq->shadow_used_packed[vq->shadow_used_idx].len = len[i]; 569 vq->shadow_used_packed[vq->shadow_used_idx].count = count[i]; 570 vq->shadow_aligned_idx += count[i]; 571 vq->shadow_used_idx++; 572 } 573 } 574 575 static __rte_always_inline void 576 vhost_shadow_enqueue_single_packed(struct virtio_net *dev, 577 struct vhost_virtqueue *vq, 578 uint32_t *len, 579 uint16_t *id, 580 uint16_t *count, 581 uint16_t num_buffers) 582 { 583 vhost_shadow_enqueue_packed(vq, len, id, count, num_buffers); 584 585 if (vq->shadow_aligned_idx >= PACKED_BATCH_SIZE) { 586 do_data_copy_enqueue(dev, vq); 587 vhost_flush_enqueue_shadow_packed(dev, vq); 588 } 589 } 590 591 /* avoid write operation when necessary, to lessen cache issues */ 592 #define ASSIGN_UNLESS_EQUAL(var, val) do { \ 593 if ((var) != (val)) \ 594 (var) = (val); \ 595 } while (0) 596 597 static __rte_always_inline void 598 virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr) 599 { 600 uint64_t csum_l4 = m_buf->ol_flags & RTE_MBUF_F_TX_L4_MASK; 601 602 if (m_buf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) 603 csum_l4 |= RTE_MBUF_F_TX_TCP_CKSUM; 604 605 if (csum_l4) { 606 /* 607 * Pseudo-header checksum must be set as per Virtio spec. 608 * 609 * Note: We don't propagate rte_net_intel_cksum_prepare() 610 * errors, as it would have an impact on performance, and an 611 * error would mean the packet is dropped by the guest instead 612 * of being dropped here. 613 */ 614 rte_net_intel_cksum_prepare(m_buf); 615 616 net_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 617 net_hdr->csum_start = m_buf->l2_len + m_buf->l3_len; 618 619 switch (csum_l4) { 620 case RTE_MBUF_F_TX_TCP_CKSUM: 621 net_hdr->csum_offset = (offsetof(struct rte_tcp_hdr, 622 cksum)); 623 break; 624 case RTE_MBUF_F_TX_UDP_CKSUM: 625 net_hdr->csum_offset = (offsetof(struct rte_udp_hdr, 626 dgram_cksum)); 627 break; 628 case RTE_MBUF_F_TX_SCTP_CKSUM: 629 net_hdr->csum_offset = (offsetof(struct rte_sctp_hdr, 630 cksum)); 631 break; 632 } 633 } else { 634 ASSIGN_UNLESS_EQUAL(net_hdr->csum_start, 0); 635 ASSIGN_UNLESS_EQUAL(net_hdr->csum_offset, 0); 636 ASSIGN_UNLESS_EQUAL(net_hdr->flags, 0); 637 } 638 639 /* IP cksum verification cannot be bypassed, then calculate here */ 640 if (m_buf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) { 641 struct rte_ipv4_hdr *ipv4_hdr; 642 643 ipv4_hdr = rte_pktmbuf_mtod_offset(m_buf, struct rte_ipv4_hdr *, 644 m_buf->l2_len); 645 ipv4_hdr->hdr_checksum = 0; 646 ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr); 647 } 648 649 if (m_buf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) { 650 if (m_buf->ol_flags & RTE_MBUF_F_TX_IPV4) 651 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 652 else 653 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 654 net_hdr->gso_size = m_buf->tso_segsz; 655 net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len 656 + m_buf->l4_len; 657 } else if (m_buf->ol_flags & RTE_MBUF_F_TX_UDP_SEG) { 658 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP; 659 net_hdr->gso_size = m_buf->tso_segsz; 660 net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len + 661 m_buf->l4_len; 662 } else { 663 ASSIGN_UNLESS_EQUAL(net_hdr->gso_type, 0); 664 ASSIGN_UNLESS_EQUAL(net_hdr->gso_size, 0); 665 ASSIGN_UNLESS_EQUAL(net_hdr->hdr_len, 0); 666 } 667 } 668 669 static __rte_always_inline int 670 map_one_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, 671 struct buf_vector *buf_vec, uint16_t *vec_idx, 672 uint64_t desc_iova, uint64_t desc_len, uint8_t perm) 673 { 674 uint16_t vec_id = *vec_idx; 675 676 while (desc_len) { 677 uint64_t desc_addr; 678 uint64_t desc_chunck_len = desc_len; 679 680 if (unlikely(vec_id >= BUF_VECTOR_MAX)) 681 return -1; 682 683 desc_addr = vhost_iova_to_vva(dev, vq, 684 desc_iova, 685 &desc_chunck_len, 686 perm); 687 if (unlikely(!desc_addr)) 688 return -1; 689 690 rte_prefetch0((void *)(uintptr_t)desc_addr); 691 692 buf_vec[vec_id].buf_iova = desc_iova; 693 buf_vec[vec_id].buf_addr = desc_addr; 694 buf_vec[vec_id].buf_len = desc_chunck_len; 695 696 desc_len -= desc_chunck_len; 697 desc_iova += desc_chunck_len; 698 vec_id++; 699 } 700 *vec_idx = vec_id; 701 702 return 0; 703 } 704 705 static __rte_always_inline int 706 fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 707 uint32_t avail_idx, uint16_t *vec_idx, 708 struct buf_vector *buf_vec, uint16_t *desc_chain_head, 709 uint32_t *desc_chain_len, uint8_t perm) 710 { 711 uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)]; 712 uint16_t vec_id = *vec_idx; 713 uint32_t len = 0; 714 uint64_t dlen; 715 uint32_t nr_descs = vq->size; 716 uint32_t cnt = 0; 717 struct vring_desc *descs = vq->desc; 718 struct vring_desc *idesc = NULL; 719 720 if (unlikely(idx >= vq->size)) 721 return -1; 722 723 *desc_chain_head = idx; 724 725 if (vq->desc[idx].flags & VRING_DESC_F_INDIRECT) { 726 dlen = vq->desc[idx].len; 727 nr_descs = dlen / sizeof(struct vring_desc); 728 if (unlikely(nr_descs > vq->size)) 729 return -1; 730 731 descs = (struct vring_desc *)(uintptr_t) 732 vhost_iova_to_vva(dev, vq, vq->desc[idx].addr, 733 &dlen, 734 VHOST_ACCESS_RO); 735 if (unlikely(!descs)) 736 return -1; 737 738 if (unlikely(dlen < vq->desc[idx].len)) { 739 /* 740 * The indirect desc table is not contiguous 741 * in process VA space, we have to copy it. 742 */ 743 idesc = vhost_alloc_copy_ind_table(dev, vq, 744 vq->desc[idx].addr, vq->desc[idx].len); 745 if (unlikely(!idesc)) 746 return -1; 747 748 descs = idesc; 749 } 750 751 idx = 0; 752 } 753 754 while (1) { 755 if (unlikely(idx >= nr_descs || cnt++ >= nr_descs)) { 756 free_ind_table(idesc); 757 return -1; 758 } 759 760 dlen = descs[idx].len; 761 len += dlen; 762 763 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id, 764 descs[idx].addr, dlen, 765 perm))) { 766 free_ind_table(idesc); 767 return -1; 768 } 769 770 if ((descs[idx].flags & VRING_DESC_F_NEXT) == 0) 771 break; 772 773 idx = descs[idx].next; 774 } 775 776 *desc_chain_len = len; 777 *vec_idx = vec_id; 778 779 if (unlikely(!!idesc)) 780 free_ind_table(idesc); 781 782 return 0; 783 } 784 785 /* 786 * Returns -1 on fail, 0 on success 787 */ 788 static inline int 789 reserve_avail_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 790 uint32_t size, struct buf_vector *buf_vec, 791 uint16_t *num_buffers, uint16_t avail_head, 792 uint16_t *nr_vec) 793 { 794 uint16_t cur_idx; 795 uint16_t vec_idx = 0; 796 uint16_t max_tries, tries = 0; 797 798 uint16_t head_idx = 0; 799 uint32_t len = 0; 800 801 *num_buffers = 0; 802 cur_idx = vq->last_avail_idx; 803 804 if (rxvq_is_mergeable(dev)) 805 max_tries = vq->size - 1; 806 else 807 max_tries = 1; 808 809 while (size > 0) { 810 if (unlikely(cur_idx == avail_head)) 811 return -1; 812 /* 813 * if we tried all available ring items, and still 814 * can't get enough buf, it means something abnormal 815 * happened. 816 */ 817 if (unlikely(++tries > max_tries)) 818 return -1; 819 820 if (unlikely(fill_vec_buf_split(dev, vq, cur_idx, 821 &vec_idx, buf_vec, 822 &head_idx, &len, 823 VHOST_ACCESS_RW) < 0)) 824 return -1; 825 len = RTE_MIN(len, size); 826 update_shadow_used_ring_split(vq, head_idx, len); 827 size -= len; 828 829 cur_idx++; 830 *num_buffers += 1; 831 } 832 833 *nr_vec = vec_idx; 834 835 return 0; 836 } 837 838 static __rte_always_inline int 839 fill_vec_buf_packed_indirect(struct virtio_net *dev, 840 struct vhost_virtqueue *vq, 841 struct vring_packed_desc *desc, uint16_t *vec_idx, 842 struct buf_vector *buf_vec, uint32_t *len, uint8_t perm) 843 { 844 uint16_t i; 845 uint32_t nr_descs; 846 uint16_t vec_id = *vec_idx; 847 uint64_t dlen; 848 struct vring_packed_desc *descs, *idescs = NULL; 849 850 dlen = desc->len; 851 descs = (struct vring_packed_desc *)(uintptr_t) 852 vhost_iova_to_vva(dev, vq, desc->addr, &dlen, VHOST_ACCESS_RO); 853 if (unlikely(!descs)) 854 return -1; 855 856 if (unlikely(dlen < desc->len)) { 857 /* 858 * The indirect desc table is not contiguous 859 * in process VA space, we have to copy it. 860 */ 861 idescs = vhost_alloc_copy_ind_table(dev, 862 vq, desc->addr, desc->len); 863 if (unlikely(!idescs)) 864 return -1; 865 866 descs = idescs; 867 } 868 869 nr_descs = desc->len / sizeof(struct vring_packed_desc); 870 if (unlikely(nr_descs >= vq->size)) { 871 free_ind_table(idescs); 872 return -1; 873 } 874 875 for (i = 0; i < nr_descs; i++) { 876 if (unlikely(vec_id >= BUF_VECTOR_MAX)) { 877 free_ind_table(idescs); 878 return -1; 879 } 880 881 dlen = descs[i].len; 882 *len += dlen; 883 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id, 884 descs[i].addr, dlen, 885 perm))) 886 return -1; 887 } 888 *vec_idx = vec_id; 889 890 if (unlikely(!!idescs)) 891 free_ind_table(idescs); 892 893 return 0; 894 } 895 896 static __rte_always_inline int 897 fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, 898 uint16_t avail_idx, uint16_t *desc_count, 899 struct buf_vector *buf_vec, uint16_t *vec_idx, 900 uint16_t *buf_id, uint32_t *len, uint8_t perm) 901 { 902 bool wrap_counter = vq->avail_wrap_counter; 903 struct vring_packed_desc *descs = vq->desc_packed; 904 uint16_t vec_id = *vec_idx; 905 uint64_t dlen; 906 907 if (avail_idx < vq->last_avail_idx) 908 wrap_counter ^= 1; 909 910 /* 911 * Perform a load-acquire barrier in desc_is_avail to 912 * enforce the ordering between desc flags and desc 913 * content. 914 */ 915 if (unlikely(!desc_is_avail(&descs[avail_idx], wrap_counter))) 916 return -1; 917 918 *desc_count = 0; 919 *len = 0; 920 921 while (1) { 922 if (unlikely(vec_id >= BUF_VECTOR_MAX)) 923 return -1; 924 925 if (unlikely(*desc_count >= vq->size)) 926 return -1; 927 928 *desc_count += 1; 929 *buf_id = descs[avail_idx].id; 930 931 if (descs[avail_idx].flags & VRING_DESC_F_INDIRECT) { 932 if (unlikely(fill_vec_buf_packed_indirect(dev, vq, 933 &descs[avail_idx], 934 &vec_id, buf_vec, 935 len, perm) < 0)) 936 return -1; 937 } else { 938 dlen = descs[avail_idx].len; 939 *len += dlen; 940 941 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id, 942 descs[avail_idx].addr, 943 dlen, 944 perm))) 945 return -1; 946 } 947 948 if ((descs[avail_idx].flags & VRING_DESC_F_NEXT) == 0) 949 break; 950 951 if (++avail_idx >= vq->size) { 952 avail_idx -= vq->size; 953 wrap_counter ^= 1; 954 } 955 } 956 957 *vec_idx = vec_id; 958 959 return 0; 960 } 961 962 static __rte_noinline void 963 copy_vnet_hdr_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, 964 struct buf_vector *buf_vec, 965 struct virtio_net_hdr_mrg_rxbuf *hdr) 966 { 967 uint64_t len; 968 uint64_t remain = dev->vhost_hlen; 969 uint64_t src = (uint64_t)(uintptr_t)hdr, dst; 970 uint64_t iova = buf_vec->buf_iova; 971 972 while (remain) { 973 len = RTE_MIN(remain, 974 buf_vec->buf_len); 975 dst = buf_vec->buf_addr; 976 rte_memcpy((void *)(uintptr_t)dst, 977 (void *)(uintptr_t)src, 978 len); 979 980 PRINT_PACKET(dev, (uintptr_t)dst, 981 (uint32_t)len, 0); 982 vhost_log_cache_write_iova(dev, vq, 983 iova, len); 984 985 remain -= len; 986 iova += len; 987 src += len; 988 buf_vec++; 989 } 990 } 991 992 static __rte_always_inline int 993 async_iter_initialize(struct virtio_net *dev, struct vhost_async *async) 994 { 995 struct vhost_iov_iter *iter; 996 997 if (unlikely(async->iovec_idx >= VHOST_MAX_ASYNC_VEC)) { 998 VHOST_LOG_DATA(dev->ifname, ERR, "no more async iovec available\n"); 999 return -1; 1000 } 1001 1002 iter = async->iov_iter + async->iter_idx; 1003 iter->iov = async->iovec + async->iovec_idx; 1004 iter->nr_segs = 0; 1005 1006 return 0; 1007 } 1008 1009 static __rte_always_inline int 1010 async_iter_add_iovec(struct virtio_net *dev, struct vhost_async *async, 1011 void *src, void *dst, size_t len) 1012 { 1013 struct vhost_iov_iter *iter; 1014 struct vhost_iovec *iovec; 1015 1016 if (unlikely(async->iovec_idx >= VHOST_MAX_ASYNC_VEC)) { 1017 static bool vhost_max_async_vec_log; 1018 1019 if (!vhost_max_async_vec_log) { 1020 VHOST_LOG_DATA(dev->ifname, ERR, "no more async iovec available\n"); 1021 vhost_max_async_vec_log = true; 1022 } 1023 1024 return -1; 1025 } 1026 1027 iter = async->iov_iter + async->iter_idx; 1028 iovec = async->iovec + async->iovec_idx; 1029 1030 iovec->src_addr = src; 1031 iovec->dst_addr = dst; 1032 iovec->len = len; 1033 1034 iter->nr_segs++; 1035 async->iovec_idx++; 1036 1037 return 0; 1038 } 1039 1040 static __rte_always_inline void 1041 async_iter_finalize(struct vhost_async *async) 1042 { 1043 async->iter_idx++; 1044 } 1045 1046 static __rte_always_inline void 1047 async_iter_cancel(struct vhost_async *async) 1048 { 1049 struct vhost_iov_iter *iter; 1050 1051 iter = async->iov_iter + async->iter_idx; 1052 async->iovec_idx -= iter->nr_segs; 1053 iter->nr_segs = 0; 1054 iter->iov = NULL; 1055 } 1056 1057 static __rte_always_inline void 1058 async_iter_reset(struct vhost_async *async) 1059 { 1060 async->iter_idx = 0; 1061 async->iovec_idx = 0; 1062 } 1063 1064 static __rte_always_inline int 1065 async_fill_seg(struct virtio_net *dev, struct vhost_virtqueue *vq, 1066 struct rte_mbuf *m, uint32_t mbuf_offset, 1067 uint64_t buf_iova, uint32_t cpy_len, bool to_desc) 1068 { 1069 struct vhost_async *async = vq->async; 1070 uint64_t mapped_len; 1071 uint32_t buf_offset = 0; 1072 void *src, *dst; 1073 void *host_iova; 1074 1075 while (cpy_len) { 1076 host_iova = (void *)(uintptr_t)gpa_to_first_hpa(dev, 1077 buf_iova + buf_offset, cpy_len, &mapped_len); 1078 if (unlikely(!host_iova)) { 1079 VHOST_LOG_DATA(dev->ifname, ERR, 1080 "%s: failed to get host iova.\n", 1081 __func__); 1082 return -1; 1083 } 1084 1085 if (to_desc) { 1086 src = (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, mbuf_offset); 1087 dst = host_iova; 1088 } else { 1089 src = host_iova; 1090 dst = (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, mbuf_offset); 1091 } 1092 1093 if (unlikely(async_iter_add_iovec(dev, async, src, dst, (size_t)mapped_len))) 1094 return -1; 1095 1096 cpy_len -= (uint32_t)mapped_len; 1097 mbuf_offset += (uint32_t)mapped_len; 1098 buf_offset += (uint32_t)mapped_len; 1099 } 1100 1101 return 0; 1102 } 1103 1104 static __rte_always_inline void 1105 sync_fill_seg(struct virtio_net *dev, struct vhost_virtqueue *vq, 1106 struct rte_mbuf *m, uint32_t mbuf_offset, 1107 uint64_t buf_addr, uint64_t buf_iova, uint32_t cpy_len, bool to_desc) 1108 { 1109 struct batch_copy_elem *batch_copy = vq->batch_copy_elems; 1110 1111 if (likely(cpy_len > MAX_BATCH_LEN || vq->batch_copy_nb_elems >= vq->size)) { 1112 if (to_desc) { 1113 rte_memcpy((void *)((uintptr_t)(buf_addr)), 1114 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), 1115 cpy_len); 1116 vhost_log_cache_write_iova(dev, vq, buf_iova, cpy_len); 1117 PRINT_PACKET(dev, (uintptr_t)(buf_addr), cpy_len, 0); 1118 } else { 1119 rte_memcpy(rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), 1120 (void *)((uintptr_t)(buf_addr)), 1121 cpy_len); 1122 } 1123 } else { 1124 if (to_desc) { 1125 batch_copy[vq->batch_copy_nb_elems].dst = 1126 (void *)((uintptr_t)(buf_addr)); 1127 batch_copy[vq->batch_copy_nb_elems].src = 1128 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset); 1129 batch_copy[vq->batch_copy_nb_elems].log_addr = buf_iova; 1130 } else { 1131 batch_copy[vq->batch_copy_nb_elems].dst = 1132 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset); 1133 batch_copy[vq->batch_copy_nb_elems].src = 1134 (void *)((uintptr_t)(buf_addr)); 1135 } 1136 batch_copy[vq->batch_copy_nb_elems].len = cpy_len; 1137 vq->batch_copy_nb_elems++; 1138 } 1139 } 1140 1141 static __rte_always_inline int 1142 mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, 1143 struct rte_mbuf *m, struct buf_vector *buf_vec, 1144 uint16_t nr_vec, uint16_t num_buffers, bool is_async) 1145 { 1146 uint32_t vec_idx = 0; 1147 uint32_t mbuf_offset, mbuf_avail; 1148 uint32_t buf_offset, buf_avail; 1149 uint64_t buf_addr, buf_iova, buf_len; 1150 uint32_t cpy_len; 1151 uint64_t hdr_addr; 1152 struct rte_mbuf *hdr_mbuf; 1153 struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL; 1154 struct vhost_async *async = vq->async; 1155 1156 if (unlikely(m == NULL)) 1157 return -1; 1158 1159 buf_addr = buf_vec[vec_idx].buf_addr; 1160 buf_iova = buf_vec[vec_idx].buf_iova; 1161 buf_len = buf_vec[vec_idx].buf_len; 1162 1163 if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) 1164 return -1; 1165 1166 hdr_mbuf = m; 1167 hdr_addr = buf_addr; 1168 if (unlikely(buf_len < dev->vhost_hlen)) { 1169 memset(&tmp_hdr, 0, sizeof(struct virtio_net_hdr_mrg_rxbuf)); 1170 hdr = &tmp_hdr; 1171 } else 1172 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr; 1173 1174 VHOST_LOG_DATA(dev->ifname, DEBUG, "RX: num merge buffers %d\n", num_buffers); 1175 1176 if (unlikely(buf_len < dev->vhost_hlen)) { 1177 buf_offset = dev->vhost_hlen - buf_len; 1178 vec_idx++; 1179 buf_addr = buf_vec[vec_idx].buf_addr; 1180 buf_iova = buf_vec[vec_idx].buf_iova; 1181 buf_len = buf_vec[vec_idx].buf_len; 1182 buf_avail = buf_len - buf_offset; 1183 } else { 1184 buf_offset = dev->vhost_hlen; 1185 buf_avail = buf_len - dev->vhost_hlen; 1186 } 1187 1188 mbuf_avail = rte_pktmbuf_data_len(m); 1189 mbuf_offset = 0; 1190 1191 if (is_async) { 1192 if (async_iter_initialize(dev, async)) 1193 return -1; 1194 } 1195 1196 while (mbuf_avail != 0 || m->next != NULL) { 1197 /* done with current buf, get the next one */ 1198 if (buf_avail == 0) { 1199 vec_idx++; 1200 if (unlikely(vec_idx >= nr_vec)) 1201 goto error; 1202 1203 buf_addr = buf_vec[vec_idx].buf_addr; 1204 buf_iova = buf_vec[vec_idx].buf_iova; 1205 buf_len = buf_vec[vec_idx].buf_len; 1206 1207 buf_offset = 0; 1208 buf_avail = buf_len; 1209 } 1210 1211 /* done with current mbuf, get the next one */ 1212 if (mbuf_avail == 0) { 1213 m = m->next; 1214 1215 mbuf_offset = 0; 1216 mbuf_avail = rte_pktmbuf_data_len(m); 1217 } 1218 1219 if (hdr_addr) { 1220 virtio_enqueue_offload(hdr_mbuf, &hdr->hdr); 1221 if (rxvq_is_mergeable(dev)) 1222 ASSIGN_UNLESS_EQUAL(hdr->num_buffers, 1223 num_buffers); 1224 1225 if (unlikely(hdr == &tmp_hdr)) { 1226 copy_vnet_hdr_to_desc(dev, vq, buf_vec, hdr); 1227 } else { 1228 PRINT_PACKET(dev, (uintptr_t)hdr_addr, 1229 dev->vhost_hlen, 0); 1230 vhost_log_cache_write_iova(dev, vq, 1231 buf_vec[0].buf_iova, 1232 dev->vhost_hlen); 1233 } 1234 1235 hdr_addr = 0; 1236 } 1237 1238 cpy_len = RTE_MIN(buf_avail, mbuf_avail); 1239 1240 if (is_async) { 1241 if (async_fill_seg(dev, vq, m, mbuf_offset, 1242 buf_iova + buf_offset, cpy_len, true) < 0) 1243 goto error; 1244 } else { 1245 sync_fill_seg(dev, vq, m, mbuf_offset, 1246 buf_addr + buf_offset, 1247 buf_iova + buf_offset, cpy_len, true); 1248 } 1249 1250 mbuf_avail -= cpy_len; 1251 mbuf_offset += cpy_len; 1252 buf_avail -= cpy_len; 1253 buf_offset += cpy_len; 1254 } 1255 1256 if (is_async) 1257 async_iter_finalize(async); 1258 1259 return 0; 1260 error: 1261 if (is_async) 1262 async_iter_cancel(async); 1263 1264 return -1; 1265 } 1266 1267 static __rte_always_inline int 1268 vhost_enqueue_single_packed(struct virtio_net *dev, 1269 struct vhost_virtqueue *vq, 1270 struct rte_mbuf *pkt, 1271 struct buf_vector *buf_vec, 1272 uint16_t *nr_descs) 1273 { 1274 uint16_t nr_vec = 0; 1275 uint16_t avail_idx = vq->last_avail_idx; 1276 uint16_t max_tries, tries = 0; 1277 uint16_t buf_id = 0; 1278 uint32_t len = 0; 1279 uint16_t desc_count; 1280 uint32_t size = pkt->pkt_len + sizeof(struct virtio_net_hdr_mrg_rxbuf); 1281 uint16_t num_buffers = 0; 1282 uint32_t buffer_len[vq->size]; 1283 uint16_t buffer_buf_id[vq->size]; 1284 uint16_t buffer_desc_count[vq->size]; 1285 1286 if (rxvq_is_mergeable(dev)) 1287 max_tries = vq->size - 1; 1288 else 1289 max_tries = 1; 1290 1291 while (size > 0) { 1292 /* 1293 * if we tried all available ring items, and still 1294 * can't get enough buf, it means something abnormal 1295 * happened. 1296 */ 1297 if (unlikely(++tries > max_tries)) 1298 return -1; 1299 1300 if (unlikely(fill_vec_buf_packed(dev, vq, 1301 avail_idx, &desc_count, 1302 buf_vec, &nr_vec, 1303 &buf_id, &len, 1304 VHOST_ACCESS_RW) < 0)) 1305 return -1; 1306 1307 len = RTE_MIN(len, size); 1308 size -= len; 1309 1310 buffer_len[num_buffers] = len; 1311 buffer_buf_id[num_buffers] = buf_id; 1312 buffer_desc_count[num_buffers] = desc_count; 1313 num_buffers += 1; 1314 1315 *nr_descs += desc_count; 1316 avail_idx += desc_count; 1317 if (avail_idx >= vq->size) 1318 avail_idx -= vq->size; 1319 } 1320 1321 if (mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, num_buffers, false) < 0) 1322 return -1; 1323 1324 vhost_shadow_enqueue_single_packed(dev, vq, buffer_len, buffer_buf_id, 1325 buffer_desc_count, num_buffers); 1326 1327 return 0; 1328 } 1329 1330 static __rte_noinline uint32_t 1331 virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 1332 struct rte_mbuf **pkts, uint32_t count) 1333 { 1334 uint32_t pkt_idx = 0; 1335 uint16_t num_buffers; 1336 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1337 uint16_t avail_head; 1338 1339 /* 1340 * The ordering between avail index and 1341 * desc reads needs to be enforced. 1342 */ 1343 avail_head = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE); 1344 1345 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); 1346 1347 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { 1348 uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen; 1349 uint16_t nr_vec = 0; 1350 1351 if (unlikely(reserve_avail_buf_split(dev, vq, 1352 pkt_len, buf_vec, &num_buffers, 1353 avail_head, &nr_vec) < 0)) { 1354 VHOST_LOG_DATA(dev->ifname, DEBUG, 1355 "failed to get enough desc from vring\n"); 1356 vq->shadow_used_idx -= num_buffers; 1357 break; 1358 } 1359 1360 VHOST_LOG_DATA(dev->ifname, DEBUG, 1361 "current index %d | end index %d\n", 1362 vq->last_avail_idx, vq->last_avail_idx + num_buffers); 1363 1364 if (mbuf_to_desc(dev, vq, pkts[pkt_idx], buf_vec, nr_vec, 1365 num_buffers, false) < 0) { 1366 vq->shadow_used_idx -= num_buffers; 1367 break; 1368 } 1369 1370 vq->last_avail_idx += num_buffers; 1371 } 1372 1373 do_data_copy_enqueue(dev, vq); 1374 1375 if (likely(vq->shadow_used_idx)) { 1376 flush_shadow_used_ring_split(dev, vq); 1377 vhost_vring_call_split(dev, vq); 1378 } 1379 1380 return pkt_idx; 1381 } 1382 1383 static __rte_always_inline int 1384 virtio_dev_rx_sync_batch_check(struct virtio_net *dev, 1385 struct vhost_virtqueue *vq, 1386 struct rte_mbuf **pkts, 1387 uint64_t *desc_addrs, 1388 uint64_t *lens) 1389 { 1390 bool wrap_counter = vq->avail_wrap_counter; 1391 struct vring_packed_desc *descs = vq->desc_packed; 1392 uint16_t avail_idx = vq->last_avail_idx; 1393 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 1394 uint16_t i; 1395 1396 if (unlikely(avail_idx & PACKED_BATCH_MASK)) 1397 return -1; 1398 1399 if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size)) 1400 return -1; 1401 1402 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1403 if (unlikely(pkts[i]->next != NULL)) 1404 return -1; 1405 if (unlikely(!desc_is_avail(&descs[avail_idx + i], 1406 wrap_counter))) 1407 return -1; 1408 } 1409 1410 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1411 lens[i] = descs[avail_idx + i].len; 1412 1413 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1414 if (unlikely(pkts[i]->pkt_len > (lens[i] - buf_offset))) 1415 return -1; 1416 } 1417 1418 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1419 desc_addrs[i] = vhost_iova_to_vva(dev, vq, 1420 descs[avail_idx + i].addr, 1421 &lens[i], 1422 VHOST_ACCESS_RW); 1423 1424 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1425 if (unlikely(!desc_addrs[i])) 1426 return -1; 1427 if (unlikely(lens[i] != descs[avail_idx + i].len)) 1428 return -1; 1429 } 1430 1431 return 0; 1432 } 1433 1434 static __rte_always_inline void 1435 virtio_dev_rx_batch_packed_copy(struct virtio_net *dev, 1436 struct vhost_virtqueue *vq, 1437 struct rte_mbuf **pkts, 1438 uint64_t *desc_addrs, 1439 uint64_t *lens) 1440 { 1441 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 1442 struct virtio_net_hdr_mrg_rxbuf *hdrs[PACKED_BATCH_SIZE]; 1443 struct vring_packed_desc *descs = vq->desc_packed; 1444 uint16_t avail_idx = vq->last_avail_idx; 1445 uint16_t ids[PACKED_BATCH_SIZE]; 1446 uint16_t i; 1447 1448 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1449 rte_prefetch0((void *)(uintptr_t)desc_addrs[i]); 1450 hdrs[i] = (struct virtio_net_hdr_mrg_rxbuf *) 1451 (uintptr_t)desc_addrs[i]; 1452 lens[i] = pkts[i]->pkt_len + 1453 sizeof(struct virtio_net_hdr_mrg_rxbuf); 1454 } 1455 1456 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1457 virtio_enqueue_offload(pkts[i], &hdrs[i]->hdr); 1458 1459 vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE); 1460 1461 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1462 rte_memcpy((void *)(uintptr_t)(desc_addrs[i] + buf_offset), 1463 rte_pktmbuf_mtod_offset(pkts[i], void *, 0), 1464 pkts[i]->pkt_len); 1465 } 1466 1467 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1468 vhost_log_cache_write_iova(dev, vq, descs[avail_idx + i].addr, 1469 lens[i]); 1470 1471 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1472 ids[i] = descs[avail_idx + i].id; 1473 1474 vhost_flush_enqueue_batch_packed(dev, vq, lens, ids); 1475 } 1476 1477 static __rte_always_inline int 1478 virtio_dev_rx_sync_batch_packed(struct virtio_net *dev, 1479 struct vhost_virtqueue *vq, 1480 struct rte_mbuf **pkts) 1481 { 1482 uint64_t desc_addrs[PACKED_BATCH_SIZE]; 1483 uint64_t lens[PACKED_BATCH_SIZE]; 1484 1485 if (virtio_dev_rx_sync_batch_check(dev, vq, pkts, desc_addrs, lens) == -1) 1486 return -1; 1487 1488 if (vq->shadow_used_idx) { 1489 do_data_copy_enqueue(dev, vq); 1490 vhost_flush_enqueue_shadow_packed(dev, vq); 1491 } 1492 1493 virtio_dev_rx_batch_packed_copy(dev, vq, pkts, desc_addrs, lens); 1494 1495 return 0; 1496 } 1497 1498 static __rte_always_inline int16_t 1499 virtio_dev_rx_single_packed(struct virtio_net *dev, 1500 struct vhost_virtqueue *vq, 1501 struct rte_mbuf *pkt) 1502 { 1503 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1504 uint16_t nr_descs = 0; 1505 1506 if (unlikely(vhost_enqueue_single_packed(dev, vq, pkt, buf_vec, 1507 &nr_descs) < 0)) { 1508 VHOST_LOG_DATA(dev->ifname, DEBUG, "failed to get enough desc from vring\n"); 1509 return -1; 1510 } 1511 1512 VHOST_LOG_DATA(dev->ifname, DEBUG, 1513 "current index %d | end index %d\n", 1514 vq->last_avail_idx, vq->last_avail_idx + nr_descs); 1515 1516 vq_inc_last_avail_packed(vq, nr_descs); 1517 1518 return 0; 1519 } 1520 1521 static __rte_noinline uint32_t 1522 virtio_dev_rx_packed(struct virtio_net *dev, 1523 struct vhost_virtqueue *__rte_restrict vq, 1524 struct rte_mbuf **__rte_restrict pkts, 1525 uint32_t count) 1526 { 1527 uint32_t pkt_idx = 0; 1528 1529 do { 1530 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); 1531 1532 if (count - pkt_idx >= PACKED_BATCH_SIZE) { 1533 if (!virtio_dev_rx_sync_batch_packed(dev, vq, 1534 &pkts[pkt_idx])) { 1535 pkt_idx += PACKED_BATCH_SIZE; 1536 continue; 1537 } 1538 } 1539 1540 if (virtio_dev_rx_single_packed(dev, vq, pkts[pkt_idx])) 1541 break; 1542 pkt_idx++; 1543 1544 } while (pkt_idx < count); 1545 1546 if (vq->shadow_used_idx) { 1547 do_data_copy_enqueue(dev, vq); 1548 vhost_flush_enqueue_shadow_packed(dev, vq); 1549 } 1550 1551 if (pkt_idx) 1552 vhost_vring_call_packed(dev, vq); 1553 1554 return pkt_idx; 1555 } 1556 1557 static __rte_always_inline uint32_t 1558 virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, 1559 struct rte_mbuf **pkts, uint32_t count) 1560 { 1561 struct vhost_virtqueue *vq; 1562 uint32_t nb_tx = 0; 1563 1564 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__); 1565 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { 1566 VHOST_LOG_DATA(dev->ifname, ERR, 1567 "%s: invalid virtqueue idx %d.\n", 1568 __func__, queue_id); 1569 return 0; 1570 } 1571 1572 vq = dev->virtqueue[queue_id]; 1573 1574 rte_spinlock_lock(&vq->access_lock); 1575 1576 if (unlikely(!vq->enabled)) 1577 goto out_access_unlock; 1578 1579 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 1580 vhost_user_iotlb_rd_lock(vq); 1581 1582 if (unlikely(!vq->access_ok)) 1583 if (unlikely(vring_translate(dev, vq) < 0)) 1584 goto out; 1585 1586 count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); 1587 if (count == 0) 1588 goto out; 1589 1590 if (vq_is_packed(dev)) 1591 nb_tx = virtio_dev_rx_packed(dev, vq, pkts, count); 1592 else 1593 nb_tx = virtio_dev_rx_split(dev, vq, pkts, count); 1594 1595 vhost_queue_stats_update(dev, vq, pkts, nb_tx); 1596 1597 out: 1598 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 1599 vhost_user_iotlb_rd_unlock(vq); 1600 1601 out_access_unlock: 1602 rte_spinlock_unlock(&vq->access_lock); 1603 1604 return nb_tx; 1605 } 1606 1607 uint16_t 1608 rte_vhost_enqueue_burst(int vid, uint16_t queue_id, 1609 struct rte_mbuf **__rte_restrict pkts, uint16_t count) 1610 { 1611 struct virtio_net *dev = get_device(vid); 1612 1613 if (!dev) 1614 return 0; 1615 1616 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { 1617 VHOST_LOG_DATA(dev->ifname, ERR, 1618 "%s: built-in vhost net backend is disabled.\n", 1619 __func__); 1620 return 0; 1621 } 1622 1623 return virtio_dev_rx(dev, queue_id, pkts, count); 1624 } 1625 1626 static __rte_always_inline uint16_t 1627 async_get_first_inflight_pkt_idx(struct vhost_virtqueue *vq) 1628 { 1629 struct vhost_async *async = vq->async; 1630 1631 if (async->pkts_idx >= async->pkts_inflight_n) 1632 return async->pkts_idx - async->pkts_inflight_n; 1633 else 1634 return vq->size - async->pkts_inflight_n + async->pkts_idx; 1635 } 1636 1637 static __rte_always_inline void 1638 store_dma_desc_info_split(struct vring_used_elem *s_ring, struct vring_used_elem *d_ring, 1639 uint16_t ring_size, uint16_t s_idx, uint16_t d_idx, uint16_t count) 1640 { 1641 size_t elem_size = sizeof(struct vring_used_elem); 1642 1643 if (d_idx + count <= ring_size) { 1644 rte_memcpy(d_ring + d_idx, s_ring + s_idx, count * elem_size); 1645 } else { 1646 uint16_t size = ring_size - d_idx; 1647 1648 rte_memcpy(d_ring + d_idx, s_ring + s_idx, size * elem_size); 1649 rte_memcpy(d_ring, s_ring + s_idx + size, (count - size) * elem_size); 1650 } 1651 } 1652 1653 static __rte_always_inline void 1654 store_dma_desc_info_packed(struct vring_used_elem_packed *s_ring, 1655 struct vring_used_elem_packed *d_ring, 1656 uint16_t ring_size, uint16_t s_idx, uint16_t d_idx, uint16_t count) 1657 { 1658 size_t elem_size = sizeof(struct vring_used_elem_packed); 1659 1660 if (d_idx + count <= ring_size) { 1661 rte_memcpy(d_ring + d_idx, s_ring + s_idx, count * elem_size); 1662 } else { 1663 uint16_t size = ring_size - d_idx; 1664 1665 rte_memcpy(d_ring + d_idx, s_ring + s_idx, size * elem_size); 1666 rte_memcpy(d_ring, s_ring + s_idx + size, (count - size) * elem_size); 1667 } 1668 } 1669 1670 static __rte_noinline uint32_t 1671 virtio_dev_rx_async_submit_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 1672 uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count, 1673 int16_t dma_id, uint16_t vchan_id) 1674 { 1675 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1676 uint32_t pkt_idx = 0; 1677 uint16_t num_buffers; 1678 uint16_t avail_head; 1679 1680 struct vhost_async *async = vq->async; 1681 struct async_inflight_info *pkts_info = async->pkts_info; 1682 uint32_t pkt_err = 0; 1683 uint16_t n_xfer; 1684 uint16_t slot_idx = 0; 1685 1686 /* 1687 * The ordering between avail index and desc reads need to be enforced. 1688 */ 1689 avail_head = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE); 1690 1691 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); 1692 1693 async_iter_reset(async); 1694 1695 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { 1696 uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen; 1697 uint16_t nr_vec = 0; 1698 1699 if (unlikely(reserve_avail_buf_split(dev, vq, pkt_len, buf_vec, 1700 &num_buffers, avail_head, &nr_vec) < 0)) { 1701 VHOST_LOG_DATA(dev->ifname, DEBUG, 1702 "failed to get enough desc from vring\n"); 1703 vq->shadow_used_idx -= num_buffers; 1704 break; 1705 } 1706 1707 VHOST_LOG_DATA(dev->ifname, DEBUG, 1708 "current index %d | end index %d\n", 1709 vq->last_avail_idx, vq->last_avail_idx + num_buffers); 1710 1711 if (mbuf_to_desc(dev, vq, pkts[pkt_idx], buf_vec, nr_vec, num_buffers, true) < 0) { 1712 vq->shadow_used_idx -= num_buffers; 1713 break; 1714 } 1715 1716 slot_idx = (async->pkts_idx + pkt_idx) & (vq->size - 1); 1717 pkts_info[slot_idx].descs = num_buffers; 1718 pkts_info[slot_idx].mbuf = pkts[pkt_idx]; 1719 1720 vq->last_avail_idx += num_buffers; 1721 } 1722 1723 if (unlikely(pkt_idx == 0)) 1724 return 0; 1725 1726 n_xfer = vhost_async_dma_transfer(dev, vq, dma_id, vchan_id, async->pkts_idx, 1727 async->iov_iter, pkt_idx); 1728 1729 pkt_err = pkt_idx - n_xfer; 1730 if (unlikely(pkt_err)) { 1731 uint16_t num_descs = 0; 1732 1733 VHOST_LOG_DATA(dev->ifname, DEBUG, 1734 "%s: failed to transfer %u packets for queue %u.\n", 1735 __func__, pkt_err, queue_id); 1736 1737 /* update number of completed packets */ 1738 pkt_idx = n_xfer; 1739 1740 /* calculate the sum of descriptors to revert */ 1741 while (pkt_err-- > 0) { 1742 num_descs += pkts_info[slot_idx & (vq->size - 1)].descs; 1743 slot_idx--; 1744 } 1745 1746 /* recover shadow used ring and available ring */ 1747 vq->shadow_used_idx -= num_descs; 1748 vq->last_avail_idx -= num_descs; 1749 } 1750 1751 /* keep used descriptors */ 1752 if (likely(vq->shadow_used_idx)) { 1753 uint16_t to = async->desc_idx_split & (vq->size - 1); 1754 1755 store_dma_desc_info_split(vq->shadow_used_split, 1756 async->descs_split, vq->size, 0, to, 1757 vq->shadow_used_idx); 1758 1759 async->desc_idx_split += vq->shadow_used_idx; 1760 1761 async->pkts_idx += pkt_idx; 1762 if (async->pkts_idx >= vq->size) 1763 async->pkts_idx -= vq->size; 1764 1765 async->pkts_inflight_n += pkt_idx; 1766 vq->shadow_used_idx = 0; 1767 } 1768 1769 return pkt_idx; 1770 } 1771 1772 1773 static __rte_always_inline int 1774 vhost_enqueue_async_packed(struct virtio_net *dev, 1775 struct vhost_virtqueue *vq, 1776 struct rte_mbuf *pkt, 1777 struct buf_vector *buf_vec, 1778 uint16_t *nr_descs, 1779 uint16_t *nr_buffers) 1780 { 1781 uint16_t nr_vec = 0; 1782 uint16_t avail_idx = vq->last_avail_idx; 1783 uint16_t max_tries, tries = 0; 1784 uint16_t buf_id = 0; 1785 uint32_t len = 0; 1786 uint16_t desc_count = 0; 1787 uint32_t size = pkt->pkt_len + sizeof(struct virtio_net_hdr_mrg_rxbuf); 1788 uint32_t buffer_len[vq->size]; 1789 uint16_t buffer_buf_id[vq->size]; 1790 uint16_t buffer_desc_count[vq->size]; 1791 1792 if (rxvq_is_mergeable(dev)) 1793 max_tries = vq->size - 1; 1794 else 1795 max_tries = 1; 1796 1797 while (size > 0) { 1798 /* 1799 * if we tried all available ring items, and still 1800 * can't get enough buf, it means something abnormal 1801 * happened. 1802 */ 1803 if (unlikely(++tries > max_tries)) 1804 return -1; 1805 1806 if (unlikely(fill_vec_buf_packed(dev, vq, 1807 avail_idx, &desc_count, 1808 buf_vec, &nr_vec, 1809 &buf_id, &len, 1810 VHOST_ACCESS_RW) < 0)) 1811 return -1; 1812 1813 len = RTE_MIN(len, size); 1814 size -= len; 1815 1816 buffer_len[*nr_buffers] = len; 1817 buffer_buf_id[*nr_buffers] = buf_id; 1818 buffer_desc_count[*nr_buffers] = desc_count; 1819 *nr_buffers += 1; 1820 *nr_descs += desc_count; 1821 avail_idx += desc_count; 1822 if (avail_idx >= vq->size) 1823 avail_idx -= vq->size; 1824 } 1825 1826 if (unlikely(mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, *nr_buffers, true) < 0)) 1827 return -1; 1828 1829 vhost_shadow_enqueue_packed(vq, buffer_len, buffer_buf_id, buffer_desc_count, *nr_buffers); 1830 1831 return 0; 1832 } 1833 1834 static __rte_always_inline int16_t 1835 virtio_dev_rx_async_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, 1836 struct rte_mbuf *pkt, uint16_t *nr_descs, uint16_t *nr_buffers) 1837 { 1838 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1839 1840 if (unlikely(vhost_enqueue_async_packed(dev, vq, pkt, buf_vec, 1841 nr_descs, nr_buffers) < 0)) { 1842 VHOST_LOG_DATA(dev->ifname, DEBUG, "failed to get enough desc from vring\n"); 1843 return -1; 1844 } 1845 1846 VHOST_LOG_DATA(dev->ifname, DEBUG, 1847 "current index %d | end index %d\n", 1848 vq->last_avail_idx, vq->last_avail_idx + *nr_descs); 1849 1850 return 0; 1851 } 1852 1853 static __rte_always_inline void 1854 dma_error_handler_packed(struct vhost_virtqueue *vq, uint16_t slot_idx, 1855 uint32_t nr_err, uint32_t *pkt_idx) 1856 { 1857 uint16_t descs_err = 0; 1858 uint16_t buffers_err = 0; 1859 struct async_inflight_info *pkts_info = vq->async->pkts_info; 1860 1861 *pkt_idx -= nr_err; 1862 /* calculate the sum of buffers and descs of DMA-error packets. */ 1863 while (nr_err-- > 0) { 1864 descs_err += pkts_info[slot_idx % vq->size].descs; 1865 buffers_err += pkts_info[slot_idx % vq->size].nr_buffers; 1866 slot_idx--; 1867 } 1868 1869 if (vq->last_avail_idx >= descs_err) { 1870 vq->last_avail_idx -= descs_err; 1871 } else { 1872 vq->last_avail_idx = vq->last_avail_idx + vq->size - descs_err; 1873 vq->avail_wrap_counter ^= 1; 1874 } 1875 1876 vq->shadow_used_idx -= buffers_err; 1877 } 1878 1879 static __rte_noinline uint32_t 1880 virtio_dev_rx_async_submit_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, 1881 uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count, 1882 int16_t dma_id, uint16_t vchan_id) 1883 { 1884 uint32_t pkt_idx = 0; 1885 uint32_t remained = count; 1886 uint16_t n_xfer; 1887 uint16_t num_buffers; 1888 uint16_t num_descs; 1889 1890 struct vhost_async *async = vq->async; 1891 struct async_inflight_info *pkts_info = async->pkts_info; 1892 uint32_t pkt_err = 0; 1893 uint16_t slot_idx = 0; 1894 1895 do { 1896 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); 1897 1898 num_buffers = 0; 1899 num_descs = 0; 1900 if (unlikely(virtio_dev_rx_async_packed(dev, vq, pkts[pkt_idx], 1901 &num_descs, &num_buffers) < 0)) 1902 break; 1903 1904 slot_idx = (async->pkts_idx + pkt_idx) % vq->size; 1905 1906 pkts_info[slot_idx].descs = num_descs; 1907 pkts_info[slot_idx].nr_buffers = num_buffers; 1908 pkts_info[slot_idx].mbuf = pkts[pkt_idx]; 1909 1910 pkt_idx++; 1911 remained--; 1912 vq_inc_last_avail_packed(vq, num_descs); 1913 } while (pkt_idx < count); 1914 1915 if (unlikely(pkt_idx == 0)) 1916 return 0; 1917 1918 n_xfer = vhost_async_dma_transfer(dev, vq, dma_id, vchan_id, async->pkts_idx, 1919 async->iov_iter, pkt_idx); 1920 1921 async_iter_reset(async); 1922 1923 pkt_err = pkt_idx - n_xfer; 1924 if (unlikely(pkt_err)) { 1925 VHOST_LOG_DATA(dev->ifname, DEBUG, 1926 "%s: failed to transfer %u packets for queue %u.\n", 1927 __func__, pkt_err, queue_id); 1928 dma_error_handler_packed(vq, slot_idx, pkt_err, &pkt_idx); 1929 } 1930 1931 if (likely(vq->shadow_used_idx)) { 1932 /* keep used descriptors. */ 1933 store_dma_desc_info_packed(vq->shadow_used_packed, async->buffers_packed, 1934 vq->size, 0, async->buffer_idx_packed, 1935 vq->shadow_used_idx); 1936 1937 async->buffer_idx_packed += vq->shadow_used_idx; 1938 if (async->buffer_idx_packed >= vq->size) 1939 async->buffer_idx_packed -= vq->size; 1940 1941 async->pkts_idx += pkt_idx; 1942 if (async->pkts_idx >= vq->size) 1943 async->pkts_idx -= vq->size; 1944 1945 vq->shadow_used_idx = 0; 1946 async->pkts_inflight_n += pkt_idx; 1947 } 1948 1949 return pkt_idx; 1950 } 1951 1952 static __rte_always_inline void 1953 write_back_completed_descs_split(struct vhost_virtqueue *vq, uint16_t n_descs) 1954 { 1955 struct vhost_async *async = vq->async; 1956 uint16_t nr_left = n_descs; 1957 uint16_t nr_copy; 1958 uint16_t to, from; 1959 1960 do { 1961 from = async->last_desc_idx_split & (vq->size - 1); 1962 nr_copy = nr_left + from <= vq->size ? nr_left : vq->size - from; 1963 to = vq->last_used_idx & (vq->size - 1); 1964 1965 if (to + nr_copy <= vq->size) { 1966 rte_memcpy(&vq->used->ring[to], &async->descs_split[from], 1967 nr_copy * sizeof(struct vring_used_elem)); 1968 } else { 1969 uint16_t size = vq->size - to; 1970 1971 rte_memcpy(&vq->used->ring[to], &async->descs_split[from], 1972 size * sizeof(struct vring_used_elem)); 1973 rte_memcpy(&vq->used->ring[0], &async->descs_split[from + size], 1974 (nr_copy - size) * sizeof(struct vring_used_elem)); 1975 } 1976 1977 async->last_desc_idx_split += nr_copy; 1978 vq->last_used_idx += nr_copy; 1979 nr_left -= nr_copy; 1980 } while (nr_left > 0); 1981 } 1982 1983 static __rte_always_inline void 1984 write_back_completed_descs_packed(struct vhost_virtqueue *vq, 1985 uint16_t n_buffers) 1986 { 1987 struct vhost_async *async = vq->async; 1988 uint16_t from = async->last_buffer_idx_packed; 1989 uint16_t used_idx = vq->last_used_idx; 1990 uint16_t head_idx = vq->last_used_idx; 1991 uint16_t head_flags = 0; 1992 uint16_t i; 1993 1994 /* Split loop in two to save memory barriers */ 1995 for (i = 0; i < n_buffers; i++) { 1996 vq->desc_packed[used_idx].id = async->buffers_packed[from].id; 1997 vq->desc_packed[used_idx].len = async->buffers_packed[from].len; 1998 1999 used_idx += async->buffers_packed[from].count; 2000 if (used_idx >= vq->size) 2001 used_idx -= vq->size; 2002 2003 from++; 2004 if (from >= vq->size) 2005 from = 0; 2006 } 2007 2008 /* The ordering for storing desc flags needs to be enforced. */ 2009 rte_atomic_thread_fence(__ATOMIC_RELEASE); 2010 2011 from = async->last_buffer_idx_packed; 2012 2013 for (i = 0; i < n_buffers; i++) { 2014 uint16_t flags; 2015 2016 if (async->buffers_packed[from].len) 2017 flags = VRING_DESC_F_WRITE; 2018 else 2019 flags = 0; 2020 2021 if (vq->used_wrap_counter) { 2022 flags |= VRING_DESC_F_USED; 2023 flags |= VRING_DESC_F_AVAIL; 2024 } else { 2025 flags &= ~VRING_DESC_F_USED; 2026 flags &= ~VRING_DESC_F_AVAIL; 2027 } 2028 2029 if (i > 0) { 2030 vq->desc_packed[vq->last_used_idx].flags = flags; 2031 } else { 2032 head_idx = vq->last_used_idx; 2033 head_flags = flags; 2034 } 2035 2036 vq_inc_last_used_packed(vq, async->buffers_packed[from].count); 2037 2038 from++; 2039 if (from == vq->size) 2040 from = 0; 2041 } 2042 2043 vq->desc_packed[head_idx].flags = head_flags; 2044 async->last_buffer_idx_packed = from; 2045 } 2046 2047 static __rte_always_inline uint16_t 2048 vhost_poll_enqueue_completed(struct virtio_net *dev, uint16_t queue_id, 2049 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, 2050 uint16_t vchan_id) 2051 { 2052 struct vhost_virtqueue *vq = dev->virtqueue[queue_id]; 2053 struct vhost_async *async = vq->async; 2054 struct async_inflight_info *pkts_info = async->pkts_info; 2055 uint16_t nr_cpl_pkts = 0; 2056 uint16_t n_descs = 0, n_buffers = 0; 2057 uint16_t start_idx, from, i; 2058 2059 /* Check completed copies for the given DMA vChannel */ 2060 vhost_async_dma_check_completed(dev, dma_id, vchan_id, VHOST_DMA_MAX_COPY_COMPLETE); 2061 2062 start_idx = async_get_first_inflight_pkt_idx(vq); 2063 /** 2064 * Calculate the number of copy completed packets. 2065 * Note that there may be completed packets even if 2066 * no copies are reported done by the given DMA vChannel, 2067 * as it's possible that a virtqueue uses multiple DMA 2068 * vChannels. 2069 */ 2070 from = start_idx; 2071 while (vq->async->pkts_cmpl_flag[from] && count--) { 2072 vq->async->pkts_cmpl_flag[from] = false; 2073 from++; 2074 if (from >= vq->size) 2075 from -= vq->size; 2076 nr_cpl_pkts++; 2077 } 2078 2079 if (nr_cpl_pkts == 0) 2080 return 0; 2081 2082 for (i = 0; i < nr_cpl_pkts; i++) { 2083 from = (start_idx + i) % vq->size; 2084 /* Only used with packed ring */ 2085 n_buffers += pkts_info[from].nr_buffers; 2086 /* Only used with split ring */ 2087 n_descs += pkts_info[from].descs; 2088 pkts[i] = pkts_info[from].mbuf; 2089 } 2090 2091 async->pkts_inflight_n -= nr_cpl_pkts; 2092 2093 if (likely(vq->enabled && vq->access_ok)) { 2094 if (vq_is_packed(dev)) { 2095 write_back_completed_descs_packed(vq, n_buffers); 2096 vhost_vring_call_packed(dev, vq); 2097 } else { 2098 write_back_completed_descs_split(vq, n_descs); 2099 __atomic_add_fetch(&vq->used->idx, n_descs, __ATOMIC_RELEASE); 2100 vhost_vring_call_split(dev, vq); 2101 } 2102 } else { 2103 if (vq_is_packed(dev)) { 2104 async->last_buffer_idx_packed += n_buffers; 2105 if (async->last_buffer_idx_packed >= vq->size) 2106 async->last_buffer_idx_packed -= vq->size; 2107 } else { 2108 async->last_desc_idx_split += n_descs; 2109 } 2110 } 2111 2112 return nr_cpl_pkts; 2113 } 2114 2115 uint16_t 2116 rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id, 2117 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, 2118 uint16_t vchan_id) 2119 { 2120 struct virtio_net *dev = get_device(vid); 2121 struct vhost_virtqueue *vq; 2122 uint16_t n_pkts_cpl = 0; 2123 2124 if (unlikely(!dev)) 2125 return 0; 2126 2127 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__); 2128 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { 2129 VHOST_LOG_DATA(dev->ifname, ERR, 2130 "%s: invalid virtqueue idx %d.\n", 2131 __func__, queue_id); 2132 return 0; 2133 } 2134 2135 if (unlikely(!dma_copy_track[dma_id].vchans || 2136 !dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) { 2137 VHOST_LOG_DATA(dev->ifname, ERR, 2138 "%s: invalid channel %d:%u.\n", 2139 __func__, dma_id, vchan_id); 2140 return 0; 2141 } 2142 2143 vq = dev->virtqueue[queue_id]; 2144 2145 if (!rte_spinlock_trylock(&vq->access_lock)) { 2146 VHOST_LOG_DATA(dev->ifname, DEBUG, 2147 "%s: virtqueue %u is busy.\n", 2148 __func__, queue_id); 2149 return 0; 2150 } 2151 2152 if (unlikely(!vq->async)) { 2153 VHOST_LOG_DATA(dev->ifname, ERR, 2154 "%s: async not registered for virtqueue %d.\n", 2155 __func__, queue_id); 2156 goto out; 2157 } 2158 2159 n_pkts_cpl = vhost_poll_enqueue_completed(dev, queue_id, pkts, count, dma_id, vchan_id); 2160 2161 vhost_queue_stats_update(dev, vq, pkts, n_pkts_cpl); 2162 vq->stats.inflight_completed += n_pkts_cpl; 2163 2164 out: 2165 rte_spinlock_unlock(&vq->access_lock); 2166 2167 return n_pkts_cpl; 2168 } 2169 2170 uint16_t 2171 rte_vhost_clear_queue_thread_unsafe(int vid, uint16_t queue_id, 2172 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, 2173 uint16_t vchan_id) 2174 { 2175 struct virtio_net *dev = get_device(vid); 2176 struct vhost_virtqueue *vq; 2177 uint16_t n_pkts_cpl = 0; 2178 2179 if (!dev) 2180 return 0; 2181 2182 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__); 2183 if (unlikely(queue_id >= dev->nr_vring)) { 2184 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid virtqueue idx %d.\n", 2185 __func__, queue_id); 2186 return 0; 2187 } 2188 2189 if (unlikely(dma_id < 0 || dma_id >= RTE_DMADEV_DEFAULT_MAX)) { 2190 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid dma id %d.\n", 2191 __func__, dma_id); 2192 return 0; 2193 } 2194 2195 vq = dev->virtqueue[queue_id]; 2196 2197 if (unlikely(!rte_spinlock_is_locked(&vq->access_lock))) { 2198 VHOST_LOG_DATA(dev->ifname, ERR, "%s() called without access lock taken.\n", 2199 __func__); 2200 return -1; 2201 } 2202 2203 if (unlikely(!vq->async)) { 2204 VHOST_LOG_DATA(dev->ifname, ERR, 2205 "%s: async not registered for virtqueue %d.\n", 2206 __func__, queue_id); 2207 return 0; 2208 } 2209 2210 if (unlikely(!dma_copy_track[dma_id].vchans || 2211 !dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) { 2212 VHOST_LOG_DATA(dev->ifname, ERR, 2213 "%s: invalid channel %d:%u.\n", 2214 __func__, dma_id, vchan_id); 2215 return 0; 2216 } 2217 2218 if ((queue_id & 1) == 0) 2219 n_pkts_cpl = vhost_poll_enqueue_completed(dev, queue_id, 2220 pkts, count, dma_id, vchan_id); 2221 else { 2222 n_pkts_cpl = async_poll_dequeue_completed(dev, vq, pkts, count, 2223 dma_id, vchan_id, dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS); 2224 } 2225 2226 vhost_queue_stats_update(dev, vq, pkts, n_pkts_cpl); 2227 vq->stats.inflight_completed += n_pkts_cpl; 2228 2229 return n_pkts_cpl; 2230 } 2231 2232 uint16_t 2233 rte_vhost_clear_queue(int vid, uint16_t queue_id, struct rte_mbuf **pkts, 2234 uint16_t count, int16_t dma_id, uint16_t vchan_id) 2235 { 2236 struct virtio_net *dev = get_device(vid); 2237 struct vhost_virtqueue *vq; 2238 uint16_t n_pkts_cpl = 0; 2239 2240 if (!dev) 2241 return 0; 2242 2243 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__); 2244 if (unlikely(queue_id >= dev->nr_vring)) { 2245 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid virtqueue idx %u.\n", 2246 __func__, queue_id); 2247 return 0; 2248 } 2249 2250 if (unlikely(dma_id < 0 || dma_id >= RTE_DMADEV_DEFAULT_MAX)) { 2251 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid dma id %d.\n", 2252 __func__, dma_id); 2253 return 0; 2254 } 2255 2256 vq = dev->virtqueue[queue_id]; 2257 2258 if (!rte_spinlock_trylock(&vq->access_lock)) { 2259 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s: virtqueue %u is busy.\n", 2260 __func__, queue_id); 2261 return 0; 2262 } 2263 2264 if (unlikely(!vq->async)) { 2265 VHOST_LOG_DATA(dev->ifname, ERR, "%s: async not registered for queue id %u.\n", 2266 __func__, queue_id); 2267 goto out_access_unlock; 2268 } 2269 2270 if (unlikely(!dma_copy_track[dma_id].vchans || 2271 !dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) { 2272 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid channel %d:%u.\n", 2273 __func__, dma_id, vchan_id); 2274 goto out_access_unlock; 2275 } 2276 2277 if ((queue_id & 1) == 0) 2278 n_pkts_cpl = vhost_poll_enqueue_completed(dev, queue_id, 2279 pkts, count, dma_id, vchan_id); 2280 else { 2281 n_pkts_cpl = async_poll_dequeue_completed(dev, vq, pkts, count, 2282 dma_id, vchan_id, dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS); 2283 } 2284 2285 vhost_queue_stats_update(dev, vq, pkts, n_pkts_cpl); 2286 vq->stats.inflight_completed += n_pkts_cpl; 2287 2288 out_access_unlock: 2289 rte_spinlock_unlock(&vq->access_lock); 2290 2291 return n_pkts_cpl; 2292 } 2293 2294 static __rte_always_inline uint32_t 2295 virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id, 2296 struct rte_mbuf **pkts, uint32_t count, int16_t dma_id, uint16_t vchan_id) 2297 { 2298 struct vhost_virtqueue *vq; 2299 uint32_t nb_tx = 0; 2300 2301 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__); 2302 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { 2303 VHOST_LOG_DATA(dev->ifname, ERR, 2304 "%s: invalid virtqueue idx %d.\n", 2305 __func__, queue_id); 2306 return 0; 2307 } 2308 2309 if (unlikely(!dma_copy_track[dma_id].vchans || 2310 !dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) { 2311 VHOST_LOG_DATA(dev->ifname, ERR, 2312 "%s: invalid channel %d:%u.\n", 2313 __func__, dma_id, vchan_id); 2314 return 0; 2315 } 2316 2317 vq = dev->virtqueue[queue_id]; 2318 2319 rte_spinlock_lock(&vq->access_lock); 2320 2321 if (unlikely(!vq->enabled || !vq->async)) 2322 goto out_access_unlock; 2323 2324 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 2325 vhost_user_iotlb_rd_lock(vq); 2326 2327 if (unlikely(!vq->access_ok)) 2328 if (unlikely(vring_translate(dev, vq) < 0)) 2329 goto out; 2330 2331 count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); 2332 if (count == 0) 2333 goto out; 2334 2335 if (vq_is_packed(dev)) 2336 nb_tx = virtio_dev_rx_async_submit_packed(dev, vq, queue_id, 2337 pkts, count, dma_id, vchan_id); 2338 else 2339 nb_tx = virtio_dev_rx_async_submit_split(dev, vq, queue_id, 2340 pkts, count, dma_id, vchan_id); 2341 2342 vq->stats.inflight_submitted += nb_tx; 2343 2344 out: 2345 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 2346 vhost_user_iotlb_rd_unlock(vq); 2347 2348 out_access_unlock: 2349 rte_spinlock_unlock(&vq->access_lock); 2350 2351 return nb_tx; 2352 } 2353 2354 uint16_t 2355 rte_vhost_submit_enqueue_burst(int vid, uint16_t queue_id, 2356 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, 2357 uint16_t vchan_id) 2358 { 2359 struct virtio_net *dev = get_device(vid); 2360 2361 if (!dev) 2362 return 0; 2363 2364 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { 2365 VHOST_LOG_DATA(dev->ifname, ERR, 2366 "%s: built-in vhost net backend is disabled.\n", 2367 __func__); 2368 return 0; 2369 } 2370 2371 return virtio_dev_rx_async_submit(dev, queue_id, pkts, count, dma_id, vchan_id); 2372 } 2373 2374 static inline bool 2375 virtio_net_with_host_offload(struct virtio_net *dev) 2376 { 2377 if (dev->features & 2378 ((1ULL << VIRTIO_NET_F_CSUM) | 2379 (1ULL << VIRTIO_NET_F_HOST_ECN) | 2380 (1ULL << VIRTIO_NET_F_HOST_TSO4) | 2381 (1ULL << VIRTIO_NET_F_HOST_TSO6) | 2382 (1ULL << VIRTIO_NET_F_HOST_UFO))) 2383 return true; 2384 2385 return false; 2386 } 2387 2388 static int 2389 parse_headers(struct rte_mbuf *m, uint8_t *l4_proto) 2390 { 2391 struct rte_ipv4_hdr *ipv4_hdr; 2392 struct rte_ipv6_hdr *ipv6_hdr; 2393 struct rte_ether_hdr *eth_hdr; 2394 uint16_t ethertype; 2395 uint16_t data_len = rte_pktmbuf_data_len(m); 2396 2397 if (data_len < sizeof(struct rte_ether_hdr)) 2398 return -EINVAL; 2399 2400 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 2401 2402 m->l2_len = sizeof(struct rte_ether_hdr); 2403 ethertype = rte_be_to_cpu_16(eth_hdr->ether_type); 2404 2405 if (ethertype == RTE_ETHER_TYPE_VLAN) { 2406 if (data_len < sizeof(struct rte_ether_hdr) + 2407 sizeof(struct rte_vlan_hdr)) 2408 goto error; 2409 2410 struct rte_vlan_hdr *vlan_hdr = 2411 (struct rte_vlan_hdr *)(eth_hdr + 1); 2412 2413 m->l2_len += sizeof(struct rte_vlan_hdr); 2414 ethertype = rte_be_to_cpu_16(vlan_hdr->eth_proto); 2415 } 2416 2417 switch (ethertype) { 2418 case RTE_ETHER_TYPE_IPV4: 2419 if (data_len < m->l2_len + sizeof(struct rte_ipv4_hdr)) 2420 goto error; 2421 ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *, 2422 m->l2_len); 2423 m->l3_len = rte_ipv4_hdr_len(ipv4_hdr); 2424 if (data_len < m->l2_len + m->l3_len) 2425 goto error; 2426 m->ol_flags |= RTE_MBUF_F_TX_IPV4; 2427 *l4_proto = ipv4_hdr->next_proto_id; 2428 break; 2429 case RTE_ETHER_TYPE_IPV6: 2430 if (data_len < m->l2_len + sizeof(struct rte_ipv6_hdr)) 2431 goto error; 2432 ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *, 2433 m->l2_len); 2434 m->l3_len = sizeof(struct rte_ipv6_hdr); 2435 m->ol_flags |= RTE_MBUF_F_TX_IPV6; 2436 *l4_proto = ipv6_hdr->proto; 2437 break; 2438 default: 2439 /* a valid L3 header is needed for further L4 parsing */ 2440 goto error; 2441 } 2442 2443 /* both CSUM and GSO need a valid L4 header */ 2444 switch (*l4_proto) { 2445 case IPPROTO_TCP: 2446 if (data_len < m->l2_len + m->l3_len + 2447 sizeof(struct rte_tcp_hdr)) 2448 goto error; 2449 break; 2450 case IPPROTO_UDP: 2451 if (data_len < m->l2_len + m->l3_len + 2452 sizeof(struct rte_udp_hdr)) 2453 goto error; 2454 break; 2455 case IPPROTO_SCTP: 2456 if (data_len < m->l2_len + m->l3_len + 2457 sizeof(struct rte_sctp_hdr)) 2458 goto error; 2459 break; 2460 default: 2461 goto error; 2462 } 2463 2464 return 0; 2465 2466 error: 2467 m->l2_len = 0; 2468 m->l3_len = 0; 2469 m->ol_flags = 0; 2470 return -EINVAL; 2471 } 2472 2473 static __rte_always_inline void 2474 vhost_dequeue_offload_legacy(struct virtio_net *dev, struct virtio_net_hdr *hdr, 2475 struct rte_mbuf *m) 2476 { 2477 uint8_t l4_proto = 0; 2478 struct rte_tcp_hdr *tcp_hdr = NULL; 2479 uint16_t tcp_len; 2480 uint16_t data_len = rte_pktmbuf_data_len(m); 2481 2482 if (parse_headers(m, &l4_proto) < 0) 2483 return; 2484 2485 if (hdr->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) { 2486 if (hdr->csum_start == (m->l2_len + m->l3_len)) { 2487 switch (hdr->csum_offset) { 2488 case (offsetof(struct rte_tcp_hdr, cksum)): 2489 if (l4_proto != IPPROTO_TCP) 2490 goto error; 2491 m->ol_flags |= RTE_MBUF_F_TX_TCP_CKSUM; 2492 break; 2493 case (offsetof(struct rte_udp_hdr, dgram_cksum)): 2494 if (l4_proto != IPPROTO_UDP) 2495 goto error; 2496 m->ol_flags |= RTE_MBUF_F_TX_UDP_CKSUM; 2497 break; 2498 case (offsetof(struct rte_sctp_hdr, cksum)): 2499 if (l4_proto != IPPROTO_SCTP) 2500 goto error; 2501 m->ol_flags |= RTE_MBUF_F_TX_SCTP_CKSUM; 2502 break; 2503 default: 2504 goto error; 2505 } 2506 } else { 2507 goto error; 2508 } 2509 } 2510 2511 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 2512 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 2513 case VIRTIO_NET_HDR_GSO_TCPV4: 2514 case VIRTIO_NET_HDR_GSO_TCPV6: 2515 if (l4_proto != IPPROTO_TCP) 2516 goto error; 2517 tcp_hdr = rte_pktmbuf_mtod_offset(m, 2518 struct rte_tcp_hdr *, 2519 m->l2_len + m->l3_len); 2520 tcp_len = (tcp_hdr->data_off & 0xf0) >> 2; 2521 if (data_len < m->l2_len + m->l3_len + tcp_len) 2522 goto error; 2523 m->ol_flags |= RTE_MBUF_F_TX_TCP_SEG; 2524 m->tso_segsz = hdr->gso_size; 2525 m->l4_len = tcp_len; 2526 break; 2527 case VIRTIO_NET_HDR_GSO_UDP: 2528 if (l4_proto != IPPROTO_UDP) 2529 goto error; 2530 m->ol_flags |= RTE_MBUF_F_TX_UDP_SEG; 2531 m->tso_segsz = hdr->gso_size; 2532 m->l4_len = sizeof(struct rte_udp_hdr); 2533 break; 2534 default: 2535 VHOST_LOG_DATA(dev->ifname, WARNING, 2536 "unsupported gso type %u.\n", 2537 hdr->gso_type); 2538 goto error; 2539 } 2540 } 2541 return; 2542 2543 error: 2544 m->l2_len = 0; 2545 m->l3_len = 0; 2546 m->ol_flags = 0; 2547 } 2548 2549 static __rte_always_inline void 2550 vhost_dequeue_offload(struct virtio_net *dev, struct virtio_net_hdr *hdr, 2551 struct rte_mbuf *m, bool legacy_ol_flags) 2552 { 2553 struct rte_net_hdr_lens hdr_lens; 2554 int l4_supported = 0; 2555 uint32_t ptype; 2556 2557 if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE) 2558 return; 2559 2560 if (legacy_ol_flags) { 2561 vhost_dequeue_offload_legacy(dev, hdr, m); 2562 return; 2563 } 2564 2565 m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN; 2566 2567 ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK); 2568 m->packet_type = ptype; 2569 if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP || 2570 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP || 2571 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP) 2572 l4_supported = 1; 2573 2574 /* According to Virtio 1.1 spec, the device only needs to look at 2575 * VIRTIO_NET_HDR_F_NEEDS_CSUM in the packet transmission path. 2576 * This differs from the processing incoming packets path where the 2577 * driver could rely on VIRTIO_NET_HDR_F_DATA_VALID flag set by the 2578 * device. 2579 * 2580 * 5.1.6.2.1 Driver Requirements: Packet Transmission 2581 * The driver MUST NOT set the VIRTIO_NET_HDR_F_DATA_VALID and 2582 * VIRTIO_NET_HDR_F_RSC_INFO bits in flags. 2583 * 2584 * 5.1.6.2.2 Device Requirements: Packet Transmission 2585 * The device MUST ignore flag bits that it does not recognize. 2586 */ 2587 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 2588 uint32_t hdrlen; 2589 2590 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len; 2591 if (hdr->csum_start <= hdrlen && l4_supported != 0) { 2592 m->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_NONE; 2593 } else { 2594 /* Unknown proto or tunnel, do sw cksum. We can assume 2595 * the cksum field is in the first segment since the 2596 * buffers we provided to the host are large enough. 2597 * In case of SCTP, this will be wrong since it's a CRC 2598 * but there's nothing we can do. 2599 */ 2600 uint16_t csum = 0, off; 2601 2602 if (rte_raw_cksum_mbuf(m, hdr->csum_start, 2603 rte_pktmbuf_pkt_len(m) - hdr->csum_start, &csum) < 0) 2604 return; 2605 if (likely(csum != 0xffff)) 2606 csum = ~csum; 2607 off = hdr->csum_offset + hdr->csum_start; 2608 if (rte_pktmbuf_data_len(m) >= off + 1) 2609 *rte_pktmbuf_mtod_offset(m, uint16_t *, off) = csum; 2610 } 2611 } 2612 2613 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 2614 if (hdr->gso_size == 0) 2615 return; 2616 2617 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 2618 case VIRTIO_NET_HDR_GSO_TCPV4: 2619 case VIRTIO_NET_HDR_GSO_TCPV6: 2620 if ((ptype & RTE_PTYPE_L4_MASK) != RTE_PTYPE_L4_TCP) 2621 break; 2622 m->ol_flags |= RTE_MBUF_F_RX_LRO | RTE_MBUF_F_RX_L4_CKSUM_NONE; 2623 m->tso_segsz = hdr->gso_size; 2624 break; 2625 case VIRTIO_NET_HDR_GSO_UDP: 2626 if ((ptype & RTE_PTYPE_L4_MASK) != RTE_PTYPE_L4_UDP) 2627 break; 2628 m->ol_flags |= RTE_MBUF_F_RX_LRO | RTE_MBUF_F_RX_L4_CKSUM_NONE; 2629 m->tso_segsz = hdr->gso_size; 2630 break; 2631 default: 2632 break; 2633 } 2634 } 2635 } 2636 2637 static __rte_noinline void 2638 copy_vnet_hdr_from_desc(struct virtio_net_hdr *hdr, 2639 struct buf_vector *buf_vec) 2640 { 2641 uint64_t len; 2642 uint64_t remain = sizeof(struct virtio_net_hdr); 2643 uint64_t src; 2644 uint64_t dst = (uint64_t)(uintptr_t)hdr; 2645 2646 while (remain) { 2647 len = RTE_MIN(remain, buf_vec->buf_len); 2648 src = buf_vec->buf_addr; 2649 rte_memcpy((void *)(uintptr_t)dst, 2650 (void *)(uintptr_t)src, len); 2651 2652 remain -= len; 2653 dst += len; 2654 buf_vec++; 2655 } 2656 } 2657 2658 static __rte_always_inline int 2659 desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, 2660 struct buf_vector *buf_vec, uint16_t nr_vec, 2661 struct rte_mbuf *m, struct rte_mempool *mbuf_pool, 2662 bool legacy_ol_flags, uint16_t slot_idx, bool is_async) 2663 { 2664 uint32_t buf_avail, buf_offset, buf_len; 2665 uint64_t buf_addr, buf_iova; 2666 uint32_t mbuf_avail, mbuf_offset; 2667 uint32_t cpy_len; 2668 struct rte_mbuf *cur = m, *prev = m; 2669 struct virtio_net_hdr tmp_hdr; 2670 struct virtio_net_hdr *hdr = NULL; 2671 /* A counter to avoid desc dead loop chain */ 2672 uint16_t vec_idx = 0; 2673 struct vhost_async *async = vq->async; 2674 struct async_inflight_info *pkts_info; 2675 2676 buf_addr = buf_vec[vec_idx].buf_addr; 2677 buf_iova = buf_vec[vec_idx].buf_iova; 2678 buf_len = buf_vec[vec_idx].buf_len; 2679 2680 if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) 2681 return -1; 2682 2683 if (virtio_net_with_host_offload(dev)) { 2684 if (unlikely(buf_len < sizeof(struct virtio_net_hdr))) { 2685 /* 2686 * No luck, the virtio-net header doesn't fit 2687 * in a contiguous virtual area. 2688 */ 2689 copy_vnet_hdr_from_desc(&tmp_hdr, buf_vec); 2690 hdr = &tmp_hdr; 2691 } else { 2692 hdr = (struct virtio_net_hdr *)((uintptr_t)buf_addr); 2693 } 2694 } 2695 2696 /* 2697 * A virtio driver normally uses at least 2 desc buffers 2698 * for Tx: the first for storing the header, and others 2699 * for storing the data. 2700 */ 2701 if (unlikely(buf_len < dev->vhost_hlen)) { 2702 buf_offset = dev->vhost_hlen - buf_len; 2703 vec_idx++; 2704 buf_addr = buf_vec[vec_idx].buf_addr; 2705 buf_iova = buf_vec[vec_idx].buf_iova; 2706 buf_len = buf_vec[vec_idx].buf_len; 2707 buf_avail = buf_len - buf_offset; 2708 } else if (buf_len == dev->vhost_hlen) { 2709 if (unlikely(++vec_idx >= nr_vec)) 2710 goto error; 2711 buf_addr = buf_vec[vec_idx].buf_addr; 2712 buf_iova = buf_vec[vec_idx].buf_iova; 2713 buf_len = buf_vec[vec_idx].buf_len; 2714 2715 buf_offset = 0; 2716 buf_avail = buf_len; 2717 } else { 2718 buf_offset = dev->vhost_hlen; 2719 buf_avail = buf_vec[vec_idx].buf_len - dev->vhost_hlen; 2720 } 2721 2722 PRINT_PACKET(dev, 2723 (uintptr_t)(buf_addr + buf_offset), 2724 (uint32_t)buf_avail, 0); 2725 2726 mbuf_offset = 0; 2727 mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM; 2728 2729 if (is_async) { 2730 pkts_info = async->pkts_info; 2731 if (async_iter_initialize(dev, async)) 2732 return -1; 2733 } 2734 2735 while (1) { 2736 cpy_len = RTE_MIN(buf_avail, mbuf_avail); 2737 2738 if (is_async) { 2739 if (async_fill_seg(dev, vq, cur, mbuf_offset, 2740 buf_iova + buf_offset, cpy_len, false) < 0) 2741 goto error; 2742 } else if (likely(hdr && cur == m)) { 2743 rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, mbuf_offset), 2744 (void *)((uintptr_t)(buf_addr + buf_offset)), 2745 cpy_len); 2746 } else { 2747 sync_fill_seg(dev, vq, cur, mbuf_offset, 2748 buf_addr + buf_offset, 2749 buf_iova + buf_offset, cpy_len, false); 2750 } 2751 2752 mbuf_avail -= cpy_len; 2753 mbuf_offset += cpy_len; 2754 buf_avail -= cpy_len; 2755 buf_offset += cpy_len; 2756 2757 /* This buf reaches to its end, get the next one */ 2758 if (buf_avail == 0) { 2759 if (++vec_idx >= nr_vec) 2760 break; 2761 2762 buf_addr = buf_vec[vec_idx].buf_addr; 2763 buf_iova = buf_vec[vec_idx].buf_iova; 2764 buf_len = buf_vec[vec_idx].buf_len; 2765 2766 buf_offset = 0; 2767 buf_avail = buf_len; 2768 2769 PRINT_PACKET(dev, (uintptr_t)buf_addr, 2770 (uint32_t)buf_avail, 0); 2771 } 2772 2773 /* 2774 * This mbuf reaches to its end, get a new one 2775 * to hold more data. 2776 */ 2777 if (mbuf_avail == 0) { 2778 cur = rte_pktmbuf_alloc(mbuf_pool); 2779 if (unlikely(cur == NULL)) { 2780 VHOST_LOG_DATA(dev->ifname, ERR, 2781 "failed to allocate memory for mbuf.\n"); 2782 goto error; 2783 } 2784 2785 prev->next = cur; 2786 prev->data_len = mbuf_offset; 2787 m->nb_segs += 1; 2788 m->pkt_len += mbuf_offset; 2789 prev = cur; 2790 2791 mbuf_offset = 0; 2792 mbuf_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM; 2793 } 2794 } 2795 2796 prev->data_len = mbuf_offset; 2797 m->pkt_len += mbuf_offset; 2798 2799 if (is_async) { 2800 async_iter_finalize(async); 2801 if (hdr) 2802 pkts_info[slot_idx].nethdr = *hdr; 2803 } else if (hdr) { 2804 vhost_dequeue_offload(dev, hdr, m, legacy_ol_flags); 2805 } 2806 2807 return 0; 2808 error: 2809 if (is_async) 2810 async_iter_cancel(async); 2811 2812 return -1; 2813 } 2814 2815 static void 2816 virtio_dev_extbuf_free(void *addr __rte_unused, void *opaque) 2817 { 2818 rte_free(opaque); 2819 } 2820 2821 static int 2822 virtio_dev_extbuf_alloc(struct virtio_net *dev, struct rte_mbuf *pkt, uint32_t size) 2823 { 2824 struct rte_mbuf_ext_shared_info *shinfo = NULL; 2825 uint32_t total_len = RTE_PKTMBUF_HEADROOM + size; 2826 uint16_t buf_len; 2827 rte_iova_t iova; 2828 void *buf; 2829 2830 total_len += sizeof(*shinfo) + sizeof(uintptr_t); 2831 total_len = RTE_ALIGN_CEIL(total_len, sizeof(uintptr_t)); 2832 2833 if (unlikely(total_len > UINT16_MAX)) 2834 return -ENOSPC; 2835 2836 buf_len = total_len; 2837 buf = rte_malloc(NULL, buf_len, RTE_CACHE_LINE_SIZE); 2838 if (unlikely(buf == NULL)) 2839 return -ENOMEM; 2840 2841 /* Initialize shinfo */ 2842 shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len, 2843 virtio_dev_extbuf_free, buf); 2844 if (unlikely(shinfo == NULL)) { 2845 rte_free(buf); 2846 VHOST_LOG_DATA(dev->ifname, ERR, "failed to init shinfo\n"); 2847 return -1; 2848 } 2849 2850 iova = rte_malloc_virt2iova(buf); 2851 rte_pktmbuf_attach_extbuf(pkt, buf, iova, buf_len, shinfo); 2852 rte_pktmbuf_reset_headroom(pkt); 2853 2854 return 0; 2855 } 2856 2857 /* 2858 * Prepare a host supported pktmbuf. 2859 */ 2860 static __rte_always_inline int 2861 virtio_dev_pktmbuf_prep(struct virtio_net *dev, struct rte_mbuf *pkt, 2862 uint32_t data_len) 2863 { 2864 if (rte_pktmbuf_tailroom(pkt) >= data_len) 2865 return 0; 2866 2867 /* attach an external buffer if supported */ 2868 if (dev->extbuf && !virtio_dev_extbuf_alloc(dev, pkt, data_len)) 2869 return 0; 2870 2871 /* check if chained buffers are allowed */ 2872 if (!dev->linearbuf) 2873 return 0; 2874 2875 return -1; 2876 } 2877 2878 __rte_always_inline 2879 static uint16_t 2880 virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 2881 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count, 2882 bool legacy_ol_flags) 2883 { 2884 uint16_t i; 2885 uint16_t avail_entries; 2886 uint16_t dropped = 0; 2887 static bool allocerr_warned; 2888 2889 /* 2890 * The ordering between avail index and 2891 * desc reads needs to be enforced. 2892 */ 2893 avail_entries = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE) - 2894 vq->last_avail_idx; 2895 if (avail_entries == 0) 2896 return 0; 2897 2898 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); 2899 2900 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__); 2901 2902 count = RTE_MIN(count, MAX_PKT_BURST); 2903 count = RTE_MIN(count, avail_entries); 2904 VHOST_LOG_DATA(dev->ifname, DEBUG, "about to dequeue %u buffers\n", count); 2905 2906 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, count)) 2907 return 0; 2908 2909 for (i = 0; i < count; i++) { 2910 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 2911 uint16_t head_idx; 2912 uint32_t buf_len; 2913 uint16_t nr_vec = 0; 2914 int err; 2915 2916 if (unlikely(fill_vec_buf_split(dev, vq, 2917 vq->last_avail_idx + i, 2918 &nr_vec, buf_vec, 2919 &head_idx, &buf_len, 2920 VHOST_ACCESS_RO) < 0)) 2921 break; 2922 2923 update_shadow_used_ring_split(vq, head_idx, 0); 2924 2925 err = virtio_dev_pktmbuf_prep(dev, pkts[i], buf_len); 2926 if (unlikely(err)) { 2927 /* 2928 * mbuf allocation fails for jumbo packets when external 2929 * buffer allocation is not allowed and linear buffer 2930 * is required. Drop this packet. 2931 */ 2932 if (!allocerr_warned) { 2933 VHOST_LOG_DATA(dev->ifname, ERR, 2934 "failed mbuf alloc of size %d from %s.\n", 2935 buf_len, mbuf_pool->name); 2936 allocerr_warned = true; 2937 } 2938 dropped += 1; 2939 i++; 2940 break; 2941 } 2942 2943 err = desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i], 2944 mbuf_pool, legacy_ol_flags, 0, false); 2945 if (unlikely(err)) { 2946 if (!allocerr_warned) { 2947 VHOST_LOG_DATA(dev->ifname, ERR, "failed to copy desc to mbuf.\n"); 2948 allocerr_warned = true; 2949 } 2950 dropped += 1; 2951 i++; 2952 break; 2953 } 2954 2955 } 2956 2957 if (dropped) 2958 rte_pktmbuf_free_bulk(&pkts[i - 1], count - i + 1); 2959 2960 vq->last_avail_idx += i; 2961 2962 do_data_copy_dequeue(vq); 2963 if (unlikely(i < count)) 2964 vq->shadow_used_idx = i; 2965 if (likely(vq->shadow_used_idx)) { 2966 flush_shadow_used_ring_split(dev, vq); 2967 vhost_vring_call_split(dev, vq); 2968 } 2969 2970 return (i - dropped); 2971 } 2972 2973 __rte_noinline 2974 static uint16_t 2975 virtio_dev_tx_split_legacy(struct virtio_net *dev, 2976 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, 2977 struct rte_mbuf **pkts, uint16_t count) 2978 { 2979 return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, true); 2980 } 2981 2982 __rte_noinline 2983 static uint16_t 2984 virtio_dev_tx_split_compliant(struct virtio_net *dev, 2985 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, 2986 struct rte_mbuf **pkts, uint16_t count) 2987 { 2988 return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, false); 2989 } 2990 2991 static __rte_always_inline int 2992 vhost_reserve_avail_batch_packed(struct virtio_net *dev, 2993 struct vhost_virtqueue *vq, 2994 struct rte_mbuf **pkts, 2995 uint16_t avail_idx, 2996 uintptr_t *desc_addrs, 2997 uint16_t *ids) 2998 { 2999 bool wrap = vq->avail_wrap_counter; 3000 struct vring_packed_desc *descs = vq->desc_packed; 3001 uint64_t lens[PACKED_BATCH_SIZE]; 3002 uint64_t buf_lens[PACKED_BATCH_SIZE]; 3003 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 3004 uint16_t flags, i; 3005 3006 if (unlikely(avail_idx & PACKED_BATCH_MASK)) 3007 return -1; 3008 if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size)) 3009 return -1; 3010 3011 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3012 flags = descs[avail_idx + i].flags; 3013 if (unlikely((wrap != !!(flags & VRING_DESC_F_AVAIL)) || 3014 (wrap == !!(flags & VRING_DESC_F_USED)) || 3015 (flags & PACKED_DESC_SINGLE_DEQUEUE_FLAG))) 3016 return -1; 3017 } 3018 3019 rte_atomic_thread_fence(__ATOMIC_ACQUIRE); 3020 3021 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 3022 lens[i] = descs[avail_idx + i].len; 3023 3024 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3025 desc_addrs[i] = vhost_iova_to_vva(dev, vq, 3026 descs[avail_idx + i].addr, 3027 &lens[i], VHOST_ACCESS_RW); 3028 } 3029 3030 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3031 if (unlikely(!desc_addrs[i])) 3032 return -1; 3033 if (unlikely((lens[i] != descs[avail_idx + i].len))) 3034 return -1; 3035 } 3036 3037 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3038 if (virtio_dev_pktmbuf_prep(dev, pkts[i], lens[i])) 3039 goto err; 3040 } 3041 3042 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 3043 buf_lens[i] = pkts[i]->buf_len - pkts[i]->data_off; 3044 3045 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3046 if (unlikely(buf_lens[i] < (lens[i] - buf_offset))) 3047 goto err; 3048 } 3049 3050 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3051 pkts[i]->pkt_len = lens[i] - buf_offset; 3052 pkts[i]->data_len = pkts[i]->pkt_len; 3053 ids[i] = descs[avail_idx + i].id; 3054 } 3055 3056 return 0; 3057 3058 err: 3059 return -1; 3060 } 3061 3062 static __rte_always_inline int 3063 virtio_dev_tx_batch_packed(struct virtio_net *dev, 3064 struct vhost_virtqueue *vq, 3065 struct rte_mbuf **pkts, 3066 bool legacy_ol_flags) 3067 { 3068 uint16_t avail_idx = vq->last_avail_idx; 3069 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 3070 struct virtio_net_hdr *hdr; 3071 uintptr_t desc_addrs[PACKED_BATCH_SIZE]; 3072 uint16_t ids[PACKED_BATCH_SIZE]; 3073 uint16_t i; 3074 3075 if (vhost_reserve_avail_batch_packed(dev, vq, pkts, avail_idx, 3076 desc_addrs, ids)) 3077 return -1; 3078 3079 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 3080 rte_prefetch0((void *)(uintptr_t)desc_addrs[i]); 3081 3082 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 3083 rte_memcpy(rte_pktmbuf_mtod_offset(pkts[i], void *, 0), 3084 (void *)(uintptr_t)(desc_addrs[i] + buf_offset), 3085 pkts[i]->pkt_len); 3086 3087 if (virtio_net_with_host_offload(dev)) { 3088 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3089 hdr = (struct virtio_net_hdr *)(desc_addrs[i]); 3090 vhost_dequeue_offload(dev, hdr, pkts[i], legacy_ol_flags); 3091 } 3092 } 3093 3094 if (virtio_net_is_inorder(dev)) 3095 vhost_shadow_dequeue_batch_packed_inorder(vq, 3096 ids[PACKED_BATCH_SIZE - 1]); 3097 else 3098 vhost_shadow_dequeue_batch_packed(dev, vq, ids); 3099 3100 vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE); 3101 3102 return 0; 3103 } 3104 3105 static __rte_always_inline int 3106 vhost_dequeue_single_packed(struct virtio_net *dev, 3107 struct vhost_virtqueue *vq, 3108 struct rte_mempool *mbuf_pool, 3109 struct rte_mbuf *pkts, 3110 uint16_t *buf_id, 3111 uint16_t *desc_count, 3112 bool legacy_ol_flags) 3113 { 3114 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 3115 uint32_t buf_len; 3116 uint16_t nr_vec = 0; 3117 int err; 3118 static bool allocerr_warned; 3119 3120 if (unlikely(fill_vec_buf_packed(dev, vq, 3121 vq->last_avail_idx, desc_count, 3122 buf_vec, &nr_vec, 3123 buf_id, &buf_len, 3124 VHOST_ACCESS_RO) < 0)) 3125 return -1; 3126 3127 if (unlikely(virtio_dev_pktmbuf_prep(dev, pkts, buf_len))) { 3128 if (!allocerr_warned) { 3129 VHOST_LOG_DATA(dev->ifname, ERR, 3130 "failed mbuf alloc of size %d from %s.\n", 3131 buf_len, mbuf_pool->name); 3132 allocerr_warned = true; 3133 } 3134 return -1; 3135 } 3136 3137 err = desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts, 3138 mbuf_pool, legacy_ol_flags, 0, false); 3139 if (unlikely(err)) { 3140 if (!allocerr_warned) { 3141 VHOST_LOG_DATA(dev->ifname, ERR, "failed to copy desc to mbuf.\n"); 3142 allocerr_warned = true; 3143 } 3144 return -1; 3145 } 3146 3147 return 0; 3148 } 3149 3150 static __rte_always_inline int 3151 virtio_dev_tx_single_packed(struct virtio_net *dev, 3152 struct vhost_virtqueue *vq, 3153 struct rte_mempool *mbuf_pool, 3154 struct rte_mbuf *pkts, 3155 bool legacy_ol_flags) 3156 { 3157 3158 uint16_t buf_id, desc_count = 0; 3159 int ret; 3160 3161 ret = vhost_dequeue_single_packed(dev, vq, mbuf_pool, pkts, &buf_id, 3162 &desc_count, legacy_ol_flags); 3163 3164 if (likely(desc_count > 0)) { 3165 if (virtio_net_is_inorder(dev)) 3166 vhost_shadow_dequeue_single_packed_inorder(vq, buf_id, 3167 desc_count); 3168 else 3169 vhost_shadow_dequeue_single_packed(vq, buf_id, 3170 desc_count); 3171 3172 vq_inc_last_avail_packed(vq, desc_count); 3173 } 3174 3175 return ret; 3176 } 3177 3178 __rte_always_inline 3179 static uint16_t 3180 virtio_dev_tx_packed(struct virtio_net *dev, 3181 struct vhost_virtqueue *__rte_restrict vq, 3182 struct rte_mempool *mbuf_pool, 3183 struct rte_mbuf **__rte_restrict pkts, 3184 uint32_t count, 3185 bool legacy_ol_flags) 3186 { 3187 uint32_t pkt_idx = 0; 3188 3189 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, count)) 3190 return 0; 3191 3192 do { 3193 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); 3194 3195 if (count - pkt_idx >= PACKED_BATCH_SIZE) { 3196 if (!virtio_dev_tx_batch_packed(dev, vq, 3197 &pkts[pkt_idx], 3198 legacy_ol_flags)) { 3199 pkt_idx += PACKED_BATCH_SIZE; 3200 continue; 3201 } 3202 } 3203 3204 if (virtio_dev_tx_single_packed(dev, vq, mbuf_pool, 3205 pkts[pkt_idx], 3206 legacy_ol_flags)) 3207 break; 3208 pkt_idx++; 3209 } while (pkt_idx < count); 3210 3211 if (pkt_idx != count) 3212 rte_pktmbuf_free_bulk(&pkts[pkt_idx], count - pkt_idx); 3213 3214 if (vq->shadow_used_idx) { 3215 do_data_copy_dequeue(vq); 3216 3217 vhost_flush_dequeue_shadow_packed(dev, vq); 3218 vhost_vring_call_packed(dev, vq); 3219 } 3220 3221 return pkt_idx; 3222 } 3223 3224 __rte_noinline 3225 static uint16_t 3226 virtio_dev_tx_packed_legacy(struct virtio_net *dev, 3227 struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool, 3228 struct rte_mbuf **__rte_restrict pkts, uint32_t count) 3229 { 3230 return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, true); 3231 } 3232 3233 __rte_noinline 3234 static uint16_t 3235 virtio_dev_tx_packed_compliant(struct virtio_net *dev, 3236 struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool, 3237 struct rte_mbuf **__rte_restrict pkts, uint32_t count) 3238 { 3239 return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, false); 3240 } 3241 3242 uint16_t 3243 rte_vhost_dequeue_burst(int vid, uint16_t queue_id, 3244 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) 3245 { 3246 struct virtio_net *dev; 3247 struct rte_mbuf *rarp_mbuf = NULL; 3248 struct vhost_virtqueue *vq; 3249 int16_t success = 1; 3250 3251 dev = get_device(vid); 3252 if (!dev) 3253 return 0; 3254 3255 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { 3256 VHOST_LOG_DATA(dev->ifname, ERR, 3257 "%s: built-in vhost net backend is disabled.\n", 3258 __func__); 3259 return 0; 3260 } 3261 3262 if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) { 3263 VHOST_LOG_DATA(dev->ifname, ERR, 3264 "%s: invalid virtqueue idx %d.\n", 3265 __func__, queue_id); 3266 return 0; 3267 } 3268 3269 vq = dev->virtqueue[queue_id]; 3270 3271 if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0)) 3272 return 0; 3273 3274 if (unlikely(!vq->enabled)) { 3275 count = 0; 3276 goto out_access_unlock; 3277 } 3278 3279 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 3280 vhost_user_iotlb_rd_lock(vq); 3281 3282 if (unlikely(!vq->access_ok)) 3283 if (unlikely(vring_translate(dev, vq) < 0)) { 3284 count = 0; 3285 goto out; 3286 } 3287 3288 /* 3289 * Construct a RARP broadcast packet, and inject it to the "pkts" 3290 * array, to looks like that guest actually send such packet. 3291 * 3292 * Check user_send_rarp() for more information. 3293 * 3294 * broadcast_rarp shares a cacheline in the virtio_net structure 3295 * with some fields that are accessed during enqueue and 3296 * __atomic_compare_exchange_n causes a write if performed compare 3297 * and exchange. This could result in false sharing between enqueue 3298 * and dequeue. 3299 * 3300 * Prevent unnecessary false sharing by reading broadcast_rarp first 3301 * and only performing compare and exchange if the read indicates it 3302 * is likely to be set. 3303 */ 3304 if (unlikely(__atomic_load_n(&dev->broadcast_rarp, __ATOMIC_ACQUIRE) && 3305 __atomic_compare_exchange_n(&dev->broadcast_rarp, 3306 &success, 0, 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED))) { 3307 3308 rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac); 3309 if (rarp_mbuf == NULL) { 3310 VHOST_LOG_DATA(dev->ifname, ERR, "failed to make RARP packet.\n"); 3311 count = 0; 3312 goto out; 3313 } 3314 /* 3315 * Inject it to the head of "pkts" array, so that switch's mac 3316 * learning table will get updated first. 3317 */ 3318 pkts[0] = rarp_mbuf; 3319 vhost_queue_stats_update(dev, vq, pkts, 1); 3320 pkts++; 3321 count -= 1; 3322 } 3323 3324 if (vq_is_packed(dev)) { 3325 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS) 3326 count = virtio_dev_tx_packed_legacy(dev, vq, mbuf_pool, pkts, count); 3327 else 3328 count = virtio_dev_tx_packed_compliant(dev, vq, mbuf_pool, pkts, count); 3329 } else { 3330 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS) 3331 count = virtio_dev_tx_split_legacy(dev, vq, mbuf_pool, pkts, count); 3332 else 3333 count = virtio_dev_tx_split_compliant(dev, vq, mbuf_pool, pkts, count); 3334 } 3335 3336 vhost_queue_stats_update(dev, vq, pkts, count); 3337 3338 out: 3339 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 3340 vhost_user_iotlb_rd_unlock(vq); 3341 3342 out_access_unlock: 3343 rte_spinlock_unlock(&vq->access_lock); 3344 3345 if (unlikely(rarp_mbuf != NULL)) 3346 count += 1; 3347 3348 return count; 3349 } 3350 3351 static __rte_always_inline uint16_t 3352 async_poll_dequeue_completed(struct virtio_net *dev, struct vhost_virtqueue *vq, 3353 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, 3354 uint16_t vchan_id, bool legacy_ol_flags) 3355 { 3356 uint16_t start_idx, from, i; 3357 uint16_t nr_cpl_pkts = 0; 3358 struct async_inflight_info *pkts_info = vq->async->pkts_info; 3359 3360 vhost_async_dma_check_completed(dev, dma_id, vchan_id, VHOST_DMA_MAX_COPY_COMPLETE); 3361 3362 start_idx = async_get_first_inflight_pkt_idx(vq); 3363 3364 from = start_idx; 3365 while (vq->async->pkts_cmpl_flag[from] && count--) { 3366 vq->async->pkts_cmpl_flag[from] = false; 3367 from = (from + 1) % vq->size; 3368 nr_cpl_pkts++; 3369 } 3370 3371 if (nr_cpl_pkts == 0) 3372 return 0; 3373 3374 for (i = 0; i < nr_cpl_pkts; i++) { 3375 from = (start_idx + i) % vq->size; 3376 pkts[i] = pkts_info[from].mbuf; 3377 3378 if (virtio_net_with_host_offload(dev)) 3379 vhost_dequeue_offload(dev, &pkts_info[from].nethdr, pkts[i], 3380 legacy_ol_flags); 3381 } 3382 3383 /* write back completed descs to used ring and update used idx */ 3384 if (vq_is_packed(dev)) { 3385 write_back_completed_descs_packed(vq, nr_cpl_pkts); 3386 vhost_vring_call_packed(dev, vq); 3387 } else { 3388 write_back_completed_descs_split(vq, nr_cpl_pkts); 3389 __atomic_add_fetch(&vq->used->idx, nr_cpl_pkts, __ATOMIC_RELEASE); 3390 vhost_vring_call_split(dev, vq); 3391 } 3392 vq->async->pkts_inflight_n -= nr_cpl_pkts; 3393 3394 return nr_cpl_pkts; 3395 } 3396 3397 static __rte_always_inline uint16_t 3398 virtio_dev_tx_async_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 3399 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count, 3400 int16_t dma_id, uint16_t vchan_id, bool legacy_ol_flags) 3401 { 3402 static bool allocerr_warned; 3403 bool dropped = false; 3404 uint16_t avail_entries; 3405 uint16_t pkt_idx, slot_idx = 0; 3406 uint16_t nr_done_pkts = 0; 3407 uint16_t pkt_err = 0; 3408 uint16_t n_xfer; 3409 struct vhost_async *async = vq->async; 3410 struct async_inflight_info *pkts_info = async->pkts_info; 3411 struct rte_mbuf *pkts_prealloc[MAX_PKT_BURST]; 3412 uint16_t pkts_size = count; 3413 3414 /** 3415 * The ordering between avail index and 3416 * desc reads needs to be enforced. 3417 */ 3418 avail_entries = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE) - 3419 vq->last_avail_idx; 3420 if (avail_entries == 0) 3421 goto out; 3422 3423 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); 3424 3425 async_iter_reset(async); 3426 3427 count = RTE_MIN(count, MAX_PKT_BURST); 3428 count = RTE_MIN(count, avail_entries); 3429 VHOST_LOG_DATA(dev->ifname, DEBUG, "about to dequeue %u buffers\n", count); 3430 3431 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts_prealloc, count)) 3432 goto out; 3433 3434 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { 3435 uint16_t head_idx = 0; 3436 uint16_t nr_vec = 0; 3437 uint16_t to; 3438 uint32_t buf_len; 3439 int err; 3440 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 3441 struct rte_mbuf *pkt = pkts_prealloc[pkt_idx]; 3442 3443 if (unlikely(fill_vec_buf_split(dev, vq, vq->last_avail_idx, 3444 &nr_vec, buf_vec, 3445 &head_idx, &buf_len, 3446 VHOST_ACCESS_RO) < 0)) { 3447 dropped = true; 3448 break; 3449 } 3450 3451 err = virtio_dev_pktmbuf_prep(dev, pkt, buf_len); 3452 if (unlikely(err)) { 3453 /** 3454 * mbuf allocation fails for jumbo packets when external 3455 * buffer allocation is not allowed and linear buffer 3456 * is required. Drop this packet. 3457 */ 3458 if (!allocerr_warned) { 3459 VHOST_LOG_DATA(dev->ifname, ERR, 3460 "%s: Failed mbuf alloc of size %d from %s\n", 3461 __func__, buf_len, mbuf_pool->name); 3462 allocerr_warned = true; 3463 } 3464 dropped = true; 3465 break; 3466 } 3467 3468 slot_idx = (async->pkts_idx + pkt_idx) & (vq->size - 1); 3469 err = desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkt, mbuf_pool, 3470 legacy_ol_flags, slot_idx, true); 3471 if (unlikely(err)) { 3472 if (!allocerr_warned) { 3473 VHOST_LOG_DATA(dev->ifname, ERR, 3474 "%s: Failed to offload copies to async channel.\n", 3475 __func__); 3476 allocerr_warned = true; 3477 } 3478 dropped = true; 3479 break; 3480 } 3481 3482 pkts_info[slot_idx].mbuf = pkt; 3483 3484 /* store used descs */ 3485 to = async->desc_idx_split & (vq->size - 1); 3486 async->descs_split[to].id = head_idx; 3487 async->descs_split[to].len = 0; 3488 async->desc_idx_split++; 3489 3490 vq->last_avail_idx++; 3491 } 3492 3493 if (unlikely(dropped)) 3494 rte_pktmbuf_free_bulk(&pkts_prealloc[pkt_idx], count - pkt_idx); 3495 3496 n_xfer = vhost_async_dma_transfer(dev, vq, dma_id, vchan_id, async->pkts_idx, 3497 async->iov_iter, pkt_idx); 3498 3499 async->pkts_inflight_n += n_xfer; 3500 3501 pkt_err = pkt_idx - n_xfer; 3502 if (unlikely(pkt_err)) { 3503 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s: failed to transfer data.\n", 3504 __func__); 3505 3506 pkt_idx = n_xfer; 3507 /* recover available ring */ 3508 vq->last_avail_idx -= pkt_err; 3509 3510 /** 3511 * recover async channel copy related structures and free pktmbufs 3512 * for error pkts. 3513 */ 3514 async->desc_idx_split -= pkt_err; 3515 while (pkt_err-- > 0) { 3516 rte_pktmbuf_free(pkts_info[slot_idx & (vq->size - 1)].mbuf); 3517 slot_idx--; 3518 } 3519 } 3520 3521 async->pkts_idx += pkt_idx; 3522 if (async->pkts_idx >= vq->size) 3523 async->pkts_idx -= vq->size; 3524 3525 out: 3526 /* DMA device may serve other queues, unconditionally check completed. */ 3527 nr_done_pkts = async_poll_dequeue_completed(dev, vq, pkts, pkts_size, 3528 dma_id, vchan_id, legacy_ol_flags); 3529 3530 return nr_done_pkts; 3531 } 3532 3533 __rte_noinline 3534 static uint16_t 3535 virtio_dev_tx_async_split_legacy(struct virtio_net *dev, 3536 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, 3537 struct rte_mbuf **pkts, uint16_t count, 3538 int16_t dma_id, uint16_t vchan_id) 3539 { 3540 return virtio_dev_tx_async_split(dev, vq, mbuf_pool, 3541 pkts, count, dma_id, vchan_id, true); 3542 } 3543 3544 __rte_noinline 3545 static uint16_t 3546 virtio_dev_tx_async_split_compliant(struct virtio_net *dev, 3547 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, 3548 struct rte_mbuf **pkts, uint16_t count, 3549 int16_t dma_id, uint16_t vchan_id) 3550 { 3551 return virtio_dev_tx_async_split(dev, vq, mbuf_pool, 3552 pkts, count, dma_id, vchan_id, false); 3553 } 3554 3555 static __rte_always_inline void 3556 vhost_async_shadow_dequeue_single_packed(struct vhost_virtqueue *vq, uint16_t buf_id) 3557 { 3558 struct vhost_async *async = vq->async; 3559 uint16_t idx = async->buffer_idx_packed; 3560 3561 async->buffers_packed[idx].id = buf_id; 3562 async->buffers_packed[idx].len = 0; 3563 async->buffers_packed[idx].count = 1; 3564 3565 async->buffer_idx_packed++; 3566 if (async->buffer_idx_packed >= vq->size) 3567 async->buffer_idx_packed -= vq->size; 3568 3569 } 3570 3571 static __rte_always_inline int 3572 virtio_dev_tx_async_single_packed(struct virtio_net *dev, 3573 struct vhost_virtqueue *vq, 3574 struct rte_mempool *mbuf_pool, 3575 struct rte_mbuf *pkts, 3576 uint16_t slot_idx, 3577 bool legacy_ol_flags) 3578 { 3579 int err; 3580 uint16_t buf_id, desc_count = 0; 3581 uint16_t nr_vec = 0; 3582 uint32_t buf_len; 3583 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 3584 static bool allocerr_warned; 3585 3586 if (unlikely(fill_vec_buf_packed(dev, vq, vq->last_avail_idx, &desc_count, 3587 buf_vec, &nr_vec, &buf_id, &buf_len, 3588 VHOST_ACCESS_RO) < 0)) 3589 return -1; 3590 3591 if (unlikely(virtio_dev_pktmbuf_prep(dev, pkts, buf_len))) { 3592 if (!allocerr_warned) { 3593 VHOST_LOG_DATA(dev->ifname, ERR, "Failed mbuf alloc of size %d from %s.\n", 3594 buf_len, mbuf_pool->name); 3595 3596 allocerr_warned = true; 3597 } 3598 return -1; 3599 } 3600 3601 err = desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts, mbuf_pool, 3602 legacy_ol_flags, slot_idx, true); 3603 if (unlikely(err)) { 3604 rte_pktmbuf_free(pkts); 3605 if (!allocerr_warned) { 3606 VHOST_LOG_DATA(dev->ifname, ERR, "Failed to copy desc to mbuf on.\n"); 3607 allocerr_warned = true; 3608 } 3609 return -1; 3610 } 3611 3612 /* update async shadow packed ring */ 3613 vhost_async_shadow_dequeue_single_packed(vq, buf_id); 3614 3615 return err; 3616 } 3617 3618 static __rte_always_inline uint16_t 3619 virtio_dev_tx_async_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, 3620 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, 3621 uint16_t count, uint16_t dma_id, uint16_t vchan_id, bool legacy_ol_flags) 3622 { 3623 uint16_t pkt_idx; 3624 uint16_t slot_idx = 0; 3625 uint16_t nr_done_pkts = 0; 3626 uint16_t pkt_err = 0; 3627 uint32_t n_xfer; 3628 struct vhost_async *async = vq->async; 3629 struct async_inflight_info *pkts_info = async->pkts_info; 3630 struct rte_mbuf *pkts_prealloc[MAX_PKT_BURST]; 3631 3632 VHOST_LOG_DATA(dev->ifname, DEBUG, "(%d) about to dequeue %u buffers\n", dev->vid, count); 3633 3634 async_iter_reset(async); 3635 3636 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts_prealloc, count)) 3637 goto out; 3638 3639 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { 3640 struct rte_mbuf *pkt = pkts_prealloc[pkt_idx]; 3641 3642 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); 3643 3644 slot_idx = (async->pkts_idx + pkt_idx) % vq->size; 3645 if (unlikely(virtio_dev_tx_async_single_packed(dev, vq, mbuf_pool, pkt, 3646 slot_idx, legacy_ol_flags))) { 3647 rte_pktmbuf_free_bulk(&pkts_prealloc[pkt_idx], count - pkt_idx); 3648 break; 3649 } 3650 3651 pkts_info[slot_idx].mbuf = pkt; 3652 3653 vq_inc_last_avail_packed(vq, 1); 3654 3655 } 3656 3657 n_xfer = vhost_async_dma_transfer(dev, vq, dma_id, vchan_id, async->pkts_idx, 3658 async->iov_iter, pkt_idx); 3659 3660 async->pkts_inflight_n += n_xfer; 3661 3662 pkt_err = pkt_idx - n_xfer; 3663 3664 if (unlikely(pkt_err)) { 3665 pkt_idx -= pkt_err; 3666 3667 /** 3668 * recover DMA-copy related structures and free pktmbuf for DMA-error pkts. 3669 */ 3670 if (async->buffer_idx_packed >= pkt_err) 3671 async->buffer_idx_packed -= pkt_err; 3672 else 3673 async->buffer_idx_packed += vq->size - pkt_err; 3674 3675 while (pkt_err-- > 0) { 3676 rte_pktmbuf_free(pkts_info[slot_idx % vq->size].mbuf); 3677 slot_idx--; 3678 } 3679 3680 /* recover available ring */ 3681 if (vq->last_avail_idx >= pkt_err) { 3682 vq->last_avail_idx -= pkt_err; 3683 } else { 3684 vq->last_avail_idx += vq->size - pkt_err; 3685 vq->avail_wrap_counter ^= 1; 3686 } 3687 } 3688 3689 async->pkts_idx += pkt_idx; 3690 if (async->pkts_idx >= vq->size) 3691 async->pkts_idx -= vq->size; 3692 3693 out: 3694 nr_done_pkts = async_poll_dequeue_completed(dev, vq, pkts, count, 3695 dma_id, vchan_id, legacy_ol_flags); 3696 3697 return nr_done_pkts; 3698 } 3699 3700 __rte_noinline 3701 static uint16_t 3702 virtio_dev_tx_async_packed_legacy(struct virtio_net *dev, struct vhost_virtqueue *vq, 3703 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, 3704 uint16_t count, uint16_t dma_id, uint16_t vchan_id) 3705 { 3706 return virtio_dev_tx_async_packed(dev, vq, mbuf_pool, 3707 pkts, count, dma_id, vchan_id, true); 3708 } 3709 3710 __rte_noinline 3711 static uint16_t 3712 virtio_dev_tx_async_packed_compliant(struct virtio_net *dev, struct vhost_virtqueue *vq, 3713 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, 3714 uint16_t count, uint16_t dma_id, uint16_t vchan_id) 3715 { 3716 return virtio_dev_tx_async_packed(dev, vq, mbuf_pool, 3717 pkts, count, dma_id, vchan_id, false); 3718 } 3719 3720 uint16_t 3721 rte_vhost_async_try_dequeue_burst(int vid, uint16_t queue_id, 3722 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count, 3723 int *nr_inflight, int16_t dma_id, uint16_t vchan_id) 3724 { 3725 struct virtio_net *dev; 3726 struct rte_mbuf *rarp_mbuf = NULL; 3727 struct vhost_virtqueue *vq; 3728 int16_t success = 1; 3729 3730 dev = get_device(vid); 3731 if (!dev || !nr_inflight) 3732 return 0; 3733 3734 *nr_inflight = -1; 3735 3736 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { 3737 VHOST_LOG_DATA(dev->ifname, ERR, "%s: built-in vhost net backend is disabled.\n", 3738 __func__); 3739 return 0; 3740 } 3741 3742 if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) { 3743 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid virtqueue idx %d.\n", 3744 __func__, queue_id); 3745 return 0; 3746 } 3747 3748 if (unlikely(dma_id < 0 || dma_id >= RTE_DMADEV_DEFAULT_MAX)) { 3749 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid dma id %d.\n", 3750 __func__, dma_id); 3751 return 0; 3752 } 3753 3754 if (unlikely(!dma_copy_track[dma_id].vchans || 3755 !dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) { 3756 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid channel %d:%u.\n", 3757 __func__, dma_id, vchan_id); 3758 return 0; 3759 } 3760 3761 vq = dev->virtqueue[queue_id]; 3762 3763 if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0)) 3764 return 0; 3765 3766 if (unlikely(vq->enabled == 0)) { 3767 count = 0; 3768 goto out_access_unlock; 3769 } 3770 3771 if (unlikely(!vq->async)) { 3772 VHOST_LOG_DATA(dev->ifname, ERR, "%s: async not registered for queue id %d.\n", 3773 __func__, queue_id); 3774 count = 0; 3775 goto out_access_unlock; 3776 } 3777 3778 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 3779 vhost_user_iotlb_rd_lock(vq); 3780 3781 if (unlikely(vq->access_ok == 0)) 3782 if (unlikely(vring_translate(dev, vq) < 0)) { 3783 count = 0; 3784 goto out; 3785 } 3786 3787 /* 3788 * Construct a RARP broadcast packet, and inject it to the "pkts" 3789 * array, to looks like that guest actually send such packet. 3790 * 3791 * Check user_send_rarp() for more information. 3792 * 3793 * broadcast_rarp shares a cacheline in the virtio_net structure 3794 * with some fields that are accessed during enqueue and 3795 * __atomic_compare_exchange_n causes a write if performed compare 3796 * and exchange. This could result in false sharing between enqueue 3797 * and dequeue. 3798 * 3799 * Prevent unnecessary false sharing by reading broadcast_rarp first 3800 * and only performing compare and exchange if the read indicates it 3801 * is likely to be set. 3802 */ 3803 if (unlikely(__atomic_load_n(&dev->broadcast_rarp, __ATOMIC_ACQUIRE) && 3804 __atomic_compare_exchange_n(&dev->broadcast_rarp, 3805 &success, 0, 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED))) { 3806 3807 rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac); 3808 if (rarp_mbuf == NULL) { 3809 VHOST_LOG_DATA(dev->ifname, ERR, "failed to make RARP packet.\n"); 3810 count = 0; 3811 goto out; 3812 } 3813 /* 3814 * Inject it to the head of "pkts" array, so that switch's mac 3815 * learning table will get updated first. 3816 */ 3817 pkts[0] = rarp_mbuf; 3818 vhost_queue_stats_update(dev, vq, pkts, 1); 3819 pkts++; 3820 count -= 1; 3821 } 3822 3823 if (vq_is_packed(dev)) { 3824 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS) 3825 count = virtio_dev_tx_async_packed_legacy(dev, vq, mbuf_pool, 3826 pkts, count, dma_id, vchan_id); 3827 else 3828 count = virtio_dev_tx_async_packed_compliant(dev, vq, mbuf_pool, 3829 pkts, count, dma_id, vchan_id); 3830 } else { 3831 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS) 3832 count = virtio_dev_tx_async_split_legacy(dev, vq, mbuf_pool, 3833 pkts, count, dma_id, vchan_id); 3834 else 3835 count = virtio_dev_tx_async_split_compliant(dev, vq, mbuf_pool, 3836 pkts, count, dma_id, vchan_id); 3837 } 3838 3839 *nr_inflight = vq->async->pkts_inflight_n; 3840 vhost_queue_stats_update(dev, vq, pkts, count); 3841 3842 out: 3843 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 3844 vhost_user_iotlb_rd_unlock(vq); 3845 3846 out_access_unlock: 3847 rte_spinlock_unlock(&vq->access_lock); 3848 3849 if (unlikely(rarp_mbuf != NULL)) 3850 count += 1; 3851 3852 return count; 3853 } 3854