1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2016 Intel Corporation 3 */ 4 5 #include <stdint.h> 6 #include <stdbool.h> 7 #include <linux/virtio_net.h> 8 9 #include <rte_mbuf.h> 10 #include <rte_memcpy.h> 11 #include <rte_net.h> 12 #include <rte_ether.h> 13 #include <rte_ip.h> 14 #include <rte_dmadev.h> 15 #include <rte_vhost.h> 16 #include <rte_tcp.h> 17 #include <rte_udp.h> 18 #include <rte_sctp.h> 19 #include <rte_arp.h> 20 #include <rte_spinlock.h> 21 #include <rte_malloc.h> 22 #include <rte_vhost_async.h> 23 24 #include "iotlb.h" 25 #include "vhost.h" 26 27 #define MAX_BATCH_LEN 256 28 29 static __rte_always_inline uint16_t 30 async_poll_dequeue_completed(struct virtio_net *dev, struct vhost_virtqueue *vq, 31 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, 32 uint16_t vchan_id, bool legacy_ol_flags); 33 34 /* DMA device copy operation tracking array. */ 35 struct async_dma_info dma_copy_track[RTE_DMADEV_DEFAULT_MAX]; 36 37 static __rte_always_inline bool 38 rxvq_is_mergeable(struct virtio_net *dev) 39 { 40 return dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF); 41 } 42 43 static __rte_always_inline bool 44 virtio_net_is_inorder(struct virtio_net *dev) 45 { 46 return dev->features & (1ULL << VIRTIO_F_IN_ORDER); 47 } 48 49 static bool 50 is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t nr_vring) 51 { 52 return (is_tx ^ (idx & 1)) == 0 && idx < nr_vring; 53 } 54 55 /* 56 * This function must be called with virtqueue's access_lock taken. 57 */ 58 static inline void 59 vhost_queue_stats_update(struct virtio_net *dev, struct vhost_virtqueue *vq, 60 struct rte_mbuf **pkts, uint16_t count) 61 { 62 struct virtqueue_stats *stats = &vq->stats; 63 int i; 64 65 if (!(dev->flags & VIRTIO_DEV_STATS_ENABLED)) 66 return; 67 68 for (i = 0; i < count; i++) { 69 struct rte_ether_addr *ea; 70 struct rte_mbuf *pkt = pkts[i]; 71 uint32_t pkt_len = rte_pktmbuf_pkt_len(pkt); 72 73 stats->packets++; 74 stats->bytes += pkt_len; 75 76 if (pkt_len == 64) { 77 stats->size_bins[1]++; 78 } else if (pkt_len > 64 && pkt_len < 1024) { 79 uint32_t bin; 80 81 /* count zeros, and offset into correct bin */ 82 bin = (sizeof(pkt_len) * 8) - __builtin_clz(pkt_len) - 5; 83 stats->size_bins[bin]++; 84 } else { 85 if (pkt_len < 64) 86 stats->size_bins[0]++; 87 else if (pkt_len < 1519) 88 stats->size_bins[6]++; 89 else 90 stats->size_bins[7]++; 91 } 92 93 ea = rte_pktmbuf_mtod(pkt, struct rte_ether_addr *); 94 if (rte_is_multicast_ether_addr(ea)) { 95 if (rte_is_broadcast_ether_addr(ea)) 96 stats->broadcast++; 97 else 98 stats->multicast++; 99 } 100 } 101 } 102 103 static __rte_always_inline int64_t 104 vhost_async_dma_transfer_one(struct virtio_net *dev, struct vhost_virtqueue *vq, 105 int16_t dma_id, uint16_t vchan_id, uint16_t flag_idx, 106 struct vhost_iov_iter *pkt) 107 { 108 struct async_dma_vchan_info *dma_info = &dma_copy_track[dma_id].vchans[vchan_id]; 109 uint16_t ring_mask = dma_info->ring_mask; 110 static bool vhost_async_dma_copy_log; 111 112 113 struct vhost_iovec *iov = pkt->iov; 114 int copy_idx = 0; 115 uint32_t nr_segs = pkt->nr_segs; 116 uint16_t i; 117 118 if (rte_dma_burst_capacity(dma_id, vchan_id) < nr_segs) 119 return -1; 120 121 for (i = 0; i < nr_segs; i++) { 122 copy_idx = rte_dma_copy(dma_id, vchan_id, (rte_iova_t)iov[i].src_addr, 123 (rte_iova_t)iov[i].dst_addr, iov[i].len, RTE_DMA_OP_FLAG_LLC); 124 /** 125 * Since all memory is pinned and DMA vChannel 126 * ring has enough space, failure should be a 127 * rare case. If failure happens, it means DMA 128 * device encounters serious errors; in this 129 * case, please stop async data-path and check 130 * what has happened to DMA device. 131 */ 132 if (unlikely(copy_idx < 0)) { 133 if (!vhost_async_dma_copy_log) { 134 VHOST_LOG_DATA(dev->ifname, ERR, 135 "DMA copy failed for channel %d:%u\n", 136 dma_id, vchan_id); 137 vhost_async_dma_copy_log = true; 138 } 139 return -1; 140 } 141 } 142 143 /** 144 * Only store packet completion flag address in the last copy's 145 * slot, and other slots are set to NULL. 146 */ 147 dma_info->pkts_cmpl_flag_addr[copy_idx & ring_mask] = &vq->async->pkts_cmpl_flag[flag_idx]; 148 149 return nr_segs; 150 } 151 152 static __rte_always_inline uint16_t 153 vhost_async_dma_transfer(struct virtio_net *dev, struct vhost_virtqueue *vq, 154 int16_t dma_id, uint16_t vchan_id, uint16_t head_idx, 155 struct vhost_iov_iter *pkts, uint16_t nr_pkts) 156 { 157 struct async_dma_vchan_info *dma_info = &dma_copy_track[dma_id].vchans[vchan_id]; 158 int64_t ret, nr_copies = 0; 159 uint16_t pkt_idx; 160 161 rte_spinlock_lock(&dma_info->dma_lock); 162 163 for (pkt_idx = 0; pkt_idx < nr_pkts; pkt_idx++) { 164 ret = vhost_async_dma_transfer_one(dev, vq, dma_id, vchan_id, head_idx, 165 &pkts[pkt_idx]); 166 if (unlikely(ret < 0)) 167 break; 168 169 nr_copies += ret; 170 head_idx++; 171 if (head_idx >= vq->size) 172 head_idx -= vq->size; 173 } 174 175 if (likely(nr_copies > 0)) 176 rte_dma_submit(dma_id, vchan_id); 177 178 rte_spinlock_unlock(&dma_info->dma_lock); 179 180 return pkt_idx; 181 } 182 183 static __rte_always_inline uint16_t 184 vhost_async_dma_check_completed(struct virtio_net *dev, int16_t dma_id, uint16_t vchan_id, 185 uint16_t max_pkts) 186 { 187 struct async_dma_vchan_info *dma_info = &dma_copy_track[dma_id].vchans[vchan_id]; 188 uint16_t ring_mask = dma_info->ring_mask; 189 uint16_t last_idx = 0; 190 uint16_t nr_copies; 191 uint16_t copy_idx; 192 uint16_t i; 193 bool has_error = false; 194 static bool vhost_async_dma_complete_log; 195 196 rte_spinlock_lock(&dma_info->dma_lock); 197 198 /** 199 * Print error log for debugging, if DMA reports error during 200 * DMA transfer. We do not handle error in vhost level. 201 */ 202 nr_copies = rte_dma_completed(dma_id, vchan_id, max_pkts, &last_idx, &has_error); 203 if (unlikely(!vhost_async_dma_complete_log && has_error)) { 204 VHOST_LOG_DATA(dev->ifname, ERR, 205 "DMA completion failure on channel %d:%u\n", 206 dma_id, vchan_id); 207 vhost_async_dma_complete_log = true; 208 } else if (nr_copies == 0) { 209 goto out; 210 } 211 212 copy_idx = last_idx - nr_copies + 1; 213 for (i = 0; i < nr_copies; i++) { 214 bool *flag; 215 216 flag = dma_info->pkts_cmpl_flag_addr[copy_idx & ring_mask]; 217 if (flag) { 218 /** 219 * Mark the packet flag as received. The flag 220 * could belong to another virtqueue but write 221 * is atomic. 222 */ 223 *flag = true; 224 dma_info->pkts_cmpl_flag_addr[copy_idx & ring_mask] = NULL; 225 } 226 copy_idx++; 227 } 228 229 out: 230 rte_spinlock_unlock(&dma_info->dma_lock); 231 return nr_copies; 232 } 233 234 static inline void 235 do_data_copy_enqueue(struct virtio_net *dev, struct vhost_virtqueue *vq) 236 { 237 struct batch_copy_elem *elem = vq->batch_copy_elems; 238 uint16_t count = vq->batch_copy_nb_elems; 239 int i; 240 241 for (i = 0; i < count; i++) { 242 rte_memcpy(elem[i].dst, elem[i].src, elem[i].len); 243 vhost_log_cache_write_iova(dev, vq, elem[i].log_addr, 244 elem[i].len); 245 PRINT_PACKET(dev, (uintptr_t)elem[i].dst, elem[i].len, 0); 246 } 247 248 vq->batch_copy_nb_elems = 0; 249 } 250 251 static inline void 252 do_data_copy_dequeue(struct vhost_virtqueue *vq) 253 { 254 struct batch_copy_elem *elem = vq->batch_copy_elems; 255 uint16_t count = vq->batch_copy_nb_elems; 256 int i; 257 258 for (i = 0; i < count; i++) 259 rte_memcpy(elem[i].dst, elem[i].src, elem[i].len); 260 261 vq->batch_copy_nb_elems = 0; 262 } 263 264 static __rte_always_inline void 265 do_flush_shadow_used_ring_split(struct virtio_net *dev, 266 struct vhost_virtqueue *vq, 267 uint16_t to, uint16_t from, uint16_t size) 268 { 269 rte_memcpy(&vq->used->ring[to], 270 &vq->shadow_used_split[from], 271 size * sizeof(struct vring_used_elem)); 272 vhost_log_cache_used_vring(dev, vq, 273 offsetof(struct vring_used, ring[to]), 274 size * sizeof(struct vring_used_elem)); 275 } 276 277 static __rte_always_inline void 278 flush_shadow_used_ring_split(struct virtio_net *dev, struct vhost_virtqueue *vq) 279 { 280 uint16_t used_idx = vq->last_used_idx & (vq->size - 1); 281 282 if (used_idx + vq->shadow_used_idx <= vq->size) { 283 do_flush_shadow_used_ring_split(dev, vq, used_idx, 0, 284 vq->shadow_used_idx); 285 } else { 286 uint16_t size; 287 288 /* update used ring interval [used_idx, vq->size] */ 289 size = vq->size - used_idx; 290 do_flush_shadow_used_ring_split(dev, vq, used_idx, 0, size); 291 292 /* update the left half used ring interval [0, left_size] */ 293 do_flush_shadow_used_ring_split(dev, vq, 0, size, 294 vq->shadow_used_idx - size); 295 } 296 vq->last_used_idx += vq->shadow_used_idx; 297 298 vhost_log_cache_sync(dev, vq); 299 300 __atomic_add_fetch(&vq->used->idx, vq->shadow_used_idx, 301 __ATOMIC_RELEASE); 302 vq->shadow_used_idx = 0; 303 vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx), 304 sizeof(vq->used->idx)); 305 } 306 307 static __rte_always_inline void 308 update_shadow_used_ring_split(struct vhost_virtqueue *vq, 309 uint16_t desc_idx, uint32_t len) 310 { 311 uint16_t i = vq->shadow_used_idx++; 312 313 vq->shadow_used_split[i].id = desc_idx; 314 vq->shadow_used_split[i].len = len; 315 } 316 317 static __rte_always_inline void 318 vhost_flush_enqueue_shadow_packed(struct virtio_net *dev, 319 struct vhost_virtqueue *vq) 320 { 321 int i; 322 uint16_t used_idx = vq->last_used_idx; 323 uint16_t head_idx = vq->last_used_idx; 324 uint16_t head_flags = 0; 325 326 /* Split loop in two to save memory barriers */ 327 for (i = 0; i < vq->shadow_used_idx; i++) { 328 vq->desc_packed[used_idx].id = vq->shadow_used_packed[i].id; 329 vq->desc_packed[used_idx].len = vq->shadow_used_packed[i].len; 330 331 used_idx += vq->shadow_used_packed[i].count; 332 if (used_idx >= vq->size) 333 used_idx -= vq->size; 334 } 335 336 /* The ordering for storing desc flags needs to be enforced. */ 337 rte_atomic_thread_fence(__ATOMIC_RELEASE); 338 339 for (i = 0; i < vq->shadow_used_idx; i++) { 340 uint16_t flags; 341 342 if (vq->shadow_used_packed[i].len) 343 flags = VRING_DESC_F_WRITE; 344 else 345 flags = 0; 346 347 if (vq->used_wrap_counter) { 348 flags |= VRING_DESC_F_USED; 349 flags |= VRING_DESC_F_AVAIL; 350 } else { 351 flags &= ~VRING_DESC_F_USED; 352 flags &= ~VRING_DESC_F_AVAIL; 353 } 354 355 if (i > 0) { 356 vq->desc_packed[vq->last_used_idx].flags = flags; 357 358 vhost_log_cache_used_vring(dev, vq, 359 vq->last_used_idx * 360 sizeof(struct vring_packed_desc), 361 sizeof(struct vring_packed_desc)); 362 } else { 363 head_idx = vq->last_used_idx; 364 head_flags = flags; 365 } 366 367 vq_inc_last_used_packed(vq, vq->shadow_used_packed[i].count); 368 } 369 370 vq->desc_packed[head_idx].flags = head_flags; 371 372 vhost_log_cache_used_vring(dev, vq, 373 head_idx * 374 sizeof(struct vring_packed_desc), 375 sizeof(struct vring_packed_desc)); 376 377 vq->shadow_used_idx = 0; 378 vhost_log_cache_sync(dev, vq); 379 } 380 381 static __rte_always_inline void 382 vhost_flush_dequeue_shadow_packed(struct virtio_net *dev, 383 struct vhost_virtqueue *vq) 384 { 385 struct vring_used_elem_packed *used_elem = &vq->shadow_used_packed[0]; 386 387 vq->desc_packed[vq->shadow_last_used_idx].id = used_elem->id; 388 /* desc flags is the synchronization point for virtio packed vring */ 389 __atomic_store_n(&vq->desc_packed[vq->shadow_last_used_idx].flags, 390 used_elem->flags, __ATOMIC_RELEASE); 391 392 vhost_log_cache_used_vring(dev, vq, vq->shadow_last_used_idx * 393 sizeof(struct vring_packed_desc), 394 sizeof(struct vring_packed_desc)); 395 vq->shadow_used_idx = 0; 396 vhost_log_cache_sync(dev, vq); 397 } 398 399 static __rte_always_inline void 400 vhost_flush_enqueue_batch_packed(struct virtio_net *dev, 401 struct vhost_virtqueue *vq, 402 uint64_t *lens, 403 uint16_t *ids) 404 { 405 uint16_t i; 406 uint16_t flags; 407 uint16_t last_used_idx; 408 struct vring_packed_desc *desc_base; 409 410 last_used_idx = vq->last_used_idx; 411 desc_base = &vq->desc_packed[last_used_idx]; 412 413 flags = PACKED_DESC_ENQUEUE_USED_FLAG(vq->used_wrap_counter); 414 415 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 416 desc_base[i].id = ids[i]; 417 desc_base[i].len = lens[i]; 418 } 419 420 rte_atomic_thread_fence(__ATOMIC_RELEASE); 421 422 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 423 desc_base[i].flags = flags; 424 } 425 426 vhost_log_cache_used_vring(dev, vq, last_used_idx * 427 sizeof(struct vring_packed_desc), 428 sizeof(struct vring_packed_desc) * 429 PACKED_BATCH_SIZE); 430 vhost_log_cache_sync(dev, vq); 431 432 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE); 433 } 434 435 static __rte_always_inline void 436 vhost_shadow_dequeue_batch_packed_inorder(struct vhost_virtqueue *vq, 437 uint16_t id) 438 { 439 vq->shadow_used_packed[0].id = id; 440 441 if (!vq->shadow_used_idx) { 442 vq->shadow_last_used_idx = vq->last_used_idx; 443 vq->shadow_used_packed[0].flags = 444 PACKED_DESC_DEQUEUE_USED_FLAG(vq->used_wrap_counter); 445 vq->shadow_used_packed[0].len = 0; 446 vq->shadow_used_packed[0].count = 1; 447 vq->shadow_used_idx++; 448 } 449 450 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE); 451 } 452 453 static __rte_always_inline void 454 vhost_shadow_dequeue_batch_packed(struct virtio_net *dev, 455 struct vhost_virtqueue *vq, 456 uint16_t *ids) 457 { 458 uint16_t flags; 459 uint16_t i; 460 uint16_t begin; 461 462 flags = PACKED_DESC_DEQUEUE_USED_FLAG(vq->used_wrap_counter); 463 464 if (!vq->shadow_used_idx) { 465 vq->shadow_last_used_idx = vq->last_used_idx; 466 vq->shadow_used_packed[0].id = ids[0]; 467 vq->shadow_used_packed[0].len = 0; 468 vq->shadow_used_packed[0].count = 1; 469 vq->shadow_used_packed[0].flags = flags; 470 vq->shadow_used_idx++; 471 begin = 1; 472 } else 473 begin = 0; 474 475 vhost_for_each_try_unroll(i, begin, PACKED_BATCH_SIZE) { 476 vq->desc_packed[vq->last_used_idx + i].id = ids[i]; 477 vq->desc_packed[vq->last_used_idx + i].len = 0; 478 } 479 480 rte_atomic_thread_fence(__ATOMIC_RELEASE); 481 vhost_for_each_try_unroll(i, begin, PACKED_BATCH_SIZE) 482 vq->desc_packed[vq->last_used_idx + i].flags = flags; 483 484 vhost_log_cache_used_vring(dev, vq, vq->last_used_idx * 485 sizeof(struct vring_packed_desc), 486 sizeof(struct vring_packed_desc) * 487 PACKED_BATCH_SIZE); 488 vhost_log_cache_sync(dev, vq); 489 490 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE); 491 } 492 493 static __rte_always_inline void 494 vhost_shadow_dequeue_single_packed(struct vhost_virtqueue *vq, 495 uint16_t buf_id, 496 uint16_t count) 497 { 498 uint16_t flags; 499 500 flags = vq->desc_packed[vq->last_used_idx].flags; 501 if (vq->used_wrap_counter) { 502 flags |= VRING_DESC_F_USED; 503 flags |= VRING_DESC_F_AVAIL; 504 } else { 505 flags &= ~VRING_DESC_F_USED; 506 flags &= ~VRING_DESC_F_AVAIL; 507 } 508 509 if (!vq->shadow_used_idx) { 510 vq->shadow_last_used_idx = vq->last_used_idx; 511 512 vq->shadow_used_packed[0].id = buf_id; 513 vq->shadow_used_packed[0].len = 0; 514 vq->shadow_used_packed[0].flags = flags; 515 vq->shadow_used_idx++; 516 } else { 517 vq->desc_packed[vq->last_used_idx].id = buf_id; 518 vq->desc_packed[vq->last_used_idx].len = 0; 519 vq->desc_packed[vq->last_used_idx].flags = flags; 520 } 521 522 vq_inc_last_used_packed(vq, count); 523 } 524 525 static __rte_always_inline void 526 vhost_shadow_dequeue_single_packed_inorder(struct vhost_virtqueue *vq, 527 uint16_t buf_id, 528 uint16_t count) 529 { 530 uint16_t flags; 531 532 vq->shadow_used_packed[0].id = buf_id; 533 534 flags = vq->desc_packed[vq->last_used_idx].flags; 535 if (vq->used_wrap_counter) { 536 flags |= VRING_DESC_F_USED; 537 flags |= VRING_DESC_F_AVAIL; 538 } else { 539 flags &= ~VRING_DESC_F_USED; 540 flags &= ~VRING_DESC_F_AVAIL; 541 } 542 543 if (!vq->shadow_used_idx) { 544 vq->shadow_last_used_idx = vq->last_used_idx; 545 vq->shadow_used_packed[0].len = 0; 546 vq->shadow_used_packed[0].flags = flags; 547 vq->shadow_used_idx++; 548 } 549 550 vq_inc_last_used_packed(vq, count); 551 } 552 553 static __rte_always_inline void 554 vhost_shadow_enqueue_packed(struct vhost_virtqueue *vq, 555 uint32_t *len, 556 uint16_t *id, 557 uint16_t *count, 558 uint16_t num_buffers) 559 { 560 uint16_t i; 561 562 for (i = 0; i < num_buffers; i++) { 563 /* enqueue shadow flush action aligned with batch num */ 564 if (!vq->shadow_used_idx) 565 vq->shadow_aligned_idx = vq->last_used_idx & 566 PACKED_BATCH_MASK; 567 vq->shadow_used_packed[vq->shadow_used_idx].id = id[i]; 568 vq->shadow_used_packed[vq->shadow_used_idx].len = len[i]; 569 vq->shadow_used_packed[vq->shadow_used_idx].count = count[i]; 570 vq->shadow_aligned_idx += count[i]; 571 vq->shadow_used_idx++; 572 } 573 } 574 575 static __rte_always_inline void 576 vhost_shadow_enqueue_single_packed(struct virtio_net *dev, 577 struct vhost_virtqueue *vq, 578 uint32_t *len, 579 uint16_t *id, 580 uint16_t *count, 581 uint16_t num_buffers) 582 { 583 vhost_shadow_enqueue_packed(vq, len, id, count, num_buffers); 584 585 if (vq->shadow_aligned_idx >= PACKED_BATCH_SIZE) { 586 do_data_copy_enqueue(dev, vq); 587 vhost_flush_enqueue_shadow_packed(dev, vq); 588 } 589 } 590 591 /* avoid write operation when necessary, to lessen cache issues */ 592 #define ASSIGN_UNLESS_EQUAL(var, val) do { \ 593 if ((var) != (val)) \ 594 (var) = (val); \ 595 } while (0) 596 597 static __rte_always_inline void 598 virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr) 599 { 600 uint64_t csum_l4 = m_buf->ol_flags & RTE_MBUF_F_TX_L4_MASK; 601 602 if (m_buf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) 603 csum_l4 |= RTE_MBUF_F_TX_TCP_CKSUM; 604 605 if (csum_l4) { 606 /* 607 * Pseudo-header checksum must be set as per Virtio spec. 608 * 609 * Note: We don't propagate rte_net_intel_cksum_prepare() 610 * errors, as it would have an impact on performance, and an 611 * error would mean the packet is dropped by the guest instead 612 * of being dropped here. 613 */ 614 rte_net_intel_cksum_prepare(m_buf); 615 616 net_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 617 net_hdr->csum_start = m_buf->l2_len + m_buf->l3_len; 618 619 switch (csum_l4) { 620 case RTE_MBUF_F_TX_TCP_CKSUM: 621 net_hdr->csum_offset = (offsetof(struct rte_tcp_hdr, 622 cksum)); 623 break; 624 case RTE_MBUF_F_TX_UDP_CKSUM: 625 net_hdr->csum_offset = (offsetof(struct rte_udp_hdr, 626 dgram_cksum)); 627 break; 628 case RTE_MBUF_F_TX_SCTP_CKSUM: 629 net_hdr->csum_offset = (offsetof(struct rte_sctp_hdr, 630 cksum)); 631 break; 632 } 633 } else { 634 ASSIGN_UNLESS_EQUAL(net_hdr->csum_start, 0); 635 ASSIGN_UNLESS_EQUAL(net_hdr->csum_offset, 0); 636 ASSIGN_UNLESS_EQUAL(net_hdr->flags, 0); 637 } 638 639 /* IP cksum verification cannot be bypassed, then calculate here */ 640 if (m_buf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) { 641 struct rte_ipv4_hdr *ipv4_hdr; 642 643 ipv4_hdr = rte_pktmbuf_mtod_offset(m_buf, struct rte_ipv4_hdr *, 644 m_buf->l2_len); 645 ipv4_hdr->hdr_checksum = 0; 646 ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr); 647 } 648 649 if (m_buf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) { 650 if (m_buf->ol_flags & RTE_MBUF_F_TX_IPV4) 651 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 652 else 653 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 654 net_hdr->gso_size = m_buf->tso_segsz; 655 net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len 656 + m_buf->l4_len; 657 } else if (m_buf->ol_flags & RTE_MBUF_F_TX_UDP_SEG) { 658 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP; 659 net_hdr->gso_size = m_buf->tso_segsz; 660 net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len + 661 m_buf->l4_len; 662 } else { 663 ASSIGN_UNLESS_EQUAL(net_hdr->gso_type, 0); 664 ASSIGN_UNLESS_EQUAL(net_hdr->gso_size, 0); 665 ASSIGN_UNLESS_EQUAL(net_hdr->hdr_len, 0); 666 } 667 } 668 669 static __rte_always_inline int 670 map_one_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, 671 struct buf_vector *buf_vec, uint16_t *vec_idx, 672 uint64_t desc_iova, uint64_t desc_len, uint8_t perm) 673 { 674 uint16_t vec_id = *vec_idx; 675 676 while (desc_len) { 677 uint64_t desc_addr; 678 uint64_t desc_chunck_len = desc_len; 679 680 if (unlikely(vec_id >= BUF_VECTOR_MAX)) 681 return -1; 682 683 desc_addr = vhost_iova_to_vva(dev, vq, 684 desc_iova, 685 &desc_chunck_len, 686 perm); 687 if (unlikely(!desc_addr)) 688 return -1; 689 690 rte_prefetch0((void *)(uintptr_t)desc_addr); 691 692 buf_vec[vec_id].buf_iova = desc_iova; 693 buf_vec[vec_id].buf_addr = desc_addr; 694 buf_vec[vec_id].buf_len = desc_chunck_len; 695 696 desc_len -= desc_chunck_len; 697 desc_iova += desc_chunck_len; 698 vec_id++; 699 } 700 *vec_idx = vec_id; 701 702 return 0; 703 } 704 705 static __rte_always_inline int 706 fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 707 uint32_t avail_idx, uint16_t *vec_idx, 708 struct buf_vector *buf_vec, uint16_t *desc_chain_head, 709 uint32_t *desc_chain_len, uint8_t perm) 710 { 711 uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)]; 712 uint16_t vec_id = *vec_idx; 713 uint32_t len = 0; 714 uint64_t dlen; 715 uint32_t nr_descs = vq->size; 716 uint32_t cnt = 0; 717 struct vring_desc *descs = vq->desc; 718 struct vring_desc *idesc = NULL; 719 720 if (unlikely(idx >= vq->size)) 721 return -1; 722 723 *desc_chain_head = idx; 724 725 if (vq->desc[idx].flags & VRING_DESC_F_INDIRECT) { 726 dlen = vq->desc[idx].len; 727 nr_descs = dlen / sizeof(struct vring_desc); 728 if (unlikely(nr_descs > vq->size)) 729 return -1; 730 731 descs = (struct vring_desc *)(uintptr_t) 732 vhost_iova_to_vva(dev, vq, vq->desc[idx].addr, 733 &dlen, 734 VHOST_ACCESS_RO); 735 if (unlikely(!descs)) 736 return -1; 737 738 if (unlikely(dlen < vq->desc[idx].len)) { 739 /* 740 * The indirect desc table is not contiguous 741 * in process VA space, we have to copy it. 742 */ 743 idesc = vhost_alloc_copy_ind_table(dev, vq, 744 vq->desc[idx].addr, vq->desc[idx].len); 745 if (unlikely(!idesc)) 746 return -1; 747 748 descs = idesc; 749 } 750 751 idx = 0; 752 } 753 754 while (1) { 755 if (unlikely(idx >= nr_descs || cnt++ >= nr_descs)) { 756 free_ind_table(idesc); 757 return -1; 758 } 759 760 dlen = descs[idx].len; 761 len += dlen; 762 763 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id, 764 descs[idx].addr, dlen, 765 perm))) { 766 free_ind_table(idesc); 767 return -1; 768 } 769 770 if ((descs[idx].flags & VRING_DESC_F_NEXT) == 0) 771 break; 772 773 idx = descs[idx].next; 774 } 775 776 *desc_chain_len = len; 777 *vec_idx = vec_id; 778 779 if (unlikely(!!idesc)) 780 free_ind_table(idesc); 781 782 return 0; 783 } 784 785 /* 786 * Returns -1 on fail, 0 on success 787 */ 788 static inline int 789 reserve_avail_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 790 uint32_t size, struct buf_vector *buf_vec, 791 uint16_t *num_buffers, uint16_t avail_head, 792 uint16_t *nr_vec) 793 { 794 uint16_t cur_idx; 795 uint16_t vec_idx = 0; 796 uint16_t max_tries, tries = 0; 797 798 uint16_t head_idx = 0; 799 uint32_t len = 0; 800 801 *num_buffers = 0; 802 cur_idx = vq->last_avail_idx; 803 804 if (rxvq_is_mergeable(dev)) 805 max_tries = vq->size - 1; 806 else 807 max_tries = 1; 808 809 while (size > 0) { 810 if (unlikely(cur_idx == avail_head)) 811 return -1; 812 /* 813 * if we tried all available ring items, and still 814 * can't get enough buf, it means something abnormal 815 * happened. 816 */ 817 if (unlikely(++tries > max_tries)) 818 return -1; 819 820 if (unlikely(fill_vec_buf_split(dev, vq, cur_idx, 821 &vec_idx, buf_vec, 822 &head_idx, &len, 823 VHOST_ACCESS_RW) < 0)) 824 return -1; 825 len = RTE_MIN(len, size); 826 update_shadow_used_ring_split(vq, head_idx, len); 827 size -= len; 828 829 cur_idx++; 830 *num_buffers += 1; 831 } 832 833 *nr_vec = vec_idx; 834 835 return 0; 836 } 837 838 static __rte_always_inline int 839 fill_vec_buf_packed_indirect(struct virtio_net *dev, 840 struct vhost_virtqueue *vq, 841 struct vring_packed_desc *desc, uint16_t *vec_idx, 842 struct buf_vector *buf_vec, uint32_t *len, uint8_t perm) 843 { 844 uint16_t i; 845 uint32_t nr_descs; 846 uint16_t vec_id = *vec_idx; 847 uint64_t dlen; 848 struct vring_packed_desc *descs, *idescs = NULL; 849 850 dlen = desc->len; 851 descs = (struct vring_packed_desc *)(uintptr_t) 852 vhost_iova_to_vva(dev, vq, desc->addr, &dlen, VHOST_ACCESS_RO); 853 if (unlikely(!descs)) 854 return -1; 855 856 if (unlikely(dlen < desc->len)) { 857 /* 858 * The indirect desc table is not contiguous 859 * in process VA space, we have to copy it. 860 */ 861 idescs = vhost_alloc_copy_ind_table(dev, 862 vq, desc->addr, desc->len); 863 if (unlikely(!idescs)) 864 return -1; 865 866 descs = idescs; 867 } 868 869 nr_descs = desc->len / sizeof(struct vring_packed_desc); 870 if (unlikely(nr_descs >= vq->size)) { 871 free_ind_table(idescs); 872 return -1; 873 } 874 875 for (i = 0; i < nr_descs; i++) { 876 if (unlikely(vec_id >= BUF_VECTOR_MAX)) { 877 free_ind_table(idescs); 878 return -1; 879 } 880 881 dlen = descs[i].len; 882 *len += dlen; 883 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id, 884 descs[i].addr, dlen, 885 perm))) 886 return -1; 887 } 888 *vec_idx = vec_id; 889 890 if (unlikely(!!idescs)) 891 free_ind_table(idescs); 892 893 return 0; 894 } 895 896 static __rte_always_inline int 897 fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, 898 uint16_t avail_idx, uint16_t *desc_count, 899 struct buf_vector *buf_vec, uint16_t *vec_idx, 900 uint16_t *buf_id, uint32_t *len, uint8_t perm) 901 { 902 bool wrap_counter = vq->avail_wrap_counter; 903 struct vring_packed_desc *descs = vq->desc_packed; 904 uint16_t vec_id = *vec_idx; 905 uint64_t dlen; 906 907 if (avail_idx < vq->last_avail_idx) 908 wrap_counter ^= 1; 909 910 /* 911 * Perform a load-acquire barrier in desc_is_avail to 912 * enforce the ordering between desc flags and desc 913 * content. 914 */ 915 if (unlikely(!desc_is_avail(&descs[avail_idx], wrap_counter))) 916 return -1; 917 918 *desc_count = 0; 919 *len = 0; 920 921 while (1) { 922 if (unlikely(vec_id >= BUF_VECTOR_MAX)) 923 return -1; 924 925 if (unlikely(*desc_count >= vq->size)) 926 return -1; 927 928 *desc_count += 1; 929 *buf_id = descs[avail_idx].id; 930 931 if (descs[avail_idx].flags & VRING_DESC_F_INDIRECT) { 932 if (unlikely(fill_vec_buf_packed_indirect(dev, vq, 933 &descs[avail_idx], 934 &vec_id, buf_vec, 935 len, perm) < 0)) 936 return -1; 937 } else { 938 dlen = descs[avail_idx].len; 939 *len += dlen; 940 941 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id, 942 descs[avail_idx].addr, 943 dlen, 944 perm))) 945 return -1; 946 } 947 948 if ((descs[avail_idx].flags & VRING_DESC_F_NEXT) == 0) 949 break; 950 951 if (++avail_idx >= vq->size) { 952 avail_idx -= vq->size; 953 wrap_counter ^= 1; 954 } 955 } 956 957 *vec_idx = vec_id; 958 959 return 0; 960 } 961 962 static __rte_noinline void 963 copy_vnet_hdr_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, 964 struct buf_vector *buf_vec, 965 struct virtio_net_hdr_mrg_rxbuf *hdr) 966 { 967 uint64_t len; 968 uint64_t remain = dev->vhost_hlen; 969 uint64_t src = (uint64_t)(uintptr_t)hdr, dst; 970 uint64_t iova = buf_vec->buf_iova; 971 972 while (remain) { 973 len = RTE_MIN(remain, 974 buf_vec->buf_len); 975 dst = buf_vec->buf_addr; 976 rte_memcpy((void *)(uintptr_t)dst, 977 (void *)(uintptr_t)src, 978 len); 979 980 PRINT_PACKET(dev, (uintptr_t)dst, 981 (uint32_t)len, 0); 982 vhost_log_cache_write_iova(dev, vq, 983 iova, len); 984 985 remain -= len; 986 iova += len; 987 src += len; 988 buf_vec++; 989 } 990 } 991 992 static __rte_always_inline int 993 async_iter_initialize(struct virtio_net *dev, struct vhost_async *async) 994 { 995 struct vhost_iov_iter *iter; 996 997 if (unlikely(async->iovec_idx >= VHOST_MAX_ASYNC_VEC)) { 998 VHOST_LOG_DATA(dev->ifname, ERR, "no more async iovec available\n"); 999 return -1; 1000 } 1001 1002 iter = async->iov_iter + async->iter_idx; 1003 iter->iov = async->iovec + async->iovec_idx; 1004 iter->nr_segs = 0; 1005 1006 return 0; 1007 } 1008 1009 static __rte_always_inline int 1010 async_iter_add_iovec(struct virtio_net *dev, struct vhost_async *async, 1011 void *src, void *dst, size_t len) 1012 { 1013 struct vhost_iov_iter *iter; 1014 struct vhost_iovec *iovec; 1015 1016 if (unlikely(async->iovec_idx >= VHOST_MAX_ASYNC_VEC)) { 1017 static bool vhost_max_async_vec_log; 1018 1019 if (!vhost_max_async_vec_log) { 1020 VHOST_LOG_DATA(dev->ifname, ERR, "no more async iovec available\n"); 1021 vhost_max_async_vec_log = true; 1022 } 1023 1024 return -1; 1025 } 1026 1027 iter = async->iov_iter + async->iter_idx; 1028 iovec = async->iovec + async->iovec_idx; 1029 1030 iovec->src_addr = src; 1031 iovec->dst_addr = dst; 1032 iovec->len = len; 1033 1034 iter->nr_segs++; 1035 async->iovec_idx++; 1036 1037 return 0; 1038 } 1039 1040 static __rte_always_inline void 1041 async_iter_finalize(struct vhost_async *async) 1042 { 1043 async->iter_idx++; 1044 } 1045 1046 static __rte_always_inline void 1047 async_iter_cancel(struct vhost_async *async) 1048 { 1049 struct vhost_iov_iter *iter; 1050 1051 iter = async->iov_iter + async->iter_idx; 1052 async->iovec_idx -= iter->nr_segs; 1053 iter->nr_segs = 0; 1054 iter->iov = NULL; 1055 } 1056 1057 static __rte_always_inline void 1058 async_iter_reset(struct vhost_async *async) 1059 { 1060 async->iter_idx = 0; 1061 async->iovec_idx = 0; 1062 } 1063 1064 static __rte_always_inline int 1065 async_fill_seg(struct virtio_net *dev, struct vhost_virtqueue *vq, 1066 struct rte_mbuf *m, uint32_t mbuf_offset, 1067 uint64_t buf_iova, uint32_t cpy_len, bool to_desc) 1068 { 1069 struct vhost_async *async = vq->async; 1070 uint64_t mapped_len; 1071 uint32_t buf_offset = 0; 1072 void *src, *dst; 1073 void *host_iova; 1074 1075 while (cpy_len) { 1076 host_iova = (void *)(uintptr_t)gpa_to_first_hpa(dev, 1077 buf_iova + buf_offset, cpy_len, &mapped_len); 1078 if (unlikely(!host_iova)) { 1079 VHOST_LOG_DATA(dev->ifname, ERR, 1080 "%s: failed to get host iova.\n", 1081 __func__); 1082 return -1; 1083 } 1084 1085 if (to_desc) { 1086 src = (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, mbuf_offset); 1087 dst = host_iova; 1088 } else { 1089 src = host_iova; 1090 dst = (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, mbuf_offset); 1091 } 1092 1093 if (unlikely(async_iter_add_iovec(dev, async, src, dst, (size_t)mapped_len))) 1094 return -1; 1095 1096 cpy_len -= (uint32_t)mapped_len; 1097 mbuf_offset += (uint32_t)mapped_len; 1098 buf_offset += (uint32_t)mapped_len; 1099 } 1100 1101 return 0; 1102 } 1103 1104 static __rte_always_inline void 1105 sync_fill_seg(struct virtio_net *dev, struct vhost_virtqueue *vq, 1106 struct rte_mbuf *m, uint32_t mbuf_offset, 1107 uint64_t buf_addr, uint64_t buf_iova, uint32_t cpy_len, bool to_desc) 1108 { 1109 struct batch_copy_elem *batch_copy = vq->batch_copy_elems; 1110 1111 if (likely(cpy_len > MAX_BATCH_LEN || vq->batch_copy_nb_elems >= vq->size)) { 1112 if (to_desc) { 1113 rte_memcpy((void *)((uintptr_t)(buf_addr)), 1114 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), 1115 cpy_len); 1116 vhost_log_cache_write_iova(dev, vq, buf_iova, cpy_len); 1117 PRINT_PACKET(dev, (uintptr_t)(buf_addr), cpy_len, 0); 1118 } else { 1119 rte_memcpy(rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), 1120 (void *)((uintptr_t)(buf_addr)), 1121 cpy_len); 1122 } 1123 } else { 1124 if (to_desc) { 1125 batch_copy[vq->batch_copy_nb_elems].dst = 1126 (void *)((uintptr_t)(buf_addr)); 1127 batch_copy[vq->batch_copy_nb_elems].src = 1128 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset); 1129 batch_copy[vq->batch_copy_nb_elems].log_addr = buf_iova; 1130 } else { 1131 batch_copy[vq->batch_copy_nb_elems].dst = 1132 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset); 1133 batch_copy[vq->batch_copy_nb_elems].src = 1134 (void *)((uintptr_t)(buf_addr)); 1135 } 1136 batch_copy[vq->batch_copy_nb_elems].len = cpy_len; 1137 vq->batch_copy_nb_elems++; 1138 } 1139 } 1140 1141 static __rte_always_inline int 1142 mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, 1143 struct rte_mbuf *m, struct buf_vector *buf_vec, 1144 uint16_t nr_vec, uint16_t num_buffers, bool is_async) 1145 { 1146 uint32_t vec_idx = 0; 1147 uint32_t mbuf_offset, mbuf_avail; 1148 uint32_t buf_offset, buf_avail; 1149 uint64_t buf_addr, buf_iova, buf_len; 1150 uint32_t cpy_len; 1151 uint64_t hdr_addr; 1152 struct rte_mbuf *hdr_mbuf; 1153 struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL; 1154 struct vhost_async *async = vq->async; 1155 1156 if (unlikely(m == NULL)) 1157 return -1; 1158 1159 buf_addr = buf_vec[vec_idx].buf_addr; 1160 buf_iova = buf_vec[vec_idx].buf_iova; 1161 buf_len = buf_vec[vec_idx].buf_len; 1162 1163 if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) 1164 return -1; 1165 1166 hdr_mbuf = m; 1167 hdr_addr = buf_addr; 1168 if (unlikely(buf_len < dev->vhost_hlen)) { 1169 memset(&tmp_hdr, 0, sizeof(struct virtio_net_hdr_mrg_rxbuf)); 1170 hdr = &tmp_hdr; 1171 } else 1172 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr; 1173 1174 VHOST_LOG_DATA(dev->ifname, DEBUG, "RX: num merge buffers %d\n", num_buffers); 1175 1176 if (unlikely(buf_len < dev->vhost_hlen)) { 1177 buf_offset = dev->vhost_hlen - buf_len; 1178 vec_idx++; 1179 buf_addr = buf_vec[vec_idx].buf_addr; 1180 buf_iova = buf_vec[vec_idx].buf_iova; 1181 buf_len = buf_vec[vec_idx].buf_len; 1182 buf_avail = buf_len - buf_offset; 1183 } else { 1184 buf_offset = dev->vhost_hlen; 1185 buf_avail = buf_len - dev->vhost_hlen; 1186 } 1187 1188 mbuf_avail = rte_pktmbuf_data_len(m); 1189 mbuf_offset = 0; 1190 1191 if (is_async) { 1192 if (async_iter_initialize(dev, async)) 1193 return -1; 1194 } 1195 1196 while (mbuf_avail != 0 || m->next != NULL) { 1197 /* done with current buf, get the next one */ 1198 if (buf_avail == 0) { 1199 vec_idx++; 1200 if (unlikely(vec_idx >= nr_vec)) 1201 goto error; 1202 1203 buf_addr = buf_vec[vec_idx].buf_addr; 1204 buf_iova = buf_vec[vec_idx].buf_iova; 1205 buf_len = buf_vec[vec_idx].buf_len; 1206 1207 buf_offset = 0; 1208 buf_avail = buf_len; 1209 } 1210 1211 /* done with current mbuf, get the next one */ 1212 if (mbuf_avail == 0) { 1213 m = m->next; 1214 1215 mbuf_offset = 0; 1216 mbuf_avail = rte_pktmbuf_data_len(m); 1217 } 1218 1219 if (hdr_addr) { 1220 virtio_enqueue_offload(hdr_mbuf, &hdr->hdr); 1221 if (rxvq_is_mergeable(dev)) 1222 ASSIGN_UNLESS_EQUAL(hdr->num_buffers, 1223 num_buffers); 1224 1225 if (unlikely(hdr == &tmp_hdr)) { 1226 copy_vnet_hdr_to_desc(dev, vq, buf_vec, hdr); 1227 } else { 1228 PRINT_PACKET(dev, (uintptr_t)hdr_addr, 1229 dev->vhost_hlen, 0); 1230 vhost_log_cache_write_iova(dev, vq, 1231 buf_vec[0].buf_iova, 1232 dev->vhost_hlen); 1233 } 1234 1235 hdr_addr = 0; 1236 } 1237 1238 cpy_len = RTE_MIN(buf_avail, mbuf_avail); 1239 1240 if (is_async) { 1241 if (async_fill_seg(dev, vq, m, mbuf_offset, 1242 buf_iova + buf_offset, cpy_len, true) < 0) 1243 goto error; 1244 } else { 1245 sync_fill_seg(dev, vq, m, mbuf_offset, 1246 buf_addr + buf_offset, 1247 buf_iova + buf_offset, cpy_len, true); 1248 } 1249 1250 mbuf_avail -= cpy_len; 1251 mbuf_offset += cpy_len; 1252 buf_avail -= cpy_len; 1253 buf_offset += cpy_len; 1254 } 1255 1256 if (is_async) 1257 async_iter_finalize(async); 1258 1259 return 0; 1260 error: 1261 if (is_async) 1262 async_iter_cancel(async); 1263 1264 return -1; 1265 } 1266 1267 static __rte_always_inline int 1268 vhost_enqueue_single_packed(struct virtio_net *dev, 1269 struct vhost_virtqueue *vq, 1270 struct rte_mbuf *pkt, 1271 struct buf_vector *buf_vec, 1272 uint16_t *nr_descs) 1273 { 1274 uint16_t nr_vec = 0; 1275 uint16_t avail_idx = vq->last_avail_idx; 1276 uint16_t max_tries, tries = 0; 1277 uint16_t buf_id = 0; 1278 uint32_t len = 0; 1279 uint16_t desc_count; 1280 uint32_t size = pkt->pkt_len + sizeof(struct virtio_net_hdr_mrg_rxbuf); 1281 uint16_t num_buffers = 0; 1282 uint32_t buffer_len[vq->size]; 1283 uint16_t buffer_buf_id[vq->size]; 1284 uint16_t buffer_desc_count[vq->size]; 1285 1286 if (rxvq_is_mergeable(dev)) 1287 max_tries = vq->size - 1; 1288 else 1289 max_tries = 1; 1290 1291 while (size > 0) { 1292 /* 1293 * if we tried all available ring items, and still 1294 * can't get enough buf, it means something abnormal 1295 * happened. 1296 */ 1297 if (unlikely(++tries > max_tries)) 1298 return -1; 1299 1300 if (unlikely(fill_vec_buf_packed(dev, vq, 1301 avail_idx, &desc_count, 1302 buf_vec, &nr_vec, 1303 &buf_id, &len, 1304 VHOST_ACCESS_RW) < 0)) 1305 return -1; 1306 1307 len = RTE_MIN(len, size); 1308 size -= len; 1309 1310 buffer_len[num_buffers] = len; 1311 buffer_buf_id[num_buffers] = buf_id; 1312 buffer_desc_count[num_buffers] = desc_count; 1313 num_buffers += 1; 1314 1315 *nr_descs += desc_count; 1316 avail_idx += desc_count; 1317 if (avail_idx >= vq->size) 1318 avail_idx -= vq->size; 1319 } 1320 1321 if (mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, num_buffers, false) < 0) 1322 return -1; 1323 1324 vhost_shadow_enqueue_single_packed(dev, vq, buffer_len, buffer_buf_id, 1325 buffer_desc_count, num_buffers); 1326 1327 return 0; 1328 } 1329 1330 static __rte_noinline uint32_t 1331 virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 1332 struct rte_mbuf **pkts, uint32_t count) 1333 { 1334 uint32_t pkt_idx = 0; 1335 uint16_t num_buffers; 1336 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1337 uint16_t avail_head; 1338 1339 /* 1340 * The ordering between avail index and 1341 * desc reads needs to be enforced. 1342 */ 1343 avail_head = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE); 1344 1345 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); 1346 1347 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { 1348 uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen; 1349 uint16_t nr_vec = 0; 1350 1351 if (unlikely(reserve_avail_buf_split(dev, vq, 1352 pkt_len, buf_vec, &num_buffers, 1353 avail_head, &nr_vec) < 0)) { 1354 VHOST_LOG_DATA(dev->ifname, DEBUG, 1355 "failed to get enough desc from vring\n"); 1356 vq->shadow_used_idx -= num_buffers; 1357 break; 1358 } 1359 1360 VHOST_LOG_DATA(dev->ifname, DEBUG, 1361 "current index %d | end index %d\n", 1362 vq->last_avail_idx, vq->last_avail_idx + num_buffers); 1363 1364 if (mbuf_to_desc(dev, vq, pkts[pkt_idx], buf_vec, nr_vec, 1365 num_buffers, false) < 0) { 1366 vq->shadow_used_idx -= num_buffers; 1367 break; 1368 } 1369 1370 vq->last_avail_idx += num_buffers; 1371 } 1372 1373 do_data_copy_enqueue(dev, vq); 1374 1375 if (likely(vq->shadow_used_idx)) { 1376 flush_shadow_used_ring_split(dev, vq); 1377 vhost_vring_call_split(dev, vq); 1378 } 1379 1380 return pkt_idx; 1381 } 1382 1383 static __rte_always_inline int 1384 virtio_dev_rx_sync_batch_check(struct virtio_net *dev, 1385 struct vhost_virtqueue *vq, 1386 struct rte_mbuf **pkts, 1387 uint64_t *desc_addrs, 1388 uint64_t *lens) 1389 { 1390 bool wrap_counter = vq->avail_wrap_counter; 1391 struct vring_packed_desc *descs = vq->desc_packed; 1392 uint16_t avail_idx = vq->last_avail_idx; 1393 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 1394 uint16_t i; 1395 1396 if (unlikely(avail_idx & PACKED_BATCH_MASK)) 1397 return -1; 1398 1399 if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size)) 1400 return -1; 1401 1402 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1403 if (unlikely(pkts[i]->next != NULL)) 1404 return -1; 1405 if (unlikely(!desc_is_avail(&descs[avail_idx + i], 1406 wrap_counter))) 1407 return -1; 1408 } 1409 1410 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1411 lens[i] = descs[avail_idx + i].len; 1412 1413 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1414 if (unlikely(pkts[i]->pkt_len > (lens[i] - buf_offset))) 1415 return -1; 1416 } 1417 1418 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1419 desc_addrs[i] = vhost_iova_to_vva(dev, vq, 1420 descs[avail_idx + i].addr, 1421 &lens[i], 1422 VHOST_ACCESS_RW); 1423 1424 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1425 if (unlikely(!desc_addrs[i])) 1426 return -1; 1427 if (unlikely(lens[i] != descs[avail_idx + i].len)) 1428 return -1; 1429 } 1430 1431 return 0; 1432 } 1433 1434 static __rte_always_inline void 1435 virtio_dev_rx_batch_packed_copy(struct virtio_net *dev, 1436 struct vhost_virtqueue *vq, 1437 struct rte_mbuf **pkts, 1438 uint64_t *desc_addrs, 1439 uint64_t *lens) 1440 { 1441 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 1442 struct virtio_net_hdr_mrg_rxbuf *hdrs[PACKED_BATCH_SIZE]; 1443 struct vring_packed_desc *descs = vq->desc_packed; 1444 uint16_t avail_idx = vq->last_avail_idx; 1445 uint16_t ids[PACKED_BATCH_SIZE]; 1446 uint16_t i; 1447 1448 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1449 rte_prefetch0((void *)(uintptr_t)desc_addrs[i]); 1450 hdrs[i] = (struct virtio_net_hdr_mrg_rxbuf *) 1451 (uintptr_t)desc_addrs[i]; 1452 lens[i] = pkts[i]->pkt_len + 1453 sizeof(struct virtio_net_hdr_mrg_rxbuf); 1454 } 1455 1456 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1457 virtio_enqueue_offload(pkts[i], &hdrs[i]->hdr); 1458 1459 vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE); 1460 1461 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1462 rte_memcpy((void *)(uintptr_t)(desc_addrs[i] + buf_offset), 1463 rte_pktmbuf_mtod_offset(pkts[i], void *, 0), 1464 pkts[i]->pkt_len); 1465 } 1466 1467 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1468 vhost_log_cache_write_iova(dev, vq, descs[avail_idx + i].addr, 1469 lens[i]); 1470 1471 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1472 ids[i] = descs[avail_idx + i].id; 1473 1474 vhost_flush_enqueue_batch_packed(dev, vq, lens, ids); 1475 } 1476 1477 static __rte_always_inline int 1478 virtio_dev_rx_sync_batch_packed(struct virtio_net *dev, 1479 struct vhost_virtqueue *vq, 1480 struct rte_mbuf **pkts) 1481 { 1482 uint64_t desc_addrs[PACKED_BATCH_SIZE]; 1483 uint64_t lens[PACKED_BATCH_SIZE]; 1484 1485 if (virtio_dev_rx_sync_batch_check(dev, vq, pkts, desc_addrs, lens) == -1) 1486 return -1; 1487 1488 if (vq->shadow_used_idx) { 1489 do_data_copy_enqueue(dev, vq); 1490 vhost_flush_enqueue_shadow_packed(dev, vq); 1491 } 1492 1493 virtio_dev_rx_batch_packed_copy(dev, vq, pkts, desc_addrs, lens); 1494 1495 return 0; 1496 } 1497 1498 static __rte_always_inline int16_t 1499 virtio_dev_rx_single_packed(struct virtio_net *dev, 1500 struct vhost_virtqueue *vq, 1501 struct rte_mbuf *pkt) 1502 { 1503 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1504 uint16_t nr_descs = 0; 1505 1506 if (unlikely(vhost_enqueue_single_packed(dev, vq, pkt, buf_vec, 1507 &nr_descs) < 0)) { 1508 VHOST_LOG_DATA(dev->ifname, DEBUG, "failed to get enough desc from vring\n"); 1509 return -1; 1510 } 1511 1512 VHOST_LOG_DATA(dev->ifname, DEBUG, 1513 "current index %d | end index %d\n", 1514 vq->last_avail_idx, vq->last_avail_idx + nr_descs); 1515 1516 vq_inc_last_avail_packed(vq, nr_descs); 1517 1518 return 0; 1519 } 1520 1521 static __rte_noinline uint32_t 1522 virtio_dev_rx_packed(struct virtio_net *dev, 1523 struct vhost_virtqueue *__rte_restrict vq, 1524 struct rte_mbuf **__rte_restrict pkts, 1525 uint32_t count) 1526 { 1527 uint32_t pkt_idx = 0; 1528 1529 do { 1530 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); 1531 1532 if (count - pkt_idx >= PACKED_BATCH_SIZE) { 1533 if (!virtio_dev_rx_sync_batch_packed(dev, vq, 1534 &pkts[pkt_idx])) { 1535 pkt_idx += PACKED_BATCH_SIZE; 1536 continue; 1537 } 1538 } 1539 1540 if (virtio_dev_rx_single_packed(dev, vq, pkts[pkt_idx])) 1541 break; 1542 pkt_idx++; 1543 1544 } while (pkt_idx < count); 1545 1546 if (vq->shadow_used_idx) { 1547 do_data_copy_enqueue(dev, vq); 1548 vhost_flush_enqueue_shadow_packed(dev, vq); 1549 } 1550 1551 if (pkt_idx) 1552 vhost_vring_call_packed(dev, vq); 1553 1554 return pkt_idx; 1555 } 1556 1557 static __rte_always_inline uint32_t 1558 virtio_dev_rx(struct virtio_net *dev, struct vhost_virtqueue *vq, 1559 struct rte_mbuf **pkts, uint32_t count) 1560 { 1561 uint32_t nb_tx = 0; 1562 1563 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__); 1564 rte_spinlock_lock(&vq->access_lock); 1565 1566 if (unlikely(!vq->enabled)) 1567 goto out_access_unlock; 1568 1569 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 1570 vhost_user_iotlb_rd_lock(vq); 1571 1572 if (unlikely(!vq->access_ok)) 1573 if (unlikely(vring_translate(dev, vq) < 0)) 1574 goto out; 1575 1576 count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); 1577 if (count == 0) 1578 goto out; 1579 1580 if (vq_is_packed(dev)) 1581 nb_tx = virtio_dev_rx_packed(dev, vq, pkts, count); 1582 else 1583 nb_tx = virtio_dev_rx_split(dev, vq, pkts, count); 1584 1585 vhost_queue_stats_update(dev, vq, pkts, nb_tx); 1586 1587 out: 1588 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 1589 vhost_user_iotlb_rd_unlock(vq); 1590 1591 out_access_unlock: 1592 rte_spinlock_unlock(&vq->access_lock); 1593 1594 return nb_tx; 1595 } 1596 1597 uint16_t 1598 rte_vhost_enqueue_burst(int vid, uint16_t queue_id, 1599 struct rte_mbuf **__rte_restrict pkts, uint16_t count) 1600 { 1601 struct virtio_net *dev = get_device(vid); 1602 1603 if (!dev) 1604 return 0; 1605 1606 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { 1607 VHOST_LOG_DATA(dev->ifname, ERR, 1608 "%s: built-in vhost net backend is disabled.\n", 1609 __func__); 1610 return 0; 1611 } 1612 1613 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { 1614 VHOST_LOG_DATA(dev->ifname, ERR, 1615 "%s: invalid virtqueue idx %d.\n", 1616 __func__, queue_id); 1617 return 0; 1618 } 1619 1620 return virtio_dev_rx(dev, dev->virtqueue[queue_id], pkts, count); 1621 } 1622 1623 static __rte_always_inline uint16_t 1624 async_get_first_inflight_pkt_idx(struct vhost_virtqueue *vq) 1625 { 1626 struct vhost_async *async = vq->async; 1627 1628 if (async->pkts_idx >= async->pkts_inflight_n) 1629 return async->pkts_idx - async->pkts_inflight_n; 1630 else 1631 return vq->size - async->pkts_inflight_n + async->pkts_idx; 1632 } 1633 1634 static __rte_always_inline void 1635 store_dma_desc_info_split(struct vring_used_elem *s_ring, struct vring_used_elem *d_ring, 1636 uint16_t ring_size, uint16_t s_idx, uint16_t d_idx, uint16_t count) 1637 { 1638 size_t elem_size = sizeof(struct vring_used_elem); 1639 1640 if (d_idx + count <= ring_size) { 1641 rte_memcpy(d_ring + d_idx, s_ring + s_idx, count * elem_size); 1642 } else { 1643 uint16_t size = ring_size - d_idx; 1644 1645 rte_memcpy(d_ring + d_idx, s_ring + s_idx, size * elem_size); 1646 rte_memcpy(d_ring, s_ring + s_idx + size, (count - size) * elem_size); 1647 } 1648 } 1649 1650 static __rte_always_inline void 1651 store_dma_desc_info_packed(struct vring_used_elem_packed *s_ring, 1652 struct vring_used_elem_packed *d_ring, 1653 uint16_t ring_size, uint16_t s_idx, uint16_t d_idx, uint16_t count) 1654 { 1655 size_t elem_size = sizeof(struct vring_used_elem_packed); 1656 1657 if (d_idx + count <= ring_size) { 1658 rte_memcpy(d_ring + d_idx, s_ring + s_idx, count * elem_size); 1659 } else { 1660 uint16_t size = ring_size - d_idx; 1661 1662 rte_memcpy(d_ring + d_idx, s_ring + s_idx, size * elem_size); 1663 rte_memcpy(d_ring, s_ring + s_idx + size, (count - size) * elem_size); 1664 } 1665 } 1666 1667 static __rte_noinline uint32_t 1668 virtio_dev_rx_async_submit_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 1669 struct rte_mbuf **pkts, uint32_t count, int16_t dma_id, uint16_t vchan_id) 1670 { 1671 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1672 uint32_t pkt_idx = 0; 1673 uint16_t num_buffers; 1674 uint16_t avail_head; 1675 1676 struct vhost_async *async = vq->async; 1677 struct async_inflight_info *pkts_info = async->pkts_info; 1678 uint32_t pkt_err = 0; 1679 uint16_t n_xfer; 1680 uint16_t slot_idx = 0; 1681 1682 /* 1683 * The ordering between avail index and desc reads need to be enforced. 1684 */ 1685 avail_head = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE); 1686 1687 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); 1688 1689 async_iter_reset(async); 1690 1691 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { 1692 uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen; 1693 uint16_t nr_vec = 0; 1694 1695 if (unlikely(reserve_avail_buf_split(dev, vq, pkt_len, buf_vec, 1696 &num_buffers, avail_head, &nr_vec) < 0)) { 1697 VHOST_LOG_DATA(dev->ifname, DEBUG, 1698 "failed to get enough desc from vring\n"); 1699 vq->shadow_used_idx -= num_buffers; 1700 break; 1701 } 1702 1703 VHOST_LOG_DATA(dev->ifname, DEBUG, 1704 "current index %d | end index %d\n", 1705 vq->last_avail_idx, vq->last_avail_idx + num_buffers); 1706 1707 if (mbuf_to_desc(dev, vq, pkts[pkt_idx], buf_vec, nr_vec, num_buffers, true) < 0) { 1708 vq->shadow_used_idx -= num_buffers; 1709 break; 1710 } 1711 1712 slot_idx = (async->pkts_idx + pkt_idx) & (vq->size - 1); 1713 pkts_info[slot_idx].descs = num_buffers; 1714 pkts_info[slot_idx].mbuf = pkts[pkt_idx]; 1715 1716 vq->last_avail_idx += num_buffers; 1717 } 1718 1719 if (unlikely(pkt_idx == 0)) 1720 return 0; 1721 1722 n_xfer = vhost_async_dma_transfer(dev, vq, dma_id, vchan_id, async->pkts_idx, 1723 async->iov_iter, pkt_idx); 1724 1725 pkt_err = pkt_idx - n_xfer; 1726 if (unlikely(pkt_err)) { 1727 uint16_t num_descs = 0; 1728 1729 VHOST_LOG_DATA(dev->ifname, DEBUG, 1730 "%s: failed to transfer %u packets for queue %u.\n", 1731 __func__, pkt_err, vq->index); 1732 1733 /* update number of completed packets */ 1734 pkt_idx = n_xfer; 1735 1736 /* calculate the sum of descriptors to revert */ 1737 while (pkt_err-- > 0) { 1738 num_descs += pkts_info[slot_idx & (vq->size - 1)].descs; 1739 slot_idx--; 1740 } 1741 1742 /* recover shadow used ring and available ring */ 1743 vq->shadow_used_idx -= num_descs; 1744 vq->last_avail_idx -= num_descs; 1745 } 1746 1747 /* keep used descriptors */ 1748 if (likely(vq->shadow_used_idx)) { 1749 uint16_t to = async->desc_idx_split & (vq->size - 1); 1750 1751 store_dma_desc_info_split(vq->shadow_used_split, 1752 async->descs_split, vq->size, 0, to, 1753 vq->shadow_used_idx); 1754 1755 async->desc_idx_split += vq->shadow_used_idx; 1756 1757 async->pkts_idx += pkt_idx; 1758 if (async->pkts_idx >= vq->size) 1759 async->pkts_idx -= vq->size; 1760 1761 async->pkts_inflight_n += pkt_idx; 1762 vq->shadow_used_idx = 0; 1763 } 1764 1765 return pkt_idx; 1766 } 1767 1768 1769 static __rte_always_inline int 1770 vhost_enqueue_async_packed(struct virtio_net *dev, 1771 struct vhost_virtqueue *vq, 1772 struct rte_mbuf *pkt, 1773 struct buf_vector *buf_vec, 1774 uint16_t *nr_descs, 1775 uint16_t *nr_buffers) 1776 { 1777 uint16_t nr_vec = 0; 1778 uint16_t avail_idx = vq->last_avail_idx; 1779 uint16_t max_tries, tries = 0; 1780 uint16_t buf_id = 0; 1781 uint32_t len = 0; 1782 uint16_t desc_count = 0; 1783 uint32_t size = pkt->pkt_len + sizeof(struct virtio_net_hdr_mrg_rxbuf); 1784 uint32_t buffer_len[vq->size]; 1785 uint16_t buffer_buf_id[vq->size]; 1786 uint16_t buffer_desc_count[vq->size]; 1787 1788 if (rxvq_is_mergeable(dev)) 1789 max_tries = vq->size - 1; 1790 else 1791 max_tries = 1; 1792 1793 while (size > 0) { 1794 /* 1795 * if we tried all available ring items, and still 1796 * can't get enough buf, it means something abnormal 1797 * happened. 1798 */ 1799 if (unlikely(++tries > max_tries)) 1800 return -1; 1801 1802 if (unlikely(fill_vec_buf_packed(dev, vq, 1803 avail_idx, &desc_count, 1804 buf_vec, &nr_vec, 1805 &buf_id, &len, 1806 VHOST_ACCESS_RW) < 0)) 1807 return -1; 1808 1809 len = RTE_MIN(len, size); 1810 size -= len; 1811 1812 buffer_len[*nr_buffers] = len; 1813 buffer_buf_id[*nr_buffers] = buf_id; 1814 buffer_desc_count[*nr_buffers] = desc_count; 1815 *nr_buffers += 1; 1816 *nr_descs += desc_count; 1817 avail_idx += desc_count; 1818 if (avail_idx >= vq->size) 1819 avail_idx -= vq->size; 1820 } 1821 1822 if (unlikely(mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, *nr_buffers, true) < 0)) 1823 return -1; 1824 1825 vhost_shadow_enqueue_packed(vq, buffer_len, buffer_buf_id, buffer_desc_count, *nr_buffers); 1826 1827 return 0; 1828 } 1829 1830 static __rte_always_inline int16_t 1831 virtio_dev_rx_async_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, 1832 struct rte_mbuf *pkt, uint16_t *nr_descs, uint16_t *nr_buffers) 1833 { 1834 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1835 1836 if (unlikely(vhost_enqueue_async_packed(dev, vq, pkt, buf_vec, 1837 nr_descs, nr_buffers) < 0)) { 1838 VHOST_LOG_DATA(dev->ifname, DEBUG, "failed to get enough desc from vring\n"); 1839 return -1; 1840 } 1841 1842 VHOST_LOG_DATA(dev->ifname, DEBUG, 1843 "current index %d | end index %d\n", 1844 vq->last_avail_idx, vq->last_avail_idx + *nr_descs); 1845 1846 return 0; 1847 } 1848 1849 static __rte_always_inline void 1850 dma_error_handler_packed(struct vhost_virtqueue *vq, uint16_t slot_idx, 1851 uint32_t nr_err, uint32_t *pkt_idx) 1852 { 1853 uint16_t descs_err = 0; 1854 uint16_t buffers_err = 0; 1855 struct async_inflight_info *pkts_info = vq->async->pkts_info; 1856 1857 *pkt_idx -= nr_err; 1858 /* calculate the sum of buffers and descs of DMA-error packets. */ 1859 while (nr_err-- > 0) { 1860 descs_err += pkts_info[slot_idx % vq->size].descs; 1861 buffers_err += pkts_info[slot_idx % vq->size].nr_buffers; 1862 slot_idx--; 1863 } 1864 1865 if (vq->last_avail_idx >= descs_err) { 1866 vq->last_avail_idx -= descs_err; 1867 } else { 1868 vq->last_avail_idx = vq->last_avail_idx + vq->size - descs_err; 1869 vq->avail_wrap_counter ^= 1; 1870 } 1871 1872 vq->shadow_used_idx -= buffers_err; 1873 } 1874 1875 static __rte_noinline uint32_t 1876 virtio_dev_rx_async_submit_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, 1877 struct rte_mbuf **pkts, uint32_t count, int16_t dma_id, uint16_t vchan_id) 1878 { 1879 uint32_t pkt_idx = 0; 1880 uint32_t remained = count; 1881 uint16_t n_xfer; 1882 uint16_t num_buffers; 1883 uint16_t num_descs; 1884 1885 struct vhost_async *async = vq->async; 1886 struct async_inflight_info *pkts_info = async->pkts_info; 1887 uint32_t pkt_err = 0; 1888 uint16_t slot_idx = 0; 1889 1890 do { 1891 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); 1892 1893 num_buffers = 0; 1894 num_descs = 0; 1895 if (unlikely(virtio_dev_rx_async_packed(dev, vq, pkts[pkt_idx], 1896 &num_descs, &num_buffers) < 0)) 1897 break; 1898 1899 slot_idx = (async->pkts_idx + pkt_idx) % vq->size; 1900 1901 pkts_info[slot_idx].descs = num_descs; 1902 pkts_info[slot_idx].nr_buffers = num_buffers; 1903 pkts_info[slot_idx].mbuf = pkts[pkt_idx]; 1904 1905 pkt_idx++; 1906 remained--; 1907 vq_inc_last_avail_packed(vq, num_descs); 1908 } while (pkt_idx < count); 1909 1910 if (unlikely(pkt_idx == 0)) 1911 return 0; 1912 1913 n_xfer = vhost_async_dma_transfer(dev, vq, dma_id, vchan_id, async->pkts_idx, 1914 async->iov_iter, pkt_idx); 1915 1916 async_iter_reset(async); 1917 1918 pkt_err = pkt_idx - n_xfer; 1919 if (unlikely(pkt_err)) { 1920 VHOST_LOG_DATA(dev->ifname, DEBUG, 1921 "%s: failed to transfer %u packets for queue %u.\n", 1922 __func__, pkt_err, vq->index); 1923 dma_error_handler_packed(vq, slot_idx, pkt_err, &pkt_idx); 1924 } 1925 1926 if (likely(vq->shadow_used_idx)) { 1927 /* keep used descriptors. */ 1928 store_dma_desc_info_packed(vq->shadow_used_packed, async->buffers_packed, 1929 vq->size, 0, async->buffer_idx_packed, 1930 vq->shadow_used_idx); 1931 1932 async->buffer_idx_packed += vq->shadow_used_idx; 1933 if (async->buffer_idx_packed >= vq->size) 1934 async->buffer_idx_packed -= vq->size; 1935 1936 async->pkts_idx += pkt_idx; 1937 if (async->pkts_idx >= vq->size) 1938 async->pkts_idx -= vq->size; 1939 1940 vq->shadow_used_idx = 0; 1941 async->pkts_inflight_n += pkt_idx; 1942 } 1943 1944 return pkt_idx; 1945 } 1946 1947 static __rte_always_inline void 1948 write_back_completed_descs_split(struct vhost_virtqueue *vq, uint16_t n_descs) 1949 { 1950 struct vhost_async *async = vq->async; 1951 uint16_t nr_left = n_descs; 1952 uint16_t nr_copy; 1953 uint16_t to, from; 1954 1955 do { 1956 from = async->last_desc_idx_split & (vq->size - 1); 1957 nr_copy = nr_left + from <= vq->size ? nr_left : vq->size - from; 1958 to = vq->last_used_idx & (vq->size - 1); 1959 1960 if (to + nr_copy <= vq->size) { 1961 rte_memcpy(&vq->used->ring[to], &async->descs_split[from], 1962 nr_copy * sizeof(struct vring_used_elem)); 1963 } else { 1964 uint16_t size = vq->size - to; 1965 1966 rte_memcpy(&vq->used->ring[to], &async->descs_split[from], 1967 size * sizeof(struct vring_used_elem)); 1968 rte_memcpy(&vq->used->ring[0], &async->descs_split[from + size], 1969 (nr_copy - size) * sizeof(struct vring_used_elem)); 1970 } 1971 1972 async->last_desc_idx_split += nr_copy; 1973 vq->last_used_idx += nr_copy; 1974 nr_left -= nr_copy; 1975 } while (nr_left > 0); 1976 } 1977 1978 static __rte_always_inline void 1979 write_back_completed_descs_packed(struct vhost_virtqueue *vq, 1980 uint16_t n_buffers) 1981 { 1982 struct vhost_async *async = vq->async; 1983 uint16_t from = async->last_buffer_idx_packed; 1984 uint16_t used_idx = vq->last_used_idx; 1985 uint16_t head_idx = vq->last_used_idx; 1986 uint16_t head_flags = 0; 1987 uint16_t i; 1988 1989 /* Split loop in two to save memory barriers */ 1990 for (i = 0; i < n_buffers; i++) { 1991 vq->desc_packed[used_idx].id = async->buffers_packed[from].id; 1992 vq->desc_packed[used_idx].len = async->buffers_packed[from].len; 1993 1994 used_idx += async->buffers_packed[from].count; 1995 if (used_idx >= vq->size) 1996 used_idx -= vq->size; 1997 1998 from++; 1999 if (from >= vq->size) 2000 from = 0; 2001 } 2002 2003 /* The ordering for storing desc flags needs to be enforced. */ 2004 rte_atomic_thread_fence(__ATOMIC_RELEASE); 2005 2006 from = async->last_buffer_idx_packed; 2007 2008 for (i = 0; i < n_buffers; i++) { 2009 uint16_t flags; 2010 2011 if (async->buffers_packed[from].len) 2012 flags = VRING_DESC_F_WRITE; 2013 else 2014 flags = 0; 2015 2016 if (vq->used_wrap_counter) { 2017 flags |= VRING_DESC_F_USED; 2018 flags |= VRING_DESC_F_AVAIL; 2019 } else { 2020 flags &= ~VRING_DESC_F_USED; 2021 flags &= ~VRING_DESC_F_AVAIL; 2022 } 2023 2024 if (i > 0) { 2025 vq->desc_packed[vq->last_used_idx].flags = flags; 2026 } else { 2027 head_idx = vq->last_used_idx; 2028 head_flags = flags; 2029 } 2030 2031 vq_inc_last_used_packed(vq, async->buffers_packed[from].count); 2032 2033 from++; 2034 if (from == vq->size) 2035 from = 0; 2036 } 2037 2038 vq->desc_packed[head_idx].flags = head_flags; 2039 async->last_buffer_idx_packed = from; 2040 } 2041 2042 static __rte_always_inline uint16_t 2043 vhost_poll_enqueue_completed(struct virtio_net *dev, struct vhost_virtqueue *vq, 2044 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, uint16_t vchan_id) 2045 { 2046 struct vhost_async *async = vq->async; 2047 struct async_inflight_info *pkts_info = async->pkts_info; 2048 uint16_t nr_cpl_pkts = 0; 2049 uint16_t n_descs = 0, n_buffers = 0; 2050 uint16_t start_idx, from, i; 2051 2052 /* Check completed copies for the given DMA vChannel */ 2053 vhost_async_dma_check_completed(dev, dma_id, vchan_id, VHOST_DMA_MAX_COPY_COMPLETE); 2054 2055 start_idx = async_get_first_inflight_pkt_idx(vq); 2056 /** 2057 * Calculate the number of copy completed packets. 2058 * Note that there may be completed packets even if 2059 * no copies are reported done by the given DMA vChannel, 2060 * as it's possible that a virtqueue uses multiple DMA 2061 * vChannels. 2062 */ 2063 from = start_idx; 2064 while (vq->async->pkts_cmpl_flag[from] && count--) { 2065 vq->async->pkts_cmpl_flag[from] = false; 2066 from++; 2067 if (from >= vq->size) 2068 from -= vq->size; 2069 nr_cpl_pkts++; 2070 } 2071 2072 if (nr_cpl_pkts == 0) 2073 return 0; 2074 2075 for (i = 0; i < nr_cpl_pkts; i++) { 2076 from = (start_idx + i) % vq->size; 2077 /* Only used with packed ring */ 2078 n_buffers += pkts_info[from].nr_buffers; 2079 /* Only used with split ring */ 2080 n_descs += pkts_info[from].descs; 2081 pkts[i] = pkts_info[from].mbuf; 2082 } 2083 2084 async->pkts_inflight_n -= nr_cpl_pkts; 2085 2086 if (likely(vq->enabled && vq->access_ok)) { 2087 if (vq_is_packed(dev)) { 2088 write_back_completed_descs_packed(vq, n_buffers); 2089 vhost_vring_call_packed(dev, vq); 2090 } else { 2091 write_back_completed_descs_split(vq, n_descs); 2092 __atomic_add_fetch(&vq->used->idx, n_descs, __ATOMIC_RELEASE); 2093 vhost_vring_call_split(dev, vq); 2094 } 2095 } else { 2096 if (vq_is_packed(dev)) { 2097 async->last_buffer_idx_packed += n_buffers; 2098 if (async->last_buffer_idx_packed >= vq->size) 2099 async->last_buffer_idx_packed -= vq->size; 2100 } else { 2101 async->last_desc_idx_split += n_descs; 2102 } 2103 } 2104 2105 return nr_cpl_pkts; 2106 } 2107 2108 uint16_t 2109 rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id, 2110 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, 2111 uint16_t vchan_id) 2112 { 2113 struct virtio_net *dev = get_device(vid); 2114 struct vhost_virtqueue *vq; 2115 uint16_t n_pkts_cpl = 0; 2116 2117 if (unlikely(!dev)) 2118 return 0; 2119 2120 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__); 2121 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { 2122 VHOST_LOG_DATA(dev->ifname, ERR, 2123 "%s: invalid virtqueue idx %d.\n", 2124 __func__, queue_id); 2125 return 0; 2126 } 2127 2128 if (unlikely(!dma_copy_track[dma_id].vchans || 2129 !dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) { 2130 VHOST_LOG_DATA(dev->ifname, ERR, 2131 "%s: invalid channel %d:%u.\n", 2132 __func__, dma_id, vchan_id); 2133 return 0; 2134 } 2135 2136 vq = dev->virtqueue[queue_id]; 2137 2138 if (!rte_spinlock_trylock(&vq->access_lock)) { 2139 VHOST_LOG_DATA(dev->ifname, DEBUG, 2140 "%s: virtqueue %u is busy.\n", 2141 __func__, queue_id); 2142 return 0; 2143 } 2144 2145 if (unlikely(!vq->async)) { 2146 VHOST_LOG_DATA(dev->ifname, ERR, 2147 "%s: async not registered for virtqueue %d.\n", 2148 __func__, queue_id); 2149 goto out; 2150 } 2151 2152 n_pkts_cpl = vhost_poll_enqueue_completed(dev, vq, pkts, count, dma_id, vchan_id); 2153 2154 vhost_queue_stats_update(dev, vq, pkts, n_pkts_cpl); 2155 vq->stats.inflight_completed += n_pkts_cpl; 2156 2157 out: 2158 rte_spinlock_unlock(&vq->access_lock); 2159 2160 return n_pkts_cpl; 2161 } 2162 2163 uint16_t 2164 rte_vhost_clear_queue_thread_unsafe(int vid, uint16_t queue_id, 2165 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, 2166 uint16_t vchan_id) 2167 { 2168 struct virtio_net *dev = get_device(vid); 2169 struct vhost_virtqueue *vq; 2170 uint16_t n_pkts_cpl = 0; 2171 2172 if (!dev) 2173 return 0; 2174 2175 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__); 2176 if (unlikely(queue_id >= dev->nr_vring)) { 2177 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid virtqueue idx %d.\n", 2178 __func__, queue_id); 2179 return 0; 2180 } 2181 2182 if (unlikely(dma_id < 0 || dma_id >= RTE_DMADEV_DEFAULT_MAX)) { 2183 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid dma id %d.\n", 2184 __func__, dma_id); 2185 return 0; 2186 } 2187 2188 vq = dev->virtqueue[queue_id]; 2189 2190 if (unlikely(!rte_spinlock_is_locked(&vq->access_lock))) { 2191 VHOST_LOG_DATA(dev->ifname, ERR, "%s() called without access lock taken.\n", 2192 __func__); 2193 return -1; 2194 } 2195 2196 if (unlikely(!vq->async)) { 2197 VHOST_LOG_DATA(dev->ifname, ERR, 2198 "%s: async not registered for virtqueue %d.\n", 2199 __func__, queue_id); 2200 return 0; 2201 } 2202 2203 if (unlikely(!dma_copy_track[dma_id].vchans || 2204 !dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) { 2205 VHOST_LOG_DATA(dev->ifname, ERR, 2206 "%s: invalid channel %d:%u.\n", 2207 __func__, dma_id, vchan_id); 2208 return 0; 2209 } 2210 2211 if ((queue_id & 1) == 0) 2212 n_pkts_cpl = vhost_poll_enqueue_completed(dev, vq, pkts, count, 2213 dma_id, vchan_id); 2214 else 2215 n_pkts_cpl = async_poll_dequeue_completed(dev, vq, pkts, count, 2216 dma_id, vchan_id, dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS); 2217 2218 vhost_queue_stats_update(dev, vq, pkts, n_pkts_cpl); 2219 vq->stats.inflight_completed += n_pkts_cpl; 2220 2221 return n_pkts_cpl; 2222 } 2223 2224 uint16_t 2225 rte_vhost_clear_queue(int vid, uint16_t queue_id, struct rte_mbuf **pkts, 2226 uint16_t count, int16_t dma_id, uint16_t vchan_id) 2227 { 2228 struct virtio_net *dev = get_device(vid); 2229 struct vhost_virtqueue *vq; 2230 uint16_t n_pkts_cpl = 0; 2231 2232 if (!dev) 2233 return 0; 2234 2235 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__); 2236 if (unlikely(queue_id >= dev->nr_vring)) { 2237 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid virtqueue idx %u.\n", 2238 __func__, queue_id); 2239 return 0; 2240 } 2241 2242 if (unlikely(dma_id < 0 || dma_id >= RTE_DMADEV_DEFAULT_MAX)) { 2243 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid dma id %d.\n", 2244 __func__, dma_id); 2245 return 0; 2246 } 2247 2248 vq = dev->virtqueue[queue_id]; 2249 2250 if (!rte_spinlock_trylock(&vq->access_lock)) { 2251 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s: virtqueue %u is busy.\n", 2252 __func__, queue_id); 2253 return 0; 2254 } 2255 2256 if (unlikely(!vq->async)) { 2257 VHOST_LOG_DATA(dev->ifname, ERR, "%s: async not registered for queue id %u.\n", 2258 __func__, queue_id); 2259 goto out_access_unlock; 2260 } 2261 2262 if (unlikely(!dma_copy_track[dma_id].vchans || 2263 !dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) { 2264 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid channel %d:%u.\n", 2265 __func__, dma_id, vchan_id); 2266 goto out_access_unlock; 2267 } 2268 2269 if ((queue_id & 1) == 0) 2270 n_pkts_cpl = vhost_poll_enqueue_completed(dev, vq, pkts, count, 2271 dma_id, vchan_id); 2272 else 2273 n_pkts_cpl = async_poll_dequeue_completed(dev, vq, pkts, count, 2274 dma_id, vchan_id, dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS); 2275 2276 vhost_queue_stats_update(dev, vq, pkts, n_pkts_cpl); 2277 vq->stats.inflight_completed += n_pkts_cpl; 2278 2279 out_access_unlock: 2280 rte_spinlock_unlock(&vq->access_lock); 2281 2282 return n_pkts_cpl; 2283 } 2284 2285 static __rte_always_inline uint32_t 2286 virtio_dev_rx_async_submit(struct virtio_net *dev, struct vhost_virtqueue *vq, 2287 struct rte_mbuf **pkts, uint32_t count, int16_t dma_id, uint16_t vchan_id) 2288 { 2289 uint32_t nb_tx = 0; 2290 2291 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__); 2292 2293 if (unlikely(!dma_copy_track[dma_id].vchans || 2294 !dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) { 2295 VHOST_LOG_DATA(dev->ifname, ERR, 2296 "%s: invalid channel %d:%u.\n", 2297 __func__, dma_id, vchan_id); 2298 return 0; 2299 } 2300 2301 rte_spinlock_lock(&vq->access_lock); 2302 2303 if (unlikely(!vq->enabled || !vq->async)) 2304 goto out_access_unlock; 2305 2306 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 2307 vhost_user_iotlb_rd_lock(vq); 2308 2309 if (unlikely(!vq->access_ok)) 2310 if (unlikely(vring_translate(dev, vq) < 0)) 2311 goto out; 2312 2313 count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); 2314 if (count == 0) 2315 goto out; 2316 2317 if (vq_is_packed(dev)) 2318 nb_tx = virtio_dev_rx_async_submit_packed(dev, vq, pkts, count, 2319 dma_id, vchan_id); 2320 else 2321 nb_tx = virtio_dev_rx_async_submit_split(dev, vq, pkts, count, 2322 dma_id, vchan_id); 2323 2324 vq->stats.inflight_submitted += nb_tx; 2325 2326 out: 2327 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 2328 vhost_user_iotlb_rd_unlock(vq); 2329 2330 out_access_unlock: 2331 rte_spinlock_unlock(&vq->access_lock); 2332 2333 return nb_tx; 2334 } 2335 2336 uint16_t 2337 rte_vhost_submit_enqueue_burst(int vid, uint16_t queue_id, 2338 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, 2339 uint16_t vchan_id) 2340 { 2341 struct virtio_net *dev = get_device(vid); 2342 2343 if (!dev) 2344 return 0; 2345 2346 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { 2347 VHOST_LOG_DATA(dev->ifname, ERR, 2348 "%s: built-in vhost net backend is disabled.\n", 2349 __func__); 2350 return 0; 2351 } 2352 2353 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { 2354 VHOST_LOG_DATA(dev->ifname, ERR, 2355 "%s: invalid virtqueue idx %d.\n", 2356 __func__, queue_id); 2357 return 0; 2358 } 2359 2360 return virtio_dev_rx_async_submit(dev, dev->virtqueue[queue_id], pkts, count, 2361 dma_id, vchan_id); 2362 } 2363 2364 static inline bool 2365 virtio_net_with_host_offload(struct virtio_net *dev) 2366 { 2367 if (dev->features & 2368 ((1ULL << VIRTIO_NET_F_CSUM) | 2369 (1ULL << VIRTIO_NET_F_HOST_ECN) | 2370 (1ULL << VIRTIO_NET_F_HOST_TSO4) | 2371 (1ULL << VIRTIO_NET_F_HOST_TSO6) | 2372 (1ULL << VIRTIO_NET_F_HOST_UFO))) 2373 return true; 2374 2375 return false; 2376 } 2377 2378 static int 2379 parse_headers(struct rte_mbuf *m, uint8_t *l4_proto) 2380 { 2381 struct rte_ipv4_hdr *ipv4_hdr; 2382 struct rte_ipv6_hdr *ipv6_hdr; 2383 struct rte_ether_hdr *eth_hdr; 2384 uint16_t ethertype; 2385 uint16_t data_len = rte_pktmbuf_data_len(m); 2386 2387 if (data_len < sizeof(struct rte_ether_hdr)) 2388 return -EINVAL; 2389 2390 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 2391 2392 m->l2_len = sizeof(struct rte_ether_hdr); 2393 ethertype = rte_be_to_cpu_16(eth_hdr->ether_type); 2394 2395 if (ethertype == RTE_ETHER_TYPE_VLAN) { 2396 if (data_len < sizeof(struct rte_ether_hdr) + 2397 sizeof(struct rte_vlan_hdr)) 2398 goto error; 2399 2400 struct rte_vlan_hdr *vlan_hdr = 2401 (struct rte_vlan_hdr *)(eth_hdr + 1); 2402 2403 m->l2_len += sizeof(struct rte_vlan_hdr); 2404 ethertype = rte_be_to_cpu_16(vlan_hdr->eth_proto); 2405 } 2406 2407 switch (ethertype) { 2408 case RTE_ETHER_TYPE_IPV4: 2409 if (data_len < m->l2_len + sizeof(struct rte_ipv4_hdr)) 2410 goto error; 2411 ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *, 2412 m->l2_len); 2413 m->l3_len = rte_ipv4_hdr_len(ipv4_hdr); 2414 if (data_len < m->l2_len + m->l3_len) 2415 goto error; 2416 m->ol_flags |= RTE_MBUF_F_TX_IPV4; 2417 *l4_proto = ipv4_hdr->next_proto_id; 2418 break; 2419 case RTE_ETHER_TYPE_IPV6: 2420 if (data_len < m->l2_len + sizeof(struct rte_ipv6_hdr)) 2421 goto error; 2422 ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *, 2423 m->l2_len); 2424 m->l3_len = sizeof(struct rte_ipv6_hdr); 2425 m->ol_flags |= RTE_MBUF_F_TX_IPV6; 2426 *l4_proto = ipv6_hdr->proto; 2427 break; 2428 default: 2429 /* a valid L3 header is needed for further L4 parsing */ 2430 goto error; 2431 } 2432 2433 /* both CSUM and GSO need a valid L4 header */ 2434 switch (*l4_proto) { 2435 case IPPROTO_TCP: 2436 if (data_len < m->l2_len + m->l3_len + 2437 sizeof(struct rte_tcp_hdr)) 2438 goto error; 2439 break; 2440 case IPPROTO_UDP: 2441 if (data_len < m->l2_len + m->l3_len + 2442 sizeof(struct rte_udp_hdr)) 2443 goto error; 2444 break; 2445 case IPPROTO_SCTP: 2446 if (data_len < m->l2_len + m->l3_len + 2447 sizeof(struct rte_sctp_hdr)) 2448 goto error; 2449 break; 2450 default: 2451 goto error; 2452 } 2453 2454 return 0; 2455 2456 error: 2457 m->l2_len = 0; 2458 m->l3_len = 0; 2459 m->ol_flags = 0; 2460 return -EINVAL; 2461 } 2462 2463 static __rte_always_inline void 2464 vhost_dequeue_offload_legacy(struct virtio_net *dev, struct virtio_net_hdr *hdr, 2465 struct rte_mbuf *m) 2466 { 2467 uint8_t l4_proto = 0; 2468 struct rte_tcp_hdr *tcp_hdr = NULL; 2469 uint16_t tcp_len; 2470 uint16_t data_len = rte_pktmbuf_data_len(m); 2471 2472 if (parse_headers(m, &l4_proto) < 0) 2473 return; 2474 2475 if (hdr->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) { 2476 if (hdr->csum_start == (m->l2_len + m->l3_len)) { 2477 switch (hdr->csum_offset) { 2478 case (offsetof(struct rte_tcp_hdr, cksum)): 2479 if (l4_proto != IPPROTO_TCP) 2480 goto error; 2481 m->ol_flags |= RTE_MBUF_F_TX_TCP_CKSUM; 2482 break; 2483 case (offsetof(struct rte_udp_hdr, dgram_cksum)): 2484 if (l4_proto != IPPROTO_UDP) 2485 goto error; 2486 m->ol_flags |= RTE_MBUF_F_TX_UDP_CKSUM; 2487 break; 2488 case (offsetof(struct rte_sctp_hdr, cksum)): 2489 if (l4_proto != IPPROTO_SCTP) 2490 goto error; 2491 m->ol_flags |= RTE_MBUF_F_TX_SCTP_CKSUM; 2492 break; 2493 default: 2494 goto error; 2495 } 2496 } else { 2497 goto error; 2498 } 2499 } 2500 2501 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 2502 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 2503 case VIRTIO_NET_HDR_GSO_TCPV4: 2504 case VIRTIO_NET_HDR_GSO_TCPV6: 2505 if (l4_proto != IPPROTO_TCP) 2506 goto error; 2507 tcp_hdr = rte_pktmbuf_mtod_offset(m, 2508 struct rte_tcp_hdr *, 2509 m->l2_len + m->l3_len); 2510 tcp_len = (tcp_hdr->data_off & 0xf0) >> 2; 2511 if (data_len < m->l2_len + m->l3_len + tcp_len) 2512 goto error; 2513 m->ol_flags |= RTE_MBUF_F_TX_TCP_SEG; 2514 m->tso_segsz = hdr->gso_size; 2515 m->l4_len = tcp_len; 2516 break; 2517 case VIRTIO_NET_HDR_GSO_UDP: 2518 if (l4_proto != IPPROTO_UDP) 2519 goto error; 2520 m->ol_flags |= RTE_MBUF_F_TX_UDP_SEG; 2521 m->tso_segsz = hdr->gso_size; 2522 m->l4_len = sizeof(struct rte_udp_hdr); 2523 break; 2524 default: 2525 VHOST_LOG_DATA(dev->ifname, WARNING, 2526 "unsupported gso type %u.\n", 2527 hdr->gso_type); 2528 goto error; 2529 } 2530 } 2531 return; 2532 2533 error: 2534 m->l2_len = 0; 2535 m->l3_len = 0; 2536 m->ol_flags = 0; 2537 } 2538 2539 static __rte_always_inline void 2540 vhost_dequeue_offload(struct virtio_net *dev, struct virtio_net_hdr *hdr, 2541 struct rte_mbuf *m, bool legacy_ol_flags) 2542 { 2543 struct rte_net_hdr_lens hdr_lens; 2544 int l4_supported = 0; 2545 uint32_t ptype; 2546 2547 if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE) 2548 return; 2549 2550 if (legacy_ol_flags) { 2551 vhost_dequeue_offload_legacy(dev, hdr, m); 2552 return; 2553 } 2554 2555 m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN; 2556 2557 ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK); 2558 m->packet_type = ptype; 2559 if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP || 2560 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP || 2561 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP) 2562 l4_supported = 1; 2563 2564 /* According to Virtio 1.1 spec, the device only needs to look at 2565 * VIRTIO_NET_HDR_F_NEEDS_CSUM in the packet transmission path. 2566 * This differs from the processing incoming packets path where the 2567 * driver could rely on VIRTIO_NET_HDR_F_DATA_VALID flag set by the 2568 * device. 2569 * 2570 * 5.1.6.2.1 Driver Requirements: Packet Transmission 2571 * The driver MUST NOT set the VIRTIO_NET_HDR_F_DATA_VALID and 2572 * VIRTIO_NET_HDR_F_RSC_INFO bits in flags. 2573 * 2574 * 5.1.6.2.2 Device Requirements: Packet Transmission 2575 * The device MUST ignore flag bits that it does not recognize. 2576 */ 2577 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 2578 uint32_t hdrlen; 2579 2580 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len; 2581 if (hdr->csum_start <= hdrlen && l4_supported != 0) { 2582 m->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_NONE; 2583 } else { 2584 /* Unknown proto or tunnel, do sw cksum. We can assume 2585 * the cksum field is in the first segment since the 2586 * buffers we provided to the host are large enough. 2587 * In case of SCTP, this will be wrong since it's a CRC 2588 * but there's nothing we can do. 2589 */ 2590 uint16_t csum = 0, off; 2591 2592 if (rte_raw_cksum_mbuf(m, hdr->csum_start, 2593 rte_pktmbuf_pkt_len(m) - hdr->csum_start, &csum) < 0) 2594 return; 2595 if (likely(csum != 0xffff)) 2596 csum = ~csum; 2597 off = hdr->csum_offset + hdr->csum_start; 2598 if (rte_pktmbuf_data_len(m) >= off + 1) 2599 *rte_pktmbuf_mtod_offset(m, uint16_t *, off) = csum; 2600 } 2601 } 2602 2603 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 2604 if (hdr->gso_size == 0) 2605 return; 2606 2607 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 2608 case VIRTIO_NET_HDR_GSO_TCPV4: 2609 case VIRTIO_NET_HDR_GSO_TCPV6: 2610 if ((ptype & RTE_PTYPE_L4_MASK) != RTE_PTYPE_L4_TCP) 2611 break; 2612 m->ol_flags |= RTE_MBUF_F_RX_LRO | RTE_MBUF_F_RX_L4_CKSUM_NONE; 2613 m->tso_segsz = hdr->gso_size; 2614 break; 2615 case VIRTIO_NET_HDR_GSO_UDP: 2616 if ((ptype & RTE_PTYPE_L4_MASK) != RTE_PTYPE_L4_UDP) 2617 break; 2618 m->ol_flags |= RTE_MBUF_F_RX_LRO | RTE_MBUF_F_RX_L4_CKSUM_NONE; 2619 m->tso_segsz = hdr->gso_size; 2620 break; 2621 default: 2622 break; 2623 } 2624 } 2625 } 2626 2627 static __rte_noinline void 2628 copy_vnet_hdr_from_desc(struct virtio_net_hdr *hdr, 2629 struct buf_vector *buf_vec) 2630 { 2631 uint64_t len; 2632 uint64_t remain = sizeof(struct virtio_net_hdr); 2633 uint64_t src; 2634 uint64_t dst = (uint64_t)(uintptr_t)hdr; 2635 2636 while (remain) { 2637 len = RTE_MIN(remain, buf_vec->buf_len); 2638 src = buf_vec->buf_addr; 2639 rte_memcpy((void *)(uintptr_t)dst, 2640 (void *)(uintptr_t)src, len); 2641 2642 remain -= len; 2643 dst += len; 2644 buf_vec++; 2645 } 2646 } 2647 2648 static __rte_always_inline int 2649 desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, 2650 struct buf_vector *buf_vec, uint16_t nr_vec, 2651 struct rte_mbuf *m, struct rte_mempool *mbuf_pool, 2652 bool legacy_ol_flags, uint16_t slot_idx, bool is_async) 2653 { 2654 uint32_t buf_avail, buf_offset, buf_len; 2655 uint64_t buf_addr, buf_iova; 2656 uint32_t mbuf_avail, mbuf_offset; 2657 uint32_t hdr_remain = dev->vhost_hlen; 2658 uint32_t cpy_len; 2659 struct rte_mbuf *cur = m, *prev = m; 2660 struct virtio_net_hdr tmp_hdr; 2661 struct virtio_net_hdr *hdr = NULL; 2662 uint16_t vec_idx; 2663 struct vhost_async *async = vq->async; 2664 struct async_inflight_info *pkts_info; 2665 2666 /* 2667 * The caller has checked the descriptors chain is larger than the 2668 * header size. 2669 */ 2670 2671 if (virtio_net_with_host_offload(dev)) { 2672 if (unlikely(buf_vec[0].buf_len < sizeof(struct virtio_net_hdr))) { 2673 /* 2674 * No luck, the virtio-net header doesn't fit 2675 * in a contiguous virtual area. 2676 */ 2677 copy_vnet_hdr_from_desc(&tmp_hdr, buf_vec); 2678 hdr = &tmp_hdr; 2679 } else { 2680 hdr = (struct virtio_net_hdr *)((uintptr_t)buf_vec[0].buf_addr); 2681 } 2682 } 2683 2684 for (vec_idx = 0; vec_idx < nr_vec; vec_idx++) { 2685 if (buf_vec[vec_idx].buf_len > hdr_remain) 2686 break; 2687 2688 hdr_remain -= buf_vec[vec_idx].buf_len; 2689 } 2690 2691 buf_addr = buf_vec[vec_idx].buf_addr; 2692 buf_iova = buf_vec[vec_idx].buf_iova; 2693 buf_len = buf_vec[vec_idx].buf_len; 2694 buf_offset = hdr_remain; 2695 buf_avail = buf_vec[vec_idx].buf_len - hdr_remain; 2696 2697 PRINT_PACKET(dev, 2698 (uintptr_t)(buf_addr + buf_offset), 2699 (uint32_t)buf_avail, 0); 2700 2701 mbuf_offset = 0; 2702 mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM; 2703 2704 if (is_async) { 2705 pkts_info = async->pkts_info; 2706 if (async_iter_initialize(dev, async)) 2707 return -1; 2708 } 2709 2710 while (1) { 2711 cpy_len = RTE_MIN(buf_avail, mbuf_avail); 2712 2713 if (is_async) { 2714 if (async_fill_seg(dev, vq, cur, mbuf_offset, 2715 buf_iova + buf_offset, cpy_len, false) < 0) 2716 goto error; 2717 } else if (likely(hdr && cur == m)) { 2718 rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, mbuf_offset), 2719 (void *)((uintptr_t)(buf_addr + buf_offset)), 2720 cpy_len); 2721 } else { 2722 sync_fill_seg(dev, vq, cur, mbuf_offset, 2723 buf_addr + buf_offset, 2724 buf_iova + buf_offset, cpy_len, false); 2725 } 2726 2727 mbuf_avail -= cpy_len; 2728 mbuf_offset += cpy_len; 2729 buf_avail -= cpy_len; 2730 buf_offset += cpy_len; 2731 2732 /* This buf reaches to its end, get the next one */ 2733 if (buf_avail == 0) { 2734 if (++vec_idx >= nr_vec) 2735 break; 2736 2737 buf_addr = buf_vec[vec_idx].buf_addr; 2738 buf_iova = buf_vec[vec_idx].buf_iova; 2739 buf_len = buf_vec[vec_idx].buf_len; 2740 2741 buf_offset = 0; 2742 buf_avail = buf_len; 2743 2744 PRINT_PACKET(dev, (uintptr_t)buf_addr, 2745 (uint32_t)buf_avail, 0); 2746 } 2747 2748 /* 2749 * This mbuf reaches to its end, get a new one 2750 * to hold more data. 2751 */ 2752 if (mbuf_avail == 0) { 2753 cur = rte_pktmbuf_alloc(mbuf_pool); 2754 if (unlikely(cur == NULL)) { 2755 VHOST_LOG_DATA(dev->ifname, ERR, 2756 "failed to allocate memory for mbuf.\n"); 2757 goto error; 2758 } 2759 2760 prev->next = cur; 2761 prev->data_len = mbuf_offset; 2762 m->nb_segs += 1; 2763 m->pkt_len += mbuf_offset; 2764 prev = cur; 2765 2766 mbuf_offset = 0; 2767 mbuf_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM; 2768 } 2769 } 2770 2771 prev->data_len = mbuf_offset; 2772 m->pkt_len += mbuf_offset; 2773 2774 if (is_async) { 2775 async_iter_finalize(async); 2776 if (hdr) 2777 pkts_info[slot_idx].nethdr = *hdr; 2778 } else if (hdr) { 2779 vhost_dequeue_offload(dev, hdr, m, legacy_ol_flags); 2780 } 2781 2782 return 0; 2783 error: 2784 if (is_async) 2785 async_iter_cancel(async); 2786 2787 return -1; 2788 } 2789 2790 static void 2791 virtio_dev_extbuf_free(void *addr __rte_unused, void *opaque) 2792 { 2793 rte_free(opaque); 2794 } 2795 2796 static int 2797 virtio_dev_extbuf_alloc(struct virtio_net *dev, struct rte_mbuf *pkt, uint32_t size) 2798 { 2799 struct rte_mbuf_ext_shared_info *shinfo = NULL; 2800 uint32_t total_len = RTE_PKTMBUF_HEADROOM + size; 2801 uint16_t buf_len; 2802 rte_iova_t iova; 2803 void *buf; 2804 2805 total_len += sizeof(*shinfo) + sizeof(uintptr_t); 2806 total_len = RTE_ALIGN_CEIL(total_len, sizeof(uintptr_t)); 2807 2808 if (unlikely(total_len > UINT16_MAX)) 2809 return -ENOSPC; 2810 2811 buf_len = total_len; 2812 buf = rte_malloc(NULL, buf_len, RTE_CACHE_LINE_SIZE); 2813 if (unlikely(buf == NULL)) 2814 return -ENOMEM; 2815 2816 /* Initialize shinfo */ 2817 shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len, 2818 virtio_dev_extbuf_free, buf); 2819 if (unlikely(shinfo == NULL)) { 2820 rte_free(buf); 2821 VHOST_LOG_DATA(dev->ifname, ERR, "failed to init shinfo\n"); 2822 return -1; 2823 } 2824 2825 iova = rte_malloc_virt2iova(buf); 2826 rte_pktmbuf_attach_extbuf(pkt, buf, iova, buf_len, shinfo); 2827 rte_pktmbuf_reset_headroom(pkt); 2828 2829 return 0; 2830 } 2831 2832 /* 2833 * Prepare a host supported pktmbuf. 2834 */ 2835 static __rte_always_inline int 2836 virtio_dev_pktmbuf_prep(struct virtio_net *dev, struct rte_mbuf *pkt, 2837 uint32_t data_len) 2838 { 2839 if (rte_pktmbuf_tailroom(pkt) >= data_len) 2840 return 0; 2841 2842 /* attach an external buffer if supported */ 2843 if (dev->extbuf && !virtio_dev_extbuf_alloc(dev, pkt, data_len)) 2844 return 0; 2845 2846 /* check if chained buffers are allowed */ 2847 if (!dev->linearbuf) 2848 return 0; 2849 2850 return -1; 2851 } 2852 2853 __rte_always_inline 2854 static uint16_t 2855 virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 2856 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count, 2857 bool legacy_ol_flags) 2858 { 2859 uint16_t i; 2860 uint16_t avail_entries; 2861 uint16_t dropped = 0; 2862 static bool allocerr_warned; 2863 2864 /* 2865 * The ordering between avail index and 2866 * desc reads needs to be enforced. 2867 */ 2868 avail_entries = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE) - 2869 vq->last_avail_idx; 2870 if (avail_entries == 0) 2871 return 0; 2872 2873 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); 2874 2875 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__); 2876 2877 count = RTE_MIN(count, MAX_PKT_BURST); 2878 count = RTE_MIN(count, avail_entries); 2879 VHOST_LOG_DATA(dev->ifname, DEBUG, "about to dequeue %u buffers\n", count); 2880 2881 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, count)) 2882 return 0; 2883 2884 for (i = 0; i < count; i++) { 2885 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 2886 uint16_t head_idx; 2887 uint32_t buf_len; 2888 uint16_t nr_vec = 0; 2889 int err; 2890 2891 if (unlikely(fill_vec_buf_split(dev, vq, 2892 vq->last_avail_idx + i, 2893 &nr_vec, buf_vec, 2894 &head_idx, &buf_len, 2895 VHOST_ACCESS_RO) < 0)) 2896 break; 2897 2898 update_shadow_used_ring_split(vq, head_idx, 0); 2899 2900 if (unlikely(buf_len <= dev->vhost_hlen)) { 2901 dropped += 1; 2902 i++; 2903 break; 2904 } 2905 2906 buf_len -= dev->vhost_hlen; 2907 2908 err = virtio_dev_pktmbuf_prep(dev, pkts[i], buf_len); 2909 if (unlikely(err)) { 2910 /* 2911 * mbuf allocation fails for jumbo packets when external 2912 * buffer allocation is not allowed and linear buffer 2913 * is required. Drop this packet. 2914 */ 2915 if (!allocerr_warned) { 2916 VHOST_LOG_DATA(dev->ifname, ERR, 2917 "failed mbuf alloc of size %d from %s.\n", 2918 buf_len, mbuf_pool->name); 2919 allocerr_warned = true; 2920 } 2921 dropped += 1; 2922 i++; 2923 break; 2924 } 2925 2926 err = desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i], 2927 mbuf_pool, legacy_ol_flags, 0, false); 2928 if (unlikely(err)) { 2929 if (!allocerr_warned) { 2930 VHOST_LOG_DATA(dev->ifname, ERR, "failed to copy desc to mbuf.\n"); 2931 allocerr_warned = true; 2932 } 2933 dropped += 1; 2934 i++; 2935 break; 2936 } 2937 2938 } 2939 2940 if (dropped) 2941 rte_pktmbuf_free_bulk(&pkts[i - 1], count - i + 1); 2942 2943 vq->last_avail_idx += i; 2944 2945 do_data_copy_dequeue(vq); 2946 if (unlikely(i < count)) 2947 vq->shadow_used_idx = i; 2948 if (likely(vq->shadow_used_idx)) { 2949 flush_shadow_used_ring_split(dev, vq); 2950 vhost_vring_call_split(dev, vq); 2951 } 2952 2953 return (i - dropped); 2954 } 2955 2956 __rte_noinline 2957 static uint16_t 2958 virtio_dev_tx_split_legacy(struct virtio_net *dev, 2959 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, 2960 struct rte_mbuf **pkts, uint16_t count) 2961 { 2962 return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, true); 2963 } 2964 2965 __rte_noinline 2966 static uint16_t 2967 virtio_dev_tx_split_compliant(struct virtio_net *dev, 2968 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, 2969 struct rte_mbuf **pkts, uint16_t count) 2970 { 2971 return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, false); 2972 } 2973 2974 static __rte_always_inline int 2975 vhost_reserve_avail_batch_packed(struct virtio_net *dev, 2976 struct vhost_virtqueue *vq, 2977 struct rte_mbuf **pkts, 2978 uint16_t avail_idx, 2979 uintptr_t *desc_addrs, 2980 uint16_t *ids) 2981 { 2982 bool wrap = vq->avail_wrap_counter; 2983 struct vring_packed_desc *descs = vq->desc_packed; 2984 uint64_t lens[PACKED_BATCH_SIZE]; 2985 uint64_t buf_lens[PACKED_BATCH_SIZE]; 2986 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 2987 uint16_t flags, i; 2988 2989 if (unlikely(avail_idx & PACKED_BATCH_MASK)) 2990 return -1; 2991 if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size)) 2992 return -1; 2993 2994 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 2995 flags = descs[avail_idx + i].flags; 2996 if (unlikely((wrap != !!(flags & VRING_DESC_F_AVAIL)) || 2997 (wrap == !!(flags & VRING_DESC_F_USED)) || 2998 (flags & PACKED_DESC_SINGLE_DEQUEUE_FLAG))) 2999 return -1; 3000 } 3001 3002 rte_atomic_thread_fence(__ATOMIC_ACQUIRE); 3003 3004 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 3005 lens[i] = descs[avail_idx + i].len; 3006 3007 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3008 desc_addrs[i] = vhost_iova_to_vva(dev, vq, 3009 descs[avail_idx + i].addr, 3010 &lens[i], VHOST_ACCESS_RW); 3011 } 3012 3013 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3014 if (unlikely(!desc_addrs[i])) 3015 return -1; 3016 if (unlikely((lens[i] != descs[avail_idx + i].len))) 3017 return -1; 3018 } 3019 3020 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3021 if (virtio_dev_pktmbuf_prep(dev, pkts[i], lens[i])) 3022 goto err; 3023 } 3024 3025 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 3026 buf_lens[i] = pkts[i]->buf_len - pkts[i]->data_off; 3027 3028 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3029 if (unlikely(buf_lens[i] < (lens[i] - buf_offset))) 3030 goto err; 3031 } 3032 3033 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3034 pkts[i]->pkt_len = lens[i] - buf_offset; 3035 pkts[i]->data_len = pkts[i]->pkt_len; 3036 ids[i] = descs[avail_idx + i].id; 3037 } 3038 3039 return 0; 3040 3041 err: 3042 return -1; 3043 } 3044 3045 static __rte_always_inline int 3046 virtio_dev_tx_batch_packed(struct virtio_net *dev, 3047 struct vhost_virtqueue *vq, 3048 struct rte_mbuf **pkts, 3049 bool legacy_ol_flags) 3050 { 3051 uint16_t avail_idx = vq->last_avail_idx; 3052 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 3053 struct virtio_net_hdr *hdr; 3054 uintptr_t desc_addrs[PACKED_BATCH_SIZE]; 3055 uint16_t ids[PACKED_BATCH_SIZE]; 3056 uint16_t i; 3057 3058 if (vhost_reserve_avail_batch_packed(dev, vq, pkts, avail_idx, 3059 desc_addrs, ids)) 3060 return -1; 3061 3062 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 3063 rte_prefetch0((void *)(uintptr_t)desc_addrs[i]); 3064 3065 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 3066 rte_memcpy(rte_pktmbuf_mtod_offset(pkts[i], void *, 0), 3067 (void *)(uintptr_t)(desc_addrs[i] + buf_offset), 3068 pkts[i]->pkt_len); 3069 3070 if (virtio_net_with_host_offload(dev)) { 3071 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3072 hdr = (struct virtio_net_hdr *)(desc_addrs[i]); 3073 vhost_dequeue_offload(dev, hdr, pkts[i], legacy_ol_flags); 3074 } 3075 } 3076 3077 if (virtio_net_is_inorder(dev)) 3078 vhost_shadow_dequeue_batch_packed_inorder(vq, 3079 ids[PACKED_BATCH_SIZE - 1]); 3080 else 3081 vhost_shadow_dequeue_batch_packed(dev, vq, ids); 3082 3083 vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE); 3084 3085 return 0; 3086 } 3087 3088 static __rte_always_inline int 3089 vhost_dequeue_single_packed(struct virtio_net *dev, 3090 struct vhost_virtqueue *vq, 3091 struct rte_mempool *mbuf_pool, 3092 struct rte_mbuf *pkts, 3093 uint16_t *buf_id, 3094 uint16_t *desc_count, 3095 bool legacy_ol_flags) 3096 { 3097 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 3098 uint32_t buf_len; 3099 uint16_t nr_vec = 0; 3100 int err; 3101 static bool allocerr_warned; 3102 3103 if (unlikely(fill_vec_buf_packed(dev, vq, 3104 vq->last_avail_idx, desc_count, 3105 buf_vec, &nr_vec, 3106 buf_id, &buf_len, 3107 VHOST_ACCESS_RO) < 0)) 3108 return -1; 3109 3110 if (unlikely(buf_len <= dev->vhost_hlen)) 3111 return -1; 3112 3113 buf_len -= dev->vhost_hlen; 3114 3115 if (unlikely(virtio_dev_pktmbuf_prep(dev, pkts, buf_len))) { 3116 if (!allocerr_warned) { 3117 VHOST_LOG_DATA(dev->ifname, ERR, 3118 "failed mbuf alloc of size %d from %s.\n", 3119 buf_len, mbuf_pool->name); 3120 allocerr_warned = true; 3121 } 3122 return -1; 3123 } 3124 3125 err = desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts, 3126 mbuf_pool, legacy_ol_flags, 0, false); 3127 if (unlikely(err)) { 3128 if (!allocerr_warned) { 3129 VHOST_LOG_DATA(dev->ifname, ERR, "failed to copy desc to mbuf.\n"); 3130 allocerr_warned = true; 3131 } 3132 return -1; 3133 } 3134 3135 return 0; 3136 } 3137 3138 static __rte_always_inline int 3139 virtio_dev_tx_single_packed(struct virtio_net *dev, 3140 struct vhost_virtqueue *vq, 3141 struct rte_mempool *mbuf_pool, 3142 struct rte_mbuf *pkts, 3143 bool legacy_ol_flags) 3144 { 3145 3146 uint16_t buf_id, desc_count = 0; 3147 int ret; 3148 3149 ret = vhost_dequeue_single_packed(dev, vq, mbuf_pool, pkts, &buf_id, 3150 &desc_count, legacy_ol_flags); 3151 3152 if (likely(desc_count > 0)) { 3153 if (virtio_net_is_inorder(dev)) 3154 vhost_shadow_dequeue_single_packed_inorder(vq, buf_id, 3155 desc_count); 3156 else 3157 vhost_shadow_dequeue_single_packed(vq, buf_id, 3158 desc_count); 3159 3160 vq_inc_last_avail_packed(vq, desc_count); 3161 } 3162 3163 return ret; 3164 } 3165 3166 __rte_always_inline 3167 static uint16_t 3168 virtio_dev_tx_packed(struct virtio_net *dev, 3169 struct vhost_virtqueue *__rte_restrict vq, 3170 struct rte_mempool *mbuf_pool, 3171 struct rte_mbuf **__rte_restrict pkts, 3172 uint32_t count, 3173 bool legacy_ol_flags) 3174 { 3175 uint32_t pkt_idx = 0; 3176 3177 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, count)) 3178 return 0; 3179 3180 do { 3181 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); 3182 3183 if (count - pkt_idx >= PACKED_BATCH_SIZE) { 3184 if (!virtio_dev_tx_batch_packed(dev, vq, 3185 &pkts[pkt_idx], 3186 legacy_ol_flags)) { 3187 pkt_idx += PACKED_BATCH_SIZE; 3188 continue; 3189 } 3190 } 3191 3192 if (virtio_dev_tx_single_packed(dev, vq, mbuf_pool, 3193 pkts[pkt_idx], 3194 legacy_ol_flags)) 3195 break; 3196 pkt_idx++; 3197 } while (pkt_idx < count); 3198 3199 if (pkt_idx != count) 3200 rte_pktmbuf_free_bulk(&pkts[pkt_idx], count - pkt_idx); 3201 3202 if (vq->shadow_used_idx) { 3203 do_data_copy_dequeue(vq); 3204 3205 vhost_flush_dequeue_shadow_packed(dev, vq); 3206 vhost_vring_call_packed(dev, vq); 3207 } 3208 3209 return pkt_idx; 3210 } 3211 3212 __rte_noinline 3213 static uint16_t 3214 virtio_dev_tx_packed_legacy(struct virtio_net *dev, 3215 struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool, 3216 struct rte_mbuf **__rte_restrict pkts, uint32_t count) 3217 { 3218 return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, true); 3219 } 3220 3221 __rte_noinline 3222 static uint16_t 3223 virtio_dev_tx_packed_compliant(struct virtio_net *dev, 3224 struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool, 3225 struct rte_mbuf **__rte_restrict pkts, uint32_t count) 3226 { 3227 return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, false); 3228 } 3229 3230 uint16_t 3231 rte_vhost_dequeue_burst(int vid, uint16_t queue_id, 3232 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) 3233 { 3234 struct virtio_net *dev; 3235 struct rte_mbuf *rarp_mbuf = NULL; 3236 struct vhost_virtqueue *vq; 3237 int16_t success = 1; 3238 3239 dev = get_device(vid); 3240 if (!dev) 3241 return 0; 3242 3243 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { 3244 VHOST_LOG_DATA(dev->ifname, ERR, 3245 "%s: built-in vhost net backend is disabled.\n", 3246 __func__); 3247 return 0; 3248 } 3249 3250 if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) { 3251 VHOST_LOG_DATA(dev->ifname, ERR, 3252 "%s: invalid virtqueue idx %d.\n", 3253 __func__, queue_id); 3254 return 0; 3255 } 3256 3257 vq = dev->virtqueue[queue_id]; 3258 3259 if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0)) 3260 return 0; 3261 3262 if (unlikely(!vq->enabled)) { 3263 count = 0; 3264 goto out_access_unlock; 3265 } 3266 3267 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 3268 vhost_user_iotlb_rd_lock(vq); 3269 3270 if (unlikely(!vq->access_ok)) 3271 if (unlikely(vring_translate(dev, vq) < 0)) { 3272 count = 0; 3273 goto out; 3274 } 3275 3276 /* 3277 * Construct a RARP broadcast packet, and inject it to the "pkts" 3278 * array, to looks like that guest actually send such packet. 3279 * 3280 * Check user_send_rarp() for more information. 3281 * 3282 * broadcast_rarp shares a cacheline in the virtio_net structure 3283 * with some fields that are accessed during enqueue and 3284 * __atomic_compare_exchange_n causes a write if performed compare 3285 * and exchange. This could result in false sharing between enqueue 3286 * and dequeue. 3287 * 3288 * Prevent unnecessary false sharing by reading broadcast_rarp first 3289 * and only performing compare and exchange if the read indicates it 3290 * is likely to be set. 3291 */ 3292 if (unlikely(__atomic_load_n(&dev->broadcast_rarp, __ATOMIC_ACQUIRE) && 3293 __atomic_compare_exchange_n(&dev->broadcast_rarp, 3294 &success, 0, 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED))) { 3295 3296 rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac); 3297 if (rarp_mbuf == NULL) { 3298 VHOST_LOG_DATA(dev->ifname, ERR, "failed to make RARP packet.\n"); 3299 count = 0; 3300 goto out; 3301 } 3302 /* 3303 * Inject it to the head of "pkts" array, so that switch's mac 3304 * learning table will get updated first. 3305 */ 3306 pkts[0] = rarp_mbuf; 3307 vhost_queue_stats_update(dev, vq, pkts, 1); 3308 pkts++; 3309 count -= 1; 3310 } 3311 3312 if (vq_is_packed(dev)) { 3313 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS) 3314 count = virtio_dev_tx_packed_legacy(dev, vq, mbuf_pool, pkts, count); 3315 else 3316 count = virtio_dev_tx_packed_compliant(dev, vq, mbuf_pool, pkts, count); 3317 } else { 3318 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS) 3319 count = virtio_dev_tx_split_legacy(dev, vq, mbuf_pool, pkts, count); 3320 else 3321 count = virtio_dev_tx_split_compliant(dev, vq, mbuf_pool, pkts, count); 3322 } 3323 3324 vhost_queue_stats_update(dev, vq, pkts, count); 3325 3326 out: 3327 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 3328 vhost_user_iotlb_rd_unlock(vq); 3329 3330 out_access_unlock: 3331 rte_spinlock_unlock(&vq->access_lock); 3332 3333 if (unlikely(rarp_mbuf != NULL)) 3334 count += 1; 3335 3336 return count; 3337 } 3338 3339 static __rte_always_inline uint16_t 3340 async_poll_dequeue_completed(struct virtio_net *dev, struct vhost_virtqueue *vq, 3341 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, 3342 uint16_t vchan_id, bool legacy_ol_flags) 3343 { 3344 uint16_t start_idx, from, i; 3345 uint16_t nr_cpl_pkts = 0; 3346 struct async_inflight_info *pkts_info = vq->async->pkts_info; 3347 3348 vhost_async_dma_check_completed(dev, dma_id, vchan_id, VHOST_DMA_MAX_COPY_COMPLETE); 3349 3350 start_idx = async_get_first_inflight_pkt_idx(vq); 3351 3352 from = start_idx; 3353 while (vq->async->pkts_cmpl_flag[from] && count--) { 3354 vq->async->pkts_cmpl_flag[from] = false; 3355 from = (from + 1) % vq->size; 3356 nr_cpl_pkts++; 3357 } 3358 3359 if (nr_cpl_pkts == 0) 3360 return 0; 3361 3362 for (i = 0; i < nr_cpl_pkts; i++) { 3363 from = (start_idx + i) % vq->size; 3364 pkts[i] = pkts_info[from].mbuf; 3365 3366 if (virtio_net_with_host_offload(dev)) 3367 vhost_dequeue_offload(dev, &pkts_info[from].nethdr, pkts[i], 3368 legacy_ol_flags); 3369 } 3370 3371 /* write back completed descs to used ring and update used idx */ 3372 if (vq_is_packed(dev)) { 3373 write_back_completed_descs_packed(vq, nr_cpl_pkts); 3374 vhost_vring_call_packed(dev, vq); 3375 } else { 3376 write_back_completed_descs_split(vq, nr_cpl_pkts); 3377 __atomic_add_fetch(&vq->used->idx, nr_cpl_pkts, __ATOMIC_RELEASE); 3378 vhost_vring_call_split(dev, vq); 3379 } 3380 vq->async->pkts_inflight_n -= nr_cpl_pkts; 3381 3382 return nr_cpl_pkts; 3383 } 3384 3385 static __rte_always_inline uint16_t 3386 virtio_dev_tx_async_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 3387 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count, 3388 int16_t dma_id, uint16_t vchan_id, bool legacy_ol_flags) 3389 { 3390 static bool allocerr_warned; 3391 bool dropped = false; 3392 uint16_t avail_entries; 3393 uint16_t pkt_idx, slot_idx = 0; 3394 uint16_t nr_done_pkts = 0; 3395 uint16_t pkt_err = 0; 3396 uint16_t n_xfer; 3397 struct vhost_async *async = vq->async; 3398 struct async_inflight_info *pkts_info = async->pkts_info; 3399 struct rte_mbuf *pkts_prealloc[MAX_PKT_BURST]; 3400 uint16_t pkts_size = count; 3401 3402 /** 3403 * The ordering between avail index and 3404 * desc reads needs to be enforced. 3405 */ 3406 avail_entries = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE) - 3407 vq->last_avail_idx; 3408 if (avail_entries == 0) 3409 goto out; 3410 3411 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); 3412 3413 async_iter_reset(async); 3414 3415 count = RTE_MIN(count, MAX_PKT_BURST); 3416 count = RTE_MIN(count, avail_entries); 3417 VHOST_LOG_DATA(dev->ifname, DEBUG, "about to dequeue %u buffers\n", count); 3418 3419 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts_prealloc, count)) 3420 goto out; 3421 3422 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { 3423 uint16_t head_idx = 0; 3424 uint16_t nr_vec = 0; 3425 uint16_t to; 3426 uint32_t buf_len; 3427 int err; 3428 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 3429 struct rte_mbuf *pkt = pkts_prealloc[pkt_idx]; 3430 3431 if (unlikely(fill_vec_buf_split(dev, vq, vq->last_avail_idx, 3432 &nr_vec, buf_vec, 3433 &head_idx, &buf_len, 3434 VHOST_ACCESS_RO) < 0)) { 3435 dropped = true; 3436 break; 3437 } 3438 3439 if (unlikely(buf_len <= dev->vhost_hlen)) { 3440 dropped = true; 3441 break; 3442 } 3443 3444 buf_len -= dev->vhost_hlen; 3445 3446 err = virtio_dev_pktmbuf_prep(dev, pkt, buf_len); 3447 if (unlikely(err)) { 3448 /** 3449 * mbuf allocation fails for jumbo packets when external 3450 * buffer allocation is not allowed and linear buffer 3451 * is required. Drop this packet. 3452 */ 3453 if (!allocerr_warned) { 3454 VHOST_LOG_DATA(dev->ifname, ERR, 3455 "%s: Failed mbuf alloc of size %d from %s\n", 3456 __func__, buf_len, mbuf_pool->name); 3457 allocerr_warned = true; 3458 } 3459 dropped = true; 3460 break; 3461 } 3462 3463 slot_idx = (async->pkts_idx + pkt_idx) & (vq->size - 1); 3464 err = desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkt, mbuf_pool, 3465 legacy_ol_flags, slot_idx, true); 3466 if (unlikely(err)) { 3467 if (!allocerr_warned) { 3468 VHOST_LOG_DATA(dev->ifname, ERR, 3469 "%s: Failed to offload copies to async channel.\n", 3470 __func__); 3471 allocerr_warned = true; 3472 } 3473 dropped = true; 3474 break; 3475 } 3476 3477 pkts_info[slot_idx].mbuf = pkt; 3478 3479 /* store used descs */ 3480 to = async->desc_idx_split & (vq->size - 1); 3481 async->descs_split[to].id = head_idx; 3482 async->descs_split[to].len = 0; 3483 async->desc_idx_split++; 3484 3485 vq->last_avail_idx++; 3486 } 3487 3488 if (unlikely(dropped)) 3489 rte_pktmbuf_free_bulk(&pkts_prealloc[pkt_idx], count - pkt_idx); 3490 3491 n_xfer = vhost_async_dma_transfer(dev, vq, dma_id, vchan_id, async->pkts_idx, 3492 async->iov_iter, pkt_idx); 3493 3494 async->pkts_inflight_n += n_xfer; 3495 3496 pkt_err = pkt_idx - n_xfer; 3497 if (unlikely(pkt_err)) { 3498 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s: failed to transfer data.\n", 3499 __func__); 3500 3501 pkt_idx = n_xfer; 3502 /* recover available ring */ 3503 vq->last_avail_idx -= pkt_err; 3504 3505 /** 3506 * recover async channel copy related structures and free pktmbufs 3507 * for error pkts. 3508 */ 3509 async->desc_idx_split -= pkt_err; 3510 while (pkt_err-- > 0) { 3511 rte_pktmbuf_free(pkts_info[slot_idx & (vq->size - 1)].mbuf); 3512 slot_idx--; 3513 } 3514 } 3515 3516 async->pkts_idx += pkt_idx; 3517 if (async->pkts_idx >= vq->size) 3518 async->pkts_idx -= vq->size; 3519 3520 out: 3521 /* DMA device may serve other queues, unconditionally check completed. */ 3522 nr_done_pkts = async_poll_dequeue_completed(dev, vq, pkts, pkts_size, 3523 dma_id, vchan_id, legacy_ol_flags); 3524 3525 return nr_done_pkts; 3526 } 3527 3528 __rte_noinline 3529 static uint16_t 3530 virtio_dev_tx_async_split_legacy(struct virtio_net *dev, 3531 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, 3532 struct rte_mbuf **pkts, uint16_t count, 3533 int16_t dma_id, uint16_t vchan_id) 3534 { 3535 return virtio_dev_tx_async_split(dev, vq, mbuf_pool, 3536 pkts, count, dma_id, vchan_id, true); 3537 } 3538 3539 __rte_noinline 3540 static uint16_t 3541 virtio_dev_tx_async_split_compliant(struct virtio_net *dev, 3542 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, 3543 struct rte_mbuf **pkts, uint16_t count, 3544 int16_t dma_id, uint16_t vchan_id) 3545 { 3546 return virtio_dev_tx_async_split(dev, vq, mbuf_pool, 3547 pkts, count, dma_id, vchan_id, false); 3548 } 3549 3550 static __rte_always_inline void 3551 vhost_async_shadow_dequeue_single_packed(struct vhost_virtqueue *vq, uint16_t buf_id) 3552 { 3553 struct vhost_async *async = vq->async; 3554 uint16_t idx = async->buffer_idx_packed; 3555 3556 async->buffers_packed[idx].id = buf_id; 3557 async->buffers_packed[idx].len = 0; 3558 async->buffers_packed[idx].count = 1; 3559 3560 async->buffer_idx_packed++; 3561 if (async->buffer_idx_packed >= vq->size) 3562 async->buffer_idx_packed -= vq->size; 3563 3564 } 3565 3566 static __rte_always_inline int 3567 virtio_dev_tx_async_single_packed(struct virtio_net *dev, 3568 struct vhost_virtqueue *vq, 3569 struct rte_mempool *mbuf_pool, 3570 struct rte_mbuf *pkts, 3571 uint16_t slot_idx, 3572 bool legacy_ol_flags) 3573 { 3574 int err; 3575 uint16_t buf_id, desc_count = 0; 3576 uint16_t nr_vec = 0; 3577 uint32_t buf_len; 3578 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 3579 static bool allocerr_warned; 3580 3581 if (unlikely(fill_vec_buf_packed(dev, vq, vq->last_avail_idx, &desc_count, 3582 buf_vec, &nr_vec, &buf_id, &buf_len, 3583 VHOST_ACCESS_RO) < 0)) 3584 return -1; 3585 3586 if (unlikely(virtio_dev_pktmbuf_prep(dev, pkts, buf_len))) { 3587 if (!allocerr_warned) { 3588 VHOST_LOG_DATA(dev->ifname, ERR, "Failed mbuf alloc of size %d from %s.\n", 3589 buf_len, mbuf_pool->name); 3590 3591 allocerr_warned = true; 3592 } 3593 return -1; 3594 } 3595 3596 err = desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts, mbuf_pool, 3597 legacy_ol_flags, slot_idx, true); 3598 if (unlikely(err)) { 3599 rte_pktmbuf_free(pkts); 3600 if (!allocerr_warned) { 3601 VHOST_LOG_DATA(dev->ifname, ERR, "Failed to copy desc to mbuf on.\n"); 3602 allocerr_warned = true; 3603 } 3604 return -1; 3605 } 3606 3607 /* update async shadow packed ring */ 3608 vhost_async_shadow_dequeue_single_packed(vq, buf_id); 3609 3610 return err; 3611 } 3612 3613 static __rte_always_inline uint16_t 3614 virtio_dev_tx_async_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, 3615 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, 3616 uint16_t count, uint16_t dma_id, uint16_t vchan_id, bool legacy_ol_flags) 3617 { 3618 uint16_t pkt_idx; 3619 uint16_t slot_idx = 0; 3620 uint16_t nr_done_pkts = 0; 3621 uint16_t pkt_err = 0; 3622 uint32_t n_xfer; 3623 struct vhost_async *async = vq->async; 3624 struct async_inflight_info *pkts_info = async->pkts_info; 3625 struct rte_mbuf *pkts_prealloc[MAX_PKT_BURST]; 3626 3627 VHOST_LOG_DATA(dev->ifname, DEBUG, "(%d) about to dequeue %u buffers\n", dev->vid, count); 3628 3629 async_iter_reset(async); 3630 3631 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts_prealloc, count)) 3632 goto out; 3633 3634 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { 3635 struct rte_mbuf *pkt = pkts_prealloc[pkt_idx]; 3636 3637 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); 3638 3639 slot_idx = (async->pkts_idx + pkt_idx) % vq->size; 3640 if (unlikely(virtio_dev_tx_async_single_packed(dev, vq, mbuf_pool, pkt, 3641 slot_idx, legacy_ol_flags))) { 3642 rte_pktmbuf_free_bulk(&pkts_prealloc[pkt_idx], count - pkt_idx); 3643 break; 3644 } 3645 3646 pkts_info[slot_idx].mbuf = pkt; 3647 3648 vq_inc_last_avail_packed(vq, 1); 3649 3650 } 3651 3652 n_xfer = vhost_async_dma_transfer(dev, vq, dma_id, vchan_id, async->pkts_idx, 3653 async->iov_iter, pkt_idx); 3654 3655 async->pkts_inflight_n += n_xfer; 3656 3657 pkt_err = pkt_idx - n_xfer; 3658 3659 if (unlikely(pkt_err)) { 3660 pkt_idx -= pkt_err; 3661 3662 /** 3663 * recover DMA-copy related structures and free pktmbuf for DMA-error pkts. 3664 */ 3665 if (async->buffer_idx_packed >= pkt_err) 3666 async->buffer_idx_packed -= pkt_err; 3667 else 3668 async->buffer_idx_packed += vq->size - pkt_err; 3669 3670 while (pkt_err-- > 0) { 3671 rte_pktmbuf_free(pkts_info[slot_idx % vq->size].mbuf); 3672 slot_idx--; 3673 } 3674 3675 /* recover available ring */ 3676 if (vq->last_avail_idx >= pkt_err) { 3677 vq->last_avail_idx -= pkt_err; 3678 } else { 3679 vq->last_avail_idx += vq->size - pkt_err; 3680 vq->avail_wrap_counter ^= 1; 3681 } 3682 } 3683 3684 async->pkts_idx += pkt_idx; 3685 if (async->pkts_idx >= vq->size) 3686 async->pkts_idx -= vq->size; 3687 3688 out: 3689 nr_done_pkts = async_poll_dequeue_completed(dev, vq, pkts, count, 3690 dma_id, vchan_id, legacy_ol_flags); 3691 3692 return nr_done_pkts; 3693 } 3694 3695 __rte_noinline 3696 static uint16_t 3697 virtio_dev_tx_async_packed_legacy(struct virtio_net *dev, struct vhost_virtqueue *vq, 3698 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, 3699 uint16_t count, uint16_t dma_id, uint16_t vchan_id) 3700 { 3701 return virtio_dev_tx_async_packed(dev, vq, mbuf_pool, 3702 pkts, count, dma_id, vchan_id, true); 3703 } 3704 3705 __rte_noinline 3706 static uint16_t 3707 virtio_dev_tx_async_packed_compliant(struct virtio_net *dev, struct vhost_virtqueue *vq, 3708 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, 3709 uint16_t count, uint16_t dma_id, uint16_t vchan_id) 3710 { 3711 return virtio_dev_tx_async_packed(dev, vq, mbuf_pool, 3712 pkts, count, dma_id, vchan_id, false); 3713 } 3714 3715 uint16_t 3716 rte_vhost_async_try_dequeue_burst(int vid, uint16_t queue_id, 3717 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count, 3718 int *nr_inflight, int16_t dma_id, uint16_t vchan_id) 3719 { 3720 struct virtio_net *dev; 3721 struct rte_mbuf *rarp_mbuf = NULL; 3722 struct vhost_virtqueue *vq; 3723 int16_t success = 1; 3724 3725 dev = get_device(vid); 3726 if (!dev || !nr_inflight) 3727 return 0; 3728 3729 *nr_inflight = -1; 3730 3731 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { 3732 VHOST_LOG_DATA(dev->ifname, ERR, "%s: built-in vhost net backend is disabled.\n", 3733 __func__); 3734 return 0; 3735 } 3736 3737 if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) { 3738 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid virtqueue idx %d.\n", 3739 __func__, queue_id); 3740 return 0; 3741 } 3742 3743 if (unlikely(dma_id < 0 || dma_id >= RTE_DMADEV_DEFAULT_MAX)) { 3744 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid dma id %d.\n", 3745 __func__, dma_id); 3746 return 0; 3747 } 3748 3749 if (unlikely(!dma_copy_track[dma_id].vchans || 3750 !dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) { 3751 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid channel %d:%u.\n", 3752 __func__, dma_id, vchan_id); 3753 return 0; 3754 } 3755 3756 vq = dev->virtqueue[queue_id]; 3757 3758 if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0)) 3759 return 0; 3760 3761 if (unlikely(vq->enabled == 0)) { 3762 count = 0; 3763 goto out_access_unlock; 3764 } 3765 3766 if (unlikely(!vq->async)) { 3767 VHOST_LOG_DATA(dev->ifname, ERR, "%s: async not registered for queue id %d.\n", 3768 __func__, queue_id); 3769 count = 0; 3770 goto out_access_unlock; 3771 } 3772 3773 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 3774 vhost_user_iotlb_rd_lock(vq); 3775 3776 if (unlikely(vq->access_ok == 0)) 3777 if (unlikely(vring_translate(dev, vq) < 0)) { 3778 count = 0; 3779 goto out; 3780 } 3781 3782 /* 3783 * Construct a RARP broadcast packet, and inject it to the "pkts" 3784 * array, to looks like that guest actually send such packet. 3785 * 3786 * Check user_send_rarp() for more information. 3787 * 3788 * broadcast_rarp shares a cacheline in the virtio_net structure 3789 * with some fields that are accessed during enqueue and 3790 * __atomic_compare_exchange_n causes a write if performed compare 3791 * and exchange. This could result in false sharing between enqueue 3792 * and dequeue. 3793 * 3794 * Prevent unnecessary false sharing by reading broadcast_rarp first 3795 * and only performing compare and exchange if the read indicates it 3796 * is likely to be set. 3797 */ 3798 if (unlikely(__atomic_load_n(&dev->broadcast_rarp, __ATOMIC_ACQUIRE) && 3799 __atomic_compare_exchange_n(&dev->broadcast_rarp, 3800 &success, 0, 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED))) { 3801 3802 rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac); 3803 if (rarp_mbuf == NULL) { 3804 VHOST_LOG_DATA(dev->ifname, ERR, "failed to make RARP packet.\n"); 3805 count = 0; 3806 goto out; 3807 } 3808 /* 3809 * Inject it to the head of "pkts" array, so that switch's mac 3810 * learning table will get updated first. 3811 */ 3812 pkts[0] = rarp_mbuf; 3813 vhost_queue_stats_update(dev, vq, pkts, 1); 3814 pkts++; 3815 count -= 1; 3816 } 3817 3818 if (vq_is_packed(dev)) { 3819 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS) 3820 count = virtio_dev_tx_async_packed_legacy(dev, vq, mbuf_pool, 3821 pkts, count, dma_id, vchan_id); 3822 else 3823 count = virtio_dev_tx_async_packed_compliant(dev, vq, mbuf_pool, 3824 pkts, count, dma_id, vchan_id); 3825 } else { 3826 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS) 3827 count = virtio_dev_tx_async_split_legacy(dev, vq, mbuf_pool, 3828 pkts, count, dma_id, vchan_id); 3829 else 3830 count = virtio_dev_tx_async_split_compliant(dev, vq, mbuf_pool, 3831 pkts, count, dma_id, vchan_id); 3832 } 3833 3834 *nr_inflight = vq->async->pkts_inflight_n; 3835 vhost_queue_stats_update(dev, vq, pkts, count); 3836 3837 out: 3838 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 3839 vhost_user_iotlb_rd_unlock(vq); 3840 3841 out_access_unlock: 3842 rte_spinlock_unlock(&vq->access_lock); 3843 3844 if (unlikely(rarp_mbuf != NULL)) 3845 count += 1; 3846 3847 return count; 3848 } 3849