1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2016 Intel Corporation 3 */ 4 5 #include <stdint.h> 6 #include <stdbool.h> 7 #include <linux/virtio_net.h> 8 9 #include <rte_mbuf.h> 10 #include <rte_memcpy.h> 11 #include <rte_net.h> 12 #include <rte_ether.h> 13 #include <rte_ip.h> 14 #include <rte_dmadev.h> 15 #include <rte_vhost.h> 16 #include <rte_tcp.h> 17 #include <rte_udp.h> 18 #include <rte_sctp.h> 19 #include <rte_arp.h> 20 #include <rte_spinlock.h> 21 #include <rte_malloc.h> 22 #include <rte_vhost_async.h> 23 24 #include "iotlb.h" 25 #include "vhost.h" 26 27 #define MAX_BATCH_LEN 256 28 29 static __rte_always_inline uint16_t 30 async_poll_dequeue_completed(struct virtio_net *dev, struct vhost_virtqueue *vq, 31 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, 32 uint16_t vchan_id, bool legacy_ol_flags); 33 34 /* DMA device copy operation tracking array. */ 35 struct async_dma_info dma_copy_track[RTE_DMADEV_DEFAULT_MAX]; 36 37 static __rte_always_inline bool 38 rxvq_is_mergeable(struct virtio_net *dev) 39 { 40 return dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF); 41 } 42 43 static __rte_always_inline bool 44 virtio_net_is_inorder(struct virtio_net *dev) 45 { 46 return dev->features & (1ULL << VIRTIO_F_IN_ORDER); 47 } 48 49 static bool 50 is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t nr_vring) 51 { 52 return (is_tx ^ (idx & 1)) == 0 && idx < nr_vring; 53 } 54 55 /* 56 * This function must be called with virtqueue's access_lock taken. 57 */ 58 static inline void 59 vhost_queue_stats_update(struct virtio_net *dev, struct vhost_virtqueue *vq, 60 struct rte_mbuf **pkts, uint16_t count) 61 { 62 struct virtqueue_stats *stats = &vq->stats; 63 int i; 64 65 if (!(dev->flags & VIRTIO_DEV_STATS_ENABLED)) 66 return; 67 68 for (i = 0; i < count; i++) { 69 struct rte_ether_addr *ea; 70 struct rte_mbuf *pkt = pkts[i]; 71 uint32_t pkt_len = rte_pktmbuf_pkt_len(pkt); 72 73 stats->packets++; 74 stats->bytes += pkt_len; 75 76 if (pkt_len == 64) { 77 stats->size_bins[1]++; 78 } else if (pkt_len > 64 && pkt_len < 1024) { 79 uint32_t bin; 80 81 /* count zeros, and offset into correct bin */ 82 bin = (sizeof(pkt_len) * 8) - __builtin_clz(pkt_len) - 5; 83 stats->size_bins[bin]++; 84 } else { 85 if (pkt_len < 64) 86 stats->size_bins[0]++; 87 else if (pkt_len < 1519) 88 stats->size_bins[6]++; 89 else 90 stats->size_bins[7]++; 91 } 92 93 ea = rte_pktmbuf_mtod(pkt, struct rte_ether_addr *); 94 if (rte_is_multicast_ether_addr(ea)) { 95 if (rte_is_broadcast_ether_addr(ea)) 96 stats->broadcast++; 97 else 98 stats->multicast++; 99 } 100 } 101 } 102 103 static __rte_always_inline int64_t 104 vhost_async_dma_transfer_one(struct virtio_net *dev, struct vhost_virtqueue *vq, 105 int16_t dma_id, uint16_t vchan_id, uint16_t flag_idx, 106 struct vhost_iov_iter *pkt) 107 { 108 struct async_dma_vchan_info *dma_info = &dma_copy_track[dma_id].vchans[vchan_id]; 109 uint16_t ring_mask = dma_info->ring_mask; 110 static bool vhost_async_dma_copy_log; 111 112 113 struct vhost_iovec *iov = pkt->iov; 114 int copy_idx = 0; 115 uint32_t nr_segs = pkt->nr_segs; 116 uint16_t i; 117 118 if (rte_dma_burst_capacity(dma_id, vchan_id) < nr_segs) 119 return -1; 120 121 for (i = 0; i < nr_segs; i++) { 122 copy_idx = rte_dma_copy(dma_id, vchan_id, (rte_iova_t)iov[i].src_addr, 123 (rte_iova_t)iov[i].dst_addr, iov[i].len, RTE_DMA_OP_FLAG_LLC); 124 /** 125 * Since all memory is pinned and DMA vChannel 126 * ring has enough space, failure should be a 127 * rare case. If failure happens, it means DMA 128 * device encounters serious errors; in this 129 * case, please stop async data-path and check 130 * what has happened to DMA device. 131 */ 132 if (unlikely(copy_idx < 0)) { 133 if (!vhost_async_dma_copy_log) { 134 VHOST_LOG_DATA(dev->ifname, ERR, 135 "DMA copy failed for channel %d:%u\n", 136 dma_id, vchan_id); 137 vhost_async_dma_copy_log = true; 138 } 139 return -1; 140 } 141 } 142 143 /** 144 * Only store packet completion flag address in the last copy's 145 * slot, and other slots are set to NULL. 146 */ 147 dma_info->pkts_cmpl_flag_addr[copy_idx & ring_mask] = &vq->async->pkts_cmpl_flag[flag_idx]; 148 149 return nr_segs; 150 } 151 152 static __rte_always_inline uint16_t 153 vhost_async_dma_transfer(struct virtio_net *dev, struct vhost_virtqueue *vq, 154 int16_t dma_id, uint16_t vchan_id, uint16_t head_idx, 155 struct vhost_iov_iter *pkts, uint16_t nr_pkts) 156 { 157 struct async_dma_vchan_info *dma_info = &dma_copy_track[dma_id].vchans[vchan_id]; 158 int64_t ret, nr_copies = 0; 159 uint16_t pkt_idx; 160 161 rte_spinlock_lock(&dma_info->dma_lock); 162 163 for (pkt_idx = 0; pkt_idx < nr_pkts; pkt_idx++) { 164 ret = vhost_async_dma_transfer_one(dev, vq, dma_id, vchan_id, head_idx, 165 &pkts[pkt_idx]); 166 if (unlikely(ret < 0)) 167 break; 168 169 nr_copies += ret; 170 head_idx++; 171 if (head_idx >= vq->size) 172 head_idx -= vq->size; 173 } 174 175 if (likely(nr_copies > 0)) 176 rte_dma_submit(dma_id, vchan_id); 177 178 rte_spinlock_unlock(&dma_info->dma_lock); 179 180 return pkt_idx; 181 } 182 183 static __rte_always_inline uint16_t 184 vhost_async_dma_check_completed(struct virtio_net *dev, int16_t dma_id, uint16_t vchan_id, 185 uint16_t max_pkts) 186 { 187 struct async_dma_vchan_info *dma_info = &dma_copy_track[dma_id].vchans[vchan_id]; 188 uint16_t ring_mask = dma_info->ring_mask; 189 uint16_t last_idx = 0; 190 uint16_t nr_copies; 191 uint16_t copy_idx; 192 uint16_t i; 193 bool has_error = false; 194 static bool vhost_async_dma_complete_log; 195 196 rte_spinlock_lock(&dma_info->dma_lock); 197 198 /** 199 * Print error log for debugging, if DMA reports error during 200 * DMA transfer. We do not handle error in vhost level. 201 */ 202 nr_copies = rte_dma_completed(dma_id, vchan_id, max_pkts, &last_idx, &has_error); 203 if (unlikely(!vhost_async_dma_complete_log && has_error)) { 204 VHOST_LOG_DATA(dev->ifname, ERR, 205 "DMA completion failure on channel %d:%u\n", 206 dma_id, vchan_id); 207 vhost_async_dma_complete_log = true; 208 } else if (nr_copies == 0) { 209 goto out; 210 } 211 212 copy_idx = last_idx - nr_copies + 1; 213 for (i = 0; i < nr_copies; i++) { 214 bool *flag; 215 216 flag = dma_info->pkts_cmpl_flag_addr[copy_idx & ring_mask]; 217 if (flag) { 218 /** 219 * Mark the packet flag as received. The flag 220 * could belong to another virtqueue but write 221 * is atomic. 222 */ 223 *flag = true; 224 dma_info->pkts_cmpl_flag_addr[copy_idx & ring_mask] = NULL; 225 } 226 copy_idx++; 227 } 228 229 out: 230 rte_spinlock_unlock(&dma_info->dma_lock); 231 return nr_copies; 232 } 233 234 static inline void 235 do_data_copy_enqueue(struct virtio_net *dev, struct vhost_virtqueue *vq) 236 { 237 struct batch_copy_elem *elem = vq->batch_copy_elems; 238 uint16_t count = vq->batch_copy_nb_elems; 239 int i; 240 241 for (i = 0; i < count; i++) { 242 rte_memcpy(elem[i].dst, elem[i].src, elem[i].len); 243 vhost_log_cache_write_iova(dev, vq, elem[i].log_addr, 244 elem[i].len); 245 PRINT_PACKET(dev, (uintptr_t)elem[i].dst, elem[i].len, 0); 246 } 247 248 vq->batch_copy_nb_elems = 0; 249 } 250 251 static inline void 252 do_data_copy_dequeue(struct vhost_virtqueue *vq) 253 { 254 struct batch_copy_elem *elem = vq->batch_copy_elems; 255 uint16_t count = vq->batch_copy_nb_elems; 256 int i; 257 258 for (i = 0; i < count; i++) 259 rte_memcpy(elem[i].dst, elem[i].src, elem[i].len); 260 261 vq->batch_copy_nb_elems = 0; 262 } 263 264 static __rte_always_inline void 265 do_flush_shadow_used_ring_split(struct virtio_net *dev, 266 struct vhost_virtqueue *vq, 267 uint16_t to, uint16_t from, uint16_t size) 268 { 269 rte_memcpy(&vq->used->ring[to], 270 &vq->shadow_used_split[from], 271 size * sizeof(struct vring_used_elem)); 272 vhost_log_cache_used_vring(dev, vq, 273 offsetof(struct vring_used, ring[to]), 274 size * sizeof(struct vring_used_elem)); 275 } 276 277 static __rte_always_inline void 278 flush_shadow_used_ring_split(struct virtio_net *dev, struct vhost_virtqueue *vq) 279 { 280 uint16_t used_idx = vq->last_used_idx & (vq->size - 1); 281 282 if (used_idx + vq->shadow_used_idx <= vq->size) { 283 do_flush_shadow_used_ring_split(dev, vq, used_idx, 0, 284 vq->shadow_used_idx); 285 } else { 286 uint16_t size; 287 288 /* update used ring interval [used_idx, vq->size] */ 289 size = vq->size - used_idx; 290 do_flush_shadow_used_ring_split(dev, vq, used_idx, 0, size); 291 292 /* update the left half used ring interval [0, left_size] */ 293 do_flush_shadow_used_ring_split(dev, vq, 0, size, 294 vq->shadow_used_idx - size); 295 } 296 vq->last_used_idx += vq->shadow_used_idx; 297 298 vhost_log_cache_sync(dev, vq); 299 300 __atomic_add_fetch(&vq->used->idx, vq->shadow_used_idx, 301 __ATOMIC_RELEASE); 302 vq->shadow_used_idx = 0; 303 vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx), 304 sizeof(vq->used->idx)); 305 } 306 307 static __rte_always_inline void 308 update_shadow_used_ring_split(struct vhost_virtqueue *vq, 309 uint16_t desc_idx, uint32_t len) 310 { 311 uint16_t i = vq->shadow_used_idx++; 312 313 vq->shadow_used_split[i].id = desc_idx; 314 vq->shadow_used_split[i].len = len; 315 } 316 317 static __rte_always_inline void 318 vhost_flush_enqueue_shadow_packed(struct virtio_net *dev, 319 struct vhost_virtqueue *vq) 320 { 321 int i; 322 uint16_t used_idx = vq->last_used_idx; 323 uint16_t head_idx = vq->last_used_idx; 324 uint16_t head_flags = 0; 325 326 /* Split loop in two to save memory barriers */ 327 for (i = 0; i < vq->shadow_used_idx; i++) { 328 vq->desc_packed[used_idx].id = vq->shadow_used_packed[i].id; 329 vq->desc_packed[used_idx].len = vq->shadow_used_packed[i].len; 330 331 used_idx += vq->shadow_used_packed[i].count; 332 if (used_idx >= vq->size) 333 used_idx -= vq->size; 334 } 335 336 /* The ordering for storing desc flags needs to be enforced. */ 337 rte_atomic_thread_fence(__ATOMIC_RELEASE); 338 339 for (i = 0; i < vq->shadow_used_idx; i++) { 340 uint16_t flags; 341 342 if (vq->shadow_used_packed[i].len) 343 flags = VRING_DESC_F_WRITE; 344 else 345 flags = 0; 346 347 if (vq->used_wrap_counter) { 348 flags |= VRING_DESC_F_USED; 349 flags |= VRING_DESC_F_AVAIL; 350 } else { 351 flags &= ~VRING_DESC_F_USED; 352 flags &= ~VRING_DESC_F_AVAIL; 353 } 354 355 if (i > 0) { 356 vq->desc_packed[vq->last_used_idx].flags = flags; 357 358 vhost_log_cache_used_vring(dev, vq, 359 vq->last_used_idx * 360 sizeof(struct vring_packed_desc), 361 sizeof(struct vring_packed_desc)); 362 } else { 363 head_idx = vq->last_used_idx; 364 head_flags = flags; 365 } 366 367 vq_inc_last_used_packed(vq, vq->shadow_used_packed[i].count); 368 } 369 370 vq->desc_packed[head_idx].flags = head_flags; 371 372 vhost_log_cache_used_vring(dev, vq, 373 head_idx * 374 sizeof(struct vring_packed_desc), 375 sizeof(struct vring_packed_desc)); 376 377 vq->shadow_used_idx = 0; 378 vhost_log_cache_sync(dev, vq); 379 } 380 381 static __rte_always_inline void 382 vhost_flush_dequeue_shadow_packed(struct virtio_net *dev, 383 struct vhost_virtqueue *vq) 384 { 385 struct vring_used_elem_packed *used_elem = &vq->shadow_used_packed[0]; 386 387 vq->desc_packed[vq->shadow_last_used_idx].id = used_elem->id; 388 /* desc flags is the synchronization point for virtio packed vring */ 389 __atomic_store_n(&vq->desc_packed[vq->shadow_last_used_idx].flags, 390 used_elem->flags, __ATOMIC_RELEASE); 391 392 vhost_log_cache_used_vring(dev, vq, vq->shadow_last_used_idx * 393 sizeof(struct vring_packed_desc), 394 sizeof(struct vring_packed_desc)); 395 vq->shadow_used_idx = 0; 396 vhost_log_cache_sync(dev, vq); 397 } 398 399 static __rte_always_inline void 400 vhost_flush_enqueue_batch_packed(struct virtio_net *dev, 401 struct vhost_virtqueue *vq, 402 uint64_t *lens, 403 uint16_t *ids) 404 { 405 uint16_t i; 406 uint16_t flags; 407 uint16_t last_used_idx; 408 struct vring_packed_desc *desc_base; 409 410 last_used_idx = vq->last_used_idx; 411 desc_base = &vq->desc_packed[last_used_idx]; 412 413 flags = PACKED_DESC_ENQUEUE_USED_FLAG(vq->used_wrap_counter); 414 415 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 416 desc_base[i].id = ids[i]; 417 desc_base[i].len = lens[i]; 418 } 419 420 rte_atomic_thread_fence(__ATOMIC_RELEASE); 421 422 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 423 desc_base[i].flags = flags; 424 } 425 426 vhost_log_cache_used_vring(dev, vq, last_used_idx * 427 sizeof(struct vring_packed_desc), 428 sizeof(struct vring_packed_desc) * 429 PACKED_BATCH_SIZE); 430 vhost_log_cache_sync(dev, vq); 431 432 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE); 433 } 434 435 static __rte_always_inline void 436 vhost_shadow_dequeue_batch_packed_inorder(struct vhost_virtqueue *vq, 437 uint16_t id) 438 { 439 vq->shadow_used_packed[0].id = id; 440 441 if (!vq->shadow_used_idx) { 442 vq->shadow_last_used_idx = vq->last_used_idx; 443 vq->shadow_used_packed[0].flags = 444 PACKED_DESC_DEQUEUE_USED_FLAG(vq->used_wrap_counter); 445 vq->shadow_used_packed[0].len = 0; 446 vq->shadow_used_packed[0].count = 1; 447 vq->shadow_used_idx++; 448 } 449 450 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE); 451 } 452 453 static __rte_always_inline void 454 vhost_shadow_dequeue_batch_packed(struct virtio_net *dev, 455 struct vhost_virtqueue *vq, 456 uint16_t *ids) 457 { 458 uint16_t flags; 459 uint16_t i; 460 uint16_t begin; 461 462 flags = PACKED_DESC_DEQUEUE_USED_FLAG(vq->used_wrap_counter); 463 464 if (!vq->shadow_used_idx) { 465 vq->shadow_last_used_idx = vq->last_used_idx; 466 vq->shadow_used_packed[0].id = ids[0]; 467 vq->shadow_used_packed[0].len = 0; 468 vq->shadow_used_packed[0].count = 1; 469 vq->shadow_used_packed[0].flags = flags; 470 vq->shadow_used_idx++; 471 begin = 1; 472 } else 473 begin = 0; 474 475 vhost_for_each_try_unroll(i, begin, PACKED_BATCH_SIZE) { 476 vq->desc_packed[vq->last_used_idx + i].id = ids[i]; 477 vq->desc_packed[vq->last_used_idx + i].len = 0; 478 } 479 480 rte_atomic_thread_fence(__ATOMIC_RELEASE); 481 vhost_for_each_try_unroll(i, begin, PACKED_BATCH_SIZE) 482 vq->desc_packed[vq->last_used_idx + i].flags = flags; 483 484 vhost_log_cache_used_vring(dev, vq, vq->last_used_idx * 485 sizeof(struct vring_packed_desc), 486 sizeof(struct vring_packed_desc) * 487 PACKED_BATCH_SIZE); 488 vhost_log_cache_sync(dev, vq); 489 490 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE); 491 } 492 493 static __rte_always_inline void 494 vhost_shadow_dequeue_single_packed(struct vhost_virtqueue *vq, 495 uint16_t buf_id, 496 uint16_t count) 497 { 498 uint16_t flags; 499 500 flags = vq->desc_packed[vq->last_used_idx].flags; 501 if (vq->used_wrap_counter) { 502 flags |= VRING_DESC_F_USED; 503 flags |= VRING_DESC_F_AVAIL; 504 } else { 505 flags &= ~VRING_DESC_F_USED; 506 flags &= ~VRING_DESC_F_AVAIL; 507 } 508 509 if (!vq->shadow_used_idx) { 510 vq->shadow_last_used_idx = vq->last_used_idx; 511 512 vq->shadow_used_packed[0].id = buf_id; 513 vq->shadow_used_packed[0].len = 0; 514 vq->shadow_used_packed[0].flags = flags; 515 vq->shadow_used_idx++; 516 } else { 517 vq->desc_packed[vq->last_used_idx].id = buf_id; 518 vq->desc_packed[vq->last_used_idx].len = 0; 519 vq->desc_packed[vq->last_used_idx].flags = flags; 520 } 521 522 vq_inc_last_used_packed(vq, count); 523 } 524 525 static __rte_always_inline void 526 vhost_shadow_dequeue_single_packed_inorder(struct vhost_virtqueue *vq, 527 uint16_t buf_id, 528 uint16_t count) 529 { 530 uint16_t flags; 531 532 vq->shadow_used_packed[0].id = buf_id; 533 534 flags = vq->desc_packed[vq->last_used_idx].flags; 535 if (vq->used_wrap_counter) { 536 flags |= VRING_DESC_F_USED; 537 flags |= VRING_DESC_F_AVAIL; 538 } else { 539 flags &= ~VRING_DESC_F_USED; 540 flags &= ~VRING_DESC_F_AVAIL; 541 } 542 543 if (!vq->shadow_used_idx) { 544 vq->shadow_last_used_idx = vq->last_used_idx; 545 vq->shadow_used_packed[0].len = 0; 546 vq->shadow_used_packed[0].flags = flags; 547 vq->shadow_used_idx++; 548 } 549 550 vq_inc_last_used_packed(vq, count); 551 } 552 553 static __rte_always_inline void 554 vhost_shadow_enqueue_packed(struct vhost_virtqueue *vq, 555 uint32_t *len, 556 uint16_t *id, 557 uint16_t *count, 558 uint16_t num_buffers) 559 { 560 uint16_t i; 561 562 for (i = 0; i < num_buffers; i++) { 563 /* enqueue shadow flush action aligned with batch num */ 564 if (!vq->shadow_used_idx) 565 vq->shadow_aligned_idx = vq->last_used_idx & 566 PACKED_BATCH_MASK; 567 vq->shadow_used_packed[vq->shadow_used_idx].id = id[i]; 568 vq->shadow_used_packed[vq->shadow_used_idx].len = len[i]; 569 vq->shadow_used_packed[vq->shadow_used_idx].count = count[i]; 570 vq->shadow_aligned_idx += count[i]; 571 vq->shadow_used_idx++; 572 } 573 } 574 575 static __rte_always_inline void 576 vhost_shadow_enqueue_single_packed(struct virtio_net *dev, 577 struct vhost_virtqueue *vq, 578 uint32_t *len, 579 uint16_t *id, 580 uint16_t *count, 581 uint16_t num_buffers) 582 { 583 vhost_shadow_enqueue_packed(vq, len, id, count, num_buffers); 584 585 if (vq->shadow_aligned_idx >= PACKED_BATCH_SIZE) { 586 do_data_copy_enqueue(dev, vq); 587 vhost_flush_enqueue_shadow_packed(dev, vq); 588 } 589 } 590 591 /* avoid write operation when necessary, to lessen cache issues */ 592 #define ASSIGN_UNLESS_EQUAL(var, val) do { \ 593 if ((var) != (val)) \ 594 (var) = (val); \ 595 } while (0) 596 597 static __rte_always_inline void 598 virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr) 599 { 600 uint64_t csum_l4 = m_buf->ol_flags & RTE_MBUF_F_TX_L4_MASK; 601 602 if (m_buf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) 603 csum_l4 |= RTE_MBUF_F_TX_TCP_CKSUM; 604 605 if (csum_l4) { 606 /* 607 * Pseudo-header checksum must be set as per Virtio spec. 608 * 609 * Note: We don't propagate rte_net_intel_cksum_prepare() 610 * errors, as it would have an impact on performance, and an 611 * error would mean the packet is dropped by the guest instead 612 * of being dropped here. 613 */ 614 rte_net_intel_cksum_prepare(m_buf); 615 616 net_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 617 net_hdr->csum_start = m_buf->l2_len + m_buf->l3_len; 618 619 switch (csum_l4) { 620 case RTE_MBUF_F_TX_TCP_CKSUM: 621 net_hdr->csum_offset = (offsetof(struct rte_tcp_hdr, 622 cksum)); 623 break; 624 case RTE_MBUF_F_TX_UDP_CKSUM: 625 net_hdr->csum_offset = (offsetof(struct rte_udp_hdr, 626 dgram_cksum)); 627 break; 628 case RTE_MBUF_F_TX_SCTP_CKSUM: 629 net_hdr->csum_offset = (offsetof(struct rte_sctp_hdr, 630 cksum)); 631 break; 632 } 633 } else { 634 ASSIGN_UNLESS_EQUAL(net_hdr->csum_start, 0); 635 ASSIGN_UNLESS_EQUAL(net_hdr->csum_offset, 0); 636 ASSIGN_UNLESS_EQUAL(net_hdr->flags, 0); 637 } 638 639 /* IP cksum verification cannot be bypassed, then calculate here */ 640 if (m_buf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) { 641 struct rte_ipv4_hdr *ipv4_hdr; 642 643 ipv4_hdr = rte_pktmbuf_mtod_offset(m_buf, struct rte_ipv4_hdr *, 644 m_buf->l2_len); 645 ipv4_hdr->hdr_checksum = 0; 646 ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr); 647 } 648 649 if (m_buf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) { 650 if (m_buf->ol_flags & RTE_MBUF_F_TX_IPV4) 651 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 652 else 653 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 654 net_hdr->gso_size = m_buf->tso_segsz; 655 net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len 656 + m_buf->l4_len; 657 } else if (m_buf->ol_flags & RTE_MBUF_F_TX_UDP_SEG) { 658 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP; 659 net_hdr->gso_size = m_buf->tso_segsz; 660 net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len + 661 m_buf->l4_len; 662 } else { 663 ASSIGN_UNLESS_EQUAL(net_hdr->gso_type, 0); 664 ASSIGN_UNLESS_EQUAL(net_hdr->gso_size, 0); 665 ASSIGN_UNLESS_EQUAL(net_hdr->hdr_len, 0); 666 } 667 } 668 669 static __rte_always_inline int 670 map_one_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, 671 struct buf_vector *buf_vec, uint16_t *vec_idx, 672 uint64_t desc_iova, uint64_t desc_len, uint8_t perm) 673 { 674 uint16_t vec_id = *vec_idx; 675 676 while (desc_len) { 677 uint64_t desc_addr; 678 uint64_t desc_chunck_len = desc_len; 679 680 if (unlikely(vec_id >= BUF_VECTOR_MAX)) 681 return -1; 682 683 desc_addr = vhost_iova_to_vva(dev, vq, 684 desc_iova, 685 &desc_chunck_len, 686 perm); 687 if (unlikely(!desc_addr)) 688 return -1; 689 690 rte_prefetch0((void *)(uintptr_t)desc_addr); 691 692 buf_vec[vec_id].buf_iova = desc_iova; 693 buf_vec[vec_id].buf_addr = desc_addr; 694 buf_vec[vec_id].buf_len = desc_chunck_len; 695 696 desc_len -= desc_chunck_len; 697 desc_iova += desc_chunck_len; 698 vec_id++; 699 } 700 *vec_idx = vec_id; 701 702 return 0; 703 } 704 705 static __rte_always_inline int 706 fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 707 uint32_t avail_idx, uint16_t *vec_idx, 708 struct buf_vector *buf_vec, uint16_t *desc_chain_head, 709 uint32_t *desc_chain_len, uint8_t perm) 710 { 711 uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)]; 712 uint16_t vec_id = *vec_idx; 713 uint32_t len = 0; 714 uint64_t dlen; 715 uint32_t nr_descs = vq->size; 716 uint32_t cnt = 0; 717 struct vring_desc *descs = vq->desc; 718 struct vring_desc *idesc = NULL; 719 720 if (unlikely(idx >= vq->size)) 721 return -1; 722 723 *desc_chain_head = idx; 724 725 if (vq->desc[idx].flags & VRING_DESC_F_INDIRECT) { 726 dlen = vq->desc[idx].len; 727 nr_descs = dlen / sizeof(struct vring_desc); 728 if (unlikely(nr_descs > vq->size)) 729 return -1; 730 731 descs = (struct vring_desc *)(uintptr_t) 732 vhost_iova_to_vva(dev, vq, vq->desc[idx].addr, 733 &dlen, 734 VHOST_ACCESS_RO); 735 if (unlikely(!descs)) 736 return -1; 737 738 if (unlikely(dlen < vq->desc[idx].len)) { 739 /* 740 * The indirect desc table is not contiguous 741 * in process VA space, we have to copy it. 742 */ 743 idesc = vhost_alloc_copy_ind_table(dev, vq, 744 vq->desc[idx].addr, vq->desc[idx].len); 745 if (unlikely(!idesc)) 746 return -1; 747 748 descs = idesc; 749 } 750 751 idx = 0; 752 } 753 754 while (1) { 755 if (unlikely(idx >= nr_descs || cnt++ >= nr_descs)) { 756 free_ind_table(idesc); 757 return -1; 758 } 759 760 dlen = descs[idx].len; 761 len += dlen; 762 763 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id, 764 descs[idx].addr, dlen, 765 perm))) { 766 free_ind_table(idesc); 767 return -1; 768 } 769 770 if ((descs[idx].flags & VRING_DESC_F_NEXT) == 0) 771 break; 772 773 idx = descs[idx].next; 774 } 775 776 *desc_chain_len = len; 777 *vec_idx = vec_id; 778 779 if (unlikely(!!idesc)) 780 free_ind_table(idesc); 781 782 return 0; 783 } 784 785 /* 786 * Returns -1 on fail, 0 on success 787 */ 788 static inline int 789 reserve_avail_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 790 uint64_t size, struct buf_vector *buf_vec, 791 uint16_t *num_buffers, uint16_t avail_head, 792 uint16_t *nr_vec) 793 { 794 uint16_t cur_idx; 795 uint16_t vec_idx = 0; 796 uint16_t max_tries, tries = 0; 797 798 uint16_t head_idx = 0; 799 uint32_t len = 0; 800 801 *num_buffers = 0; 802 cur_idx = vq->last_avail_idx; 803 804 if (rxvq_is_mergeable(dev)) 805 max_tries = vq->size - 1; 806 else 807 max_tries = 1; 808 809 while (size > 0) { 810 if (unlikely(cur_idx == avail_head)) 811 return -1; 812 /* 813 * if we tried all available ring items, and still 814 * can't get enough buf, it means something abnormal 815 * happened. 816 */ 817 if (unlikely(++tries > max_tries)) 818 return -1; 819 820 if (unlikely(fill_vec_buf_split(dev, vq, cur_idx, 821 &vec_idx, buf_vec, 822 &head_idx, &len, 823 VHOST_ACCESS_RW) < 0)) 824 return -1; 825 len = RTE_MIN(len, size); 826 update_shadow_used_ring_split(vq, head_idx, len); 827 size -= len; 828 829 cur_idx++; 830 *num_buffers += 1; 831 } 832 833 *nr_vec = vec_idx; 834 835 return 0; 836 } 837 838 static __rte_always_inline int 839 fill_vec_buf_packed_indirect(struct virtio_net *dev, 840 struct vhost_virtqueue *vq, 841 struct vring_packed_desc *desc, uint16_t *vec_idx, 842 struct buf_vector *buf_vec, uint32_t *len, uint8_t perm) 843 { 844 uint16_t i; 845 uint32_t nr_descs; 846 uint16_t vec_id = *vec_idx; 847 uint64_t dlen; 848 struct vring_packed_desc *descs, *idescs = NULL; 849 850 dlen = desc->len; 851 descs = (struct vring_packed_desc *)(uintptr_t) 852 vhost_iova_to_vva(dev, vq, desc->addr, &dlen, VHOST_ACCESS_RO); 853 if (unlikely(!descs)) 854 return -1; 855 856 if (unlikely(dlen < desc->len)) { 857 /* 858 * The indirect desc table is not contiguous 859 * in process VA space, we have to copy it. 860 */ 861 idescs = vhost_alloc_copy_ind_table(dev, 862 vq, desc->addr, desc->len); 863 if (unlikely(!idescs)) 864 return -1; 865 866 descs = idescs; 867 } 868 869 nr_descs = desc->len / sizeof(struct vring_packed_desc); 870 if (unlikely(nr_descs >= vq->size)) { 871 free_ind_table(idescs); 872 return -1; 873 } 874 875 for (i = 0; i < nr_descs; i++) { 876 if (unlikely(vec_id >= BUF_VECTOR_MAX)) { 877 free_ind_table(idescs); 878 return -1; 879 } 880 881 dlen = descs[i].len; 882 *len += dlen; 883 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id, 884 descs[i].addr, dlen, 885 perm))) 886 return -1; 887 } 888 *vec_idx = vec_id; 889 890 if (unlikely(!!idescs)) 891 free_ind_table(idescs); 892 893 return 0; 894 } 895 896 static __rte_always_inline int 897 fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, 898 uint16_t avail_idx, uint16_t *desc_count, 899 struct buf_vector *buf_vec, uint16_t *vec_idx, 900 uint16_t *buf_id, uint32_t *len, uint8_t perm) 901 { 902 bool wrap_counter = vq->avail_wrap_counter; 903 struct vring_packed_desc *descs = vq->desc_packed; 904 uint16_t vec_id = *vec_idx; 905 uint64_t dlen; 906 907 if (avail_idx < vq->last_avail_idx) 908 wrap_counter ^= 1; 909 910 /* 911 * Perform a load-acquire barrier in desc_is_avail to 912 * enforce the ordering between desc flags and desc 913 * content. 914 */ 915 if (unlikely(!desc_is_avail(&descs[avail_idx], wrap_counter))) 916 return -1; 917 918 *desc_count = 0; 919 *len = 0; 920 921 while (1) { 922 if (unlikely(vec_id >= BUF_VECTOR_MAX)) 923 return -1; 924 925 if (unlikely(*desc_count >= vq->size)) 926 return -1; 927 928 *desc_count += 1; 929 *buf_id = descs[avail_idx].id; 930 931 if (descs[avail_idx].flags & VRING_DESC_F_INDIRECT) { 932 if (unlikely(fill_vec_buf_packed_indirect(dev, vq, 933 &descs[avail_idx], 934 &vec_id, buf_vec, 935 len, perm) < 0)) 936 return -1; 937 } else { 938 dlen = descs[avail_idx].len; 939 *len += dlen; 940 941 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id, 942 descs[avail_idx].addr, 943 dlen, 944 perm))) 945 return -1; 946 } 947 948 if ((descs[avail_idx].flags & VRING_DESC_F_NEXT) == 0) 949 break; 950 951 if (++avail_idx >= vq->size) { 952 avail_idx -= vq->size; 953 wrap_counter ^= 1; 954 } 955 } 956 957 *vec_idx = vec_id; 958 959 return 0; 960 } 961 962 static __rte_noinline void 963 copy_vnet_hdr_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, 964 struct buf_vector *buf_vec, 965 struct virtio_net_hdr_mrg_rxbuf *hdr) 966 { 967 uint64_t len; 968 uint64_t remain = dev->vhost_hlen; 969 uint64_t src = (uint64_t)(uintptr_t)hdr, dst; 970 uint64_t iova = buf_vec->buf_iova; 971 972 while (remain) { 973 len = RTE_MIN(remain, 974 buf_vec->buf_len); 975 dst = buf_vec->buf_addr; 976 rte_memcpy((void *)(uintptr_t)dst, 977 (void *)(uintptr_t)src, 978 len); 979 980 PRINT_PACKET(dev, (uintptr_t)dst, 981 (uint32_t)len, 0); 982 vhost_log_cache_write_iova(dev, vq, 983 iova, len); 984 985 remain -= len; 986 iova += len; 987 src += len; 988 buf_vec++; 989 } 990 } 991 992 static __rte_always_inline int 993 async_iter_initialize(struct virtio_net *dev, struct vhost_async *async) 994 { 995 struct vhost_iov_iter *iter; 996 997 if (unlikely(async->iovec_idx >= VHOST_MAX_ASYNC_VEC)) { 998 VHOST_LOG_DATA(dev->ifname, ERR, "no more async iovec available\n"); 999 return -1; 1000 } 1001 1002 iter = async->iov_iter + async->iter_idx; 1003 iter->iov = async->iovec + async->iovec_idx; 1004 iter->nr_segs = 0; 1005 1006 return 0; 1007 } 1008 1009 static __rte_always_inline int 1010 async_iter_add_iovec(struct virtio_net *dev, struct vhost_async *async, 1011 void *src, void *dst, size_t len) 1012 { 1013 struct vhost_iov_iter *iter; 1014 struct vhost_iovec *iovec; 1015 1016 if (unlikely(async->iovec_idx >= VHOST_MAX_ASYNC_VEC)) { 1017 static bool vhost_max_async_vec_log; 1018 1019 if (!vhost_max_async_vec_log) { 1020 VHOST_LOG_DATA(dev->ifname, ERR, "no more async iovec available\n"); 1021 vhost_max_async_vec_log = true; 1022 } 1023 1024 return -1; 1025 } 1026 1027 iter = async->iov_iter + async->iter_idx; 1028 iovec = async->iovec + async->iovec_idx; 1029 1030 iovec->src_addr = src; 1031 iovec->dst_addr = dst; 1032 iovec->len = len; 1033 1034 iter->nr_segs++; 1035 async->iovec_idx++; 1036 1037 return 0; 1038 } 1039 1040 static __rte_always_inline void 1041 async_iter_finalize(struct vhost_async *async) 1042 { 1043 async->iter_idx++; 1044 } 1045 1046 static __rte_always_inline void 1047 async_iter_cancel(struct vhost_async *async) 1048 { 1049 struct vhost_iov_iter *iter; 1050 1051 iter = async->iov_iter + async->iter_idx; 1052 async->iovec_idx -= iter->nr_segs; 1053 iter->nr_segs = 0; 1054 iter->iov = NULL; 1055 } 1056 1057 static __rte_always_inline void 1058 async_iter_reset(struct vhost_async *async) 1059 { 1060 async->iter_idx = 0; 1061 async->iovec_idx = 0; 1062 } 1063 1064 static __rte_always_inline int 1065 async_fill_seg(struct virtio_net *dev, struct vhost_virtqueue *vq, 1066 struct rte_mbuf *m, uint32_t mbuf_offset, 1067 uint64_t buf_iova, uint32_t cpy_len, bool to_desc) 1068 { 1069 struct vhost_async *async = vq->async; 1070 uint64_t mapped_len; 1071 uint32_t buf_offset = 0; 1072 void *src, *dst; 1073 void *host_iova; 1074 1075 while (cpy_len) { 1076 host_iova = (void *)(uintptr_t)gpa_to_first_hpa(dev, 1077 buf_iova + buf_offset, cpy_len, &mapped_len); 1078 if (unlikely(!host_iova)) { 1079 VHOST_LOG_DATA(dev->ifname, ERR, 1080 "%s: failed to get host iova.\n", 1081 __func__); 1082 return -1; 1083 } 1084 1085 if (to_desc) { 1086 src = (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, mbuf_offset); 1087 dst = host_iova; 1088 } else { 1089 src = host_iova; 1090 dst = (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, mbuf_offset); 1091 } 1092 1093 if (unlikely(async_iter_add_iovec(dev, async, src, dst, (size_t)mapped_len))) 1094 return -1; 1095 1096 cpy_len -= (uint32_t)mapped_len; 1097 mbuf_offset += (uint32_t)mapped_len; 1098 buf_offset += (uint32_t)mapped_len; 1099 } 1100 1101 return 0; 1102 } 1103 1104 static __rte_always_inline void 1105 sync_fill_seg(struct virtio_net *dev, struct vhost_virtqueue *vq, 1106 struct rte_mbuf *m, uint32_t mbuf_offset, 1107 uint64_t buf_addr, uint64_t buf_iova, uint32_t cpy_len, bool to_desc) 1108 { 1109 struct batch_copy_elem *batch_copy = vq->batch_copy_elems; 1110 1111 if (likely(cpy_len > MAX_BATCH_LEN || vq->batch_copy_nb_elems >= vq->size)) { 1112 if (to_desc) { 1113 rte_memcpy((void *)((uintptr_t)(buf_addr)), 1114 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), 1115 cpy_len); 1116 vhost_log_cache_write_iova(dev, vq, buf_iova, cpy_len); 1117 PRINT_PACKET(dev, (uintptr_t)(buf_addr), cpy_len, 0); 1118 } else { 1119 rte_memcpy(rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), 1120 (void *)((uintptr_t)(buf_addr)), 1121 cpy_len); 1122 } 1123 } else { 1124 if (to_desc) { 1125 batch_copy[vq->batch_copy_nb_elems].dst = 1126 (void *)((uintptr_t)(buf_addr)); 1127 batch_copy[vq->batch_copy_nb_elems].src = 1128 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset); 1129 batch_copy[vq->batch_copy_nb_elems].log_addr = buf_iova; 1130 } else { 1131 batch_copy[vq->batch_copy_nb_elems].dst = 1132 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset); 1133 batch_copy[vq->batch_copy_nb_elems].src = 1134 (void *)((uintptr_t)(buf_addr)); 1135 } 1136 batch_copy[vq->batch_copy_nb_elems].len = cpy_len; 1137 vq->batch_copy_nb_elems++; 1138 } 1139 } 1140 1141 static __rte_always_inline int 1142 mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, 1143 struct rte_mbuf *m, struct buf_vector *buf_vec, 1144 uint16_t nr_vec, uint16_t num_buffers, bool is_async) 1145 { 1146 uint32_t vec_idx = 0; 1147 uint32_t mbuf_offset, mbuf_avail; 1148 uint32_t buf_offset, buf_avail; 1149 uint64_t buf_addr, buf_iova, buf_len; 1150 uint32_t cpy_len; 1151 uint64_t hdr_addr; 1152 struct rte_mbuf *hdr_mbuf; 1153 struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL; 1154 struct vhost_async *async = vq->async; 1155 1156 if (unlikely(m == NULL)) 1157 return -1; 1158 1159 buf_addr = buf_vec[vec_idx].buf_addr; 1160 buf_iova = buf_vec[vec_idx].buf_iova; 1161 buf_len = buf_vec[vec_idx].buf_len; 1162 1163 if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) 1164 return -1; 1165 1166 hdr_mbuf = m; 1167 hdr_addr = buf_addr; 1168 if (unlikely(buf_len < dev->vhost_hlen)) { 1169 memset(&tmp_hdr, 0, sizeof(struct virtio_net_hdr_mrg_rxbuf)); 1170 hdr = &tmp_hdr; 1171 } else 1172 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr; 1173 1174 VHOST_LOG_DATA(dev->ifname, DEBUG, "RX: num merge buffers %d\n", num_buffers); 1175 1176 if (unlikely(buf_len < dev->vhost_hlen)) { 1177 buf_offset = dev->vhost_hlen - buf_len; 1178 vec_idx++; 1179 buf_addr = buf_vec[vec_idx].buf_addr; 1180 buf_iova = buf_vec[vec_idx].buf_iova; 1181 buf_len = buf_vec[vec_idx].buf_len; 1182 buf_avail = buf_len - buf_offset; 1183 } else { 1184 buf_offset = dev->vhost_hlen; 1185 buf_avail = buf_len - dev->vhost_hlen; 1186 } 1187 1188 mbuf_avail = rte_pktmbuf_data_len(m); 1189 mbuf_offset = 0; 1190 1191 if (is_async) { 1192 if (async_iter_initialize(dev, async)) 1193 return -1; 1194 } 1195 1196 while (mbuf_avail != 0 || m->next != NULL) { 1197 /* done with current buf, get the next one */ 1198 if (buf_avail == 0) { 1199 vec_idx++; 1200 if (unlikely(vec_idx >= nr_vec)) 1201 goto error; 1202 1203 buf_addr = buf_vec[vec_idx].buf_addr; 1204 buf_iova = buf_vec[vec_idx].buf_iova; 1205 buf_len = buf_vec[vec_idx].buf_len; 1206 1207 buf_offset = 0; 1208 buf_avail = buf_len; 1209 } 1210 1211 /* done with current mbuf, get the next one */ 1212 if (mbuf_avail == 0) { 1213 m = m->next; 1214 1215 mbuf_offset = 0; 1216 mbuf_avail = rte_pktmbuf_data_len(m); 1217 } 1218 1219 if (hdr_addr) { 1220 virtio_enqueue_offload(hdr_mbuf, &hdr->hdr); 1221 if (rxvq_is_mergeable(dev)) 1222 ASSIGN_UNLESS_EQUAL(hdr->num_buffers, 1223 num_buffers); 1224 1225 if (unlikely(hdr == &tmp_hdr)) { 1226 copy_vnet_hdr_to_desc(dev, vq, buf_vec, hdr); 1227 } else { 1228 PRINT_PACKET(dev, (uintptr_t)hdr_addr, 1229 dev->vhost_hlen, 0); 1230 vhost_log_cache_write_iova(dev, vq, 1231 buf_vec[0].buf_iova, 1232 dev->vhost_hlen); 1233 } 1234 1235 hdr_addr = 0; 1236 } 1237 1238 cpy_len = RTE_MIN(buf_avail, mbuf_avail); 1239 1240 if (is_async) { 1241 if (async_fill_seg(dev, vq, m, mbuf_offset, 1242 buf_iova + buf_offset, cpy_len, true) < 0) 1243 goto error; 1244 } else { 1245 sync_fill_seg(dev, vq, m, mbuf_offset, 1246 buf_addr + buf_offset, 1247 buf_iova + buf_offset, cpy_len, true); 1248 } 1249 1250 mbuf_avail -= cpy_len; 1251 mbuf_offset += cpy_len; 1252 buf_avail -= cpy_len; 1253 buf_offset += cpy_len; 1254 } 1255 1256 if (is_async) 1257 async_iter_finalize(async); 1258 1259 return 0; 1260 error: 1261 if (is_async) 1262 async_iter_cancel(async); 1263 1264 return -1; 1265 } 1266 1267 static __rte_always_inline int 1268 vhost_enqueue_single_packed(struct virtio_net *dev, 1269 struct vhost_virtqueue *vq, 1270 struct rte_mbuf *pkt, 1271 struct buf_vector *buf_vec, 1272 uint16_t *nr_descs) 1273 { 1274 uint16_t nr_vec = 0; 1275 uint16_t avail_idx = vq->last_avail_idx; 1276 uint16_t max_tries, tries = 0; 1277 uint16_t buf_id = 0; 1278 uint32_t len = 0; 1279 uint16_t desc_count; 1280 uint64_t size = pkt->pkt_len + sizeof(struct virtio_net_hdr_mrg_rxbuf); 1281 uint16_t num_buffers = 0; 1282 uint32_t buffer_len[vq->size]; 1283 uint16_t buffer_buf_id[vq->size]; 1284 uint16_t buffer_desc_count[vq->size]; 1285 1286 if (rxvq_is_mergeable(dev)) 1287 max_tries = vq->size - 1; 1288 else 1289 max_tries = 1; 1290 1291 while (size > 0) { 1292 /* 1293 * if we tried all available ring items, and still 1294 * can't get enough buf, it means something abnormal 1295 * happened. 1296 */ 1297 if (unlikely(++tries > max_tries)) 1298 return -1; 1299 1300 if (unlikely(fill_vec_buf_packed(dev, vq, 1301 avail_idx, &desc_count, 1302 buf_vec, &nr_vec, 1303 &buf_id, &len, 1304 VHOST_ACCESS_RW) < 0)) 1305 return -1; 1306 1307 len = RTE_MIN(len, size); 1308 size -= len; 1309 1310 buffer_len[num_buffers] = len; 1311 buffer_buf_id[num_buffers] = buf_id; 1312 buffer_desc_count[num_buffers] = desc_count; 1313 num_buffers += 1; 1314 1315 *nr_descs += desc_count; 1316 avail_idx += desc_count; 1317 if (avail_idx >= vq->size) 1318 avail_idx -= vq->size; 1319 } 1320 1321 if (mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, num_buffers, false) < 0) 1322 return -1; 1323 1324 vhost_shadow_enqueue_single_packed(dev, vq, buffer_len, buffer_buf_id, 1325 buffer_desc_count, num_buffers); 1326 1327 return 0; 1328 } 1329 1330 static __rte_noinline uint32_t 1331 virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 1332 struct rte_mbuf **pkts, uint32_t count) 1333 { 1334 uint32_t pkt_idx = 0; 1335 uint16_t num_buffers; 1336 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1337 uint16_t avail_head; 1338 1339 /* 1340 * The ordering between avail index and 1341 * desc reads needs to be enforced. 1342 */ 1343 avail_head = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE); 1344 1345 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); 1346 1347 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { 1348 uint64_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen; 1349 uint16_t nr_vec = 0; 1350 1351 if (unlikely(reserve_avail_buf_split(dev, vq, 1352 pkt_len, buf_vec, &num_buffers, 1353 avail_head, &nr_vec) < 0)) { 1354 VHOST_LOG_DATA(dev->ifname, DEBUG, 1355 "failed to get enough desc from vring\n"); 1356 vq->shadow_used_idx -= num_buffers; 1357 break; 1358 } 1359 1360 VHOST_LOG_DATA(dev->ifname, DEBUG, 1361 "current index %d | end index %d\n", 1362 vq->last_avail_idx, vq->last_avail_idx + num_buffers); 1363 1364 if (mbuf_to_desc(dev, vq, pkts[pkt_idx], buf_vec, nr_vec, 1365 num_buffers, false) < 0) { 1366 vq->shadow_used_idx -= num_buffers; 1367 break; 1368 } 1369 1370 vq->last_avail_idx += num_buffers; 1371 } 1372 1373 do_data_copy_enqueue(dev, vq); 1374 1375 if (likely(vq->shadow_used_idx)) { 1376 flush_shadow_used_ring_split(dev, vq); 1377 vhost_vring_call_split(dev, vq); 1378 } 1379 1380 return pkt_idx; 1381 } 1382 1383 static __rte_always_inline int 1384 virtio_dev_rx_sync_batch_check(struct virtio_net *dev, 1385 struct vhost_virtqueue *vq, 1386 struct rte_mbuf **pkts, 1387 uint64_t *desc_addrs, 1388 uint64_t *lens) 1389 { 1390 bool wrap_counter = vq->avail_wrap_counter; 1391 struct vring_packed_desc *descs = vq->desc_packed; 1392 uint16_t avail_idx = vq->last_avail_idx; 1393 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 1394 uint16_t i; 1395 1396 if (unlikely(avail_idx & PACKED_BATCH_MASK)) 1397 return -1; 1398 1399 if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size)) 1400 return -1; 1401 1402 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1403 if (unlikely(pkts[i]->next != NULL)) 1404 return -1; 1405 if (unlikely(!desc_is_avail(&descs[avail_idx + i], 1406 wrap_counter))) 1407 return -1; 1408 } 1409 1410 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1411 lens[i] = descs[avail_idx + i].len; 1412 1413 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1414 if (unlikely(pkts[i]->pkt_len > (lens[i] - buf_offset))) 1415 return -1; 1416 } 1417 1418 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1419 desc_addrs[i] = vhost_iova_to_vva(dev, vq, 1420 descs[avail_idx + i].addr, 1421 &lens[i], 1422 VHOST_ACCESS_RW); 1423 1424 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1425 if (unlikely(!desc_addrs[i])) 1426 return -1; 1427 if (unlikely(lens[i] != descs[avail_idx + i].len)) 1428 return -1; 1429 } 1430 1431 return 0; 1432 } 1433 1434 static __rte_always_inline void 1435 virtio_dev_rx_batch_packed_copy(struct virtio_net *dev, 1436 struct vhost_virtqueue *vq, 1437 struct rte_mbuf **pkts, 1438 uint64_t *desc_addrs, 1439 uint64_t *lens) 1440 { 1441 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 1442 struct virtio_net_hdr_mrg_rxbuf *hdrs[PACKED_BATCH_SIZE]; 1443 struct vring_packed_desc *descs = vq->desc_packed; 1444 uint16_t avail_idx = vq->last_avail_idx; 1445 uint16_t ids[PACKED_BATCH_SIZE]; 1446 uint16_t i; 1447 1448 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1449 rte_prefetch0((void *)(uintptr_t)desc_addrs[i]); 1450 hdrs[i] = (struct virtio_net_hdr_mrg_rxbuf *) 1451 (uintptr_t)desc_addrs[i]; 1452 lens[i] = pkts[i]->pkt_len + 1453 sizeof(struct virtio_net_hdr_mrg_rxbuf); 1454 } 1455 1456 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1457 virtio_enqueue_offload(pkts[i], &hdrs[i]->hdr); 1458 1459 vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE); 1460 1461 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1462 rte_memcpy((void *)(uintptr_t)(desc_addrs[i] + buf_offset), 1463 rte_pktmbuf_mtod_offset(pkts[i], void *, 0), 1464 pkts[i]->pkt_len); 1465 } 1466 1467 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1468 vhost_log_cache_write_iova(dev, vq, descs[avail_idx + i].addr, 1469 lens[i]); 1470 1471 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1472 ids[i] = descs[avail_idx + i].id; 1473 1474 vhost_flush_enqueue_batch_packed(dev, vq, lens, ids); 1475 } 1476 1477 static __rte_always_inline int 1478 virtio_dev_rx_sync_batch_packed(struct virtio_net *dev, 1479 struct vhost_virtqueue *vq, 1480 struct rte_mbuf **pkts) 1481 { 1482 uint64_t desc_addrs[PACKED_BATCH_SIZE]; 1483 uint64_t lens[PACKED_BATCH_SIZE]; 1484 1485 if (virtio_dev_rx_sync_batch_check(dev, vq, pkts, desc_addrs, lens) == -1) 1486 return -1; 1487 1488 if (vq->shadow_used_idx) { 1489 do_data_copy_enqueue(dev, vq); 1490 vhost_flush_enqueue_shadow_packed(dev, vq); 1491 } 1492 1493 virtio_dev_rx_batch_packed_copy(dev, vq, pkts, desc_addrs, lens); 1494 1495 return 0; 1496 } 1497 1498 static __rte_always_inline int16_t 1499 virtio_dev_rx_single_packed(struct virtio_net *dev, 1500 struct vhost_virtqueue *vq, 1501 struct rte_mbuf *pkt) 1502 { 1503 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1504 uint16_t nr_descs = 0; 1505 1506 if (unlikely(vhost_enqueue_single_packed(dev, vq, pkt, buf_vec, 1507 &nr_descs) < 0)) { 1508 VHOST_LOG_DATA(dev->ifname, DEBUG, "failed to get enough desc from vring\n"); 1509 return -1; 1510 } 1511 1512 VHOST_LOG_DATA(dev->ifname, DEBUG, 1513 "current index %d | end index %d\n", 1514 vq->last_avail_idx, vq->last_avail_idx + nr_descs); 1515 1516 vq_inc_last_avail_packed(vq, nr_descs); 1517 1518 return 0; 1519 } 1520 1521 static __rte_noinline uint32_t 1522 virtio_dev_rx_packed(struct virtio_net *dev, 1523 struct vhost_virtqueue *__rte_restrict vq, 1524 struct rte_mbuf **__rte_restrict pkts, 1525 uint32_t count) 1526 { 1527 uint32_t pkt_idx = 0; 1528 1529 do { 1530 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); 1531 1532 if (count - pkt_idx >= PACKED_BATCH_SIZE) { 1533 if (!virtio_dev_rx_sync_batch_packed(dev, vq, 1534 &pkts[pkt_idx])) { 1535 pkt_idx += PACKED_BATCH_SIZE; 1536 continue; 1537 } 1538 } 1539 1540 if (virtio_dev_rx_single_packed(dev, vq, pkts[pkt_idx])) 1541 break; 1542 pkt_idx++; 1543 1544 } while (pkt_idx < count); 1545 1546 if (vq->shadow_used_idx) { 1547 do_data_copy_enqueue(dev, vq); 1548 vhost_flush_enqueue_shadow_packed(dev, vq); 1549 } 1550 1551 if (pkt_idx) 1552 vhost_vring_call_packed(dev, vq); 1553 1554 return pkt_idx; 1555 } 1556 1557 static __rte_always_inline uint32_t 1558 virtio_dev_rx(struct virtio_net *dev, struct vhost_virtqueue *vq, 1559 struct rte_mbuf **pkts, uint32_t count) 1560 { 1561 uint32_t nb_tx = 0; 1562 1563 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__); 1564 rte_spinlock_lock(&vq->access_lock); 1565 1566 if (unlikely(!vq->enabled)) 1567 goto out_access_unlock; 1568 1569 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 1570 vhost_user_iotlb_rd_lock(vq); 1571 1572 if (unlikely(!vq->access_ok)) 1573 if (unlikely(vring_translate(dev, vq) < 0)) 1574 goto out; 1575 1576 count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); 1577 if (count == 0) 1578 goto out; 1579 1580 if (vq_is_packed(dev)) 1581 nb_tx = virtio_dev_rx_packed(dev, vq, pkts, count); 1582 else 1583 nb_tx = virtio_dev_rx_split(dev, vq, pkts, count); 1584 1585 vhost_queue_stats_update(dev, vq, pkts, nb_tx); 1586 1587 out: 1588 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 1589 vhost_user_iotlb_rd_unlock(vq); 1590 1591 out_access_unlock: 1592 rte_spinlock_unlock(&vq->access_lock); 1593 1594 return nb_tx; 1595 } 1596 1597 uint16_t 1598 rte_vhost_enqueue_burst(int vid, uint16_t queue_id, 1599 struct rte_mbuf **__rte_restrict pkts, uint16_t count) 1600 { 1601 struct virtio_net *dev = get_device(vid); 1602 1603 if (!dev) 1604 return 0; 1605 1606 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { 1607 VHOST_LOG_DATA(dev->ifname, ERR, 1608 "%s: built-in vhost net backend is disabled.\n", 1609 __func__); 1610 return 0; 1611 } 1612 1613 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { 1614 VHOST_LOG_DATA(dev->ifname, ERR, 1615 "%s: invalid virtqueue idx %d.\n", 1616 __func__, queue_id); 1617 return 0; 1618 } 1619 1620 return virtio_dev_rx(dev, dev->virtqueue[queue_id], pkts, count); 1621 } 1622 1623 static __rte_always_inline uint16_t 1624 async_get_first_inflight_pkt_idx(struct vhost_virtqueue *vq) 1625 { 1626 struct vhost_async *async = vq->async; 1627 1628 if (async->pkts_idx >= async->pkts_inflight_n) 1629 return async->pkts_idx - async->pkts_inflight_n; 1630 else 1631 return vq->size - async->pkts_inflight_n + async->pkts_idx; 1632 } 1633 1634 static __rte_always_inline void 1635 store_dma_desc_info_split(struct vring_used_elem *s_ring, struct vring_used_elem *d_ring, 1636 uint16_t ring_size, uint16_t s_idx, uint16_t d_idx, uint16_t count) 1637 { 1638 size_t elem_size = sizeof(struct vring_used_elem); 1639 1640 if (d_idx + count <= ring_size) { 1641 rte_memcpy(d_ring + d_idx, s_ring + s_idx, count * elem_size); 1642 } else { 1643 uint16_t size = ring_size - d_idx; 1644 1645 rte_memcpy(d_ring + d_idx, s_ring + s_idx, size * elem_size); 1646 rte_memcpy(d_ring, s_ring + s_idx + size, (count - size) * elem_size); 1647 } 1648 } 1649 1650 static __rte_always_inline void 1651 store_dma_desc_info_packed(struct vring_used_elem_packed *s_ring, 1652 struct vring_used_elem_packed *d_ring, 1653 uint16_t ring_size, uint16_t s_idx, uint16_t d_idx, uint16_t count) 1654 { 1655 size_t elem_size = sizeof(struct vring_used_elem_packed); 1656 1657 if (d_idx + count <= ring_size) { 1658 rte_memcpy(d_ring + d_idx, s_ring + s_idx, count * elem_size); 1659 } else { 1660 uint16_t size = ring_size - d_idx; 1661 1662 rte_memcpy(d_ring + d_idx, s_ring + s_idx, size * elem_size); 1663 rte_memcpy(d_ring, s_ring + s_idx + size, (count - size) * elem_size); 1664 } 1665 } 1666 1667 static __rte_noinline uint32_t 1668 virtio_dev_rx_async_submit_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 1669 struct rte_mbuf **pkts, uint32_t count, int16_t dma_id, uint16_t vchan_id) 1670 { 1671 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1672 uint32_t pkt_idx = 0; 1673 uint16_t num_buffers; 1674 uint16_t avail_head; 1675 1676 struct vhost_async *async = vq->async; 1677 struct async_inflight_info *pkts_info = async->pkts_info; 1678 uint32_t pkt_err = 0; 1679 uint16_t n_xfer; 1680 uint16_t slot_idx = 0; 1681 1682 /* 1683 * The ordering between avail index and desc reads need to be enforced. 1684 */ 1685 avail_head = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE); 1686 1687 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); 1688 1689 async_iter_reset(async); 1690 1691 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { 1692 uint64_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen; 1693 uint16_t nr_vec = 0; 1694 1695 if (unlikely(reserve_avail_buf_split(dev, vq, pkt_len, buf_vec, 1696 &num_buffers, avail_head, &nr_vec) < 0)) { 1697 VHOST_LOG_DATA(dev->ifname, DEBUG, 1698 "failed to get enough desc from vring\n"); 1699 vq->shadow_used_idx -= num_buffers; 1700 break; 1701 } 1702 1703 VHOST_LOG_DATA(dev->ifname, DEBUG, 1704 "current index %d | end index %d\n", 1705 vq->last_avail_idx, vq->last_avail_idx + num_buffers); 1706 1707 if (mbuf_to_desc(dev, vq, pkts[pkt_idx], buf_vec, nr_vec, num_buffers, true) < 0) { 1708 vq->shadow_used_idx -= num_buffers; 1709 break; 1710 } 1711 1712 slot_idx = (async->pkts_idx + pkt_idx) & (vq->size - 1); 1713 pkts_info[slot_idx].descs = num_buffers; 1714 pkts_info[slot_idx].mbuf = pkts[pkt_idx]; 1715 1716 vq->last_avail_idx += num_buffers; 1717 } 1718 1719 if (unlikely(pkt_idx == 0)) 1720 return 0; 1721 1722 n_xfer = vhost_async_dma_transfer(dev, vq, dma_id, vchan_id, async->pkts_idx, 1723 async->iov_iter, pkt_idx); 1724 1725 pkt_err = pkt_idx - n_xfer; 1726 if (unlikely(pkt_err)) { 1727 uint16_t num_descs = 0; 1728 1729 VHOST_LOG_DATA(dev->ifname, DEBUG, 1730 "%s: failed to transfer %u packets for queue %u.\n", 1731 __func__, pkt_err, vq->index); 1732 1733 /* update number of completed packets */ 1734 pkt_idx = n_xfer; 1735 1736 /* calculate the sum of descriptors to revert */ 1737 while (pkt_err-- > 0) { 1738 num_descs += pkts_info[slot_idx & (vq->size - 1)].descs; 1739 slot_idx--; 1740 } 1741 1742 /* recover shadow used ring and available ring */ 1743 vq->shadow_used_idx -= num_descs; 1744 vq->last_avail_idx -= num_descs; 1745 } 1746 1747 /* keep used descriptors */ 1748 if (likely(vq->shadow_used_idx)) { 1749 uint16_t to = async->desc_idx_split & (vq->size - 1); 1750 1751 store_dma_desc_info_split(vq->shadow_used_split, 1752 async->descs_split, vq->size, 0, to, 1753 vq->shadow_used_idx); 1754 1755 async->desc_idx_split += vq->shadow_used_idx; 1756 1757 async->pkts_idx += pkt_idx; 1758 if (async->pkts_idx >= vq->size) 1759 async->pkts_idx -= vq->size; 1760 1761 async->pkts_inflight_n += pkt_idx; 1762 vq->shadow_used_idx = 0; 1763 } 1764 1765 return pkt_idx; 1766 } 1767 1768 1769 static __rte_always_inline int 1770 vhost_enqueue_async_packed(struct virtio_net *dev, 1771 struct vhost_virtqueue *vq, 1772 struct rte_mbuf *pkt, 1773 struct buf_vector *buf_vec, 1774 uint16_t *nr_descs, 1775 uint16_t *nr_buffers) 1776 { 1777 uint16_t nr_vec = 0; 1778 uint16_t avail_idx = vq->last_avail_idx; 1779 uint16_t max_tries, tries = 0; 1780 uint16_t buf_id = 0; 1781 uint32_t len = 0; 1782 uint16_t desc_count = 0; 1783 uint64_t size = pkt->pkt_len + sizeof(struct virtio_net_hdr_mrg_rxbuf); 1784 uint32_t buffer_len[vq->size]; 1785 uint16_t buffer_buf_id[vq->size]; 1786 uint16_t buffer_desc_count[vq->size]; 1787 1788 if (rxvq_is_mergeable(dev)) 1789 max_tries = vq->size - 1; 1790 else 1791 max_tries = 1; 1792 1793 while (size > 0) { 1794 /* 1795 * if we tried all available ring items, and still 1796 * can't get enough buf, it means something abnormal 1797 * happened. 1798 */ 1799 if (unlikely(++tries > max_tries)) 1800 return -1; 1801 1802 if (unlikely(fill_vec_buf_packed(dev, vq, 1803 avail_idx, &desc_count, 1804 buf_vec, &nr_vec, 1805 &buf_id, &len, 1806 VHOST_ACCESS_RW) < 0)) 1807 return -1; 1808 1809 len = RTE_MIN(len, size); 1810 size -= len; 1811 1812 buffer_len[*nr_buffers] = len; 1813 buffer_buf_id[*nr_buffers] = buf_id; 1814 buffer_desc_count[*nr_buffers] = desc_count; 1815 *nr_buffers += 1; 1816 *nr_descs += desc_count; 1817 avail_idx += desc_count; 1818 if (avail_idx >= vq->size) 1819 avail_idx -= vq->size; 1820 } 1821 1822 if (unlikely(mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, *nr_buffers, true) < 0)) 1823 return -1; 1824 1825 vhost_shadow_enqueue_packed(vq, buffer_len, buffer_buf_id, buffer_desc_count, *nr_buffers); 1826 1827 return 0; 1828 } 1829 1830 static __rte_always_inline int16_t 1831 virtio_dev_rx_async_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, 1832 struct rte_mbuf *pkt, uint16_t *nr_descs, uint16_t *nr_buffers) 1833 { 1834 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1835 1836 if (unlikely(vhost_enqueue_async_packed(dev, vq, pkt, buf_vec, 1837 nr_descs, nr_buffers) < 0)) { 1838 VHOST_LOG_DATA(dev->ifname, DEBUG, "failed to get enough desc from vring\n"); 1839 return -1; 1840 } 1841 1842 VHOST_LOG_DATA(dev->ifname, DEBUG, 1843 "current index %d | end index %d\n", 1844 vq->last_avail_idx, vq->last_avail_idx + *nr_descs); 1845 1846 return 0; 1847 } 1848 1849 static __rte_always_inline void 1850 dma_error_handler_packed(struct vhost_virtqueue *vq, uint16_t slot_idx, 1851 uint32_t nr_err, uint32_t *pkt_idx) 1852 { 1853 uint16_t descs_err = 0; 1854 uint16_t buffers_err = 0; 1855 struct async_inflight_info *pkts_info = vq->async->pkts_info; 1856 1857 *pkt_idx -= nr_err; 1858 /* calculate the sum of buffers and descs of DMA-error packets. */ 1859 while (nr_err-- > 0) { 1860 descs_err += pkts_info[slot_idx % vq->size].descs; 1861 buffers_err += pkts_info[slot_idx % vq->size].nr_buffers; 1862 slot_idx--; 1863 } 1864 1865 if (vq->last_avail_idx >= descs_err) { 1866 vq->last_avail_idx -= descs_err; 1867 } else { 1868 vq->last_avail_idx = vq->last_avail_idx + vq->size - descs_err; 1869 vq->avail_wrap_counter ^= 1; 1870 } 1871 1872 vq->shadow_used_idx -= buffers_err; 1873 } 1874 1875 static __rte_noinline uint32_t 1876 virtio_dev_rx_async_submit_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, 1877 struct rte_mbuf **pkts, uint32_t count, int16_t dma_id, uint16_t vchan_id) 1878 { 1879 uint32_t pkt_idx = 0; 1880 uint16_t n_xfer; 1881 uint16_t num_buffers; 1882 uint16_t num_descs; 1883 1884 struct vhost_async *async = vq->async; 1885 struct async_inflight_info *pkts_info = async->pkts_info; 1886 uint32_t pkt_err = 0; 1887 uint16_t slot_idx = 0; 1888 1889 do { 1890 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); 1891 1892 num_buffers = 0; 1893 num_descs = 0; 1894 if (unlikely(virtio_dev_rx_async_packed(dev, vq, pkts[pkt_idx], 1895 &num_descs, &num_buffers) < 0)) 1896 break; 1897 1898 slot_idx = (async->pkts_idx + pkt_idx) % vq->size; 1899 1900 pkts_info[slot_idx].descs = num_descs; 1901 pkts_info[slot_idx].nr_buffers = num_buffers; 1902 pkts_info[slot_idx].mbuf = pkts[pkt_idx]; 1903 1904 pkt_idx++; 1905 vq_inc_last_avail_packed(vq, num_descs); 1906 } while (pkt_idx < count); 1907 1908 if (unlikely(pkt_idx == 0)) 1909 return 0; 1910 1911 n_xfer = vhost_async_dma_transfer(dev, vq, dma_id, vchan_id, async->pkts_idx, 1912 async->iov_iter, pkt_idx); 1913 1914 async_iter_reset(async); 1915 1916 pkt_err = pkt_idx - n_xfer; 1917 if (unlikely(pkt_err)) { 1918 VHOST_LOG_DATA(dev->ifname, DEBUG, 1919 "%s: failed to transfer %u packets for queue %u.\n", 1920 __func__, pkt_err, vq->index); 1921 dma_error_handler_packed(vq, slot_idx, pkt_err, &pkt_idx); 1922 } 1923 1924 if (likely(vq->shadow_used_idx)) { 1925 /* keep used descriptors. */ 1926 store_dma_desc_info_packed(vq->shadow_used_packed, async->buffers_packed, 1927 vq->size, 0, async->buffer_idx_packed, 1928 vq->shadow_used_idx); 1929 1930 async->buffer_idx_packed += vq->shadow_used_idx; 1931 if (async->buffer_idx_packed >= vq->size) 1932 async->buffer_idx_packed -= vq->size; 1933 1934 async->pkts_idx += pkt_idx; 1935 if (async->pkts_idx >= vq->size) 1936 async->pkts_idx -= vq->size; 1937 1938 vq->shadow_used_idx = 0; 1939 async->pkts_inflight_n += pkt_idx; 1940 } 1941 1942 return pkt_idx; 1943 } 1944 1945 static __rte_always_inline void 1946 write_back_completed_descs_split(struct vhost_virtqueue *vq, uint16_t n_descs) 1947 { 1948 struct vhost_async *async = vq->async; 1949 uint16_t nr_left = n_descs; 1950 uint16_t nr_copy; 1951 uint16_t to, from; 1952 1953 do { 1954 from = async->last_desc_idx_split & (vq->size - 1); 1955 nr_copy = nr_left + from <= vq->size ? nr_left : vq->size - from; 1956 to = vq->last_used_idx & (vq->size - 1); 1957 1958 if (to + nr_copy <= vq->size) { 1959 rte_memcpy(&vq->used->ring[to], &async->descs_split[from], 1960 nr_copy * sizeof(struct vring_used_elem)); 1961 } else { 1962 uint16_t size = vq->size - to; 1963 1964 rte_memcpy(&vq->used->ring[to], &async->descs_split[from], 1965 size * sizeof(struct vring_used_elem)); 1966 rte_memcpy(&vq->used->ring[0], &async->descs_split[from + size], 1967 (nr_copy - size) * sizeof(struct vring_used_elem)); 1968 } 1969 1970 async->last_desc_idx_split += nr_copy; 1971 vq->last_used_idx += nr_copy; 1972 nr_left -= nr_copy; 1973 } while (nr_left > 0); 1974 } 1975 1976 static __rte_always_inline void 1977 write_back_completed_descs_packed(struct vhost_virtqueue *vq, 1978 uint16_t n_buffers) 1979 { 1980 struct vhost_async *async = vq->async; 1981 uint16_t from = async->last_buffer_idx_packed; 1982 uint16_t used_idx = vq->last_used_idx; 1983 uint16_t head_idx = vq->last_used_idx; 1984 uint16_t head_flags = 0; 1985 uint16_t i; 1986 1987 /* Split loop in two to save memory barriers */ 1988 for (i = 0; i < n_buffers; i++) { 1989 vq->desc_packed[used_idx].id = async->buffers_packed[from].id; 1990 vq->desc_packed[used_idx].len = async->buffers_packed[from].len; 1991 1992 used_idx += async->buffers_packed[from].count; 1993 if (used_idx >= vq->size) 1994 used_idx -= vq->size; 1995 1996 from++; 1997 if (from >= vq->size) 1998 from = 0; 1999 } 2000 2001 /* The ordering for storing desc flags needs to be enforced. */ 2002 rte_atomic_thread_fence(__ATOMIC_RELEASE); 2003 2004 from = async->last_buffer_idx_packed; 2005 2006 for (i = 0; i < n_buffers; i++) { 2007 uint16_t flags; 2008 2009 if (async->buffers_packed[from].len) 2010 flags = VRING_DESC_F_WRITE; 2011 else 2012 flags = 0; 2013 2014 if (vq->used_wrap_counter) { 2015 flags |= VRING_DESC_F_USED; 2016 flags |= VRING_DESC_F_AVAIL; 2017 } else { 2018 flags &= ~VRING_DESC_F_USED; 2019 flags &= ~VRING_DESC_F_AVAIL; 2020 } 2021 2022 if (i > 0) { 2023 vq->desc_packed[vq->last_used_idx].flags = flags; 2024 } else { 2025 head_idx = vq->last_used_idx; 2026 head_flags = flags; 2027 } 2028 2029 vq_inc_last_used_packed(vq, async->buffers_packed[from].count); 2030 2031 from++; 2032 if (from == vq->size) 2033 from = 0; 2034 } 2035 2036 vq->desc_packed[head_idx].flags = head_flags; 2037 async->last_buffer_idx_packed = from; 2038 } 2039 2040 static __rte_always_inline uint16_t 2041 vhost_poll_enqueue_completed(struct virtio_net *dev, struct vhost_virtqueue *vq, 2042 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, uint16_t vchan_id) 2043 { 2044 struct vhost_async *async = vq->async; 2045 struct async_inflight_info *pkts_info = async->pkts_info; 2046 uint16_t nr_cpl_pkts = 0; 2047 uint16_t n_descs = 0, n_buffers = 0; 2048 uint16_t start_idx, from, i; 2049 2050 /* Check completed copies for the given DMA vChannel */ 2051 vhost_async_dma_check_completed(dev, dma_id, vchan_id, VHOST_DMA_MAX_COPY_COMPLETE); 2052 2053 start_idx = async_get_first_inflight_pkt_idx(vq); 2054 /** 2055 * Calculate the number of copy completed packets. 2056 * Note that there may be completed packets even if 2057 * no copies are reported done by the given DMA vChannel, 2058 * as it's possible that a virtqueue uses multiple DMA 2059 * vChannels. 2060 */ 2061 from = start_idx; 2062 while (vq->async->pkts_cmpl_flag[from] && count--) { 2063 vq->async->pkts_cmpl_flag[from] = false; 2064 from++; 2065 if (from >= vq->size) 2066 from -= vq->size; 2067 nr_cpl_pkts++; 2068 } 2069 2070 if (nr_cpl_pkts == 0) 2071 return 0; 2072 2073 for (i = 0; i < nr_cpl_pkts; i++) { 2074 from = (start_idx + i) % vq->size; 2075 /* Only used with packed ring */ 2076 n_buffers += pkts_info[from].nr_buffers; 2077 /* Only used with split ring */ 2078 n_descs += pkts_info[from].descs; 2079 pkts[i] = pkts_info[from].mbuf; 2080 } 2081 2082 async->pkts_inflight_n -= nr_cpl_pkts; 2083 2084 if (likely(vq->enabled && vq->access_ok)) { 2085 if (vq_is_packed(dev)) { 2086 write_back_completed_descs_packed(vq, n_buffers); 2087 vhost_vring_call_packed(dev, vq); 2088 } else { 2089 write_back_completed_descs_split(vq, n_descs); 2090 __atomic_add_fetch(&vq->used->idx, n_descs, __ATOMIC_RELEASE); 2091 vhost_vring_call_split(dev, vq); 2092 } 2093 } else { 2094 if (vq_is_packed(dev)) { 2095 async->last_buffer_idx_packed += n_buffers; 2096 if (async->last_buffer_idx_packed >= vq->size) 2097 async->last_buffer_idx_packed -= vq->size; 2098 } else { 2099 async->last_desc_idx_split += n_descs; 2100 } 2101 } 2102 2103 return nr_cpl_pkts; 2104 } 2105 2106 uint16_t 2107 rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id, 2108 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, 2109 uint16_t vchan_id) 2110 { 2111 struct virtio_net *dev = get_device(vid); 2112 struct vhost_virtqueue *vq; 2113 uint16_t n_pkts_cpl = 0; 2114 2115 if (unlikely(!dev)) 2116 return 0; 2117 2118 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__); 2119 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { 2120 VHOST_LOG_DATA(dev->ifname, ERR, 2121 "%s: invalid virtqueue idx %d.\n", 2122 __func__, queue_id); 2123 return 0; 2124 } 2125 2126 if (unlikely(!dma_copy_track[dma_id].vchans || 2127 !dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) { 2128 VHOST_LOG_DATA(dev->ifname, ERR, 2129 "%s: invalid channel %d:%u.\n", 2130 __func__, dma_id, vchan_id); 2131 return 0; 2132 } 2133 2134 vq = dev->virtqueue[queue_id]; 2135 2136 if (!rte_spinlock_trylock(&vq->access_lock)) { 2137 VHOST_LOG_DATA(dev->ifname, DEBUG, 2138 "%s: virtqueue %u is busy.\n", 2139 __func__, queue_id); 2140 return 0; 2141 } 2142 2143 if (unlikely(!vq->async)) { 2144 VHOST_LOG_DATA(dev->ifname, ERR, 2145 "%s: async not registered for virtqueue %d.\n", 2146 __func__, queue_id); 2147 goto out; 2148 } 2149 2150 n_pkts_cpl = vhost_poll_enqueue_completed(dev, vq, pkts, count, dma_id, vchan_id); 2151 2152 vhost_queue_stats_update(dev, vq, pkts, n_pkts_cpl); 2153 vq->stats.inflight_completed += n_pkts_cpl; 2154 2155 out: 2156 rte_spinlock_unlock(&vq->access_lock); 2157 2158 return n_pkts_cpl; 2159 } 2160 2161 uint16_t 2162 rte_vhost_clear_queue_thread_unsafe(int vid, uint16_t queue_id, 2163 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, 2164 uint16_t vchan_id) 2165 { 2166 struct virtio_net *dev = get_device(vid); 2167 struct vhost_virtqueue *vq; 2168 uint16_t n_pkts_cpl = 0; 2169 2170 if (!dev) 2171 return 0; 2172 2173 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__); 2174 if (unlikely(queue_id >= dev->nr_vring)) { 2175 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid virtqueue idx %d.\n", 2176 __func__, queue_id); 2177 return 0; 2178 } 2179 2180 if (unlikely(dma_id < 0 || dma_id >= RTE_DMADEV_DEFAULT_MAX)) { 2181 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid dma id %d.\n", 2182 __func__, dma_id); 2183 return 0; 2184 } 2185 2186 vq = dev->virtqueue[queue_id]; 2187 2188 if (unlikely(!rte_spinlock_is_locked(&vq->access_lock))) { 2189 VHOST_LOG_DATA(dev->ifname, ERR, "%s() called without access lock taken.\n", 2190 __func__); 2191 return -1; 2192 } 2193 2194 if (unlikely(!vq->async)) { 2195 VHOST_LOG_DATA(dev->ifname, ERR, 2196 "%s: async not registered for virtqueue %d.\n", 2197 __func__, queue_id); 2198 return 0; 2199 } 2200 2201 if (unlikely(!dma_copy_track[dma_id].vchans || 2202 !dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) { 2203 VHOST_LOG_DATA(dev->ifname, ERR, 2204 "%s: invalid channel %d:%u.\n", 2205 __func__, dma_id, vchan_id); 2206 return 0; 2207 } 2208 2209 if ((queue_id & 1) == 0) 2210 n_pkts_cpl = vhost_poll_enqueue_completed(dev, vq, pkts, count, 2211 dma_id, vchan_id); 2212 else 2213 n_pkts_cpl = async_poll_dequeue_completed(dev, vq, pkts, count, 2214 dma_id, vchan_id, dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS); 2215 2216 vhost_queue_stats_update(dev, vq, pkts, n_pkts_cpl); 2217 vq->stats.inflight_completed += n_pkts_cpl; 2218 2219 return n_pkts_cpl; 2220 } 2221 2222 uint16_t 2223 rte_vhost_clear_queue(int vid, uint16_t queue_id, struct rte_mbuf **pkts, 2224 uint16_t count, int16_t dma_id, uint16_t vchan_id) 2225 { 2226 struct virtio_net *dev = get_device(vid); 2227 struct vhost_virtqueue *vq; 2228 uint16_t n_pkts_cpl = 0; 2229 2230 if (!dev) 2231 return 0; 2232 2233 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__); 2234 if (unlikely(queue_id >= dev->nr_vring)) { 2235 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid virtqueue idx %u.\n", 2236 __func__, queue_id); 2237 return 0; 2238 } 2239 2240 if (unlikely(dma_id < 0 || dma_id >= RTE_DMADEV_DEFAULT_MAX)) { 2241 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid dma id %d.\n", 2242 __func__, dma_id); 2243 return 0; 2244 } 2245 2246 vq = dev->virtqueue[queue_id]; 2247 2248 if (!rte_spinlock_trylock(&vq->access_lock)) { 2249 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s: virtqueue %u is busy.\n", 2250 __func__, queue_id); 2251 return 0; 2252 } 2253 2254 if (unlikely(!vq->async)) { 2255 VHOST_LOG_DATA(dev->ifname, ERR, "%s: async not registered for queue id %u.\n", 2256 __func__, queue_id); 2257 goto out_access_unlock; 2258 } 2259 2260 if (unlikely(!dma_copy_track[dma_id].vchans || 2261 !dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) { 2262 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid channel %d:%u.\n", 2263 __func__, dma_id, vchan_id); 2264 goto out_access_unlock; 2265 } 2266 2267 if ((queue_id & 1) == 0) 2268 n_pkts_cpl = vhost_poll_enqueue_completed(dev, vq, pkts, count, 2269 dma_id, vchan_id); 2270 else 2271 n_pkts_cpl = async_poll_dequeue_completed(dev, vq, pkts, count, 2272 dma_id, vchan_id, dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS); 2273 2274 vhost_queue_stats_update(dev, vq, pkts, n_pkts_cpl); 2275 vq->stats.inflight_completed += n_pkts_cpl; 2276 2277 out_access_unlock: 2278 rte_spinlock_unlock(&vq->access_lock); 2279 2280 return n_pkts_cpl; 2281 } 2282 2283 static __rte_always_inline uint32_t 2284 virtio_dev_rx_async_submit(struct virtio_net *dev, struct vhost_virtqueue *vq, 2285 struct rte_mbuf **pkts, uint32_t count, int16_t dma_id, uint16_t vchan_id) 2286 { 2287 uint32_t nb_tx = 0; 2288 2289 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__); 2290 2291 if (unlikely(!dma_copy_track[dma_id].vchans || 2292 !dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) { 2293 VHOST_LOG_DATA(dev->ifname, ERR, 2294 "%s: invalid channel %d:%u.\n", 2295 __func__, dma_id, vchan_id); 2296 return 0; 2297 } 2298 2299 rte_spinlock_lock(&vq->access_lock); 2300 2301 if (unlikely(!vq->enabled || !vq->async)) 2302 goto out_access_unlock; 2303 2304 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 2305 vhost_user_iotlb_rd_lock(vq); 2306 2307 if (unlikely(!vq->access_ok)) 2308 if (unlikely(vring_translate(dev, vq) < 0)) 2309 goto out; 2310 2311 count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); 2312 if (count == 0) 2313 goto out; 2314 2315 if (vq_is_packed(dev)) 2316 nb_tx = virtio_dev_rx_async_submit_packed(dev, vq, pkts, count, 2317 dma_id, vchan_id); 2318 else 2319 nb_tx = virtio_dev_rx_async_submit_split(dev, vq, pkts, count, 2320 dma_id, vchan_id); 2321 2322 vq->stats.inflight_submitted += nb_tx; 2323 2324 out: 2325 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 2326 vhost_user_iotlb_rd_unlock(vq); 2327 2328 out_access_unlock: 2329 rte_spinlock_unlock(&vq->access_lock); 2330 2331 return nb_tx; 2332 } 2333 2334 uint16_t 2335 rte_vhost_submit_enqueue_burst(int vid, uint16_t queue_id, 2336 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, 2337 uint16_t vchan_id) 2338 { 2339 struct virtio_net *dev = get_device(vid); 2340 2341 if (!dev) 2342 return 0; 2343 2344 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { 2345 VHOST_LOG_DATA(dev->ifname, ERR, 2346 "%s: built-in vhost net backend is disabled.\n", 2347 __func__); 2348 return 0; 2349 } 2350 2351 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { 2352 VHOST_LOG_DATA(dev->ifname, ERR, 2353 "%s: invalid virtqueue idx %d.\n", 2354 __func__, queue_id); 2355 return 0; 2356 } 2357 2358 return virtio_dev_rx_async_submit(dev, dev->virtqueue[queue_id], pkts, count, 2359 dma_id, vchan_id); 2360 } 2361 2362 static inline bool 2363 virtio_net_with_host_offload(struct virtio_net *dev) 2364 { 2365 if (dev->features & 2366 ((1ULL << VIRTIO_NET_F_CSUM) | 2367 (1ULL << VIRTIO_NET_F_HOST_ECN) | 2368 (1ULL << VIRTIO_NET_F_HOST_TSO4) | 2369 (1ULL << VIRTIO_NET_F_HOST_TSO6) | 2370 (1ULL << VIRTIO_NET_F_HOST_UFO))) 2371 return true; 2372 2373 return false; 2374 } 2375 2376 static int 2377 parse_headers(struct rte_mbuf *m, uint8_t *l4_proto) 2378 { 2379 struct rte_ipv4_hdr *ipv4_hdr; 2380 struct rte_ipv6_hdr *ipv6_hdr; 2381 struct rte_ether_hdr *eth_hdr; 2382 uint16_t ethertype; 2383 uint16_t data_len = rte_pktmbuf_data_len(m); 2384 2385 if (data_len < sizeof(struct rte_ether_hdr)) 2386 return -EINVAL; 2387 2388 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 2389 2390 m->l2_len = sizeof(struct rte_ether_hdr); 2391 ethertype = rte_be_to_cpu_16(eth_hdr->ether_type); 2392 2393 if (ethertype == RTE_ETHER_TYPE_VLAN) { 2394 if (data_len < sizeof(struct rte_ether_hdr) + 2395 sizeof(struct rte_vlan_hdr)) 2396 goto error; 2397 2398 struct rte_vlan_hdr *vlan_hdr = 2399 (struct rte_vlan_hdr *)(eth_hdr + 1); 2400 2401 m->l2_len += sizeof(struct rte_vlan_hdr); 2402 ethertype = rte_be_to_cpu_16(vlan_hdr->eth_proto); 2403 } 2404 2405 switch (ethertype) { 2406 case RTE_ETHER_TYPE_IPV4: 2407 if (data_len < m->l2_len + sizeof(struct rte_ipv4_hdr)) 2408 goto error; 2409 ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *, 2410 m->l2_len); 2411 m->l3_len = rte_ipv4_hdr_len(ipv4_hdr); 2412 if (data_len < m->l2_len + m->l3_len) 2413 goto error; 2414 m->ol_flags |= RTE_MBUF_F_TX_IPV4; 2415 *l4_proto = ipv4_hdr->next_proto_id; 2416 break; 2417 case RTE_ETHER_TYPE_IPV6: 2418 if (data_len < m->l2_len + sizeof(struct rte_ipv6_hdr)) 2419 goto error; 2420 ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *, 2421 m->l2_len); 2422 m->l3_len = sizeof(struct rte_ipv6_hdr); 2423 m->ol_flags |= RTE_MBUF_F_TX_IPV6; 2424 *l4_proto = ipv6_hdr->proto; 2425 break; 2426 default: 2427 /* a valid L3 header is needed for further L4 parsing */ 2428 goto error; 2429 } 2430 2431 /* both CSUM and GSO need a valid L4 header */ 2432 switch (*l4_proto) { 2433 case IPPROTO_TCP: 2434 if (data_len < m->l2_len + m->l3_len + 2435 sizeof(struct rte_tcp_hdr)) 2436 goto error; 2437 break; 2438 case IPPROTO_UDP: 2439 if (data_len < m->l2_len + m->l3_len + 2440 sizeof(struct rte_udp_hdr)) 2441 goto error; 2442 break; 2443 case IPPROTO_SCTP: 2444 if (data_len < m->l2_len + m->l3_len + 2445 sizeof(struct rte_sctp_hdr)) 2446 goto error; 2447 break; 2448 default: 2449 goto error; 2450 } 2451 2452 return 0; 2453 2454 error: 2455 m->l2_len = 0; 2456 m->l3_len = 0; 2457 m->ol_flags = 0; 2458 return -EINVAL; 2459 } 2460 2461 static __rte_always_inline void 2462 vhost_dequeue_offload_legacy(struct virtio_net *dev, struct virtio_net_hdr *hdr, 2463 struct rte_mbuf *m) 2464 { 2465 uint8_t l4_proto = 0; 2466 struct rte_tcp_hdr *tcp_hdr = NULL; 2467 uint16_t tcp_len; 2468 uint16_t data_len = rte_pktmbuf_data_len(m); 2469 2470 if (parse_headers(m, &l4_proto) < 0) 2471 return; 2472 2473 if (hdr->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) { 2474 if (hdr->csum_start == (m->l2_len + m->l3_len)) { 2475 switch (hdr->csum_offset) { 2476 case (offsetof(struct rte_tcp_hdr, cksum)): 2477 if (l4_proto != IPPROTO_TCP) 2478 goto error; 2479 m->ol_flags |= RTE_MBUF_F_TX_TCP_CKSUM; 2480 break; 2481 case (offsetof(struct rte_udp_hdr, dgram_cksum)): 2482 if (l4_proto != IPPROTO_UDP) 2483 goto error; 2484 m->ol_flags |= RTE_MBUF_F_TX_UDP_CKSUM; 2485 break; 2486 case (offsetof(struct rte_sctp_hdr, cksum)): 2487 if (l4_proto != IPPROTO_SCTP) 2488 goto error; 2489 m->ol_flags |= RTE_MBUF_F_TX_SCTP_CKSUM; 2490 break; 2491 default: 2492 goto error; 2493 } 2494 } else { 2495 goto error; 2496 } 2497 } 2498 2499 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 2500 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 2501 case VIRTIO_NET_HDR_GSO_TCPV4: 2502 case VIRTIO_NET_HDR_GSO_TCPV6: 2503 if (l4_proto != IPPROTO_TCP) 2504 goto error; 2505 tcp_hdr = rte_pktmbuf_mtod_offset(m, 2506 struct rte_tcp_hdr *, 2507 m->l2_len + m->l3_len); 2508 tcp_len = (tcp_hdr->data_off & 0xf0) >> 2; 2509 if (data_len < m->l2_len + m->l3_len + tcp_len) 2510 goto error; 2511 m->ol_flags |= RTE_MBUF_F_TX_TCP_SEG; 2512 m->tso_segsz = hdr->gso_size; 2513 m->l4_len = tcp_len; 2514 break; 2515 case VIRTIO_NET_HDR_GSO_UDP: 2516 if (l4_proto != IPPROTO_UDP) 2517 goto error; 2518 m->ol_flags |= RTE_MBUF_F_TX_UDP_SEG; 2519 m->tso_segsz = hdr->gso_size; 2520 m->l4_len = sizeof(struct rte_udp_hdr); 2521 break; 2522 default: 2523 VHOST_LOG_DATA(dev->ifname, WARNING, 2524 "unsupported gso type %u.\n", 2525 hdr->gso_type); 2526 goto error; 2527 } 2528 } 2529 return; 2530 2531 error: 2532 m->l2_len = 0; 2533 m->l3_len = 0; 2534 m->ol_flags = 0; 2535 } 2536 2537 static __rte_always_inline void 2538 vhost_dequeue_offload(struct virtio_net *dev, struct virtio_net_hdr *hdr, 2539 struct rte_mbuf *m, bool legacy_ol_flags) 2540 { 2541 struct rte_net_hdr_lens hdr_lens; 2542 int l4_supported = 0; 2543 uint32_t ptype; 2544 2545 if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE) 2546 return; 2547 2548 if (legacy_ol_flags) { 2549 vhost_dequeue_offload_legacy(dev, hdr, m); 2550 return; 2551 } 2552 2553 m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN; 2554 2555 ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK); 2556 m->packet_type = ptype; 2557 if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP || 2558 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP || 2559 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP) 2560 l4_supported = 1; 2561 2562 /* According to Virtio 1.1 spec, the device only needs to look at 2563 * VIRTIO_NET_HDR_F_NEEDS_CSUM in the packet transmission path. 2564 * This differs from the processing incoming packets path where the 2565 * driver could rely on VIRTIO_NET_HDR_F_DATA_VALID flag set by the 2566 * device. 2567 * 2568 * 5.1.6.2.1 Driver Requirements: Packet Transmission 2569 * The driver MUST NOT set the VIRTIO_NET_HDR_F_DATA_VALID and 2570 * VIRTIO_NET_HDR_F_RSC_INFO bits in flags. 2571 * 2572 * 5.1.6.2.2 Device Requirements: Packet Transmission 2573 * The device MUST ignore flag bits that it does not recognize. 2574 */ 2575 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 2576 uint32_t hdrlen; 2577 2578 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len; 2579 if (hdr->csum_start <= hdrlen && l4_supported != 0) { 2580 m->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_NONE; 2581 } else { 2582 /* Unknown proto or tunnel, do sw cksum. We can assume 2583 * the cksum field is in the first segment since the 2584 * buffers we provided to the host are large enough. 2585 * In case of SCTP, this will be wrong since it's a CRC 2586 * but there's nothing we can do. 2587 */ 2588 uint16_t csum = 0, off; 2589 2590 if (rte_raw_cksum_mbuf(m, hdr->csum_start, 2591 rte_pktmbuf_pkt_len(m) - hdr->csum_start, &csum) < 0) 2592 return; 2593 if (likely(csum != 0xffff)) 2594 csum = ~csum; 2595 off = hdr->csum_offset + hdr->csum_start; 2596 if (rte_pktmbuf_data_len(m) >= off + 1) 2597 *rte_pktmbuf_mtod_offset(m, uint16_t *, off) = csum; 2598 } 2599 } 2600 2601 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 2602 if (hdr->gso_size == 0) 2603 return; 2604 2605 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 2606 case VIRTIO_NET_HDR_GSO_TCPV4: 2607 case VIRTIO_NET_HDR_GSO_TCPV6: 2608 if ((ptype & RTE_PTYPE_L4_MASK) != RTE_PTYPE_L4_TCP) 2609 break; 2610 m->ol_flags |= RTE_MBUF_F_RX_LRO | RTE_MBUF_F_RX_L4_CKSUM_NONE; 2611 m->tso_segsz = hdr->gso_size; 2612 break; 2613 case VIRTIO_NET_HDR_GSO_UDP: 2614 if ((ptype & RTE_PTYPE_L4_MASK) != RTE_PTYPE_L4_UDP) 2615 break; 2616 m->ol_flags |= RTE_MBUF_F_RX_LRO | RTE_MBUF_F_RX_L4_CKSUM_NONE; 2617 m->tso_segsz = hdr->gso_size; 2618 break; 2619 default: 2620 break; 2621 } 2622 } 2623 } 2624 2625 static __rte_noinline void 2626 copy_vnet_hdr_from_desc(struct virtio_net_hdr *hdr, 2627 struct buf_vector *buf_vec) 2628 { 2629 uint64_t len; 2630 uint64_t remain = sizeof(struct virtio_net_hdr); 2631 uint64_t src; 2632 uint64_t dst = (uint64_t)(uintptr_t)hdr; 2633 2634 while (remain) { 2635 len = RTE_MIN(remain, buf_vec->buf_len); 2636 src = buf_vec->buf_addr; 2637 rte_memcpy((void *)(uintptr_t)dst, 2638 (void *)(uintptr_t)src, len); 2639 2640 remain -= len; 2641 dst += len; 2642 buf_vec++; 2643 } 2644 } 2645 2646 static __rte_always_inline int 2647 desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, 2648 struct buf_vector *buf_vec, uint16_t nr_vec, 2649 struct rte_mbuf *m, struct rte_mempool *mbuf_pool, 2650 bool legacy_ol_flags, uint16_t slot_idx, bool is_async) 2651 { 2652 uint32_t buf_avail, buf_offset, buf_len; 2653 uint64_t buf_addr, buf_iova; 2654 uint32_t mbuf_avail, mbuf_offset; 2655 uint32_t hdr_remain = dev->vhost_hlen; 2656 uint32_t cpy_len; 2657 struct rte_mbuf *cur = m, *prev = m; 2658 struct virtio_net_hdr tmp_hdr; 2659 struct virtio_net_hdr *hdr = NULL; 2660 uint16_t vec_idx; 2661 struct vhost_async *async = vq->async; 2662 struct async_inflight_info *pkts_info; 2663 2664 /* 2665 * The caller has checked the descriptors chain is larger than the 2666 * header size. 2667 */ 2668 2669 if (virtio_net_with_host_offload(dev)) { 2670 if (unlikely(buf_vec[0].buf_len < sizeof(struct virtio_net_hdr))) { 2671 /* 2672 * No luck, the virtio-net header doesn't fit 2673 * in a contiguous virtual area. 2674 */ 2675 copy_vnet_hdr_from_desc(&tmp_hdr, buf_vec); 2676 hdr = &tmp_hdr; 2677 } else { 2678 hdr = (struct virtio_net_hdr *)((uintptr_t)buf_vec[0].buf_addr); 2679 } 2680 } 2681 2682 for (vec_idx = 0; vec_idx < nr_vec; vec_idx++) { 2683 if (buf_vec[vec_idx].buf_len > hdr_remain) 2684 break; 2685 2686 hdr_remain -= buf_vec[vec_idx].buf_len; 2687 } 2688 2689 buf_addr = buf_vec[vec_idx].buf_addr; 2690 buf_iova = buf_vec[vec_idx].buf_iova; 2691 buf_len = buf_vec[vec_idx].buf_len; 2692 buf_offset = hdr_remain; 2693 buf_avail = buf_vec[vec_idx].buf_len - hdr_remain; 2694 2695 PRINT_PACKET(dev, 2696 (uintptr_t)(buf_addr + buf_offset), 2697 (uint32_t)buf_avail, 0); 2698 2699 mbuf_offset = 0; 2700 mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM; 2701 2702 if (is_async) { 2703 pkts_info = async->pkts_info; 2704 if (async_iter_initialize(dev, async)) 2705 return -1; 2706 } 2707 2708 while (1) { 2709 cpy_len = RTE_MIN(buf_avail, mbuf_avail); 2710 2711 if (is_async) { 2712 if (async_fill_seg(dev, vq, cur, mbuf_offset, 2713 buf_iova + buf_offset, cpy_len, false) < 0) 2714 goto error; 2715 } else if (likely(hdr && cur == m)) { 2716 rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, mbuf_offset), 2717 (void *)((uintptr_t)(buf_addr + buf_offset)), 2718 cpy_len); 2719 } else { 2720 sync_fill_seg(dev, vq, cur, mbuf_offset, 2721 buf_addr + buf_offset, 2722 buf_iova + buf_offset, cpy_len, false); 2723 } 2724 2725 mbuf_avail -= cpy_len; 2726 mbuf_offset += cpy_len; 2727 buf_avail -= cpy_len; 2728 buf_offset += cpy_len; 2729 2730 /* This buf reaches to its end, get the next one */ 2731 if (buf_avail == 0) { 2732 if (++vec_idx >= nr_vec) 2733 break; 2734 2735 buf_addr = buf_vec[vec_idx].buf_addr; 2736 buf_iova = buf_vec[vec_idx].buf_iova; 2737 buf_len = buf_vec[vec_idx].buf_len; 2738 2739 buf_offset = 0; 2740 buf_avail = buf_len; 2741 2742 PRINT_PACKET(dev, (uintptr_t)buf_addr, 2743 (uint32_t)buf_avail, 0); 2744 } 2745 2746 /* 2747 * This mbuf reaches to its end, get a new one 2748 * to hold more data. 2749 */ 2750 if (mbuf_avail == 0) { 2751 cur = rte_pktmbuf_alloc(mbuf_pool); 2752 if (unlikely(cur == NULL)) { 2753 VHOST_LOG_DATA(dev->ifname, ERR, 2754 "failed to allocate memory for mbuf.\n"); 2755 goto error; 2756 } 2757 2758 prev->next = cur; 2759 prev->data_len = mbuf_offset; 2760 m->nb_segs += 1; 2761 m->pkt_len += mbuf_offset; 2762 prev = cur; 2763 2764 mbuf_offset = 0; 2765 mbuf_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM; 2766 } 2767 } 2768 2769 prev->data_len = mbuf_offset; 2770 m->pkt_len += mbuf_offset; 2771 2772 if (is_async) { 2773 async_iter_finalize(async); 2774 if (hdr) 2775 pkts_info[slot_idx].nethdr = *hdr; 2776 } else if (hdr) { 2777 vhost_dequeue_offload(dev, hdr, m, legacy_ol_flags); 2778 } 2779 2780 return 0; 2781 error: 2782 if (is_async) 2783 async_iter_cancel(async); 2784 2785 return -1; 2786 } 2787 2788 static void 2789 virtio_dev_extbuf_free(void *addr __rte_unused, void *opaque) 2790 { 2791 rte_free(opaque); 2792 } 2793 2794 static int 2795 virtio_dev_extbuf_alloc(struct virtio_net *dev, struct rte_mbuf *pkt, uint32_t size) 2796 { 2797 struct rte_mbuf_ext_shared_info *shinfo = NULL; 2798 uint32_t total_len = RTE_PKTMBUF_HEADROOM + size; 2799 uint16_t buf_len; 2800 rte_iova_t iova; 2801 void *buf; 2802 2803 total_len += sizeof(*shinfo) + sizeof(uintptr_t); 2804 total_len = RTE_ALIGN_CEIL(total_len, sizeof(uintptr_t)); 2805 2806 if (unlikely(total_len > UINT16_MAX)) 2807 return -ENOSPC; 2808 2809 buf_len = total_len; 2810 buf = rte_malloc(NULL, buf_len, RTE_CACHE_LINE_SIZE); 2811 if (unlikely(buf == NULL)) 2812 return -ENOMEM; 2813 2814 /* Initialize shinfo */ 2815 shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len, 2816 virtio_dev_extbuf_free, buf); 2817 if (unlikely(shinfo == NULL)) { 2818 rte_free(buf); 2819 VHOST_LOG_DATA(dev->ifname, ERR, "failed to init shinfo\n"); 2820 return -1; 2821 } 2822 2823 iova = rte_malloc_virt2iova(buf); 2824 rte_pktmbuf_attach_extbuf(pkt, buf, iova, buf_len, shinfo); 2825 rte_pktmbuf_reset_headroom(pkt); 2826 2827 return 0; 2828 } 2829 2830 /* 2831 * Prepare a host supported pktmbuf. 2832 */ 2833 static __rte_always_inline int 2834 virtio_dev_pktmbuf_prep(struct virtio_net *dev, struct rte_mbuf *pkt, 2835 uint32_t data_len) 2836 { 2837 if (rte_pktmbuf_tailroom(pkt) >= data_len) 2838 return 0; 2839 2840 /* attach an external buffer if supported */ 2841 if (dev->extbuf && !virtio_dev_extbuf_alloc(dev, pkt, data_len)) 2842 return 0; 2843 2844 /* check if chained buffers are allowed */ 2845 if (!dev->linearbuf) 2846 return 0; 2847 2848 return -1; 2849 } 2850 2851 __rte_always_inline 2852 static uint16_t 2853 virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 2854 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count, 2855 bool legacy_ol_flags) 2856 { 2857 uint16_t i; 2858 uint16_t avail_entries; 2859 uint16_t dropped = 0; 2860 static bool allocerr_warned; 2861 2862 /* 2863 * The ordering between avail index and 2864 * desc reads needs to be enforced. 2865 */ 2866 avail_entries = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE) - 2867 vq->last_avail_idx; 2868 if (avail_entries == 0) 2869 return 0; 2870 2871 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); 2872 2873 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__); 2874 2875 count = RTE_MIN(count, MAX_PKT_BURST); 2876 count = RTE_MIN(count, avail_entries); 2877 VHOST_LOG_DATA(dev->ifname, DEBUG, "about to dequeue %u buffers\n", count); 2878 2879 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, count)) 2880 return 0; 2881 2882 for (i = 0; i < count; i++) { 2883 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 2884 uint16_t head_idx; 2885 uint32_t buf_len; 2886 uint16_t nr_vec = 0; 2887 int err; 2888 2889 if (unlikely(fill_vec_buf_split(dev, vq, 2890 vq->last_avail_idx + i, 2891 &nr_vec, buf_vec, 2892 &head_idx, &buf_len, 2893 VHOST_ACCESS_RO) < 0)) 2894 break; 2895 2896 update_shadow_used_ring_split(vq, head_idx, 0); 2897 2898 if (unlikely(buf_len <= dev->vhost_hlen)) { 2899 dropped += 1; 2900 i++; 2901 break; 2902 } 2903 2904 buf_len -= dev->vhost_hlen; 2905 2906 err = virtio_dev_pktmbuf_prep(dev, pkts[i], buf_len); 2907 if (unlikely(err)) { 2908 /* 2909 * mbuf allocation fails for jumbo packets when external 2910 * buffer allocation is not allowed and linear buffer 2911 * is required. Drop this packet. 2912 */ 2913 if (!allocerr_warned) { 2914 VHOST_LOG_DATA(dev->ifname, ERR, 2915 "failed mbuf alloc of size %d from %s.\n", 2916 buf_len, mbuf_pool->name); 2917 allocerr_warned = true; 2918 } 2919 dropped += 1; 2920 i++; 2921 break; 2922 } 2923 2924 err = desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i], 2925 mbuf_pool, legacy_ol_flags, 0, false); 2926 if (unlikely(err)) { 2927 if (!allocerr_warned) { 2928 VHOST_LOG_DATA(dev->ifname, ERR, "failed to copy desc to mbuf.\n"); 2929 allocerr_warned = true; 2930 } 2931 dropped += 1; 2932 i++; 2933 break; 2934 } 2935 2936 } 2937 2938 if (dropped) 2939 rte_pktmbuf_free_bulk(&pkts[i - 1], count - i + 1); 2940 2941 vq->last_avail_idx += i; 2942 2943 do_data_copy_dequeue(vq); 2944 if (unlikely(i < count)) 2945 vq->shadow_used_idx = i; 2946 if (likely(vq->shadow_used_idx)) { 2947 flush_shadow_used_ring_split(dev, vq); 2948 vhost_vring_call_split(dev, vq); 2949 } 2950 2951 return (i - dropped); 2952 } 2953 2954 __rte_noinline 2955 static uint16_t 2956 virtio_dev_tx_split_legacy(struct virtio_net *dev, 2957 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, 2958 struct rte_mbuf **pkts, uint16_t count) 2959 { 2960 return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, true); 2961 } 2962 2963 __rte_noinline 2964 static uint16_t 2965 virtio_dev_tx_split_compliant(struct virtio_net *dev, 2966 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, 2967 struct rte_mbuf **pkts, uint16_t count) 2968 { 2969 return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, false); 2970 } 2971 2972 static __rte_always_inline int 2973 vhost_reserve_avail_batch_packed(struct virtio_net *dev, 2974 struct vhost_virtqueue *vq, 2975 struct rte_mbuf **pkts, 2976 uint16_t avail_idx, 2977 uintptr_t *desc_addrs, 2978 uint16_t *ids) 2979 { 2980 bool wrap = vq->avail_wrap_counter; 2981 struct vring_packed_desc *descs = vq->desc_packed; 2982 uint64_t lens[PACKED_BATCH_SIZE]; 2983 uint64_t buf_lens[PACKED_BATCH_SIZE]; 2984 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 2985 uint16_t flags, i; 2986 2987 if (unlikely(avail_idx & PACKED_BATCH_MASK)) 2988 return -1; 2989 if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size)) 2990 return -1; 2991 2992 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 2993 flags = descs[avail_idx + i].flags; 2994 if (unlikely((wrap != !!(flags & VRING_DESC_F_AVAIL)) || 2995 (wrap == !!(flags & VRING_DESC_F_USED)) || 2996 (flags & PACKED_DESC_SINGLE_DEQUEUE_FLAG))) 2997 return -1; 2998 } 2999 3000 rte_atomic_thread_fence(__ATOMIC_ACQUIRE); 3001 3002 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 3003 lens[i] = descs[avail_idx + i].len; 3004 3005 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3006 desc_addrs[i] = vhost_iova_to_vva(dev, vq, 3007 descs[avail_idx + i].addr, 3008 &lens[i], VHOST_ACCESS_RW); 3009 } 3010 3011 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3012 if (unlikely(!desc_addrs[i])) 3013 return -1; 3014 if (unlikely((lens[i] != descs[avail_idx + i].len))) 3015 return -1; 3016 } 3017 3018 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3019 if (virtio_dev_pktmbuf_prep(dev, pkts[i], lens[i])) 3020 goto err; 3021 } 3022 3023 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 3024 buf_lens[i] = pkts[i]->buf_len - pkts[i]->data_off; 3025 3026 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3027 if (unlikely(buf_lens[i] < (lens[i] - buf_offset))) 3028 goto err; 3029 } 3030 3031 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3032 pkts[i]->pkt_len = lens[i] - buf_offset; 3033 pkts[i]->data_len = pkts[i]->pkt_len; 3034 ids[i] = descs[avail_idx + i].id; 3035 } 3036 3037 return 0; 3038 3039 err: 3040 return -1; 3041 } 3042 3043 static __rte_always_inline int 3044 virtio_dev_tx_batch_packed(struct virtio_net *dev, 3045 struct vhost_virtqueue *vq, 3046 struct rte_mbuf **pkts, 3047 bool legacy_ol_flags) 3048 { 3049 uint16_t avail_idx = vq->last_avail_idx; 3050 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 3051 struct virtio_net_hdr *hdr; 3052 uintptr_t desc_addrs[PACKED_BATCH_SIZE]; 3053 uint16_t ids[PACKED_BATCH_SIZE]; 3054 uint16_t i; 3055 3056 if (vhost_reserve_avail_batch_packed(dev, vq, pkts, avail_idx, 3057 desc_addrs, ids)) 3058 return -1; 3059 3060 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 3061 rte_prefetch0((void *)(uintptr_t)desc_addrs[i]); 3062 3063 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 3064 rte_memcpy(rte_pktmbuf_mtod_offset(pkts[i], void *, 0), 3065 (void *)(uintptr_t)(desc_addrs[i] + buf_offset), 3066 pkts[i]->pkt_len); 3067 3068 if (virtio_net_with_host_offload(dev)) { 3069 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3070 hdr = (struct virtio_net_hdr *)(desc_addrs[i]); 3071 vhost_dequeue_offload(dev, hdr, pkts[i], legacy_ol_flags); 3072 } 3073 } 3074 3075 if (virtio_net_is_inorder(dev)) 3076 vhost_shadow_dequeue_batch_packed_inorder(vq, 3077 ids[PACKED_BATCH_SIZE - 1]); 3078 else 3079 vhost_shadow_dequeue_batch_packed(dev, vq, ids); 3080 3081 vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE); 3082 3083 return 0; 3084 } 3085 3086 static __rte_always_inline int 3087 vhost_dequeue_single_packed(struct virtio_net *dev, 3088 struct vhost_virtqueue *vq, 3089 struct rte_mempool *mbuf_pool, 3090 struct rte_mbuf *pkts, 3091 uint16_t *buf_id, 3092 uint16_t *desc_count, 3093 bool legacy_ol_flags) 3094 { 3095 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 3096 uint32_t buf_len; 3097 uint16_t nr_vec = 0; 3098 int err; 3099 static bool allocerr_warned; 3100 3101 if (unlikely(fill_vec_buf_packed(dev, vq, 3102 vq->last_avail_idx, desc_count, 3103 buf_vec, &nr_vec, 3104 buf_id, &buf_len, 3105 VHOST_ACCESS_RO) < 0)) 3106 return -1; 3107 3108 if (unlikely(buf_len <= dev->vhost_hlen)) 3109 return -1; 3110 3111 buf_len -= dev->vhost_hlen; 3112 3113 if (unlikely(virtio_dev_pktmbuf_prep(dev, pkts, buf_len))) { 3114 if (!allocerr_warned) { 3115 VHOST_LOG_DATA(dev->ifname, ERR, 3116 "failed mbuf alloc of size %d from %s.\n", 3117 buf_len, mbuf_pool->name); 3118 allocerr_warned = true; 3119 } 3120 return -1; 3121 } 3122 3123 err = desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts, 3124 mbuf_pool, legacy_ol_flags, 0, false); 3125 if (unlikely(err)) { 3126 if (!allocerr_warned) { 3127 VHOST_LOG_DATA(dev->ifname, ERR, "failed to copy desc to mbuf.\n"); 3128 allocerr_warned = true; 3129 } 3130 return -1; 3131 } 3132 3133 return 0; 3134 } 3135 3136 static __rte_always_inline int 3137 virtio_dev_tx_single_packed(struct virtio_net *dev, 3138 struct vhost_virtqueue *vq, 3139 struct rte_mempool *mbuf_pool, 3140 struct rte_mbuf *pkts, 3141 bool legacy_ol_flags) 3142 { 3143 3144 uint16_t buf_id, desc_count = 0; 3145 int ret; 3146 3147 ret = vhost_dequeue_single_packed(dev, vq, mbuf_pool, pkts, &buf_id, 3148 &desc_count, legacy_ol_flags); 3149 3150 if (likely(desc_count > 0)) { 3151 if (virtio_net_is_inorder(dev)) 3152 vhost_shadow_dequeue_single_packed_inorder(vq, buf_id, 3153 desc_count); 3154 else 3155 vhost_shadow_dequeue_single_packed(vq, buf_id, 3156 desc_count); 3157 3158 vq_inc_last_avail_packed(vq, desc_count); 3159 } 3160 3161 return ret; 3162 } 3163 3164 __rte_always_inline 3165 static uint16_t 3166 virtio_dev_tx_packed(struct virtio_net *dev, 3167 struct vhost_virtqueue *__rte_restrict vq, 3168 struct rte_mempool *mbuf_pool, 3169 struct rte_mbuf **__rte_restrict pkts, 3170 uint32_t count, 3171 bool legacy_ol_flags) 3172 { 3173 uint32_t pkt_idx = 0; 3174 3175 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, count)) 3176 return 0; 3177 3178 do { 3179 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); 3180 3181 if (count - pkt_idx >= PACKED_BATCH_SIZE) { 3182 if (!virtio_dev_tx_batch_packed(dev, vq, 3183 &pkts[pkt_idx], 3184 legacy_ol_flags)) { 3185 pkt_idx += PACKED_BATCH_SIZE; 3186 continue; 3187 } 3188 } 3189 3190 if (virtio_dev_tx_single_packed(dev, vq, mbuf_pool, 3191 pkts[pkt_idx], 3192 legacy_ol_flags)) 3193 break; 3194 pkt_idx++; 3195 } while (pkt_idx < count); 3196 3197 if (pkt_idx != count) 3198 rte_pktmbuf_free_bulk(&pkts[pkt_idx], count - pkt_idx); 3199 3200 if (vq->shadow_used_idx) { 3201 do_data_copy_dequeue(vq); 3202 3203 vhost_flush_dequeue_shadow_packed(dev, vq); 3204 vhost_vring_call_packed(dev, vq); 3205 } 3206 3207 return pkt_idx; 3208 } 3209 3210 __rte_noinline 3211 static uint16_t 3212 virtio_dev_tx_packed_legacy(struct virtio_net *dev, 3213 struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool, 3214 struct rte_mbuf **__rte_restrict pkts, uint32_t count) 3215 { 3216 return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, true); 3217 } 3218 3219 __rte_noinline 3220 static uint16_t 3221 virtio_dev_tx_packed_compliant(struct virtio_net *dev, 3222 struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool, 3223 struct rte_mbuf **__rte_restrict pkts, uint32_t count) 3224 { 3225 return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, false); 3226 } 3227 3228 uint16_t 3229 rte_vhost_dequeue_burst(int vid, uint16_t queue_id, 3230 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) 3231 { 3232 struct virtio_net *dev; 3233 struct rte_mbuf *rarp_mbuf = NULL; 3234 struct vhost_virtqueue *vq; 3235 int16_t success = 1; 3236 3237 dev = get_device(vid); 3238 if (!dev) 3239 return 0; 3240 3241 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { 3242 VHOST_LOG_DATA(dev->ifname, ERR, 3243 "%s: built-in vhost net backend is disabled.\n", 3244 __func__); 3245 return 0; 3246 } 3247 3248 if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) { 3249 VHOST_LOG_DATA(dev->ifname, ERR, 3250 "%s: invalid virtqueue idx %d.\n", 3251 __func__, queue_id); 3252 return 0; 3253 } 3254 3255 vq = dev->virtqueue[queue_id]; 3256 3257 if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0)) 3258 return 0; 3259 3260 if (unlikely(!vq->enabled)) { 3261 count = 0; 3262 goto out_access_unlock; 3263 } 3264 3265 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 3266 vhost_user_iotlb_rd_lock(vq); 3267 3268 if (unlikely(!vq->access_ok)) 3269 if (unlikely(vring_translate(dev, vq) < 0)) { 3270 count = 0; 3271 goto out; 3272 } 3273 3274 /* 3275 * Construct a RARP broadcast packet, and inject it to the "pkts" 3276 * array, to looks like that guest actually send such packet. 3277 * 3278 * Check user_send_rarp() for more information. 3279 * 3280 * broadcast_rarp shares a cacheline in the virtio_net structure 3281 * with some fields that are accessed during enqueue and 3282 * __atomic_compare_exchange_n causes a write if performed compare 3283 * and exchange. This could result in false sharing between enqueue 3284 * and dequeue. 3285 * 3286 * Prevent unnecessary false sharing by reading broadcast_rarp first 3287 * and only performing compare and exchange if the read indicates it 3288 * is likely to be set. 3289 */ 3290 if (unlikely(__atomic_load_n(&dev->broadcast_rarp, __ATOMIC_ACQUIRE) && 3291 __atomic_compare_exchange_n(&dev->broadcast_rarp, 3292 &success, 0, 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED))) { 3293 3294 rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac); 3295 if (rarp_mbuf == NULL) { 3296 VHOST_LOG_DATA(dev->ifname, ERR, "failed to make RARP packet.\n"); 3297 count = 0; 3298 goto out; 3299 } 3300 /* 3301 * Inject it to the head of "pkts" array, so that switch's mac 3302 * learning table will get updated first. 3303 */ 3304 pkts[0] = rarp_mbuf; 3305 vhost_queue_stats_update(dev, vq, pkts, 1); 3306 pkts++; 3307 count -= 1; 3308 } 3309 3310 if (vq_is_packed(dev)) { 3311 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS) 3312 count = virtio_dev_tx_packed_legacy(dev, vq, mbuf_pool, pkts, count); 3313 else 3314 count = virtio_dev_tx_packed_compliant(dev, vq, mbuf_pool, pkts, count); 3315 } else { 3316 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS) 3317 count = virtio_dev_tx_split_legacy(dev, vq, mbuf_pool, pkts, count); 3318 else 3319 count = virtio_dev_tx_split_compliant(dev, vq, mbuf_pool, pkts, count); 3320 } 3321 3322 vhost_queue_stats_update(dev, vq, pkts, count); 3323 3324 out: 3325 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 3326 vhost_user_iotlb_rd_unlock(vq); 3327 3328 out_access_unlock: 3329 rte_spinlock_unlock(&vq->access_lock); 3330 3331 if (unlikely(rarp_mbuf != NULL)) 3332 count += 1; 3333 3334 return count; 3335 } 3336 3337 static __rte_always_inline uint16_t 3338 async_poll_dequeue_completed(struct virtio_net *dev, struct vhost_virtqueue *vq, 3339 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, 3340 uint16_t vchan_id, bool legacy_ol_flags) 3341 { 3342 uint16_t start_idx, from, i; 3343 uint16_t nr_cpl_pkts = 0; 3344 struct async_inflight_info *pkts_info = vq->async->pkts_info; 3345 3346 vhost_async_dma_check_completed(dev, dma_id, vchan_id, VHOST_DMA_MAX_COPY_COMPLETE); 3347 3348 start_idx = async_get_first_inflight_pkt_idx(vq); 3349 3350 from = start_idx; 3351 while (vq->async->pkts_cmpl_flag[from] && count--) { 3352 vq->async->pkts_cmpl_flag[from] = false; 3353 from = (from + 1) % vq->size; 3354 nr_cpl_pkts++; 3355 } 3356 3357 if (nr_cpl_pkts == 0) 3358 return 0; 3359 3360 for (i = 0; i < nr_cpl_pkts; i++) { 3361 from = (start_idx + i) % vq->size; 3362 pkts[i] = pkts_info[from].mbuf; 3363 3364 if (virtio_net_with_host_offload(dev)) 3365 vhost_dequeue_offload(dev, &pkts_info[from].nethdr, pkts[i], 3366 legacy_ol_flags); 3367 } 3368 3369 /* write back completed descs to used ring and update used idx */ 3370 if (vq_is_packed(dev)) { 3371 write_back_completed_descs_packed(vq, nr_cpl_pkts); 3372 vhost_vring_call_packed(dev, vq); 3373 } else { 3374 write_back_completed_descs_split(vq, nr_cpl_pkts); 3375 __atomic_add_fetch(&vq->used->idx, nr_cpl_pkts, __ATOMIC_RELEASE); 3376 vhost_vring_call_split(dev, vq); 3377 } 3378 vq->async->pkts_inflight_n -= nr_cpl_pkts; 3379 3380 return nr_cpl_pkts; 3381 } 3382 3383 static __rte_always_inline uint16_t 3384 virtio_dev_tx_async_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 3385 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count, 3386 int16_t dma_id, uint16_t vchan_id, bool legacy_ol_flags) 3387 { 3388 static bool allocerr_warned; 3389 bool dropped = false; 3390 uint16_t avail_entries; 3391 uint16_t pkt_idx, slot_idx = 0; 3392 uint16_t nr_done_pkts = 0; 3393 uint16_t pkt_err = 0; 3394 uint16_t n_xfer; 3395 struct vhost_async *async = vq->async; 3396 struct async_inflight_info *pkts_info = async->pkts_info; 3397 struct rte_mbuf *pkts_prealloc[MAX_PKT_BURST]; 3398 uint16_t pkts_size = count; 3399 3400 /** 3401 * The ordering between avail index and 3402 * desc reads needs to be enforced. 3403 */ 3404 avail_entries = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE) - 3405 vq->last_avail_idx; 3406 if (avail_entries == 0) 3407 goto out; 3408 3409 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); 3410 3411 async_iter_reset(async); 3412 3413 count = RTE_MIN(count, MAX_PKT_BURST); 3414 count = RTE_MIN(count, avail_entries); 3415 VHOST_LOG_DATA(dev->ifname, DEBUG, "about to dequeue %u buffers\n", count); 3416 3417 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts_prealloc, count)) 3418 goto out; 3419 3420 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { 3421 uint16_t head_idx = 0; 3422 uint16_t nr_vec = 0; 3423 uint16_t to; 3424 uint32_t buf_len; 3425 int err; 3426 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 3427 struct rte_mbuf *pkt = pkts_prealloc[pkt_idx]; 3428 3429 if (unlikely(fill_vec_buf_split(dev, vq, vq->last_avail_idx, 3430 &nr_vec, buf_vec, 3431 &head_idx, &buf_len, 3432 VHOST_ACCESS_RO) < 0)) { 3433 dropped = true; 3434 break; 3435 } 3436 3437 if (unlikely(buf_len <= dev->vhost_hlen)) { 3438 dropped = true; 3439 break; 3440 } 3441 3442 buf_len -= dev->vhost_hlen; 3443 3444 err = virtio_dev_pktmbuf_prep(dev, pkt, buf_len); 3445 if (unlikely(err)) { 3446 /** 3447 * mbuf allocation fails for jumbo packets when external 3448 * buffer allocation is not allowed and linear buffer 3449 * is required. Drop this packet. 3450 */ 3451 if (!allocerr_warned) { 3452 VHOST_LOG_DATA(dev->ifname, ERR, 3453 "%s: Failed mbuf alloc of size %d from %s\n", 3454 __func__, buf_len, mbuf_pool->name); 3455 allocerr_warned = true; 3456 } 3457 dropped = true; 3458 slot_idx--; 3459 break; 3460 } 3461 3462 slot_idx = (async->pkts_idx + pkt_idx) & (vq->size - 1); 3463 err = desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkt, mbuf_pool, 3464 legacy_ol_flags, slot_idx, true); 3465 if (unlikely(err)) { 3466 if (!allocerr_warned) { 3467 VHOST_LOG_DATA(dev->ifname, ERR, 3468 "%s: Failed to offload copies to async channel.\n", 3469 __func__); 3470 allocerr_warned = true; 3471 } 3472 dropped = true; 3473 break; 3474 } 3475 3476 pkts_info[slot_idx].mbuf = pkt; 3477 3478 /* store used descs */ 3479 to = async->desc_idx_split & (vq->size - 1); 3480 async->descs_split[to].id = head_idx; 3481 async->descs_split[to].len = 0; 3482 async->desc_idx_split++; 3483 3484 vq->last_avail_idx++; 3485 } 3486 3487 if (unlikely(dropped)) 3488 rte_pktmbuf_free_bulk(&pkts_prealloc[pkt_idx], count - pkt_idx); 3489 3490 n_xfer = vhost_async_dma_transfer(dev, vq, dma_id, vchan_id, async->pkts_idx, 3491 async->iov_iter, pkt_idx); 3492 3493 async->pkts_inflight_n += n_xfer; 3494 3495 pkt_err = pkt_idx - n_xfer; 3496 if (unlikely(pkt_err)) { 3497 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s: failed to transfer data.\n", 3498 __func__); 3499 3500 pkt_idx = n_xfer; 3501 /* recover available ring */ 3502 vq->last_avail_idx -= pkt_err; 3503 3504 /** 3505 * recover async channel copy related structures and free pktmbufs 3506 * for error pkts. 3507 */ 3508 async->desc_idx_split -= pkt_err; 3509 while (pkt_err-- > 0) { 3510 rte_pktmbuf_free(pkts_info[slot_idx & (vq->size - 1)].mbuf); 3511 slot_idx--; 3512 } 3513 } 3514 3515 async->pkts_idx += pkt_idx; 3516 if (async->pkts_idx >= vq->size) 3517 async->pkts_idx -= vq->size; 3518 3519 out: 3520 /* DMA device may serve other queues, unconditionally check completed. */ 3521 nr_done_pkts = async_poll_dequeue_completed(dev, vq, pkts, pkts_size, 3522 dma_id, vchan_id, legacy_ol_flags); 3523 3524 return nr_done_pkts; 3525 } 3526 3527 __rte_noinline 3528 static uint16_t 3529 virtio_dev_tx_async_split_legacy(struct virtio_net *dev, 3530 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, 3531 struct rte_mbuf **pkts, uint16_t count, 3532 int16_t dma_id, uint16_t vchan_id) 3533 { 3534 return virtio_dev_tx_async_split(dev, vq, mbuf_pool, 3535 pkts, count, dma_id, vchan_id, true); 3536 } 3537 3538 __rte_noinline 3539 static uint16_t 3540 virtio_dev_tx_async_split_compliant(struct virtio_net *dev, 3541 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, 3542 struct rte_mbuf **pkts, uint16_t count, 3543 int16_t dma_id, uint16_t vchan_id) 3544 { 3545 return virtio_dev_tx_async_split(dev, vq, mbuf_pool, 3546 pkts, count, dma_id, vchan_id, false); 3547 } 3548 3549 static __rte_always_inline void 3550 vhost_async_shadow_dequeue_single_packed(struct vhost_virtqueue *vq, 3551 uint16_t buf_id, uint16_t count) 3552 { 3553 struct vhost_async *async = vq->async; 3554 uint16_t idx = async->buffer_idx_packed; 3555 3556 async->buffers_packed[idx].id = buf_id; 3557 async->buffers_packed[idx].len = 0; 3558 async->buffers_packed[idx].count = count; 3559 3560 async->buffer_idx_packed++; 3561 if (async->buffer_idx_packed >= vq->size) 3562 async->buffer_idx_packed -= vq->size; 3563 3564 } 3565 3566 static __rte_always_inline int 3567 virtio_dev_tx_async_single_packed(struct virtio_net *dev, 3568 struct vhost_virtqueue *vq, 3569 struct rte_mempool *mbuf_pool, 3570 struct rte_mbuf *pkts, 3571 uint16_t slot_idx, 3572 bool legacy_ol_flags) 3573 { 3574 int err; 3575 uint16_t buf_id, desc_count = 0; 3576 uint16_t nr_vec = 0; 3577 uint32_t buf_len; 3578 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 3579 struct vhost_async *async = vq->async; 3580 struct async_inflight_info *pkts_info = async->pkts_info; 3581 static bool allocerr_warned; 3582 3583 if (unlikely(fill_vec_buf_packed(dev, vq, vq->last_avail_idx, &desc_count, 3584 buf_vec, &nr_vec, &buf_id, &buf_len, 3585 VHOST_ACCESS_RO) < 0)) 3586 return -1; 3587 3588 if (unlikely(virtio_dev_pktmbuf_prep(dev, pkts, buf_len))) { 3589 if (!allocerr_warned) { 3590 VHOST_LOG_DATA(dev->ifname, ERR, "Failed mbuf alloc of size %d from %s.\n", 3591 buf_len, mbuf_pool->name); 3592 3593 allocerr_warned = true; 3594 } 3595 return -1; 3596 } 3597 3598 err = desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts, mbuf_pool, 3599 legacy_ol_flags, slot_idx, true); 3600 if (unlikely(err)) { 3601 rte_pktmbuf_free(pkts); 3602 if (!allocerr_warned) { 3603 VHOST_LOG_DATA(dev->ifname, ERR, "Failed to copy desc to mbuf on.\n"); 3604 allocerr_warned = true; 3605 } 3606 return -1; 3607 } 3608 3609 pkts_info[slot_idx].descs = desc_count; 3610 3611 /* update async shadow packed ring */ 3612 vhost_async_shadow_dequeue_single_packed(vq, buf_id, desc_count); 3613 3614 vq_inc_last_avail_packed(vq, desc_count); 3615 3616 return err; 3617 } 3618 3619 static __rte_always_inline uint16_t 3620 virtio_dev_tx_async_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, 3621 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, 3622 uint16_t count, uint16_t dma_id, uint16_t vchan_id, bool legacy_ol_flags) 3623 { 3624 uint16_t pkt_idx; 3625 uint16_t slot_idx = 0; 3626 uint16_t nr_done_pkts = 0; 3627 uint16_t pkt_err = 0; 3628 uint32_t n_xfer; 3629 struct vhost_async *async = vq->async; 3630 struct async_inflight_info *pkts_info = async->pkts_info; 3631 struct rte_mbuf *pkts_prealloc[MAX_PKT_BURST]; 3632 3633 VHOST_LOG_DATA(dev->ifname, DEBUG, "(%d) about to dequeue %u buffers\n", dev->vid, count); 3634 3635 async_iter_reset(async); 3636 3637 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts_prealloc, count)) 3638 goto out; 3639 3640 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { 3641 struct rte_mbuf *pkt = pkts_prealloc[pkt_idx]; 3642 3643 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); 3644 3645 slot_idx = (async->pkts_idx + pkt_idx) % vq->size; 3646 if (unlikely(virtio_dev_tx_async_single_packed(dev, vq, mbuf_pool, pkt, 3647 slot_idx, legacy_ol_flags))) { 3648 rte_pktmbuf_free_bulk(&pkts_prealloc[pkt_idx], count - pkt_idx); 3649 3650 if (slot_idx == 0) 3651 slot_idx = vq->size - 1; 3652 else 3653 slot_idx--; 3654 3655 break; 3656 } 3657 3658 pkts_info[slot_idx].mbuf = pkt; 3659 } 3660 3661 n_xfer = vhost_async_dma_transfer(dev, vq, dma_id, vchan_id, async->pkts_idx, 3662 async->iov_iter, pkt_idx); 3663 3664 async->pkts_inflight_n += n_xfer; 3665 3666 pkt_err = pkt_idx - n_xfer; 3667 3668 if (unlikely(pkt_err)) { 3669 uint16_t descs_err = 0; 3670 3671 pkt_idx -= pkt_err; 3672 3673 /** 3674 * recover DMA-copy related structures and free pktmbuf for DMA-error pkts. 3675 */ 3676 if (async->buffer_idx_packed >= pkt_err) 3677 async->buffer_idx_packed -= pkt_err; 3678 else 3679 async->buffer_idx_packed += vq->size - pkt_err; 3680 3681 while (pkt_err-- > 0) { 3682 rte_pktmbuf_free(pkts_info[slot_idx].mbuf); 3683 descs_err += pkts_info[slot_idx].descs; 3684 3685 if (slot_idx == 0) 3686 slot_idx = vq->size - 1; 3687 else 3688 slot_idx--; 3689 } 3690 3691 /* recover available ring */ 3692 if (vq->last_avail_idx >= descs_err) { 3693 vq->last_avail_idx -= descs_err; 3694 } else { 3695 vq->last_avail_idx += vq->size - descs_err; 3696 vq->avail_wrap_counter ^= 1; 3697 } 3698 } 3699 3700 async->pkts_idx += pkt_idx; 3701 if (async->pkts_idx >= vq->size) 3702 async->pkts_idx -= vq->size; 3703 3704 out: 3705 nr_done_pkts = async_poll_dequeue_completed(dev, vq, pkts, count, 3706 dma_id, vchan_id, legacy_ol_flags); 3707 3708 return nr_done_pkts; 3709 } 3710 3711 __rte_noinline 3712 static uint16_t 3713 virtio_dev_tx_async_packed_legacy(struct virtio_net *dev, struct vhost_virtqueue *vq, 3714 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, 3715 uint16_t count, uint16_t dma_id, uint16_t vchan_id) 3716 { 3717 return virtio_dev_tx_async_packed(dev, vq, mbuf_pool, 3718 pkts, count, dma_id, vchan_id, true); 3719 } 3720 3721 __rte_noinline 3722 static uint16_t 3723 virtio_dev_tx_async_packed_compliant(struct virtio_net *dev, struct vhost_virtqueue *vq, 3724 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, 3725 uint16_t count, uint16_t dma_id, uint16_t vchan_id) 3726 { 3727 return virtio_dev_tx_async_packed(dev, vq, mbuf_pool, 3728 pkts, count, dma_id, vchan_id, false); 3729 } 3730 3731 uint16_t 3732 rte_vhost_async_try_dequeue_burst(int vid, uint16_t queue_id, 3733 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count, 3734 int *nr_inflight, int16_t dma_id, uint16_t vchan_id) 3735 { 3736 struct virtio_net *dev; 3737 struct rte_mbuf *rarp_mbuf = NULL; 3738 struct vhost_virtqueue *vq; 3739 int16_t success = 1; 3740 3741 dev = get_device(vid); 3742 if (!dev || !nr_inflight) 3743 return 0; 3744 3745 *nr_inflight = -1; 3746 3747 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { 3748 VHOST_LOG_DATA(dev->ifname, ERR, "%s: built-in vhost net backend is disabled.\n", 3749 __func__); 3750 return 0; 3751 } 3752 3753 if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) { 3754 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid virtqueue idx %d.\n", 3755 __func__, queue_id); 3756 return 0; 3757 } 3758 3759 if (unlikely(dma_id < 0 || dma_id >= RTE_DMADEV_DEFAULT_MAX)) { 3760 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid dma id %d.\n", 3761 __func__, dma_id); 3762 return 0; 3763 } 3764 3765 if (unlikely(!dma_copy_track[dma_id].vchans || 3766 !dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) { 3767 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid channel %d:%u.\n", 3768 __func__, dma_id, vchan_id); 3769 return 0; 3770 } 3771 3772 vq = dev->virtqueue[queue_id]; 3773 3774 if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0)) 3775 return 0; 3776 3777 if (unlikely(vq->enabled == 0)) { 3778 count = 0; 3779 goto out_access_unlock; 3780 } 3781 3782 if (unlikely(!vq->async)) { 3783 VHOST_LOG_DATA(dev->ifname, ERR, "%s: async not registered for queue id %d.\n", 3784 __func__, queue_id); 3785 count = 0; 3786 goto out_access_unlock; 3787 } 3788 3789 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 3790 vhost_user_iotlb_rd_lock(vq); 3791 3792 if (unlikely(vq->access_ok == 0)) 3793 if (unlikely(vring_translate(dev, vq) < 0)) { 3794 count = 0; 3795 goto out; 3796 } 3797 3798 /* 3799 * Construct a RARP broadcast packet, and inject it to the "pkts" 3800 * array, to looks like that guest actually send such packet. 3801 * 3802 * Check user_send_rarp() for more information. 3803 * 3804 * broadcast_rarp shares a cacheline in the virtio_net structure 3805 * with some fields that are accessed during enqueue and 3806 * __atomic_compare_exchange_n causes a write if performed compare 3807 * and exchange. This could result in false sharing between enqueue 3808 * and dequeue. 3809 * 3810 * Prevent unnecessary false sharing by reading broadcast_rarp first 3811 * and only performing compare and exchange if the read indicates it 3812 * is likely to be set. 3813 */ 3814 if (unlikely(__atomic_load_n(&dev->broadcast_rarp, __ATOMIC_ACQUIRE) && 3815 __atomic_compare_exchange_n(&dev->broadcast_rarp, 3816 &success, 0, 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED))) { 3817 3818 rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac); 3819 if (rarp_mbuf == NULL) { 3820 VHOST_LOG_DATA(dev->ifname, ERR, "failed to make RARP packet.\n"); 3821 count = 0; 3822 goto out; 3823 } 3824 /* 3825 * Inject it to the head of "pkts" array, so that switch's mac 3826 * learning table will get updated first. 3827 */ 3828 pkts[0] = rarp_mbuf; 3829 vhost_queue_stats_update(dev, vq, pkts, 1); 3830 pkts++; 3831 count -= 1; 3832 } 3833 3834 if (vq_is_packed(dev)) { 3835 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS) 3836 count = virtio_dev_tx_async_packed_legacy(dev, vq, mbuf_pool, 3837 pkts, count, dma_id, vchan_id); 3838 else 3839 count = virtio_dev_tx_async_packed_compliant(dev, vq, mbuf_pool, 3840 pkts, count, dma_id, vchan_id); 3841 } else { 3842 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS) 3843 count = virtio_dev_tx_async_split_legacy(dev, vq, mbuf_pool, 3844 pkts, count, dma_id, vchan_id); 3845 else 3846 count = virtio_dev_tx_async_split_compliant(dev, vq, mbuf_pool, 3847 pkts, count, dma_id, vchan_id); 3848 } 3849 3850 *nr_inflight = vq->async->pkts_inflight_n; 3851 vhost_queue_stats_update(dev, vq, pkts, count); 3852 3853 out: 3854 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 3855 vhost_user_iotlb_rd_unlock(vq); 3856 3857 out_access_unlock: 3858 rte_spinlock_unlock(&vq->access_lock); 3859 3860 if (unlikely(rarp_mbuf != NULL)) 3861 count += 1; 3862 3863 return count; 3864 } 3865