1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2016 Intel Corporation 3 */ 4 5 #include <stdint.h> 6 #include <stdbool.h> 7 #include <linux/virtio_net.h> 8 9 #include <rte_mbuf.h> 10 #include <rte_memcpy.h> 11 #include <rte_net.h> 12 #include <rte_ether.h> 13 #include <rte_ip.h> 14 #include <rte_dmadev.h> 15 #include <rte_vhost.h> 16 #include <rte_tcp.h> 17 #include <rte_udp.h> 18 #include <rte_sctp.h> 19 #include <rte_arp.h> 20 #include <rte_spinlock.h> 21 #include <rte_malloc.h> 22 #include <rte_vhost_async.h> 23 24 #include "iotlb.h" 25 #include "vhost.h" 26 27 #define MAX_BATCH_LEN 256 28 29 static __rte_always_inline uint16_t 30 async_poll_dequeue_completed(struct virtio_net *dev, struct vhost_virtqueue *vq, 31 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, 32 uint16_t vchan_id, bool legacy_ol_flags); 33 34 /* DMA device copy operation tracking array. */ 35 struct async_dma_info dma_copy_track[RTE_DMADEV_DEFAULT_MAX]; 36 37 static __rte_always_inline bool 38 rxvq_is_mergeable(struct virtio_net *dev) 39 { 40 return dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF); 41 } 42 43 static __rte_always_inline bool 44 virtio_net_is_inorder(struct virtio_net *dev) 45 { 46 return dev->features & (1ULL << VIRTIO_F_IN_ORDER); 47 } 48 49 static bool 50 is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t nr_vring) 51 { 52 return (is_tx ^ (idx & 1)) == 0 && idx < nr_vring; 53 } 54 55 /* 56 * This function must be called with virtqueue's access_lock taken. 57 */ 58 static inline void 59 vhost_queue_stats_update(struct virtio_net *dev, struct vhost_virtqueue *vq, 60 struct rte_mbuf **pkts, uint16_t count) 61 { 62 struct virtqueue_stats *stats = &vq->stats; 63 int i; 64 65 if (!(dev->flags & VIRTIO_DEV_STATS_ENABLED)) 66 return; 67 68 for (i = 0; i < count; i++) { 69 struct rte_ether_addr *ea; 70 struct rte_mbuf *pkt = pkts[i]; 71 uint32_t pkt_len = rte_pktmbuf_pkt_len(pkt); 72 73 stats->packets++; 74 stats->bytes += pkt_len; 75 76 if (pkt_len == 64) { 77 stats->size_bins[1]++; 78 } else if (pkt_len > 64 && pkt_len < 1024) { 79 uint32_t bin; 80 81 /* count zeros, and offset into correct bin */ 82 bin = (sizeof(pkt_len) * 8) - __builtin_clz(pkt_len) - 5; 83 stats->size_bins[bin]++; 84 } else { 85 if (pkt_len < 64) 86 stats->size_bins[0]++; 87 else if (pkt_len < 1519) 88 stats->size_bins[6]++; 89 else 90 stats->size_bins[7]++; 91 } 92 93 ea = rte_pktmbuf_mtod(pkt, struct rte_ether_addr *); 94 if (rte_is_multicast_ether_addr(ea)) { 95 if (rte_is_broadcast_ether_addr(ea)) 96 stats->broadcast++; 97 else 98 stats->multicast++; 99 } 100 } 101 } 102 103 static __rte_always_inline int64_t 104 vhost_async_dma_transfer_one(struct virtio_net *dev, struct vhost_virtqueue *vq, 105 int16_t dma_id, uint16_t vchan_id, uint16_t flag_idx, 106 struct vhost_iov_iter *pkt) 107 { 108 struct async_dma_vchan_info *dma_info = &dma_copy_track[dma_id].vchans[vchan_id]; 109 uint16_t ring_mask = dma_info->ring_mask; 110 static bool vhost_async_dma_copy_log; 111 112 113 struct vhost_iovec *iov = pkt->iov; 114 int copy_idx = 0; 115 uint32_t nr_segs = pkt->nr_segs; 116 uint16_t i; 117 118 if (rte_dma_burst_capacity(dma_id, vchan_id) < nr_segs) 119 return -1; 120 121 for (i = 0; i < nr_segs; i++) { 122 copy_idx = rte_dma_copy(dma_id, vchan_id, (rte_iova_t)iov[i].src_addr, 123 (rte_iova_t)iov[i].dst_addr, iov[i].len, RTE_DMA_OP_FLAG_LLC); 124 /** 125 * Since all memory is pinned and DMA vChannel 126 * ring has enough space, failure should be a 127 * rare case. If failure happens, it means DMA 128 * device encounters serious errors; in this 129 * case, please stop async data-path and check 130 * what has happened to DMA device. 131 */ 132 if (unlikely(copy_idx < 0)) { 133 if (!vhost_async_dma_copy_log) { 134 VHOST_LOG_DATA(dev->ifname, ERR, 135 "DMA copy failed for channel %d:%u\n", 136 dma_id, vchan_id); 137 vhost_async_dma_copy_log = true; 138 } 139 return -1; 140 } 141 } 142 143 /** 144 * Only store packet completion flag address in the last copy's 145 * slot, and other slots are set to NULL. 146 */ 147 dma_info->pkts_cmpl_flag_addr[copy_idx & ring_mask] = &vq->async->pkts_cmpl_flag[flag_idx]; 148 149 return nr_segs; 150 } 151 152 static __rte_always_inline uint16_t 153 vhost_async_dma_transfer(struct virtio_net *dev, struct vhost_virtqueue *vq, 154 int16_t dma_id, uint16_t vchan_id, uint16_t head_idx, 155 struct vhost_iov_iter *pkts, uint16_t nr_pkts) 156 { 157 struct async_dma_vchan_info *dma_info = &dma_copy_track[dma_id].vchans[vchan_id]; 158 int64_t ret, nr_copies = 0; 159 uint16_t pkt_idx; 160 161 rte_spinlock_lock(&dma_info->dma_lock); 162 163 for (pkt_idx = 0; pkt_idx < nr_pkts; pkt_idx++) { 164 ret = vhost_async_dma_transfer_one(dev, vq, dma_id, vchan_id, head_idx, 165 &pkts[pkt_idx]); 166 if (unlikely(ret < 0)) 167 break; 168 169 nr_copies += ret; 170 head_idx++; 171 if (head_idx >= vq->size) 172 head_idx -= vq->size; 173 } 174 175 if (likely(nr_copies > 0)) 176 rte_dma_submit(dma_id, vchan_id); 177 178 rte_spinlock_unlock(&dma_info->dma_lock); 179 180 return pkt_idx; 181 } 182 183 static __rte_always_inline uint16_t 184 vhost_async_dma_check_completed(struct virtio_net *dev, int16_t dma_id, uint16_t vchan_id, 185 uint16_t max_pkts) 186 { 187 struct async_dma_vchan_info *dma_info = &dma_copy_track[dma_id].vchans[vchan_id]; 188 uint16_t ring_mask = dma_info->ring_mask; 189 uint16_t last_idx = 0; 190 uint16_t nr_copies; 191 uint16_t copy_idx; 192 uint16_t i; 193 bool has_error = false; 194 static bool vhost_async_dma_complete_log; 195 196 rte_spinlock_lock(&dma_info->dma_lock); 197 198 /** 199 * Print error log for debugging, if DMA reports error during 200 * DMA transfer. We do not handle error in vhost level. 201 */ 202 nr_copies = rte_dma_completed(dma_id, vchan_id, max_pkts, &last_idx, &has_error); 203 if (unlikely(!vhost_async_dma_complete_log && has_error)) { 204 VHOST_LOG_DATA(dev->ifname, ERR, 205 "DMA completion failure on channel %d:%u\n", 206 dma_id, vchan_id); 207 vhost_async_dma_complete_log = true; 208 } else if (nr_copies == 0) { 209 goto out; 210 } 211 212 copy_idx = last_idx - nr_copies + 1; 213 for (i = 0; i < nr_copies; i++) { 214 bool *flag; 215 216 flag = dma_info->pkts_cmpl_flag_addr[copy_idx & ring_mask]; 217 if (flag) { 218 /** 219 * Mark the packet flag as received. The flag 220 * could belong to another virtqueue but write 221 * is atomic. 222 */ 223 *flag = true; 224 dma_info->pkts_cmpl_flag_addr[copy_idx & ring_mask] = NULL; 225 } 226 copy_idx++; 227 } 228 229 out: 230 rte_spinlock_unlock(&dma_info->dma_lock); 231 return nr_copies; 232 } 233 234 static inline void 235 do_data_copy_enqueue(struct virtio_net *dev, struct vhost_virtqueue *vq) 236 { 237 struct batch_copy_elem *elem = vq->batch_copy_elems; 238 uint16_t count = vq->batch_copy_nb_elems; 239 int i; 240 241 for (i = 0; i < count; i++) { 242 rte_memcpy(elem[i].dst, elem[i].src, elem[i].len); 243 vhost_log_cache_write_iova(dev, vq, elem[i].log_addr, 244 elem[i].len); 245 PRINT_PACKET(dev, (uintptr_t)elem[i].dst, elem[i].len, 0); 246 } 247 248 vq->batch_copy_nb_elems = 0; 249 } 250 251 static inline void 252 do_data_copy_dequeue(struct vhost_virtqueue *vq) 253 { 254 struct batch_copy_elem *elem = vq->batch_copy_elems; 255 uint16_t count = vq->batch_copy_nb_elems; 256 int i; 257 258 for (i = 0; i < count; i++) 259 rte_memcpy(elem[i].dst, elem[i].src, elem[i].len); 260 261 vq->batch_copy_nb_elems = 0; 262 } 263 264 static __rte_always_inline void 265 do_flush_shadow_used_ring_split(struct virtio_net *dev, 266 struct vhost_virtqueue *vq, 267 uint16_t to, uint16_t from, uint16_t size) 268 { 269 rte_memcpy(&vq->used->ring[to], 270 &vq->shadow_used_split[from], 271 size * sizeof(struct vring_used_elem)); 272 vhost_log_cache_used_vring(dev, vq, 273 offsetof(struct vring_used, ring[to]), 274 size * sizeof(struct vring_used_elem)); 275 } 276 277 static __rte_always_inline void 278 flush_shadow_used_ring_split(struct virtio_net *dev, struct vhost_virtqueue *vq) 279 { 280 uint16_t used_idx = vq->last_used_idx & (vq->size - 1); 281 282 if (used_idx + vq->shadow_used_idx <= vq->size) { 283 do_flush_shadow_used_ring_split(dev, vq, used_idx, 0, 284 vq->shadow_used_idx); 285 } else { 286 uint16_t size; 287 288 /* update used ring interval [used_idx, vq->size] */ 289 size = vq->size - used_idx; 290 do_flush_shadow_used_ring_split(dev, vq, used_idx, 0, size); 291 292 /* update the left half used ring interval [0, left_size] */ 293 do_flush_shadow_used_ring_split(dev, vq, 0, size, 294 vq->shadow_used_idx - size); 295 } 296 vq->last_used_idx += vq->shadow_used_idx; 297 298 vhost_log_cache_sync(dev, vq); 299 300 __atomic_add_fetch(&vq->used->idx, vq->shadow_used_idx, 301 __ATOMIC_RELEASE); 302 vq->shadow_used_idx = 0; 303 vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx), 304 sizeof(vq->used->idx)); 305 } 306 307 static __rte_always_inline void 308 update_shadow_used_ring_split(struct vhost_virtqueue *vq, 309 uint16_t desc_idx, uint32_t len) 310 { 311 uint16_t i = vq->shadow_used_idx++; 312 313 vq->shadow_used_split[i].id = desc_idx; 314 vq->shadow_used_split[i].len = len; 315 } 316 317 static __rte_always_inline void 318 vhost_flush_enqueue_shadow_packed(struct virtio_net *dev, 319 struct vhost_virtqueue *vq) 320 { 321 int i; 322 uint16_t used_idx = vq->last_used_idx; 323 uint16_t head_idx = vq->last_used_idx; 324 uint16_t head_flags = 0; 325 326 /* Split loop in two to save memory barriers */ 327 for (i = 0; i < vq->shadow_used_idx; i++) { 328 vq->desc_packed[used_idx].id = vq->shadow_used_packed[i].id; 329 vq->desc_packed[used_idx].len = vq->shadow_used_packed[i].len; 330 331 used_idx += vq->shadow_used_packed[i].count; 332 if (used_idx >= vq->size) 333 used_idx -= vq->size; 334 } 335 336 /* The ordering for storing desc flags needs to be enforced. */ 337 rte_atomic_thread_fence(__ATOMIC_RELEASE); 338 339 for (i = 0; i < vq->shadow_used_idx; i++) { 340 uint16_t flags; 341 342 if (vq->shadow_used_packed[i].len) 343 flags = VRING_DESC_F_WRITE; 344 else 345 flags = 0; 346 347 if (vq->used_wrap_counter) { 348 flags |= VRING_DESC_F_USED; 349 flags |= VRING_DESC_F_AVAIL; 350 } else { 351 flags &= ~VRING_DESC_F_USED; 352 flags &= ~VRING_DESC_F_AVAIL; 353 } 354 355 if (i > 0) { 356 vq->desc_packed[vq->last_used_idx].flags = flags; 357 358 vhost_log_cache_used_vring(dev, vq, 359 vq->last_used_idx * 360 sizeof(struct vring_packed_desc), 361 sizeof(struct vring_packed_desc)); 362 } else { 363 head_idx = vq->last_used_idx; 364 head_flags = flags; 365 } 366 367 vq_inc_last_used_packed(vq, vq->shadow_used_packed[i].count); 368 } 369 370 vq->desc_packed[head_idx].flags = head_flags; 371 372 vhost_log_cache_used_vring(dev, vq, 373 head_idx * 374 sizeof(struct vring_packed_desc), 375 sizeof(struct vring_packed_desc)); 376 377 vq->shadow_used_idx = 0; 378 vhost_log_cache_sync(dev, vq); 379 } 380 381 static __rte_always_inline void 382 vhost_flush_dequeue_shadow_packed(struct virtio_net *dev, 383 struct vhost_virtqueue *vq) 384 { 385 struct vring_used_elem_packed *used_elem = &vq->shadow_used_packed[0]; 386 387 vq->desc_packed[vq->shadow_last_used_idx].id = used_elem->id; 388 /* desc flags is the synchronization point for virtio packed vring */ 389 __atomic_store_n(&vq->desc_packed[vq->shadow_last_used_idx].flags, 390 used_elem->flags, __ATOMIC_RELEASE); 391 392 vhost_log_cache_used_vring(dev, vq, vq->shadow_last_used_idx * 393 sizeof(struct vring_packed_desc), 394 sizeof(struct vring_packed_desc)); 395 vq->shadow_used_idx = 0; 396 vhost_log_cache_sync(dev, vq); 397 } 398 399 static __rte_always_inline void 400 vhost_flush_enqueue_batch_packed(struct virtio_net *dev, 401 struct vhost_virtqueue *vq, 402 uint64_t *lens, 403 uint16_t *ids) 404 { 405 uint16_t i; 406 uint16_t flags; 407 uint16_t last_used_idx; 408 struct vring_packed_desc *desc_base; 409 410 last_used_idx = vq->last_used_idx; 411 desc_base = &vq->desc_packed[last_used_idx]; 412 413 flags = PACKED_DESC_ENQUEUE_USED_FLAG(vq->used_wrap_counter); 414 415 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 416 desc_base[i].id = ids[i]; 417 desc_base[i].len = lens[i]; 418 } 419 420 rte_atomic_thread_fence(__ATOMIC_RELEASE); 421 422 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 423 desc_base[i].flags = flags; 424 } 425 426 vhost_log_cache_used_vring(dev, vq, last_used_idx * 427 sizeof(struct vring_packed_desc), 428 sizeof(struct vring_packed_desc) * 429 PACKED_BATCH_SIZE); 430 vhost_log_cache_sync(dev, vq); 431 432 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE); 433 } 434 435 static __rte_always_inline void 436 vhost_async_shadow_enqueue_packed_batch(struct vhost_virtqueue *vq, 437 uint64_t *lens, 438 uint16_t *ids) 439 { 440 uint16_t i; 441 struct vhost_async *async = vq->async; 442 443 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 444 async->buffers_packed[async->buffer_idx_packed].id = ids[i]; 445 async->buffers_packed[async->buffer_idx_packed].len = lens[i]; 446 async->buffers_packed[async->buffer_idx_packed].count = 1; 447 async->buffer_idx_packed++; 448 if (async->buffer_idx_packed >= vq->size) 449 async->buffer_idx_packed -= vq->size; 450 } 451 } 452 453 static __rte_always_inline void 454 vhost_async_shadow_dequeue_packed_batch(struct vhost_virtqueue *vq, uint16_t *ids) 455 { 456 uint16_t i; 457 struct vhost_async *async = vq->async; 458 459 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 460 async->buffers_packed[async->buffer_idx_packed].id = ids[i]; 461 async->buffers_packed[async->buffer_idx_packed].len = 0; 462 async->buffers_packed[async->buffer_idx_packed].count = 1; 463 464 async->buffer_idx_packed++; 465 if (async->buffer_idx_packed >= vq->size) 466 async->buffer_idx_packed -= vq->size; 467 } 468 } 469 470 static __rte_always_inline void 471 vhost_shadow_dequeue_batch_packed_inorder(struct vhost_virtqueue *vq, 472 uint16_t id) 473 { 474 vq->shadow_used_packed[0].id = id; 475 476 if (!vq->shadow_used_idx) { 477 vq->shadow_last_used_idx = vq->last_used_idx; 478 vq->shadow_used_packed[0].flags = 479 PACKED_DESC_DEQUEUE_USED_FLAG(vq->used_wrap_counter); 480 vq->shadow_used_packed[0].len = 0; 481 vq->shadow_used_packed[0].count = 1; 482 vq->shadow_used_idx++; 483 } 484 485 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE); 486 } 487 488 static __rte_always_inline void 489 vhost_shadow_dequeue_batch_packed(struct virtio_net *dev, 490 struct vhost_virtqueue *vq, 491 uint16_t *ids) 492 { 493 uint16_t flags; 494 uint16_t i; 495 uint16_t begin; 496 497 flags = PACKED_DESC_DEQUEUE_USED_FLAG(vq->used_wrap_counter); 498 499 if (!vq->shadow_used_idx) { 500 vq->shadow_last_used_idx = vq->last_used_idx; 501 vq->shadow_used_packed[0].id = ids[0]; 502 vq->shadow_used_packed[0].len = 0; 503 vq->shadow_used_packed[0].count = 1; 504 vq->shadow_used_packed[0].flags = flags; 505 vq->shadow_used_idx++; 506 begin = 1; 507 } else 508 begin = 0; 509 510 vhost_for_each_try_unroll(i, begin, PACKED_BATCH_SIZE) { 511 vq->desc_packed[vq->last_used_idx + i].id = ids[i]; 512 vq->desc_packed[vq->last_used_idx + i].len = 0; 513 } 514 515 rte_atomic_thread_fence(__ATOMIC_RELEASE); 516 vhost_for_each_try_unroll(i, begin, PACKED_BATCH_SIZE) 517 vq->desc_packed[vq->last_used_idx + i].flags = flags; 518 519 vhost_log_cache_used_vring(dev, vq, vq->last_used_idx * 520 sizeof(struct vring_packed_desc), 521 sizeof(struct vring_packed_desc) * 522 PACKED_BATCH_SIZE); 523 vhost_log_cache_sync(dev, vq); 524 525 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE); 526 } 527 528 static __rte_always_inline void 529 vhost_shadow_dequeue_single_packed(struct vhost_virtqueue *vq, 530 uint16_t buf_id, 531 uint16_t count) 532 { 533 uint16_t flags; 534 535 flags = vq->desc_packed[vq->last_used_idx].flags; 536 if (vq->used_wrap_counter) { 537 flags |= VRING_DESC_F_USED; 538 flags |= VRING_DESC_F_AVAIL; 539 } else { 540 flags &= ~VRING_DESC_F_USED; 541 flags &= ~VRING_DESC_F_AVAIL; 542 } 543 544 if (!vq->shadow_used_idx) { 545 vq->shadow_last_used_idx = vq->last_used_idx; 546 547 vq->shadow_used_packed[0].id = buf_id; 548 vq->shadow_used_packed[0].len = 0; 549 vq->shadow_used_packed[0].flags = flags; 550 vq->shadow_used_idx++; 551 } else { 552 vq->desc_packed[vq->last_used_idx].id = buf_id; 553 vq->desc_packed[vq->last_used_idx].len = 0; 554 vq->desc_packed[vq->last_used_idx].flags = flags; 555 } 556 557 vq_inc_last_used_packed(vq, count); 558 } 559 560 static __rte_always_inline void 561 vhost_shadow_dequeue_single_packed_inorder(struct vhost_virtqueue *vq, 562 uint16_t buf_id, 563 uint16_t count) 564 { 565 uint16_t flags; 566 567 vq->shadow_used_packed[0].id = buf_id; 568 569 flags = vq->desc_packed[vq->last_used_idx].flags; 570 if (vq->used_wrap_counter) { 571 flags |= VRING_DESC_F_USED; 572 flags |= VRING_DESC_F_AVAIL; 573 } else { 574 flags &= ~VRING_DESC_F_USED; 575 flags &= ~VRING_DESC_F_AVAIL; 576 } 577 578 if (!vq->shadow_used_idx) { 579 vq->shadow_last_used_idx = vq->last_used_idx; 580 vq->shadow_used_packed[0].len = 0; 581 vq->shadow_used_packed[0].flags = flags; 582 vq->shadow_used_idx++; 583 } 584 585 vq_inc_last_used_packed(vq, count); 586 } 587 588 static __rte_always_inline void 589 vhost_shadow_enqueue_packed(struct vhost_virtqueue *vq, 590 uint32_t *len, 591 uint16_t *id, 592 uint16_t *count, 593 uint16_t num_buffers) 594 { 595 uint16_t i; 596 597 for (i = 0; i < num_buffers; i++) { 598 /* enqueue shadow flush action aligned with batch num */ 599 if (!vq->shadow_used_idx) 600 vq->shadow_aligned_idx = vq->last_used_idx & 601 PACKED_BATCH_MASK; 602 vq->shadow_used_packed[vq->shadow_used_idx].id = id[i]; 603 vq->shadow_used_packed[vq->shadow_used_idx].len = len[i]; 604 vq->shadow_used_packed[vq->shadow_used_idx].count = count[i]; 605 vq->shadow_aligned_idx += count[i]; 606 vq->shadow_used_idx++; 607 } 608 } 609 610 static __rte_always_inline void 611 vhost_async_shadow_enqueue_packed(struct vhost_virtqueue *vq, 612 uint32_t *len, 613 uint16_t *id, 614 uint16_t *count, 615 uint16_t num_buffers) 616 { 617 uint16_t i; 618 struct vhost_async *async = vq->async; 619 620 for (i = 0; i < num_buffers; i++) { 621 async->buffers_packed[async->buffer_idx_packed].id = id[i]; 622 async->buffers_packed[async->buffer_idx_packed].len = len[i]; 623 async->buffers_packed[async->buffer_idx_packed].count = count[i]; 624 async->buffer_idx_packed++; 625 if (async->buffer_idx_packed >= vq->size) 626 async->buffer_idx_packed -= vq->size; 627 } 628 } 629 630 static __rte_always_inline void 631 vhost_shadow_enqueue_single_packed(struct virtio_net *dev, 632 struct vhost_virtqueue *vq, 633 uint32_t *len, 634 uint16_t *id, 635 uint16_t *count, 636 uint16_t num_buffers) 637 { 638 vhost_shadow_enqueue_packed(vq, len, id, count, num_buffers); 639 640 if (vq->shadow_aligned_idx >= PACKED_BATCH_SIZE) { 641 do_data_copy_enqueue(dev, vq); 642 vhost_flush_enqueue_shadow_packed(dev, vq); 643 } 644 } 645 646 /* avoid write operation when necessary, to lessen cache issues */ 647 #define ASSIGN_UNLESS_EQUAL(var, val) do { \ 648 if ((var) != (val)) \ 649 (var) = (val); \ 650 } while (0) 651 652 static __rte_always_inline void 653 virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr) 654 { 655 uint64_t csum_l4 = m_buf->ol_flags & RTE_MBUF_F_TX_L4_MASK; 656 657 if (m_buf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) 658 csum_l4 |= RTE_MBUF_F_TX_TCP_CKSUM; 659 660 if (csum_l4) { 661 /* 662 * Pseudo-header checksum must be set as per Virtio spec. 663 * 664 * Note: We don't propagate rte_net_intel_cksum_prepare() 665 * errors, as it would have an impact on performance, and an 666 * error would mean the packet is dropped by the guest instead 667 * of being dropped here. 668 */ 669 rte_net_intel_cksum_prepare(m_buf); 670 671 net_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 672 net_hdr->csum_start = m_buf->l2_len + m_buf->l3_len; 673 674 switch (csum_l4) { 675 case RTE_MBUF_F_TX_TCP_CKSUM: 676 net_hdr->csum_offset = (offsetof(struct rte_tcp_hdr, 677 cksum)); 678 break; 679 case RTE_MBUF_F_TX_UDP_CKSUM: 680 net_hdr->csum_offset = (offsetof(struct rte_udp_hdr, 681 dgram_cksum)); 682 break; 683 case RTE_MBUF_F_TX_SCTP_CKSUM: 684 net_hdr->csum_offset = (offsetof(struct rte_sctp_hdr, 685 cksum)); 686 break; 687 } 688 } else { 689 ASSIGN_UNLESS_EQUAL(net_hdr->csum_start, 0); 690 ASSIGN_UNLESS_EQUAL(net_hdr->csum_offset, 0); 691 ASSIGN_UNLESS_EQUAL(net_hdr->flags, 0); 692 } 693 694 /* IP cksum verification cannot be bypassed, then calculate here */ 695 if (m_buf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) { 696 struct rte_ipv4_hdr *ipv4_hdr; 697 698 ipv4_hdr = rte_pktmbuf_mtod_offset(m_buf, struct rte_ipv4_hdr *, 699 m_buf->l2_len); 700 ipv4_hdr->hdr_checksum = 0; 701 ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr); 702 } 703 704 if (m_buf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) { 705 if (m_buf->ol_flags & RTE_MBUF_F_TX_IPV4) 706 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 707 else 708 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 709 net_hdr->gso_size = m_buf->tso_segsz; 710 net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len 711 + m_buf->l4_len; 712 } else if (m_buf->ol_flags & RTE_MBUF_F_TX_UDP_SEG) { 713 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP; 714 net_hdr->gso_size = m_buf->tso_segsz; 715 net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len + 716 m_buf->l4_len; 717 } else { 718 ASSIGN_UNLESS_EQUAL(net_hdr->gso_type, 0); 719 ASSIGN_UNLESS_EQUAL(net_hdr->gso_size, 0); 720 ASSIGN_UNLESS_EQUAL(net_hdr->hdr_len, 0); 721 } 722 } 723 724 static __rte_always_inline int 725 map_one_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, 726 struct buf_vector *buf_vec, uint16_t *vec_idx, 727 uint64_t desc_iova, uint64_t desc_len, uint8_t perm) 728 { 729 uint16_t vec_id = *vec_idx; 730 731 while (desc_len) { 732 uint64_t desc_addr; 733 uint64_t desc_chunck_len = desc_len; 734 735 if (unlikely(vec_id >= BUF_VECTOR_MAX)) 736 return -1; 737 738 desc_addr = vhost_iova_to_vva(dev, vq, 739 desc_iova, 740 &desc_chunck_len, 741 perm); 742 if (unlikely(!desc_addr)) 743 return -1; 744 745 rte_prefetch0((void *)(uintptr_t)desc_addr); 746 747 buf_vec[vec_id].buf_iova = desc_iova; 748 buf_vec[vec_id].buf_addr = desc_addr; 749 buf_vec[vec_id].buf_len = desc_chunck_len; 750 751 desc_len -= desc_chunck_len; 752 desc_iova += desc_chunck_len; 753 vec_id++; 754 } 755 *vec_idx = vec_id; 756 757 return 0; 758 } 759 760 static __rte_always_inline int 761 fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 762 uint32_t avail_idx, uint16_t *vec_idx, 763 struct buf_vector *buf_vec, uint16_t *desc_chain_head, 764 uint32_t *desc_chain_len, uint8_t perm) 765 { 766 uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)]; 767 uint16_t vec_id = *vec_idx; 768 uint32_t len = 0; 769 uint64_t dlen; 770 uint32_t nr_descs = vq->size; 771 uint32_t cnt = 0; 772 struct vring_desc *descs = vq->desc; 773 struct vring_desc *idesc = NULL; 774 775 if (unlikely(idx >= vq->size)) 776 return -1; 777 778 *desc_chain_head = idx; 779 780 if (vq->desc[idx].flags & VRING_DESC_F_INDIRECT) { 781 dlen = vq->desc[idx].len; 782 nr_descs = dlen / sizeof(struct vring_desc); 783 if (unlikely(nr_descs > vq->size)) 784 return -1; 785 786 descs = (struct vring_desc *)(uintptr_t) 787 vhost_iova_to_vva(dev, vq, vq->desc[idx].addr, 788 &dlen, 789 VHOST_ACCESS_RO); 790 if (unlikely(!descs)) 791 return -1; 792 793 if (unlikely(dlen < vq->desc[idx].len)) { 794 /* 795 * The indirect desc table is not contiguous 796 * in process VA space, we have to copy it. 797 */ 798 idesc = vhost_alloc_copy_ind_table(dev, vq, 799 vq->desc[idx].addr, vq->desc[idx].len); 800 if (unlikely(!idesc)) 801 return -1; 802 803 descs = idesc; 804 } 805 806 idx = 0; 807 } 808 809 while (1) { 810 if (unlikely(idx >= nr_descs || cnt++ >= nr_descs)) { 811 free_ind_table(idesc); 812 return -1; 813 } 814 815 dlen = descs[idx].len; 816 len += dlen; 817 818 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id, 819 descs[idx].addr, dlen, 820 perm))) { 821 free_ind_table(idesc); 822 return -1; 823 } 824 825 if ((descs[idx].flags & VRING_DESC_F_NEXT) == 0) 826 break; 827 828 idx = descs[idx].next; 829 } 830 831 *desc_chain_len = len; 832 *vec_idx = vec_id; 833 834 if (unlikely(!!idesc)) 835 free_ind_table(idesc); 836 837 return 0; 838 } 839 840 /* 841 * Returns -1 on fail, 0 on success 842 */ 843 static inline int 844 reserve_avail_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 845 uint64_t size, struct buf_vector *buf_vec, 846 uint16_t *num_buffers, uint16_t avail_head, 847 uint16_t *nr_vec) 848 { 849 uint16_t cur_idx; 850 uint16_t vec_idx = 0; 851 uint16_t max_tries, tries = 0; 852 853 uint16_t head_idx = 0; 854 uint32_t len = 0; 855 856 *num_buffers = 0; 857 cur_idx = vq->last_avail_idx; 858 859 if (rxvq_is_mergeable(dev)) 860 max_tries = vq->size - 1; 861 else 862 max_tries = 1; 863 864 while (size > 0) { 865 if (unlikely(cur_idx == avail_head)) 866 return -1; 867 /* 868 * if we tried all available ring items, and still 869 * can't get enough buf, it means something abnormal 870 * happened. 871 */ 872 if (unlikely(++tries > max_tries)) 873 return -1; 874 875 if (unlikely(fill_vec_buf_split(dev, vq, cur_idx, 876 &vec_idx, buf_vec, 877 &head_idx, &len, 878 VHOST_ACCESS_RW) < 0)) 879 return -1; 880 len = RTE_MIN(len, size); 881 update_shadow_used_ring_split(vq, head_idx, len); 882 size -= len; 883 884 cur_idx++; 885 *num_buffers += 1; 886 } 887 888 *nr_vec = vec_idx; 889 890 return 0; 891 } 892 893 static __rte_always_inline int 894 fill_vec_buf_packed_indirect(struct virtio_net *dev, 895 struct vhost_virtqueue *vq, 896 struct vring_packed_desc *desc, uint16_t *vec_idx, 897 struct buf_vector *buf_vec, uint32_t *len, uint8_t perm) 898 { 899 uint16_t i; 900 uint32_t nr_descs; 901 uint16_t vec_id = *vec_idx; 902 uint64_t dlen; 903 struct vring_packed_desc *descs, *idescs = NULL; 904 905 dlen = desc->len; 906 descs = (struct vring_packed_desc *)(uintptr_t) 907 vhost_iova_to_vva(dev, vq, desc->addr, &dlen, VHOST_ACCESS_RO); 908 if (unlikely(!descs)) 909 return -1; 910 911 if (unlikely(dlen < desc->len)) { 912 /* 913 * The indirect desc table is not contiguous 914 * in process VA space, we have to copy it. 915 */ 916 idescs = vhost_alloc_copy_ind_table(dev, 917 vq, desc->addr, desc->len); 918 if (unlikely(!idescs)) 919 return -1; 920 921 descs = idescs; 922 } 923 924 nr_descs = desc->len / sizeof(struct vring_packed_desc); 925 if (unlikely(nr_descs >= vq->size)) { 926 free_ind_table(idescs); 927 return -1; 928 } 929 930 for (i = 0; i < nr_descs; i++) { 931 if (unlikely(vec_id >= BUF_VECTOR_MAX)) { 932 free_ind_table(idescs); 933 return -1; 934 } 935 936 dlen = descs[i].len; 937 *len += dlen; 938 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id, 939 descs[i].addr, dlen, 940 perm))) 941 return -1; 942 } 943 *vec_idx = vec_id; 944 945 if (unlikely(!!idescs)) 946 free_ind_table(idescs); 947 948 return 0; 949 } 950 951 static __rte_always_inline int 952 fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, 953 uint16_t avail_idx, uint16_t *desc_count, 954 struct buf_vector *buf_vec, uint16_t *vec_idx, 955 uint16_t *buf_id, uint32_t *len, uint8_t perm) 956 { 957 bool wrap_counter = vq->avail_wrap_counter; 958 struct vring_packed_desc *descs = vq->desc_packed; 959 uint16_t vec_id = *vec_idx; 960 uint64_t dlen; 961 962 if (avail_idx < vq->last_avail_idx) 963 wrap_counter ^= 1; 964 965 /* 966 * Perform a load-acquire barrier in desc_is_avail to 967 * enforce the ordering between desc flags and desc 968 * content. 969 */ 970 if (unlikely(!desc_is_avail(&descs[avail_idx], wrap_counter))) 971 return -1; 972 973 *desc_count = 0; 974 *len = 0; 975 976 while (1) { 977 if (unlikely(vec_id >= BUF_VECTOR_MAX)) 978 return -1; 979 980 if (unlikely(*desc_count >= vq->size)) 981 return -1; 982 983 *desc_count += 1; 984 *buf_id = descs[avail_idx].id; 985 986 if (descs[avail_idx].flags & VRING_DESC_F_INDIRECT) { 987 if (unlikely(fill_vec_buf_packed_indirect(dev, vq, 988 &descs[avail_idx], 989 &vec_id, buf_vec, 990 len, perm) < 0)) 991 return -1; 992 } else { 993 dlen = descs[avail_idx].len; 994 *len += dlen; 995 996 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id, 997 descs[avail_idx].addr, 998 dlen, 999 perm))) 1000 return -1; 1001 } 1002 1003 if ((descs[avail_idx].flags & VRING_DESC_F_NEXT) == 0) 1004 break; 1005 1006 if (++avail_idx >= vq->size) { 1007 avail_idx -= vq->size; 1008 wrap_counter ^= 1; 1009 } 1010 } 1011 1012 *vec_idx = vec_id; 1013 1014 return 0; 1015 } 1016 1017 static __rte_noinline void 1018 copy_vnet_hdr_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, 1019 struct buf_vector *buf_vec, 1020 struct virtio_net_hdr_mrg_rxbuf *hdr) 1021 { 1022 uint64_t len; 1023 uint64_t remain = dev->vhost_hlen; 1024 uint64_t src = (uint64_t)(uintptr_t)hdr, dst; 1025 uint64_t iova = buf_vec->buf_iova; 1026 1027 while (remain) { 1028 len = RTE_MIN(remain, 1029 buf_vec->buf_len); 1030 dst = buf_vec->buf_addr; 1031 rte_memcpy((void *)(uintptr_t)dst, 1032 (void *)(uintptr_t)src, 1033 len); 1034 1035 PRINT_PACKET(dev, (uintptr_t)dst, 1036 (uint32_t)len, 0); 1037 vhost_log_cache_write_iova(dev, vq, 1038 iova, len); 1039 1040 remain -= len; 1041 iova += len; 1042 src += len; 1043 buf_vec++; 1044 } 1045 } 1046 1047 static __rte_always_inline int 1048 async_iter_initialize(struct virtio_net *dev, struct vhost_async *async) 1049 { 1050 struct vhost_iov_iter *iter; 1051 1052 if (unlikely(async->iovec_idx >= VHOST_MAX_ASYNC_VEC)) { 1053 VHOST_LOG_DATA(dev->ifname, ERR, "no more async iovec available\n"); 1054 return -1; 1055 } 1056 1057 iter = async->iov_iter + async->iter_idx; 1058 iter->iov = async->iovec + async->iovec_idx; 1059 iter->nr_segs = 0; 1060 1061 return 0; 1062 } 1063 1064 static __rte_always_inline int 1065 async_iter_add_iovec(struct virtio_net *dev, struct vhost_async *async, 1066 void *src, void *dst, size_t len) 1067 { 1068 struct vhost_iov_iter *iter; 1069 struct vhost_iovec *iovec; 1070 1071 if (unlikely(async->iovec_idx >= VHOST_MAX_ASYNC_VEC)) { 1072 static bool vhost_max_async_vec_log; 1073 1074 if (!vhost_max_async_vec_log) { 1075 VHOST_LOG_DATA(dev->ifname, ERR, "no more async iovec available\n"); 1076 vhost_max_async_vec_log = true; 1077 } 1078 1079 return -1; 1080 } 1081 1082 iter = async->iov_iter + async->iter_idx; 1083 iovec = async->iovec + async->iovec_idx; 1084 1085 iovec->src_addr = src; 1086 iovec->dst_addr = dst; 1087 iovec->len = len; 1088 1089 iter->nr_segs++; 1090 async->iovec_idx++; 1091 1092 return 0; 1093 } 1094 1095 static __rte_always_inline void 1096 async_iter_finalize(struct vhost_async *async) 1097 { 1098 async->iter_idx++; 1099 } 1100 1101 static __rte_always_inline void 1102 async_iter_cancel(struct vhost_async *async) 1103 { 1104 struct vhost_iov_iter *iter; 1105 1106 iter = async->iov_iter + async->iter_idx; 1107 async->iovec_idx -= iter->nr_segs; 1108 iter->nr_segs = 0; 1109 iter->iov = NULL; 1110 } 1111 1112 static __rte_always_inline void 1113 async_iter_reset(struct vhost_async *async) 1114 { 1115 async->iter_idx = 0; 1116 async->iovec_idx = 0; 1117 } 1118 1119 static __rte_always_inline int 1120 async_fill_seg(struct virtio_net *dev, struct vhost_virtqueue *vq, 1121 struct rte_mbuf *m, uint32_t mbuf_offset, 1122 uint64_t buf_iova, uint32_t cpy_len, bool to_desc) 1123 { 1124 struct vhost_async *async = vq->async; 1125 uint64_t mapped_len; 1126 uint32_t buf_offset = 0; 1127 void *src, *dst; 1128 void *host_iova; 1129 1130 while (cpy_len) { 1131 host_iova = (void *)(uintptr_t)gpa_to_first_hpa(dev, 1132 buf_iova + buf_offset, cpy_len, &mapped_len); 1133 if (unlikely(!host_iova)) { 1134 VHOST_LOG_DATA(dev->ifname, ERR, 1135 "%s: failed to get host iova.\n", 1136 __func__); 1137 return -1; 1138 } 1139 1140 if (to_desc) { 1141 src = (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, mbuf_offset); 1142 dst = host_iova; 1143 } else { 1144 src = host_iova; 1145 dst = (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, mbuf_offset); 1146 } 1147 1148 if (unlikely(async_iter_add_iovec(dev, async, src, dst, (size_t)mapped_len))) 1149 return -1; 1150 1151 cpy_len -= (uint32_t)mapped_len; 1152 mbuf_offset += (uint32_t)mapped_len; 1153 buf_offset += (uint32_t)mapped_len; 1154 } 1155 1156 return 0; 1157 } 1158 1159 static __rte_always_inline void 1160 sync_fill_seg(struct virtio_net *dev, struct vhost_virtqueue *vq, 1161 struct rte_mbuf *m, uint32_t mbuf_offset, 1162 uint64_t buf_addr, uint64_t buf_iova, uint32_t cpy_len, bool to_desc) 1163 { 1164 struct batch_copy_elem *batch_copy = vq->batch_copy_elems; 1165 1166 if (likely(cpy_len > MAX_BATCH_LEN || vq->batch_copy_nb_elems >= vq->size)) { 1167 if (to_desc) { 1168 rte_memcpy((void *)((uintptr_t)(buf_addr)), 1169 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), 1170 cpy_len); 1171 vhost_log_cache_write_iova(dev, vq, buf_iova, cpy_len); 1172 PRINT_PACKET(dev, (uintptr_t)(buf_addr), cpy_len, 0); 1173 } else { 1174 rte_memcpy(rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), 1175 (void *)((uintptr_t)(buf_addr)), 1176 cpy_len); 1177 } 1178 } else { 1179 if (to_desc) { 1180 batch_copy[vq->batch_copy_nb_elems].dst = 1181 (void *)((uintptr_t)(buf_addr)); 1182 batch_copy[vq->batch_copy_nb_elems].src = 1183 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset); 1184 batch_copy[vq->batch_copy_nb_elems].log_addr = buf_iova; 1185 } else { 1186 batch_copy[vq->batch_copy_nb_elems].dst = 1187 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset); 1188 batch_copy[vq->batch_copy_nb_elems].src = 1189 (void *)((uintptr_t)(buf_addr)); 1190 } 1191 batch_copy[vq->batch_copy_nb_elems].len = cpy_len; 1192 vq->batch_copy_nb_elems++; 1193 } 1194 } 1195 1196 static __rte_always_inline int 1197 mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, 1198 struct rte_mbuf *m, struct buf_vector *buf_vec, 1199 uint16_t nr_vec, uint16_t num_buffers, bool is_async) 1200 { 1201 uint32_t vec_idx = 0; 1202 uint32_t mbuf_offset, mbuf_avail; 1203 uint32_t buf_offset, buf_avail; 1204 uint64_t buf_addr, buf_iova, buf_len; 1205 uint32_t cpy_len; 1206 uint64_t hdr_addr; 1207 struct rte_mbuf *hdr_mbuf; 1208 struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL; 1209 struct vhost_async *async = vq->async; 1210 1211 if (unlikely(m == NULL)) 1212 return -1; 1213 1214 buf_addr = buf_vec[vec_idx].buf_addr; 1215 buf_iova = buf_vec[vec_idx].buf_iova; 1216 buf_len = buf_vec[vec_idx].buf_len; 1217 1218 if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) 1219 return -1; 1220 1221 hdr_mbuf = m; 1222 hdr_addr = buf_addr; 1223 if (unlikely(buf_len < dev->vhost_hlen)) { 1224 memset(&tmp_hdr, 0, sizeof(struct virtio_net_hdr_mrg_rxbuf)); 1225 hdr = &tmp_hdr; 1226 } else 1227 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr; 1228 1229 VHOST_LOG_DATA(dev->ifname, DEBUG, "RX: num merge buffers %d\n", num_buffers); 1230 1231 if (unlikely(buf_len < dev->vhost_hlen)) { 1232 buf_offset = dev->vhost_hlen - buf_len; 1233 vec_idx++; 1234 buf_addr = buf_vec[vec_idx].buf_addr; 1235 buf_iova = buf_vec[vec_idx].buf_iova; 1236 buf_len = buf_vec[vec_idx].buf_len; 1237 buf_avail = buf_len - buf_offset; 1238 } else { 1239 buf_offset = dev->vhost_hlen; 1240 buf_avail = buf_len - dev->vhost_hlen; 1241 } 1242 1243 mbuf_avail = rte_pktmbuf_data_len(m); 1244 mbuf_offset = 0; 1245 1246 if (is_async) { 1247 if (async_iter_initialize(dev, async)) 1248 return -1; 1249 } 1250 1251 while (mbuf_avail != 0 || m->next != NULL) { 1252 /* done with current buf, get the next one */ 1253 if (buf_avail == 0) { 1254 vec_idx++; 1255 if (unlikely(vec_idx >= nr_vec)) 1256 goto error; 1257 1258 buf_addr = buf_vec[vec_idx].buf_addr; 1259 buf_iova = buf_vec[vec_idx].buf_iova; 1260 buf_len = buf_vec[vec_idx].buf_len; 1261 1262 buf_offset = 0; 1263 buf_avail = buf_len; 1264 } 1265 1266 /* done with current mbuf, get the next one */ 1267 if (mbuf_avail == 0) { 1268 m = m->next; 1269 1270 mbuf_offset = 0; 1271 mbuf_avail = rte_pktmbuf_data_len(m); 1272 } 1273 1274 if (hdr_addr) { 1275 virtio_enqueue_offload(hdr_mbuf, &hdr->hdr); 1276 if (rxvq_is_mergeable(dev)) 1277 ASSIGN_UNLESS_EQUAL(hdr->num_buffers, 1278 num_buffers); 1279 1280 if (unlikely(hdr == &tmp_hdr)) { 1281 copy_vnet_hdr_to_desc(dev, vq, buf_vec, hdr); 1282 } else { 1283 PRINT_PACKET(dev, (uintptr_t)hdr_addr, 1284 dev->vhost_hlen, 0); 1285 vhost_log_cache_write_iova(dev, vq, 1286 buf_vec[0].buf_iova, 1287 dev->vhost_hlen); 1288 } 1289 1290 hdr_addr = 0; 1291 } 1292 1293 cpy_len = RTE_MIN(buf_avail, mbuf_avail); 1294 1295 if (is_async) { 1296 if (async_fill_seg(dev, vq, m, mbuf_offset, 1297 buf_iova + buf_offset, cpy_len, true) < 0) 1298 goto error; 1299 } else { 1300 sync_fill_seg(dev, vq, m, mbuf_offset, 1301 buf_addr + buf_offset, 1302 buf_iova + buf_offset, cpy_len, true); 1303 } 1304 1305 mbuf_avail -= cpy_len; 1306 mbuf_offset += cpy_len; 1307 buf_avail -= cpy_len; 1308 buf_offset += cpy_len; 1309 } 1310 1311 if (is_async) 1312 async_iter_finalize(async); 1313 1314 return 0; 1315 error: 1316 if (is_async) 1317 async_iter_cancel(async); 1318 1319 return -1; 1320 } 1321 1322 static __rte_always_inline int 1323 vhost_enqueue_single_packed(struct virtio_net *dev, 1324 struct vhost_virtqueue *vq, 1325 struct rte_mbuf *pkt, 1326 struct buf_vector *buf_vec, 1327 uint16_t *nr_descs) 1328 { 1329 uint16_t nr_vec = 0; 1330 uint16_t avail_idx = vq->last_avail_idx; 1331 uint16_t max_tries, tries = 0; 1332 uint16_t buf_id = 0; 1333 uint32_t len = 0; 1334 uint16_t desc_count; 1335 uint64_t size = pkt->pkt_len + sizeof(struct virtio_net_hdr_mrg_rxbuf); 1336 uint16_t num_buffers = 0; 1337 uint32_t buffer_len[vq->size]; 1338 uint16_t buffer_buf_id[vq->size]; 1339 uint16_t buffer_desc_count[vq->size]; 1340 1341 if (rxvq_is_mergeable(dev)) 1342 max_tries = vq->size - 1; 1343 else 1344 max_tries = 1; 1345 1346 while (size > 0) { 1347 /* 1348 * if we tried all available ring items, and still 1349 * can't get enough buf, it means something abnormal 1350 * happened. 1351 */ 1352 if (unlikely(++tries > max_tries)) 1353 return -1; 1354 1355 if (unlikely(fill_vec_buf_packed(dev, vq, 1356 avail_idx, &desc_count, 1357 buf_vec, &nr_vec, 1358 &buf_id, &len, 1359 VHOST_ACCESS_RW) < 0)) 1360 return -1; 1361 1362 len = RTE_MIN(len, size); 1363 size -= len; 1364 1365 buffer_len[num_buffers] = len; 1366 buffer_buf_id[num_buffers] = buf_id; 1367 buffer_desc_count[num_buffers] = desc_count; 1368 num_buffers += 1; 1369 1370 *nr_descs += desc_count; 1371 avail_idx += desc_count; 1372 if (avail_idx >= vq->size) 1373 avail_idx -= vq->size; 1374 } 1375 1376 if (mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, num_buffers, false) < 0) 1377 return -1; 1378 1379 vhost_shadow_enqueue_single_packed(dev, vq, buffer_len, buffer_buf_id, 1380 buffer_desc_count, num_buffers); 1381 1382 return 0; 1383 } 1384 1385 static __rte_noinline uint32_t 1386 virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 1387 struct rte_mbuf **pkts, uint32_t count) 1388 { 1389 uint32_t pkt_idx = 0; 1390 uint16_t num_buffers; 1391 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1392 uint16_t avail_head; 1393 1394 /* 1395 * The ordering between avail index and 1396 * desc reads needs to be enforced. 1397 */ 1398 avail_head = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE); 1399 1400 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); 1401 1402 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { 1403 uint64_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen; 1404 uint16_t nr_vec = 0; 1405 1406 if (unlikely(reserve_avail_buf_split(dev, vq, 1407 pkt_len, buf_vec, &num_buffers, 1408 avail_head, &nr_vec) < 0)) { 1409 VHOST_LOG_DATA(dev->ifname, DEBUG, 1410 "failed to get enough desc from vring\n"); 1411 vq->shadow_used_idx -= num_buffers; 1412 break; 1413 } 1414 1415 VHOST_LOG_DATA(dev->ifname, DEBUG, 1416 "current index %d | end index %d\n", 1417 vq->last_avail_idx, vq->last_avail_idx + num_buffers); 1418 1419 if (mbuf_to_desc(dev, vq, pkts[pkt_idx], buf_vec, nr_vec, 1420 num_buffers, false) < 0) { 1421 vq->shadow_used_idx -= num_buffers; 1422 break; 1423 } 1424 1425 vq->last_avail_idx += num_buffers; 1426 } 1427 1428 do_data_copy_enqueue(dev, vq); 1429 1430 if (likely(vq->shadow_used_idx)) { 1431 flush_shadow_used_ring_split(dev, vq); 1432 vhost_vring_call_split(dev, vq); 1433 } 1434 1435 return pkt_idx; 1436 } 1437 1438 static __rte_always_inline int 1439 virtio_dev_rx_sync_batch_check(struct virtio_net *dev, 1440 struct vhost_virtqueue *vq, 1441 struct rte_mbuf **pkts, 1442 uint64_t *desc_addrs, 1443 uint64_t *lens) 1444 { 1445 bool wrap_counter = vq->avail_wrap_counter; 1446 struct vring_packed_desc *descs = vq->desc_packed; 1447 uint16_t avail_idx = vq->last_avail_idx; 1448 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 1449 uint16_t i; 1450 1451 if (unlikely(avail_idx & PACKED_BATCH_MASK)) 1452 return -1; 1453 1454 if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size)) 1455 return -1; 1456 1457 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1458 if (unlikely(pkts[i]->next != NULL)) 1459 return -1; 1460 if (unlikely(!desc_is_avail(&descs[avail_idx + i], 1461 wrap_counter))) 1462 return -1; 1463 } 1464 1465 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1466 lens[i] = descs[avail_idx + i].len; 1467 1468 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1469 if (unlikely(pkts[i]->pkt_len > (lens[i] - buf_offset))) 1470 return -1; 1471 } 1472 1473 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1474 desc_addrs[i] = vhost_iova_to_vva(dev, vq, 1475 descs[avail_idx + i].addr, 1476 &lens[i], 1477 VHOST_ACCESS_RW); 1478 1479 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1480 if (unlikely(!desc_addrs[i])) 1481 return -1; 1482 if (unlikely(lens[i] != descs[avail_idx + i].len)) 1483 return -1; 1484 } 1485 1486 return 0; 1487 } 1488 1489 static __rte_always_inline int 1490 virtio_dev_rx_async_batch_check(struct vhost_virtqueue *vq, 1491 struct rte_mbuf **pkts, 1492 uint64_t *desc_addrs, 1493 uint64_t *lens, 1494 int16_t dma_id, 1495 uint16_t vchan_id) 1496 { 1497 bool wrap_counter = vq->avail_wrap_counter; 1498 struct vring_packed_desc *descs = vq->desc_packed; 1499 uint16_t avail_idx = vq->last_avail_idx; 1500 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 1501 uint16_t i; 1502 1503 if (unlikely(avail_idx & PACKED_BATCH_MASK)) 1504 return -1; 1505 1506 if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size)) 1507 return -1; 1508 1509 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1510 if (unlikely(pkts[i]->next != NULL)) 1511 return -1; 1512 if (unlikely(!desc_is_avail(&descs[avail_idx + i], 1513 wrap_counter))) 1514 return -1; 1515 } 1516 1517 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1518 lens[i] = descs[avail_idx + i].len; 1519 1520 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1521 if (unlikely(pkts[i]->pkt_len > (lens[i] - buf_offset))) 1522 return -1; 1523 } 1524 1525 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1526 desc_addrs[i] = descs[avail_idx + i].addr; 1527 1528 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1529 if (unlikely(!desc_addrs[i])) 1530 return -1; 1531 if (unlikely(lens[i] != descs[avail_idx + i].len)) 1532 return -1; 1533 } 1534 1535 if (rte_dma_burst_capacity(dma_id, vchan_id) < PACKED_BATCH_SIZE) 1536 return -1; 1537 1538 return 0; 1539 } 1540 1541 static __rte_always_inline void 1542 virtio_dev_rx_batch_packed_copy(struct virtio_net *dev, 1543 struct vhost_virtqueue *vq, 1544 struct rte_mbuf **pkts, 1545 uint64_t *desc_addrs, 1546 uint64_t *lens) 1547 { 1548 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 1549 struct virtio_net_hdr_mrg_rxbuf *hdrs[PACKED_BATCH_SIZE]; 1550 struct vring_packed_desc *descs = vq->desc_packed; 1551 uint16_t avail_idx = vq->last_avail_idx; 1552 uint16_t ids[PACKED_BATCH_SIZE]; 1553 uint16_t i; 1554 1555 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1556 rte_prefetch0((void *)(uintptr_t)desc_addrs[i]); 1557 hdrs[i] = (struct virtio_net_hdr_mrg_rxbuf *) 1558 (uintptr_t)desc_addrs[i]; 1559 lens[i] = pkts[i]->pkt_len + 1560 sizeof(struct virtio_net_hdr_mrg_rxbuf); 1561 } 1562 1563 if (rxvq_is_mergeable(dev)) { 1564 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1565 ASSIGN_UNLESS_EQUAL(hdrs[i]->num_buffers, 1); 1566 } 1567 } 1568 1569 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1570 virtio_enqueue_offload(pkts[i], &hdrs[i]->hdr); 1571 1572 vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE); 1573 1574 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1575 rte_memcpy((void *)(uintptr_t)(desc_addrs[i] + buf_offset), 1576 rte_pktmbuf_mtod_offset(pkts[i], void *, 0), 1577 pkts[i]->pkt_len); 1578 } 1579 1580 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1581 vhost_log_cache_write_iova(dev, vq, descs[avail_idx + i].addr, 1582 lens[i]); 1583 1584 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1585 ids[i] = descs[avail_idx + i].id; 1586 1587 vhost_flush_enqueue_batch_packed(dev, vq, lens, ids); 1588 } 1589 1590 static __rte_always_inline int 1591 virtio_dev_rx_sync_batch_packed(struct virtio_net *dev, 1592 struct vhost_virtqueue *vq, 1593 struct rte_mbuf **pkts) 1594 { 1595 uint64_t desc_addrs[PACKED_BATCH_SIZE]; 1596 uint64_t lens[PACKED_BATCH_SIZE]; 1597 1598 if (virtio_dev_rx_sync_batch_check(dev, vq, pkts, desc_addrs, lens) == -1) 1599 return -1; 1600 1601 if (vq->shadow_used_idx) { 1602 do_data_copy_enqueue(dev, vq); 1603 vhost_flush_enqueue_shadow_packed(dev, vq); 1604 } 1605 1606 virtio_dev_rx_batch_packed_copy(dev, vq, pkts, desc_addrs, lens); 1607 1608 return 0; 1609 } 1610 1611 static __rte_always_inline int16_t 1612 virtio_dev_rx_single_packed(struct virtio_net *dev, 1613 struct vhost_virtqueue *vq, 1614 struct rte_mbuf *pkt) 1615 { 1616 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1617 uint16_t nr_descs = 0; 1618 1619 if (unlikely(vhost_enqueue_single_packed(dev, vq, pkt, buf_vec, 1620 &nr_descs) < 0)) { 1621 VHOST_LOG_DATA(dev->ifname, DEBUG, "failed to get enough desc from vring\n"); 1622 return -1; 1623 } 1624 1625 VHOST_LOG_DATA(dev->ifname, DEBUG, 1626 "current index %d | end index %d\n", 1627 vq->last_avail_idx, vq->last_avail_idx + nr_descs); 1628 1629 vq_inc_last_avail_packed(vq, nr_descs); 1630 1631 return 0; 1632 } 1633 1634 static __rte_noinline uint32_t 1635 virtio_dev_rx_packed(struct virtio_net *dev, 1636 struct vhost_virtqueue *__rte_restrict vq, 1637 struct rte_mbuf **__rte_restrict pkts, 1638 uint32_t count) 1639 { 1640 uint32_t pkt_idx = 0; 1641 1642 do { 1643 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); 1644 1645 if (count - pkt_idx >= PACKED_BATCH_SIZE) { 1646 if (!virtio_dev_rx_sync_batch_packed(dev, vq, 1647 &pkts[pkt_idx])) { 1648 pkt_idx += PACKED_BATCH_SIZE; 1649 continue; 1650 } 1651 } 1652 1653 if (virtio_dev_rx_single_packed(dev, vq, pkts[pkt_idx])) 1654 break; 1655 pkt_idx++; 1656 1657 } while (pkt_idx < count); 1658 1659 if (vq->shadow_used_idx) { 1660 do_data_copy_enqueue(dev, vq); 1661 vhost_flush_enqueue_shadow_packed(dev, vq); 1662 } 1663 1664 if (pkt_idx) 1665 vhost_vring_call_packed(dev, vq); 1666 1667 return pkt_idx; 1668 } 1669 1670 static __rte_always_inline uint32_t 1671 virtio_dev_rx(struct virtio_net *dev, struct vhost_virtqueue *vq, 1672 struct rte_mbuf **pkts, uint32_t count) 1673 { 1674 uint32_t nb_tx = 0; 1675 1676 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__); 1677 rte_spinlock_lock(&vq->access_lock); 1678 1679 if (unlikely(!vq->enabled)) 1680 goto out_access_unlock; 1681 1682 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 1683 vhost_user_iotlb_rd_lock(vq); 1684 1685 if (unlikely(!vq->access_ok)) 1686 if (unlikely(vring_translate(dev, vq) < 0)) 1687 goto out; 1688 1689 count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); 1690 if (count == 0) 1691 goto out; 1692 1693 if (vq_is_packed(dev)) 1694 nb_tx = virtio_dev_rx_packed(dev, vq, pkts, count); 1695 else 1696 nb_tx = virtio_dev_rx_split(dev, vq, pkts, count); 1697 1698 vhost_queue_stats_update(dev, vq, pkts, nb_tx); 1699 1700 out: 1701 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 1702 vhost_user_iotlb_rd_unlock(vq); 1703 1704 out_access_unlock: 1705 rte_spinlock_unlock(&vq->access_lock); 1706 1707 return nb_tx; 1708 } 1709 1710 uint16_t 1711 rte_vhost_enqueue_burst(int vid, uint16_t queue_id, 1712 struct rte_mbuf **__rte_restrict pkts, uint16_t count) 1713 { 1714 struct virtio_net *dev = get_device(vid); 1715 1716 if (!dev) 1717 return 0; 1718 1719 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { 1720 VHOST_LOG_DATA(dev->ifname, ERR, 1721 "%s: built-in vhost net backend is disabled.\n", 1722 __func__); 1723 return 0; 1724 } 1725 1726 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { 1727 VHOST_LOG_DATA(dev->ifname, ERR, 1728 "%s: invalid virtqueue idx %d.\n", 1729 __func__, queue_id); 1730 return 0; 1731 } 1732 1733 return virtio_dev_rx(dev, dev->virtqueue[queue_id], pkts, count); 1734 } 1735 1736 static __rte_always_inline uint16_t 1737 async_get_first_inflight_pkt_idx(struct vhost_virtqueue *vq) 1738 { 1739 struct vhost_async *async = vq->async; 1740 1741 if (async->pkts_idx >= async->pkts_inflight_n) 1742 return async->pkts_idx - async->pkts_inflight_n; 1743 else 1744 return vq->size - async->pkts_inflight_n + async->pkts_idx; 1745 } 1746 1747 static __rte_always_inline void 1748 store_dma_desc_info_split(struct vring_used_elem *s_ring, struct vring_used_elem *d_ring, 1749 uint16_t ring_size, uint16_t s_idx, uint16_t d_idx, uint16_t count) 1750 { 1751 size_t elem_size = sizeof(struct vring_used_elem); 1752 1753 if (d_idx + count <= ring_size) { 1754 rte_memcpy(d_ring + d_idx, s_ring + s_idx, count * elem_size); 1755 } else { 1756 uint16_t size = ring_size - d_idx; 1757 1758 rte_memcpy(d_ring + d_idx, s_ring + s_idx, size * elem_size); 1759 rte_memcpy(d_ring, s_ring + s_idx + size, (count - size) * elem_size); 1760 } 1761 } 1762 1763 static __rte_noinline uint32_t 1764 virtio_dev_rx_async_submit_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 1765 struct rte_mbuf **pkts, uint32_t count, int16_t dma_id, uint16_t vchan_id) 1766 { 1767 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1768 uint32_t pkt_idx = 0; 1769 uint16_t num_buffers; 1770 uint16_t avail_head; 1771 1772 struct vhost_async *async = vq->async; 1773 struct async_inflight_info *pkts_info = async->pkts_info; 1774 uint32_t pkt_err = 0; 1775 uint16_t n_xfer; 1776 uint16_t slot_idx = 0; 1777 1778 /* 1779 * The ordering between avail index and desc reads need to be enforced. 1780 */ 1781 avail_head = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE); 1782 1783 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); 1784 1785 async_iter_reset(async); 1786 1787 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { 1788 uint64_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen; 1789 uint16_t nr_vec = 0; 1790 1791 if (unlikely(reserve_avail_buf_split(dev, vq, pkt_len, buf_vec, 1792 &num_buffers, avail_head, &nr_vec) < 0)) { 1793 VHOST_LOG_DATA(dev->ifname, DEBUG, 1794 "failed to get enough desc from vring\n"); 1795 vq->shadow_used_idx -= num_buffers; 1796 break; 1797 } 1798 1799 VHOST_LOG_DATA(dev->ifname, DEBUG, 1800 "current index %d | end index %d\n", 1801 vq->last_avail_idx, vq->last_avail_idx + num_buffers); 1802 1803 if (mbuf_to_desc(dev, vq, pkts[pkt_idx], buf_vec, nr_vec, num_buffers, true) < 0) { 1804 vq->shadow_used_idx -= num_buffers; 1805 break; 1806 } 1807 1808 slot_idx = (async->pkts_idx + pkt_idx) & (vq->size - 1); 1809 pkts_info[slot_idx].descs = num_buffers; 1810 pkts_info[slot_idx].mbuf = pkts[pkt_idx]; 1811 1812 vq->last_avail_idx += num_buffers; 1813 } 1814 1815 if (unlikely(pkt_idx == 0)) 1816 return 0; 1817 1818 n_xfer = vhost_async_dma_transfer(dev, vq, dma_id, vchan_id, async->pkts_idx, 1819 async->iov_iter, pkt_idx); 1820 1821 pkt_err = pkt_idx - n_xfer; 1822 if (unlikely(pkt_err)) { 1823 uint16_t num_descs = 0; 1824 1825 VHOST_LOG_DATA(dev->ifname, DEBUG, 1826 "%s: failed to transfer %u packets for queue %u.\n", 1827 __func__, pkt_err, vq->index); 1828 1829 /* update number of completed packets */ 1830 pkt_idx = n_xfer; 1831 1832 /* calculate the sum of descriptors to revert */ 1833 while (pkt_err-- > 0) { 1834 num_descs += pkts_info[slot_idx & (vq->size - 1)].descs; 1835 slot_idx--; 1836 } 1837 1838 /* recover shadow used ring and available ring */ 1839 vq->shadow_used_idx -= num_descs; 1840 vq->last_avail_idx -= num_descs; 1841 } 1842 1843 /* keep used descriptors */ 1844 if (likely(vq->shadow_used_idx)) { 1845 uint16_t to = async->desc_idx_split & (vq->size - 1); 1846 1847 store_dma_desc_info_split(vq->shadow_used_split, 1848 async->descs_split, vq->size, 0, to, 1849 vq->shadow_used_idx); 1850 1851 async->desc_idx_split += vq->shadow_used_idx; 1852 1853 async->pkts_idx += pkt_idx; 1854 if (async->pkts_idx >= vq->size) 1855 async->pkts_idx -= vq->size; 1856 1857 async->pkts_inflight_n += pkt_idx; 1858 vq->shadow_used_idx = 0; 1859 } 1860 1861 return pkt_idx; 1862 } 1863 1864 1865 static __rte_always_inline int 1866 vhost_enqueue_async_packed(struct virtio_net *dev, 1867 struct vhost_virtqueue *vq, 1868 struct rte_mbuf *pkt, 1869 struct buf_vector *buf_vec, 1870 uint16_t *nr_descs, 1871 uint16_t *nr_buffers) 1872 { 1873 uint16_t nr_vec = 0; 1874 uint16_t avail_idx = vq->last_avail_idx; 1875 uint16_t max_tries, tries = 0; 1876 uint16_t buf_id = 0; 1877 uint32_t len = 0; 1878 uint16_t desc_count = 0; 1879 uint64_t size = pkt->pkt_len + sizeof(struct virtio_net_hdr_mrg_rxbuf); 1880 uint32_t buffer_len[vq->size]; 1881 uint16_t buffer_buf_id[vq->size]; 1882 uint16_t buffer_desc_count[vq->size]; 1883 1884 if (rxvq_is_mergeable(dev)) 1885 max_tries = vq->size - 1; 1886 else 1887 max_tries = 1; 1888 1889 while (size > 0) { 1890 /* 1891 * if we tried all available ring items, and still 1892 * can't get enough buf, it means something abnormal 1893 * happened. 1894 */ 1895 if (unlikely(++tries > max_tries)) 1896 return -1; 1897 1898 if (unlikely(fill_vec_buf_packed(dev, vq, 1899 avail_idx, &desc_count, 1900 buf_vec, &nr_vec, 1901 &buf_id, &len, 1902 VHOST_ACCESS_RW) < 0)) 1903 return -1; 1904 1905 len = RTE_MIN(len, size); 1906 size -= len; 1907 1908 buffer_len[*nr_buffers] = len; 1909 buffer_buf_id[*nr_buffers] = buf_id; 1910 buffer_desc_count[*nr_buffers] = desc_count; 1911 *nr_buffers += 1; 1912 *nr_descs += desc_count; 1913 avail_idx += desc_count; 1914 if (avail_idx >= vq->size) 1915 avail_idx -= vq->size; 1916 } 1917 1918 if (unlikely(mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, *nr_buffers, true) < 0)) 1919 return -1; 1920 1921 vhost_async_shadow_enqueue_packed(vq, buffer_len, buffer_buf_id, 1922 buffer_desc_count, *nr_buffers); 1923 1924 return 0; 1925 } 1926 1927 static __rte_always_inline int16_t 1928 virtio_dev_rx_async_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, 1929 struct rte_mbuf *pkt, uint16_t *nr_descs, uint16_t *nr_buffers) 1930 { 1931 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1932 1933 if (unlikely(vhost_enqueue_async_packed(dev, vq, pkt, buf_vec, 1934 nr_descs, nr_buffers) < 0)) { 1935 VHOST_LOG_DATA(dev->ifname, DEBUG, "failed to get enough desc from vring\n"); 1936 return -1; 1937 } 1938 1939 VHOST_LOG_DATA(dev->ifname, DEBUG, 1940 "current index %d | end index %d\n", 1941 vq->last_avail_idx, vq->last_avail_idx + *nr_descs); 1942 1943 return 0; 1944 } 1945 1946 static __rte_always_inline void 1947 virtio_dev_rx_async_packed_batch_enqueue(struct virtio_net *dev, 1948 struct vhost_virtqueue *vq, 1949 struct rte_mbuf **pkts, 1950 uint64_t *desc_addrs, 1951 uint64_t *lens) 1952 { 1953 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 1954 struct virtio_net_hdr_mrg_rxbuf *hdrs[PACKED_BATCH_SIZE]; 1955 struct vring_packed_desc *descs = vq->desc_packed; 1956 struct vhost_async *async = vq->async; 1957 uint16_t avail_idx = vq->last_avail_idx; 1958 uint32_t mbuf_offset = 0; 1959 uint16_t ids[PACKED_BATCH_SIZE]; 1960 uint64_t mapped_len[PACKED_BATCH_SIZE]; 1961 void *host_iova[PACKED_BATCH_SIZE]; 1962 uintptr_t desc; 1963 uint16_t i; 1964 1965 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1966 rte_prefetch0((void *)(uintptr_t)desc_addrs[i]); 1967 desc = vhost_iova_to_vva(dev, vq, desc_addrs[i], &lens[i], VHOST_ACCESS_RW); 1968 hdrs[i] = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc; 1969 lens[i] = pkts[i]->pkt_len + 1970 sizeof(struct virtio_net_hdr_mrg_rxbuf); 1971 } 1972 1973 if (rxvq_is_mergeable(dev)) { 1974 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1975 ASSIGN_UNLESS_EQUAL(hdrs[i]->num_buffers, 1); 1976 } 1977 } 1978 1979 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1980 virtio_enqueue_offload(pkts[i], &hdrs[i]->hdr); 1981 1982 vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE); 1983 1984 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1985 host_iova[i] = (void *)(uintptr_t)gpa_to_first_hpa(dev, 1986 desc_addrs[i] + buf_offset, lens[i], &mapped_len[i]); 1987 } 1988 1989 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1990 async_iter_initialize(dev, async); 1991 async_iter_add_iovec(dev, async, 1992 (void *)(uintptr_t)rte_pktmbuf_iova_offset(pkts[i], mbuf_offset), 1993 host_iova[i], 1994 mapped_len[i]); 1995 async->iter_idx++; 1996 } 1997 1998 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1999 vhost_log_cache_write_iova(dev, vq, descs[avail_idx + i].addr, lens[i]); 2000 2001 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 2002 ids[i] = descs[avail_idx + i].id; 2003 2004 vhost_async_shadow_enqueue_packed_batch(vq, lens, ids); 2005 } 2006 2007 static __rte_always_inline int 2008 virtio_dev_rx_async_packed_batch(struct virtio_net *dev, 2009 struct vhost_virtqueue *vq, 2010 struct rte_mbuf **pkts, 2011 int16_t dma_id, uint16_t vchan_id) 2012 { 2013 uint64_t desc_addrs[PACKED_BATCH_SIZE]; 2014 uint64_t lens[PACKED_BATCH_SIZE]; 2015 2016 if (virtio_dev_rx_async_batch_check(vq, pkts, desc_addrs, lens, dma_id, vchan_id) == -1) 2017 return -1; 2018 2019 virtio_dev_rx_async_packed_batch_enqueue(dev, vq, pkts, desc_addrs, lens); 2020 2021 return 0; 2022 } 2023 2024 static __rte_always_inline void 2025 dma_error_handler_packed(struct vhost_virtqueue *vq, uint16_t slot_idx, 2026 uint32_t nr_err, uint32_t *pkt_idx) 2027 { 2028 uint16_t descs_err = 0; 2029 uint16_t buffers_err = 0; 2030 struct vhost_async *async = vq->async; 2031 struct async_inflight_info *pkts_info = vq->async->pkts_info; 2032 2033 *pkt_idx -= nr_err; 2034 /* calculate the sum of buffers and descs of DMA-error packets. */ 2035 while (nr_err-- > 0) { 2036 descs_err += pkts_info[slot_idx % vq->size].descs; 2037 buffers_err += pkts_info[slot_idx % vq->size].nr_buffers; 2038 slot_idx--; 2039 } 2040 2041 if (vq->last_avail_idx >= descs_err) { 2042 vq->last_avail_idx -= descs_err; 2043 } else { 2044 vq->last_avail_idx = vq->last_avail_idx + vq->size - descs_err; 2045 vq->avail_wrap_counter ^= 1; 2046 } 2047 2048 if (async->buffer_idx_packed >= buffers_err) 2049 async->buffer_idx_packed -= buffers_err; 2050 else 2051 async->buffer_idx_packed = async->buffer_idx_packed + vq->size - buffers_err; 2052 } 2053 2054 static __rte_noinline uint32_t 2055 virtio_dev_rx_async_submit_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, 2056 struct rte_mbuf **pkts, uint32_t count, int16_t dma_id, uint16_t vchan_id) 2057 { 2058 uint32_t pkt_idx = 0; 2059 uint16_t n_xfer; 2060 uint16_t num_buffers; 2061 uint16_t num_descs; 2062 2063 struct vhost_async *async = vq->async; 2064 struct async_inflight_info *pkts_info = async->pkts_info; 2065 uint32_t pkt_err = 0; 2066 uint16_t slot_idx = 0; 2067 uint16_t i; 2068 2069 do { 2070 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); 2071 2072 if (count - pkt_idx >= PACKED_BATCH_SIZE) { 2073 if (!virtio_dev_rx_async_packed_batch(dev, vq, &pkts[pkt_idx], 2074 dma_id, vchan_id)) { 2075 for (i = 0; i < PACKED_BATCH_SIZE; i++) { 2076 slot_idx = (async->pkts_idx + pkt_idx) % vq->size; 2077 pkts_info[slot_idx].descs = 1; 2078 pkts_info[slot_idx].nr_buffers = 1; 2079 pkts_info[slot_idx].mbuf = pkts[pkt_idx]; 2080 pkt_idx++; 2081 } 2082 continue; 2083 } 2084 } 2085 2086 num_buffers = 0; 2087 num_descs = 0; 2088 if (unlikely(virtio_dev_rx_async_packed(dev, vq, pkts[pkt_idx], 2089 &num_descs, &num_buffers) < 0)) 2090 break; 2091 2092 slot_idx = (async->pkts_idx + pkt_idx) % vq->size; 2093 2094 pkts_info[slot_idx].descs = num_descs; 2095 pkts_info[slot_idx].nr_buffers = num_buffers; 2096 pkts_info[slot_idx].mbuf = pkts[pkt_idx]; 2097 2098 pkt_idx++; 2099 vq_inc_last_avail_packed(vq, num_descs); 2100 } while (pkt_idx < count); 2101 2102 if (unlikely(pkt_idx == 0)) 2103 return 0; 2104 2105 n_xfer = vhost_async_dma_transfer(dev, vq, dma_id, vchan_id, async->pkts_idx, 2106 async->iov_iter, pkt_idx); 2107 2108 async_iter_reset(async); 2109 2110 pkt_err = pkt_idx - n_xfer; 2111 if (unlikely(pkt_err)) { 2112 VHOST_LOG_DATA(dev->ifname, DEBUG, 2113 "%s: failed to transfer %u packets for queue %u.\n", 2114 __func__, pkt_err, vq->index); 2115 dma_error_handler_packed(vq, slot_idx, pkt_err, &pkt_idx); 2116 } 2117 2118 async->pkts_idx += pkt_idx; 2119 if (async->pkts_idx >= vq->size) 2120 async->pkts_idx -= vq->size; 2121 2122 async->pkts_inflight_n += pkt_idx; 2123 2124 return pkt_idx; 2125 } 2126 2127 static __rte_always_inline void 2128 write_back_completed_descs_split(struct vhost_virtqueue *vq, uint16_t n_descs) 2129 { 2130 struct vhost_async *async = vq->async; 2131 uint16_t nr_left = n_descs; 2132 uint16_t nr_copy; 2133 uint16_t to, from; 2134 2135 do { 2136 from = async->last_desc_idx_split & (vq->size - 1); 2137 nr_copy = nr_left + from <= vq->size ? nr_left : vq->size - from; 2138 to = vq->last_used_idx & (vq->size - 1); 2139 2140 if (to + nr_copy <= vq->size) { 2141 rte_memcpy(&vq->used->ring[to], &async->descs_split[from], 2142 nr_copy * sizeof(struct vring_used_elem)); 2143 } else { 2144 uint16_t size = vq->size - to; 2145 2146 rte_memcpy(&vq->used->ring[to], &async->descs_split[from], 2147 size * sizeof(struct vring_used_elem)); 2148 rte_memcpy(&vq->used->ring[0], &async->descs_split[from + size], 2149 (nr_copy - size) * sizeof(struct vring_used_elem)); 2150 } 2151 2152 async->last_desc_idx_split += nr_copy; 2153 vq->last_used_idx += nr_copy; 2154 nr_left -= nr_copy; 2155 } while (nr_left > 0); 2156 } 2157 2158 static __rte_always_inline void 2159 write_back_completed_descs_packed(struct vhost_virtqueue *vq, 2160 uint16_t n_buffers) 2161 { 2162 struct vhost_async *async = vq->async; 2163 uint16_t from = async->last_buffer_idx_packed; 2164 uint16_t used_idx = vq->last_used_idx; 2165 uint16_t head_idx = vq->last_used_idx; 2166 uint16_t head_flags = 0; 2167 uint16_t i; 2168 2169 /* Split loop in two to save memory barriers */ 2170 for (i = 0; i < n_buffers; i++) { 2171 vq->desc_packed[used_idx].id = async->buffers_packed[from].id; 2172 vq->desc_packed[used_idx].len = async->buffers_packed[from].len; 2173 2174 used_idx += async->buffers_packed[from].count; 2175 if (used_idx >= vq->size) 2176 used_idx -= vq->size; 2177 2178 from++; 2179 if (from >= vq->size) 2180 from = 0; 2181 } 2182 2183 /* The ordering for storing desc flags needs to be enforced. */ 2184 rte_atomic_thread_fence(__ATOMIC_RELEASE); 2185 2186 from = async->last_buffer_idx_packed; 2187 2188 for (i = 0; i < n_buffers; i++) { 2189 uint16_t flags; 2190 2191 if (async->buffers_packed[from].len) 2192 flags = VRING_DESC_F_WRITE; 2193 else 2194 flags = 0; 2195 2196 if (vq->used_wrap_counter) { 2197 flags |= VRING_DESC_F_USED; 2198 flags |= VRING_DESC_F_AVAIL; 2199 } else { 2200 flags &= ~VRING_DESC_F_USED; 2201 flags &= ~VRING_DESC_F_AVAIL; 2202 } 2203 2204 if (i > 0) { 2205 vq->desc_packed[vq->last_used_idx].flags = flags; 2206 } else { 2207 head_idx = vq->last_used_idx; 2208 head_flags = flags; 2209 } 2210 2211 vq_inc_last_used_packed(vq, async->buffers_packed[from].count); 2212 2213 from++; 2214 if (from == vq->size) 2215 from = 0; 2216 } 2217 2218 vq->desc_packed[head_idx].flags = head_flags; 2219 async->last_buffer_idx_packed = from; 2220 } 2221 2222 static __rte_always_inline uint16_t 2223 vhost_poll_enqueue_completed(struct virtio_net *dev, struct vhost_virtqueue *vq, 2224 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, uint16_t vchan_id) 2225 { 2226 struct vhost_async *async = vq->async; 2227 struct async_inflight_info *pkts_info = async->pkts_info; 2228 uint16_t nr_cpl_pkts = 0; 2229 uint16_t n_descs = 0, n_buffers = 0; 2230 uint16_t start_idx, from, i; 2231 2232 /* Check completed copies for the given DMA vChannel */ 2233 vhost_async_dma_check_completed(dev, dma_id, vchan_id, VHOST_DMA_MAX_COPY_COMPLETE); 2234 2235 start_idx = async_get_first_inflight_pkt_idx(vq); 2236 /** 2237 * Calculate the number of copy completed packets. 2238 * Note that there may be completed packets even if 2239 * no copies are reported done by the given DMA vChannel, 2240 * as it's possible that a virtqueue uses multiple DMA 2241 * vChannels. 2242 */ 2243 from = start_idx; 2244 while (vq->async->pkts_cmpl_flag[from] && count--) { 2245 vq->async->pkts_cmpl_flag[from] = false; 2246 from++; 2247 if (from >= vq->size) 2248 from -= vq->size; 2249 nr_cpl_pkts++; 2250 } 2251 2252 if (nr_cpl_pkts == 0) 2253 return 0; 2254 2255 for (i = 0; i < nr_cpl_pkts; i++) { 2256 from = (start_idx + i) % vq->size; 2257 /* Only used with packed ring */ 2258 n_buffers += pkts_info[from].nr_buffers; 2259 /* Only used with split ring */ 2260 n_descs += pkts_info[from].descs; 2261 pkts[i] = pkts_info[from].mbuf; 2262 } 2263 2264 async->pkts_inflight_n -= nr_cpl_pkts; 2265 2266 if (likely(vq->enabled && vq->access_ok)) { 2267 if (vq_is_packed(dev)) { 2268 write_back_completed_descs_packed(vq, n_buffers); 2269 vhost_vring_call_packed(dev, vq); 2270 } else { 2271 write_back_completed_descs_split(vq, n_descs); 2272 __atomic_add_fetch(&vq->used->idx, n_descs, __ATOMIC_RELEASE); 2273 vhost_vring_call_split(dev, vq); 2274 } 2275 } else { 2276 if (vq_is_packed(dev)) { 2277 async->last_buffer_idx_packed += n_buffers; 2278 if (async->last_buffer_idx_packed >= vq->size) 2279 async->last_buffer_idx_packed -= vq->size; 2280 } else { 2281 async->last_desc_idx_split += n_descs; 2282 } 2283 } 2284 2285 return nr_cpl_pkts; 2286 } 2287 2288 uint16_t 2289 rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id, 2290 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, 2291 uint16_t vchan_id) 2292 { 2293 struct virtio_net *dev = get_device(vid); 2294 struct vhost_virtqueue *vq; 2295 uint16_t n_pkts_cpl = 0; 2296 2297 if (unlikely(!dev)) 2298 return 0; 2299 2300 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__); 2301 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { 2302 VHOST_LOG_DATA(dev->ifname, ERR, 2303 "%s: invalid virtqueue idx %d.\n", 2304 __func__, queue_id); 2305 return 0; 2306 } 2307 2308 if (unlikely(!dma_copy_track[dma_id].vchans || 2309 !dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) { 2310 VHOST_LOG_DATA(dev->ifname, ERR, 2311 "%s: invalid channel %d:%u.\n", 2312 __func__, dma_id, vchan_id); 2313 return 0; 2314 } 2315 2316 vq = dev->virtqueue[queue_id]; 2317 2318 if (!rte_spinlock_trylock(&vq->access_lock)) { 2319 VHOST_LOG_DATA(dev->ifname, DEBUG, 2320 "%s: virtqueue %u is busy.\n", 2321 __func__, queue_id); 2322 return 0; 2323 } 2324 2325 if (unlikely(!vq->async)) { 2326 VHOST_LOG_DATA(dev->ifname, ERR, 2327 "%s: async not registered for virtqueue %d.\n", 2328 __func__, queue_id); 2329 goto out; 2330 } 2331 2332 n_pkts_cpl = vhost_poll_enqueue_completed(dev, vq, pkts, count, dma_id, vchan_id); 2333 2334 vhost_queue_stats_update(dev, vq, pkts, n_pkts_cpl); 2335 vq->stats.inflight_completed += n_pkts_cpl; 2336 2337 out: 2338 rte_spinlock_unlock(&vq->access_lock); 2339 2340 return n_pkts_cpl; 2341 } 2342 2343 uint16_t 2344 rte_vhost_clear_queue_thread_unsafe(int vid, uint16_t queue_id, 2345 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, 2346 uint16_t vchan_id) 2347 { 2348 struct virtio_net *dev = get_device(vid); 2349 struct vhost_virtqueue *vq; 2350 uint16_t n_pkts_cpl = 0; 2351 2352 if (!dev) 2353 return 0; 2354 2355 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__); 2356 if (unlikely(queue_id >= dev->nr_vring)) { 2357 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid virtqueue idx %d.\n", 2358 __func__, queue_id); 2359 return 0; 2360 } 2361 2362 if (unlikely(dma_id < 0 || dma_id >= RTE_DMADEV_DEFAULT_MAX)) { 2363 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid dma id %d.\n", 2364 __func__, dma_id); 2365 return 0; 2366 } 2367 2368 vq = dev->virtqueue[queue_id]; 2369 2370 if (unlikely(!rte_spinlock_is_locked(&vq->access_lock))) { 2371 VHOST_LOG_DATA(dev->ifname, ERR, "%s() called without access lock taken.\n", 2372 __func__); 2373 return -1; 2374 } 2375 2376 if (unlikely(!vq->async)) { 2377 VHOST_LOG_DATA(dev->ifname, ERR, 2378 "%s: async not registered for virtqueue %d.\n", 2379 __func__, queue_id); 2380 return 0; 2381 } 2382 2383 if (unlikely(!dma_copy_track[dma_id].vchans || 2384 !dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) { 2385 VHOST_LOG_DATA(dev->ifname, ERR, 2386 "%s: invalid channel %d:%u.\n", 2387 __func__, dma_id, vchan_id); 2388 return 0; 2389 } 2390 2391 if ((queue_id & 1) == 0) 2392 n_pkts_cpl = vhost_poll_enqueue_completed(dev, vq, pkts, count, 2393 dma_id, vchan_id); 2394 else 2395 n_pkts_cpl = async_poll_dequeue_completed(dev, vq, pkts, count, 2396 dma_id, vchan_id, dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS); 2397 2398 vhost_queue_stats_update(dev, vq, pkts, n_pkts_cpl); 2399 vq->stats.inflight_completed += n_pkts_cpl; 2400 2401 return n_pkts_cpl; 2402 } 2403 2404 uint16_t 2405 rte_vhost_clear_queue(int vid, uint16_t queue_id, struct rte_mbuf **pkts, 2406 uint16_t count, int16_t dma_id, uint16_t vchan_id) 2407 { 2408 struct virtio_net *dev = get_device(vid); 2409 struct vhost_virtqueue *vq; 2410 uint16_t n_pkts_cpl = 0; 2411 2412 if (!dev) 2413 return 0; 2414 2415 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__); 2416 if (unlikely(queue_id >= dev->nr_vring)) { 2417 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid virtqueue idx %u.\n", 2418 __func__, queue_id); 2419 return 0; 2420 } 2421 2422 if (unlikely(dma_id < 0 || dma_id >= RTE_DMADEV_DEFAULT_MAX)) { 2423 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid dma id %d.\n", 2424 __func__, dma_id); 2425 return 0; 2426 } 2427 2428 vq = dev->virtqueue[queue_id]; 2429 2430 if (!rte_spinlock_trylock(&vq->access_lock)) { 2431 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s: virtqueue %u is busy.\n", 2432 __func__, queue_id); 2433 return 0; 2434 } 2435 2436 if (unlikely(!vq->async)) { 2437 VHOST_LOG_DATA(dev->ifname, ERR, "%s: async not registered for queue id %u.\n", 2438 __func__, queue_id); 2439 goto out_access_unlock; 2440 } 2441 2442 if (unlikely(!dma_copy_track[dma_id].vchans || 2443 !dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) { 2444 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid channel %d:%u.\n", 2445 __func__, dma_id, vchan_id); 2446 goto out_access_unlock; 2447 } 2448 2449 if ((queue_id & 1) == 0) 2450 n_pkts_cpl = vhost_poll_enqueue_completed(dev, vq, pkts, count, 2451 dma_id, vchan_id); 2452 else 2453 n_pkts_cpl = async_poll_dequeue_completed(dev, vq, pkts, count, 2454 dma_id, vchan_id, dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS); 2455 2456 vhost_queue_stats_update(dev, vq, pkts, n_pkts_cpl); 2457 vq->stats.inflight_completed += n_pkts_cpl; 2458 2459 out_access_unlock: 2460 rte_spinlock_unlock(&vq->access_lock); 2461 2462 return n_pkts_cpl; 2463 } 2464 2465 static __rte_always_inline uint32_t 2466 virtio_dev_rx_async_submit(struct virtio_net *dev, struct vhost_virtqueue *vq, 2467 struct rte_mbuf **pkts, uint32_t count, int16_t dma_id, uint16_t vchan_id) 2468 { 2469 uint32_t nb_tx = 0; 2470 2471 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__); 2472 2473 if (unlikely(!dma_copy_track[dma_id].vchans || 2474 !dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) { 2475 VHOST_LOG_DATA(dev->ifname, ERR, 2476 "%s: invalid channel %d:%u.\n", 2477 __func__, dma_id, vchan_id); 2478 return 0; 2479 } 2480 2481 rte_spinlock_lock(&vq->access_lock); 2482 2483 if (unlikely(!vq->enabled || !vq->async)) 2484 goto out_access_unlock; 2485 2486 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 2487 vhost_user_iotlb_rd_lock(vq); 2488 2489 if (unlikely(!vq->access_ok)) 2490 if (unlikely(vring_translate(dev, vq) < 0)) 2491 goto out; 2492 2493 count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); 2494 if (count == 0) 2495 goto out; 2496 2497 if (vq_is_packed(dev)) 2498 nb_tx = virtio_dev_rx_async_submit_packed(dev, vq, pkts, count, 2499 dma_id, vchan_id); 2500 else 2501 nb_tx = virtio_dev_rx_async_submit_split(dev, vq, pkts, count, 2502 dma_id, vchan_id); 2503 2504 vq->stats.inflight_submitted += nb_tx; 2505 2506 out: 2507 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 2508 vhost_user_iotlb_rd_unlock(vq); 2509 2510 out_access_unlock: 2511 rte_spinlock_unlock(&vq->access_lock); 2512 2513 return nb_tx; 2514 } 2515 2516 uint16_t 2517 rte_vhost_submit_enqueue_burst(int vid, uint16_t queue_id, 2518 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, 2519 uint16_t vchan_id) 2520 { 2521 struct virtio_net *dev = get_device(vid); 2522 2523 if (!dev) 2524 return 0; 2525 2526 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { 2527 VHOST_LOG_DATA(dev->ifname, ERR, 2528 "%s: built-in vhost net backend is disabled.\n", 2529 __func__); 2530 return 0; 2531 } 2532 2533 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { 2534 VHOST_LOG_DATA(dev->ifname, ERR, 2535 "%s: invalid virtqueue idx %d.\n", 2536 __func__, queue_id); 2537 return 0; 2538 } 2539 2540 return virtio_dev_rx_async_submit(dev, dev->virtqueue[queue_id], pkts, count, 2541 dma_id, vchan_id); 2542 } 2543 2544 static inline bool 2545 virtio_net_with_host_offload(struct virtio_net *dev) 2546 { 2547 if (dev->features & 2548 ((1ULL << VIRTIO_NET_F_CSUM) | 2549 (1ULL << VIRTIO_NET_F_HOST_ECN) | 2550 (1ULL << VIRTIO_NET_F_HOST_TSO4) | 2551 (1ULL << VIRTIO_NET_F_HOST_TSO6) | 2552 (1ULL << VIRTIO_NET_F_HOST_UFO))) 2553 return true; 2554 2555 return false; 2556 } 2557 2558 static int 2559 parse_headers(struct rte_mbuf *m, uint8_t *l4_proto) 2560 { 2561 struct rte_ipv4_hdr *ipv4_hdr; 2562 struct rte_ipv6_hdr *ipv6_hdr; 2563 struct rte_ether_hdr *eth_hdr; 2564 uint16_t ethertype; 2565 uint16_t data_len = rte_pktmbuf_data_len(m); 2566 2567 if (data_len < sizeof(struct rte_ether_hdr)) 2568 return -EINVAL; 2569 2570 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 2571 2572 m->l2_len = sizeof(struct rte_ether_hdr); 2573 ethertype = rte_be_to_cpu_16(eth_hdr->ether_type); 2574 2575 if (ethertype == RTE_ETHER_TYPE_VLAN) { 2576 if (data_len < sizeof(struct rte_ether_hdr) + 2577 sizeof(struct rte_vlan_hdr)) 2578 goto error; 2579 2580 struct rte_vlan_hdr *vlan_hdr = 2581 (struct rte_vlan_hdr *)(eth_hdr + 1); 2582 2583 m->l2_len += sizeof(struct rte_vlan_hdr); 2584 ethertype = rte_be_to_cpu_16(vlan_hdr->eth_proto); 2585 } 2586 2587 switch (ethertype) { 2588 case RTE_ETHER_TYPE_IPV4: 2589 if (data_len < m->l2_len + sizeof(struct rte_ipv4_hdr)) 2590 goto error; 2591 ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *, 2592 m->l2_len); 2593 m->l3_len = rte_ipv4_hdr_len(ipv4_hdr); 2594 if (data_len < m->l2_len + m->l3_len) 2595 goto error; 2596 m->ol_flags |= RTE_MBUF_F_TX_IPV4; 2597 *l4_proto = ipv4_hdr->next_proto_id; 2598 break; 2599 case RTE_ETHER_TYPE_IPV6: 2600 if (data_len < m->l2_len + sizeof(struct rte_ipv6_hdr)) 2601 goto error; 2602 ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *, 2603 m->l2_len); 2604 m->l3_len = sizeof(struct rte_ipv6_hdr); 2605 m->ol_flags |= RTE_MBUF_F_TX_IPV6; 2606 *l4_proto = ipv6_hdr->proto; 2607 break; 2608 default: 2609 /* a valid L3 header is needed for further L4 parsing */ 2610 goto error; 2611 } 2612 2613 /* both CSUM and GSO need a valid L4 header */ 2614 switch (*l4_proto) { 2615 case IPPROTO_TCP: 2616 if (data_len < m->l2_len + m->l3_len + 2617 sizeof(struct rte_tcp_hdr)) 2618 goto error; 2619 break; 2620 case IPPROTO_UDP: 2621 if (data_len < m->l2_len + m->l3_len + 2622 sizeof(struct rte_udp_hdr)) 2623 goto error; 2624 break; 2625 case IPPROTO_SCTP: 2626 if (data_len < m->l2_len + m->l3_len + 2627 sizeof(struct rte_sctp_hdr)) 2628 goto error; 2629 break; 2630 default: 2631 goto error; 2632 } 2633 2634 return 0; 2635 2636 error: 2637 m->l2_len = 0; 2638 m->l3_len = 0; 2639 m->ol_flags = 0; 2640 return -EINVAL; 2641 } 2642 2643 static __rte_always_inline void 2644 vhost_dequeue_offload_legacy(struct virtio_net *dev, struct virtio_net_hdr *hdr, 2645 struct rte_mbuf *m) 2646 { 2647 uint8_t l4_proto = 0; 2648 struct rte_tcp_hdr *tcp_hdr = NULL; 2649 uint16_t tcp_len; 2650 uint16_t data_len = rte_pktmbuf_data_len(m); 2651 2652 if (parse_headers(m, &l4_proto) < 0) 2653 return; 2654 2655 if (hdr->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) { 2656 if (hdr->csum_start == (m->l2_len + m->l3_len)) { 2657 switch (hdr->csum_offset) { 2658 case (offsetof(struct rte_tcp_hdr, cksum)): 2659 if (l4_proto != IPPROTO_TCP) 2660 goto error; 2661 m->ol_flags |= RTE_MBUF_F_TX_TCP_CKSUM; 2662 break; 2663 case (offsetof(struct rte_udp_hdr, dgram_cksum)): 2664 if (l4_proto != IPPROTO_UDP) 2665 goto error; 2666 m->ol_flags |= RTE_MBUF_F_TX_UDP_CKSUM; 2667 break; 2668 case (offsetof(struct rte_sctp_hdr, cksum)): 2669 if (l4_proto != IPPROTO_SCTP) 2670 goto error; 2671 m->ol_flags |= RTE_MBUF_F_TX_SCTP_CKSUM; 2672 break; 2673 default: 2674 goto error; 2675 } 2676 } else { 2677 goto error; 2678 } 2679 } 2680 2681 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 2682 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 2683 case VIRTIO_NET_HDR_GSO_TCPV4: 2684 case VIRTIO_NET_HDR_GSO_TCPV6: 2685 if (l4_proto != IPPROTO_TCP) 2686 goto error; 2687 tcp_hdr = rte_pktmbuf_mtod_offset(m, 2688 struct rte_tcp_hdr *, 2689 m->l2_len + m->l3_len); 2690 tcp_len = (tcp_hdr->data_off & 0xf0) >> 2; 2691 if (data_len < m->l2_len + m->l3_len + tcp_len) 2692 goto error; 2693 m->ol_flags |= RTE_MBUF_F_TX_TCP_SEG; 2694 m->tso_segsz = hdr->gso_size; 2695 m->l4_len = tcp_len; 2696 break; 2697 case VIRTIO_NET_HDR_GSO_UDP: 2698 if (l4_proto != IPPROTO_UDP) 2699 goto error; 2700 m->ol_flags |= RTE_MBUF_F_TX_UDP_SEG; 2701 m->tso_segsz = hdr->gso_size; 2702 m->l4_len = sizeof(struct rte_udp_hdr); 2703 break; 2704 default: 2705 VHOST_LOG_DATA(dev->ifname, WARNING, 2706 "unsupported gso type %u.\n", 2707 hdr->gso_type); 2708 goto error; 2709 } 2710 } 2711 return; 2712 2713 error: 2714 m->l2_len = 0; 2715 m->l3_len = 0; 2716 m->ol_flags = 0; 2717 } 2718 2719 static __rte_always_inline void 2720 vhost_dequeue_offload(struct virtio_net *dev, struct virtio_net_hdr *hdr, 2721 struct rte_mbuf *m, bool legacy_ol_flags) 2722 { 2723 struct rte_net_hdr_lens hdr_lens; 2724 int l4_supported = 0; 2725 uint32_t ptype; 2726 2727 if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE) 2728 return; 2729 2730 if (legacy_ol_flags) { 2731 vhost_dequeue_offload_legacy(dev, hdr, m); 2732 return; 2733 } 2734 2735 m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN; 2736 2737 ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK); 2738 m->packet_type = ptype; 2739 if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP || 2740 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP || 2741 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP) 2742 l4_supported = 1; 2743 2744 /* According to Virtio 1.1 spec, the device only needs to look at 2745 * VIRTIO_NET_HDR_F_NEEDS_CSUM in the packet transmission path. 2746 * This differs from the processing incoming packets path where the 2747 * driver could rely on VIRTIO_NET_HDR_F_DATA_VALID flag set by the 2748 * device. 2749 * 2750 * 5.1.6.2.1 Driver Requirements: Packet Transmission 2751 * The driver MUST NOT set the VIRTIO_NET_HDR_F_DATA_VALID and 2752 * VIRTIO_NET_HDR_F_RSC_INFO bits in flags. 2753 * 2754 * 5.1.6.2.2 Device Requirements: Packet Transmission 2755 * The device MUST ignore flag bits that it does not recognize. 2756 */ 2757 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 2758 uint32_t hdrlen; 2759 2760 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len; 2761 if (hdr->csum_start <= hdrlen && l4_supported != 0) { 2762 m->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_NONE; 2763 } else { 2764 /* Unknown proto or tunnel, do sw cksum. We can assume 2765 * the cksum field is in the first segment since the 2766 * buffers we provided to the host are large enough. 2767 * In case of SCTP, this will be wrong since it's a CRC 2768 * but there's nothing we can do. 2769 */ 2770 uint16_t csum = 0, off; 2771 2772 if (rte_raw_cksum_mbuf(m, hdr->csum_start, 2773 rte_pktmbuf_pkt_len(m) - hdr->csum_start, &csum) < 0) 2774 return; 2775 if (likely(csum != 0xffff)) 2776 csum = ~csum; 2777 off = hdr->csum_offset + hdr->csum_start; 2778 if (rte_pktmbuf_data_len(m) >= off + 1) 2779 *rte_pktmbuf_mtod_offset(m, uint16_t *, off) = csum; 2780 } 2781 } 2782 2783 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 2784 if (hdr->gso_size == 0) 2785 return; 2786 2787 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 2788 case VIRTIO_NET_HDR_GSO_TCPV4: 2789 case VIRTIO_NET_HDR_GSO_TCPV6: 2790 if ((ptype & RTE_PTYPE_L4_MASK) != RTE_PTYPE_L4_TCP) 2791 break; 2792 m->ol_flags |= RTE_MBUF_F_RX_LRO | RTE_MBUF_F_RX_L4_CKSUM_NONE; 2793 m->tso_segsz = hdr->gso_size; 2794 break; 2795 case VIRTIO_NET_HDR_GSO_UDP: 2796 if ((ptype & RTE_PTYPE_L4_MASK) != RTE_PTYPE_L4_UDP) 2797 break; 2798 m->ol_flags |= RTE_MBUF_F_RX_LRO | RTE_MBUF_F_RX_L4_CKSUM_NONE; 2799 m->tso_segsz = hdr->gso_size; 2800 break; 2801 default: 2802 break; 2803 } 2804 } 2805 } 2806 2807 static __rte_noinline void 2808 copy_vnet_hdr_from_desc(struct virtio_net_hdr *hdr, 2809 struct buf_vector *buf_vec) 2810 { 2811 uint64_t len; 2812 uint64_t remain = sizeof(struct virtio_net_hdr); 2813 uint64_t src; 2814 uint64_t dst = (uint64_t)(uintptr_t)hdr; 2815 2816 while (remain) { 2817 len = RTE_MIN(remain, buf_vec->buf_len); 2818 src = buf_vec->buf_addr; 2819 rte_memcpy((void *)(uintptr_t)dst, 2820 (void *)(uintptr_t)src, len); 2821 2822 remain -= len; 2823 dst += len; 2824 buf_vec++; 2825 } 2826 } 2827 2828 static __rte_always_inline int 2829 desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, 2830 struct buf_vector *buf_vec, uint16_t nr_vec, 2831 struct rte_mbuf *m, struct rte_mempool *mbuf_pool, 2832 bool legacy_ol_flags, uint16_t slot_idx, bool is_async) 2833 { 2834 uint32_t buf_avail, buf_offset, buf_len; 2835 uint64_t buf_addr, buf_iova; 2836 uint32_t mbuf_avail, mbuf_offset; 2837 uint32_t hdr_remain = dev->vhost_hlen; 2838 uint32_t cpy_len; 2839 struct rte_mbuf *cur = m, *prev = m; 2840 struct virtio_net_hdr tmp_hdr; 2841 struct virtio_net_hdr *hdr = NULL; 2842 uint16_t vec_idx; 2843 struct vhost_async *async = vq->async; 2844 struct async_inflight_info *pkts_info; 2845 2846 /* 2847 * The caller has checked the descriptors chain is larger than the 2848 * header size. 2849 */ 2850 2851 if (virtio_net_with_host_offload(dev)) { 2852 if (unlikely(buf_vec[0].buf_len < sizeof(struct virtio_net_hdr))) { 2853 /* 2854 * No luck, the virtio-net header doesn't fit 2855 * in a contiguous virtual area. 2856 */ 2857 copy_vnet_hdr_from_desc(&tmp_hdr, buf_vec); 2858 hdr = &tmp_hdr; 2859 } else { 2860 hdr = (struct virtio_net_hdr *)((uintptr_t)buf_vec[0].buf_addr); 2861 } 2862 } 2863 2864 for (vec_idx = 0; vec_idx < nr_vec; vec_idx++) { 2865 if (buf_vec[vec_idx].buf_len > hdr_remain) 2866 break; 2867 2868 hdr_remain -= buf_vec[vec_idx].buf_len; 2869 } 2870 2871 buf_addr = buf_vec[vec_idx].buf_addr; 2872 buf_iova = buf_vec[vec_idx].buf_iova; 2873 buf_len = buf_vec[vec_idx].buf_len; 2874 buf_offset = hdr_remain; 2875 buf_avail = buf_vec[vec_idx].buf_len - hdr_remain; 2876 2877 PRINT_PACKET(dev, 2878 (uintptr_t)(buf_addr + buf_offset), 2879 (uint32_t)buf_avail, 0); 2880 2881 mbuf_offset = 0; 2882 mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM; 2883 2884 if (is_async) { 2885 pkts_info = async->pkts_info; 2886 if (async_iter_initialize(dev, async)) 2887 return -1; 2888 } 2889 2890 while (1) { 2891 cpy_len = RTE_MIN(buf_avail, mbuf_avail); 2892 2893 if (is_async) { 2894 if (async_fill_seg(dev, vq, cur, mbuf_offset, 2895 buf_iova + buf_offset, cpy_len, false) < 0) 2896 goto error; 2897 } else if (likely(hdr && cur == m)) { 2898 rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, mbuf_offset), 2899 (void *)((uintptr_t)(buf_addr + buf_offset)), 2900 cpy_len); 2901 } else { 2902 sync_fill_seg(dev, vq, cur, mbuf_offset, 2903 buf_addr + buf_offset, 2904 buf_iova + buf_offset, cpy_len, false); 2905 } 2906 2907 mbuf_avail -= cpy_len; 2908 mbuf_offset += cpy_len; 2909 buf_avail -= cpy_len; 2910 buf_offset += cpy_len; 2911 2912 /* This buf reaches to its end, get the next one */ 2913 if (buf_avail == 0) { 2914 if (++vec_idx >= nr_vec) 2915 break; 2916 2917 buf_addr = buf_vec[vec_idx].buf_addr; 2918 buf_iova = buf_vec[vec_idx].buf_iova; 2919 buf_len = buf_vec[vec_idx].buf_len; 2920 2921 buf_offset = 0; 2922 buf_avail = buf_len; 2923 2924 PRINT_PACKET(dev, (uintptr_t)buf_addr, 2925 (uint32_t)buf_avail, 0); 2926 } 2927 2928 /* 2929 * This mbuf reaches to its end, get a new one 2930 * to hold more data. 2931 */ 2932 if (mbuf_avail == 0) { 2933 cur = rte_pktmbuf_alloc(mbuf_pool); 2934 if (unlikely(cur == NULL)) { 2935 VHOST_LOG_DATA(dev->ifname, ERR, 2936 "failed to allocate memory for mbuf.\n"); 2937 goto error; 2938 } 2939 2940 prev->next = cur; 2941 prev->data_len = mbuf_offset; 2942 m->nb_segs += 1; 2943 m->pkt_len += mbuf_offset; 2944 prev = cur; 2945 2946 mbuf_offset = 0; 2947 mbuf_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM; 2948 } 2949 } 2950 2951 prev->data_len = mbuf_offset; 2952 m->pkt_len += mbuf_offset; 2953 2954 if (is_async) { 2955 async_iter_finalize(async); 2956 if (hdr) 2957 pkts_info[slot_idx].nethdr = *hdr; 2958 } else if (hdr) { 2959 vhost_dequeue_offload(dev, hdr, m, legacy_ol_flags); 2960 } 2961 2962 return 0; 2963 error: 2964 if (is_async) 2965 async_iter_cancel(async); 2966 2967 return -1; 2968 } 2969 2970 static void 2971 virtio_dev_extbuf_free(void *addr __rte_unused, void *opaque) 2972 { 2973 rte_free(opaque); 2974 } 2975 2976 static int 2977 virtio_dev_extbuf_alloc(struct virtio_net *dev, struct rte_mbuf *pkt, uint32_t size) 2978 { 2979 struct rte_mbuf_ext_shared_info *shinfo = NULL; 2980 uint32_t total_len = RTE_PKTMBUF_HEADROOM + size; 2981 uint16_t buf_len; 2982 rte_iova_t iova; 2983 void *buf; 2984 2985 total_len += sizeof(*shinfo) + sizeof(uintptr_t); 2986 total_len = RTE_ALIGN_CEIL(total_len, sizeof(uintptr_t)); 2987 2988 if (unlikely(total_len > UINT16_MAX)) 2989 return -ENOSPC; 2990 2991 buf_len = total_len; 2992 buf = rte_malloc(NULL, buf_len, RTE_CACHE_LINE_SIZE); 2993 if (unlikely(buf == NULL)) 2994 return -ENOMEM; 2995 2996 /* Initialize shinfo */ 2997 shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len, 2998 virtio_dev_extbuf_free, buf); 2999 if (unlikely(shinfo == NULL)) { 3000 rte_free(buf); 3001 VHOST_LOG_DATA(dev->ifname, ERR, "failed to init shinfo\n"); 3002 return -1; 3003 } 3004 3005 iova = rte_malloc_virt2iova(buf); 3006 rte_pktmbuf_attach_extbuf(pkt, buf, iova, buf_len, shinfo); 3007 rte_pktmbuf_reset_headroom(pkt); 3008 3009 return 0; 3010 } 3011 3012 /* 3013 * Prepare a host supported pktmbuf. 3014 */ 3015 static __rte_always_inline int 3016 virtio_dev_pktmbuf_prep(struct virtio_net *dev, struct rte_mbuf *pkt, 3017 uint32_t data_len) 3018 { 3019 if (rte_pktmbuf_tailroom(pkt) >= data_len) 3020 return 0; 3021 3022 /* attach an external buffer if supported */ 3023 if (dev->extbuf && !virtio_dev_extbuf_alloc(dev, pkt, data_len)) 3024 return 0; 3025 3026 /* check if chained buffers are allowed */ 3027 if (!dev->linearbuf) 3028 return 0; 3029 3030 return -1; 3031 } 3032 3033 __rte_always_inline 3034 static uint16_t 3035 virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 3036 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count, 3037 bool legacy_ol_flags) 3038 { 3039 uint16_t i; 3040 uint16_t avail_entries; 3041 uint16_t dropped = 0; 3042 static bool allocerr_warned; 3043 3044 /* 3045 * The ordering between avail index and 3046 * desc reads needs to be enforced. 3047 */ 3048 avail_entries = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE) - 3049 vq->last_avail_idx; 3050 if (avail_entries == 0) 3051 return 0; 3052 3053 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); 3054 3055 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__); 3056 3057 count = RTE_MIN(count, MAX_PKT_BURST); 3058 count = RTE_MIN(count, avail_entries); 3059 VHOST_LOG_DATA(dev->ifname, DEBUG, "about to dequeue %u buffers\n", count); 3060 3061 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, count)) 3062 return 0; 3063 3064 for (i = 0; i < count; i++) { 3065 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 3066 uint16_t head_idx; 3067 uint32_t buf_len; 3068 uint16_t nr_vec = 0; 3069 int err; 3070 3071 if (unlikely(fill_vec_buf_split(dev, vq, 3072 vq->last_avail_idx + i, 3073 &nr_vec, buf_vec, 3074 &head_idx, &buf_len, 3075 VHOST_ACCESS_RO) < 0)) 3076 break; 3077 3078 update_shadow_used_ring_split(vq, head_idx, 0); 3079 3080 if (unlikely(buf_len <= dev->vhost_hlen)) { 3081 dropped += 1; 3082 i++; 3083 break; 3084 } 3085 3086 buf_len -= dev->vhost_hlen; 3087 3088 err = virtio_dev_pktmbuf_prep(dev, pkts[i], buf_len); 3089 if (unlikely(err)) { 3090 /* 3091 * mbuf allocation fails for jumbo packets when external 3092 * buffer allocation is not allowed and linear buffer 3093 * is required. Drop this packet. 3094 */ 3095 if (!allocerr_warned) { 3096 VHOST_LOG_DATA(dev->ifname, ERR, 3097 "failed mbuf alloc of size %d from %s.\n", 3098 buf_len, mbuf_pool->name); 3099 allocerr_warned = true; 3100 } 3101 dropped += 1; 3102 i++; 3103 break; 3104 } 3105 3106 err = desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i], 3107 mbuf_pool, legacy_ol_flags, 0, false); 3108 if (unlikely(err)) { 3109 if (!allocerr_warned) { 3110 VHOST_LOG_DATA(dev->ifname, ERR, "failed to copy desc to mbuf.\n"); 3111 allocerr_warned = true; 3112 } 3113 dropped += 1; 3114 i++; 3115 break; 3116 } 3117 3118 } 3119 3120 if (dropped) 3121 rte_pktmbuf_free_bulk(&pkts[i - 1], count - i + 1); 3122 3123 vq->last_avail_idx += i; 3124 3125 do_data_copy_dequeue(vq); 3126 if (unlikely(i < count)) 3127 vq->shadow_used_idx = i; 3128 if (likely(vq->shadow_used_idx)) { 3129 flush_shadow_used_ring_split(dev, vq); 3130 vhost_vring_call_split(dev, vq); 3131 } 3132 3133 return (i - dropped); 3134 } 3135 3136 __rte_noinline 3137 static uint16_t 3138 virtio_dev_tx_split_legacy(struct virtio_net *dev, 3139 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, 3140 struct rte_mbuf **pkts, uint16_t count) 3141 { 3142 return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, true); 3143 } 3144 3145 __rte_noinline 3146 static uint16_t 3147 virtio_dev_tx_split_compliant(struct virtio_net *dev, 3148 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, 3149 struct rte_mbuf **pkts, uint16_t count) 3150 { 3151 return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, false); 3152 } 3153 3154 static __rte_always_inline int 3155 vhost_reserve_avail_batch_packed(struct virtio_net *dev, 3156 struct vhost_virtqueue *vq, 3157 struct rte_mbuf **pkts, 3158 uint16_t avail_idx, 3159 uintptr_t *desc_addrs, 3160 uint16_t *ids) 3161 { 3162 bool wrap = vq->avail_wrap_counter; 3163 struct vring_packed_desc *descs = vq->desc_packed; 3164 uint64_t lens[PACKED_BATCH_SIZE]; 3165 uint64_t buf_lens[PACKED_BATCH_SIZE]; 3166 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 3167 uint16_t flags, i; 3168 3169 if (unlikely(avail_idx & PACKED_BATCH_MASK)) 3170 return -1; 3171 if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size)) 3172 return -1; 3173 3174 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3175 flags = descs[avail_idx + i].flags; 3176 if (unlikely((wrap != !!(flags & VRING_DESC_F_AVAIL)) || 3177 (wrap == !!(flags & VRING_DESC_F_USED)) || 3178 (flags & PACKED_DESC_SINGLE_DEQUEUE_FLAG))) 3179 return -1; 3180 } 3181 3182 rte_atomic_thread_fence(__ATOMIC_ACQUIRE); 3183 3184 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 3185 lens[i] = descs[avail_idx + i].len; 3186 3187 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3188 desc_addrs[i] = vhost_iova_to_vva(dev, vq, 3189 descs[avail_idx + i].addr, 3190 &lens[i], VHOST_ACCESS_RW); 3191 } 3192 3193 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3194 if (unlikely(!desc_addrs[i])) 3195 return -1; 3196 if (unlikely((lens[i] != descs[avail_idx + i].len))) 3197 return -1; 3198 } 3199 3200 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3201 if (virtio_dev_pktmbuf_prep(dev, pkts[i], lens[i])) 3202 goto err; 3203 } 3204 3205 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 3206 buf_lens[i] = pkts[i]->buf_len - pkts[i]->data_off; 3207 3208 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3209 if (unlikely(buf_lens[i] < (lens[i] - buf_offset))) 3210 goto err; 3211 } 3212 3213 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3214 pkts[i]->pkt_len = lens[i] - buf_offset; 3215 pkts[i]->data_len = pkts[i]->pkt_len; 3216 ids[i] = descs[avail_idx + i].id; 3217 } 3218 3219 return 0; 3220 3221 err: 3222 return -1; 3223 } 3224 3225 static __rte_always_inline int 3226 vhost_async_tx_batch_packed_check(struct virtio_net *dev, 3227 struct vhost_virtqueue *vq, 3228 struct rte_mbuf **pkts, 3229 uint16_t avail_idx, 3230 uintptr_t *desc_addrs, 3231 uint64_t *lens, 3232 uint16_t *ids, 3233 int16_t dma_id, 3234 uint16_t vchan_id) 3235 { 3236 bool wrap = vq->avail_wrap_counter; 3237 struct vring_packed_desc *descs = vq->desc_packed; 3238 uint64_t buf_lens[PACKED_BATCH_SIZE]; 3239 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 3240 uint16_t flags, i; 3241 3242 if (unlikely(avail_idx & PACKED_BATCH_MASK)) 3243 return -1; 3244 if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size)) 3245 return -1; 3246 3247 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3248 flags = descs[avail_idx + i].flags; 3249 if (unlikely((wrap != !!(flags & VRING_DESC_F_AVAIL)) || 3250 (wrap == !!(flags & VRING_DESC_F_USED)) || 3251 (flags & PACKED_DESC_SINGLE_DEQUEUE_FLAG))) 3252 return -1; 3253 } 3254 3255 rte_atomic_thread_fence(__ATOMIC_ACQUIRE); 3256 3257 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 3258 lens[i] = descs[avail_idx + i].len; 3259 3260 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3261 desc_addrs[i] = descs[avail_idx + i].addr; 3262 } 3263 3264 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3265 if (unlikely(!desc_addrs[i])) 3266 return -1; 3267 if (unlikely((lens[i] != descs[avail_idx + i].len))) 3268 return -1; 3269 } 3270 3271 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3272 if (virtio_dev_pktmbuf_prep(dev, pkts[i], lens[i])) 3273 goto err; 3274 } 3275 3276 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 3277 buf_lens[i] = pkts[i]->buf_len - pkts[i]->data_off; 3278 3279 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3280 if (unlikely(buf_lens[i] < (lens[i] - buf_offset))) 3281 goto err; 3282 } 3283 3284 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3285 pkts[i]->pkt_len = lens[i] - buf_offset; 3286 pkts[i]->data_len = pkts[i]->pkt_len; 3287 ids[i] = descs[avail_idx + i].id; 3288 } 3289 3290 if (rte_dma_burst_capacity(dma_id, vchan_id) < PACKED_BATCH_SIZE) 3291 return -1; 3292 3293 return 0; 3294 3295 err: 3296 return -1; 3297 } 3298 3299 static __rte_always_inline int 3300 virtio_dev_tx_batch_packed(struct virtio_net *dev, 3301 struct vhost_virtqueue *vq, 3302 struct rte_mbuf **pkts, 3303 bool legacy_ol_flags) 3304 { 3305 uint16_t avail_idx = vq->last_avail_idx; 3306 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 3307 struct virtio_net_hdr *hdr; 3308 uintptr_t desc_addrs[PACKED_BATCH_SIZE]; 3309 uint16_t ids[PACKED_BATCH_SIZE]; 3310 uint16_t i; 3311 3312 if (vhost_reserve_avail_batch_packed(dev, vq, pkts, avail_idx, 3313 desc_addrs, ids)) 3314 return -1; 3315 3316 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 3317 rte_prefetch0((void *)(uintptr_t)desc_addrs[i]); 3318 3319 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 3320 rte_memcpy(rte_pktmbuf_mtod_offset(pkts[i], void *, 0), 3321 (void *)(uintptr_t)(desc_addrs[i] + buf_offset), 3322 pkts[i]->pkt_len); 3323 3324 if (virtio_net_with_host_offload(dev)) { 3325 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3326 hdr = (struct virtio_net_hdr *)(desc_addrs[i]); 3327 vhost_dequeue_offload(dev, hdr, pkts[i], legacy_ol_flags); 3328 } 3329 } 3330 3331 if (virtio_net_is_inorder(dev)) 3332 vhost_shadow_dequeue_batch_packed_inorder(vq, 3333 ids[PACKED_BATCH_SIZE - 1]); 3334 else 3335 vhost_shadow_dequeue_batch_packed(dev, vq, ids); 3336 3337 vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE); 3338 3339 return 0; 3340 } 3341 3342 static __rte_always_inline int 3343 vhost_dequeue_single_packed(struct virtio_net *dev, 3344 struct vhost_virtqueue *vq, 3345 struct rte_mempool *mbuf_pool, 3346 struct rte_mbuf *pkts, 3347 uint16_t *buf_id, 3348 uint16_t *desc_count, 3349 bool legacy_ol_flags) 3350 { 3351 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 3352 uint32_t buf_len; 3353 uint16_t nr_vec = 0; 3354 int err; 3355 static bool allocerr_warned; 3356 3357 if (unlikely(fill_vec_buf_packed(dev, vq, 3358 vq->last_avail_idx, desc_count, 3359 buf_vec, &nr_vec, 3360 buf_id, &buf_len, 3361 VHOST_ACCESS_RO) < 0)) 3362 return -1; 3363 3364 if (unlikely(buf_len <= dev->vhost_hlen)) 3365 return -1; 3366 3367 buf_len -= dev->vhost_hlen; 3368 3369 if (unlikely(virtio_dev_pktmbuf_prep(dev, pkts, buf_len))) { 3370 if (!allocerr_warned) { 3371 VHOST_LOG_DATA(dev->ifname, ERR, 3372 "failed mbuf alloc of size %d from %s.\n", 3373 buf_len, mbuf_pool->name); 3374 allocerr_warned = true; 3375 } 3376 return -1; 3377 } 3378 3379 err = desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts, 3380 mbuf_pool, legacy_ol_flags, 0, false); 3381 if (unlikely(err)) { 3382 if (!allocerr_warned) { 3383 VHOST_LOG_DATA(dev->ifname, ERR, "failed to copy desc to mbuf.\n"); 3384 allocerr_warned = true; 3385 } 3386 return -1; 3387 } 3388 3389 return 0; 3390 } 3391 3392 static __rte_always_inline int 3393 virtio_dev_tx_single_packed(struct virtio_net *dev, 3394 struct vhost_virtqueue *vq, 3395 struct rte_mempool *mbuf_pool, 3396 struct rte_mbuf *pkts, 3397 bool legacy_ol_flags) 3398 { 3399 3400 uint16_t buf_id, desc_count = 0; 3401 int ret; 3402 3403 ret = vhost_dequeue_single_packed(dev, vq, mbuf_pool, pkts, &buf_id, 3404 &desc_count, legacy_ol_flags); 3405 3406 if (likely(desc_count > 0)) { 3407 if (virtio_net_is_inorder(dev)) 3408 vhost_shadow_dequeue_single_packed_inorder(vq, buf_id, 3409 desc_count); 3410 else 3411 vhost_shadow_dequeue_single_packed(vq, buf_id, 3412 desc_count); 3413 3414 vq_inc_last_avail_packed(vq, desc_count); 3415 } 3416 3417 return ret; 3418 } 3419 3420 __rte_always_inline 3421 static uint16_t 3422 virtio_dev_tx_packed(struct virtio_net *dev, 3423 struct vhost_virtqueue *__rte_restrict vq, 3424 struct rte_mempool *mbuf_pool, 3425 struct rte_mbuf **__rte_restrict pkts, 3426 uint32_t count, 3427 bool legacy_ol_flags) 3428 { 3429 uint32_t pkt_idx = 0; 3430 3431 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, count)) 3432 return 0; 3433 3434 do { 3435 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); 3436 3437 if (count - pkt_idx >= PACKED_BATCH_SIZE) { 3438 if (!virtio_dev_tx_batch_packed(dev, vq, 3439 &pkts[pkt_idx], 3440 legacy_ol_flags)) { 3441 pkt_idx += PACKED_BATCH_SIZE; 3442 continue; 3443 } 3444 } 3445 3446 if (virtio_dev_tx_single_packed(dev, vq, mbuf_pool, 3447 pkts[pkt_idx], 3448 legacy_ol_flags)) 3449 break; 3450 pkt_idx++; 3451 } while (pkt_idx < count); 3452 3453 if (pkt_idx != count) 3454 rte_pktmbuf_free_bulk(&pkts[pkt_idx], count - pkt_idx); 3455 3456 if (vq->shadow_used_idx) { 3457 do_data_copy_dequeue(vq); 3458 3459 vhost_flush_dequeue_shadow_packed(dev, vq); 3460 vhost_vring_call_packed(dev, vq); 3461 } 3462 3463 return pkt_idx; 3464 } 3465 3466 __rte_noinline 3467 static uint16_t 3468 virtio_dev_tx_packed_legacy(struct virtio_net *dev, 3469 struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool, 3470 struct rte_mbuf **__rte_restrict pkts, uint32_t count) 3471 { 3472 return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, true); 3473 } 3474 3475 __rte_noinline 3476 static uint16_t 3477 virtio_dev_tx_packed_compliant(struct virtio_net *dev, 3478 struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool, 3479 struct rte_mbuf **__rte_restrict pkts, uint32_t count) 3480 { 3481 return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, false); 3482 } 3483 3484 uint16_t 3485 rte_vhost_dequeue_burst(int vid, uint16_t queue_id, 3486 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) 3487 { 3488 struct virtio_net *dev; 3489 struct rte_mbuf *rarp_mbuf = NULL; 3490 struct vhost_virtqueue *vq; 3491 int16_t success = 1; 3492 3493 dev = get_device(vid); 3494 if (!dev) 3495 return 0; 3496 3497 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { 3498 VHOST_LOG_DATA(dev->ifname, ERR, 3499 "%s: built-in vhost net backend is disabled.\n", 3500 __func__); 3501 return 0; 3502 } 3503 3504 if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) { 3505 VHOST_LOG_DATA(dev->ifname, ERR, 3506 "%s: invalid virtqueue idx %d.\n", 3507 __func__, queue_id); 3508 return 0; 3509 } 3510 3511 vq = dev->virtqueue[queue_id]; 3512 3513 if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0)) 3514 return 0; 3515 3516 if (unlikely(!vq->enabled)) { 3517 count = 0; 3518 goto out_access_unlock; 3519 } 3520 3521 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 3522 vhost_user_iotlb_rd_lock(vq); 3523 3524 if (unlikely(!vq->access_ok)) 3525 if (unlikely(vring_translate(dev, vq) < 0)) { 3526 count = 0; 3527 goto out; 3528 } 3529 3530 /* 3531 * Construct a RARP broadcast packet, and inject it to the "pkts" 3532 * array, to looks like that guest actually send such packet. 3533 * 3534 * Check user_send_rarp() for more information. 3535 * 3536 * broadcast_rarp shares a cacheline in the virtio_net structure 3537 * with some fields that are accessed during enqueue and 3538 * __atomic_compare_exchange_n causes a write if performed compare 3539 * and exchange. This could result in false sharing between enqueue 3540 * and dequeue. 3541 * 3542 * Prevent unnecessary false sharing by reading broadcast_rarp first 3543 * and only performing compare and exchange if the read indicates it 3544 * is likely to be set. 3545 */ 3546 if (unlikely(__atomic_load_n(&dev->broadcast_rarp, __ATOMIC_ACQUIRE) && 3547 __atomic_compare_exchange_n(&dev->broadcast_rarp, 3548 &success, 0, 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED))) { 3549 3550 rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac); 3551 if (rarp_mbuf == NULL) { 3552 VHOST_LOG_DATA(dev->ifname, ERR, "failed to make RARP packet.\n"); 3553 count = 0; 3554 goto out; 3555 } 3556 /* 3557 * Inject it to the head of "pkts" array, so that switch's mac 3558 * learning table will get updated first. 3559 */ 3560 pkts[0] = rarp_mbuf; 3561 vhost_queue_stats_update(dev, vq, pkts, 1); 3562 pkts++; 3563 count -= 1; 3564 } 3565 3566 if (vq_is_packed(dev)) { 3567 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS) 3568 count = virtio_dev_tx_packed_legacy(dev, vq, mbuf_pool, pkts, count); 3569 else 3570 count = virtio_dev_tx_packed_compliant(dev, vq, mbuf_pool, pkts, count); 3571 } else { 3572 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS) 3573 count = virtio_dev_tx_split_legacy(dev, vq, mbuf_pool, pkts, count); 3574 else 3575 count = virtio_dev_tx_split_compliant(dev, vq, mbuf_pool, pkts, count); 3576 } 3577 3578 vhost_queue_stats_update(dev, vq, pkts, count); 3579 3580 out: 3581 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 3582 vhost_user_iotlb_rd_unlock(vq); 3583 3584 out_access_unlock: 3585 rte_spinlock_unlock(&vq->access_lock); 3586 3587 if (unlikely(rarp_mbuf != NULL)) 3588 count += 1; 3589 3590 return count; 3591 } 3592 3593 static __rte_always_inline uint16_t 3594 async_poll_dequeue_completed(struct virtio_net *dev, struct vhost_virtqueue *vq, 3595 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, 3596 uint16_t vchan_id, bool legacy_ol_flags) 3597 { 3598 uint16_t start_idx, from, i; 3599 uint16_t nr_cpl_pkts = 0; 3600 struct async_inflight_info *pkts_info = vq->async->pkts_info; 3601 3602 vhost_async_dma_check_completed(dev, dma_id, vchan_id, VHOST_DMA_MAX_COPY_COMPLETE); 3603 3604 start_idx = async_get_first_inflight_pkt_idx(vq); 3605 3606 from = start_idx; 3607 while (vq->async->pkts_cmpl_flag[from] && count--) { 3608 vq->async->pkts_cmpl_flag[from] = false; 3609 from = (from + 1) % vq->size; 3610 nr_cpl_pkts++; 3611 } 3612 3613 if (nr_cpl_pkts == 0) 3614 return 0; 3615 3616 for (i = 0; i < nr_cpl_pkts; i++) { 3617 from = (start_idx + i) % vq->size; 3618 pkts[i] = pkts_info[from].mbuf; 3619 3620 if (virtio_net_with_host_offload(dev)) 3621 vhost_dequeue_offload(dev, &pkts_info[from].nethdr, pkts[i], 3622 legacy_ol_flags); 3623 } 3624 3625 /* write back completed descs to used ring and update used idx */ 3626 if (vq_is_packed(dev)) { 3627 write_back_completed_descs_packed(vq, nr_cpl_pkts); 3628 vhost_vring_call_packed(dev, vq); 3629 } else { 3630 write_back_completed_descs_split(vq, nr_cpl_pkts); 3631 __atomic_add_fetch(&vq->used->idx, nr_cpl_pkts, __ATOMIC_RELEASE); 3632 vhost_vring_call_split(dev, vq); 3633 } 3634 vq->async->pkts_inflight_n -= nr_cpl_pkts; 3635 3636 return nr_cpl_pkts; 3637 } 3638 3639 static __rte_always_inline uint16_t 3640 virtio_dev_tx_async_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 3641 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count, 3642 int16_t dma_id, uint16_t vchan_id, bool legacy_ol_flags) 3643 { 3644 static bool allocerr_warned; 3645 bool dropped = false; 3646 uint16_t avail_entries; 3647 uint16_t pkt_idx, slot_idx = 0; 3648 uint16_t nr_done_pkts = 0; 3649 uint16_t pkt_err = 0; 3650 uint16_t n_xfer; 3651 struct vhost_async *async = vq->async; 3652 struct async_inflight_info *pkts_info = async->pkts_info; 3653 struct rte_mbuf *pkts_prealloc[MAX_PKT_BURST]; 3654 uint16_t pkts_size = count; 3655 3656 /** 3657 * The ordering between avail index and 3658 * desc reads needs to be enforced. 3659 */ 3660 avail_entries = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE) - 3661 vq->last_avail_idx; 3662 if (avail_entries == 0) 3663 goto out; 3664 3665 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); 3666 3667 async_iter_reset(async); 3668 3669 count = RTE_MIN(count, MAX_PKT_BURST); 3670 count = RTE_MIN(count, avail_entries); 3671 VHOST_LOG_DATA(dev->ifname, DEBUG, "about to dequeue %u buffers\n", count); 3672 3673 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts_prealloc, count)) 3674 goto out; 3675 3676 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { 3677 uint16_t head_idx = 0; 3678 uint16_t nr_vec = 0; 3679 uint16_t to; 3680 uint32_t buf_len; 3681 int err; 3682 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 3683 struct rte_mbuf *pkt = pkts_prealloc[pkt_idx]; 3684 3685 if (unlikely(fill_vec_buf_split(dev, vq, vq->last_avail_idx, 3686 &nr_vec, buf_vec, 3687 &head_idx, &buf_len, 3688 VHOST_ACCESS_RO) < 0)) { 3689 dropped = true; 3690 break; 3691 } 3692 3693 if (unlikely(buf_len <= dev->vhost_hlen)) { 3694 dropped = true; 3695 break; 3696 } 3697 3698 buf_len -= dev->vhost_hlen; 3699 3700 err = virtio_dev_pktmbuf_prep(dev, pkt, buf_len); 3701 if (unlikely(err)) { 3702 /** 3703 * mbuf allocation fails for jumbo packets when external 3704 * buffer allocation is not allowed and linear buffer 3705 * is required. Drop this packet. 3706 */ 3707 if (!allocerr_warned) { 3708 VHOST_LOG_DATA(dev->ifname, ERR, 3709 "%s: Failed mbuf alloc of size %d from %s\n", 3710 __func__, buf_len, mbuf_pool->name); 3711 allocerr_warned = true; 3712 } 3713 dropped = true; 3714 slot_idx--; 3715 break; 3716 } 3717 3718 slot_idx = (async->pkts_idx + pkt_idx) & (vq->size - 1); 3719 err = desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkt, mbuf_pool, 3720 legacy_ol_flags, slot_idx, true); 3721 if (unlikely(err)) { 3722 if (!allocerr_warned) { 3723 VHOST_LOG_DATA(dev->ifname, ERR, 3724 "%s: Failed to offload copies to async channel.\n", 3725 __func__); 3726 allocerr_warned = true; 3727 } 3728 dropped = true; 3729 break; 3730 } 3731 3732 pkts_info[slot_idx].mbuf = pkt; 3733 3734 /* store used descs */ 3735 to = async->desc_idx_split & (vq->size - 1); 3736 async->descs_split[to].id = head_idx; 3737 async->descs_split[to].len = 0; 3738 async->desc_idx_split++; 3739 3740 vq->last_avail_idx++; 3741 } 3742 3743 if (unlikely(dropped)) 3744 rte_pktmbuf_free_bulk(&pkts_prealloc[pkt_idx], count - pkt_idx); 3745 3746 n_xfer = vhost_async_dma_transfer(dev, vq, dma_id, vchan_id, async->pkts_idx, 3747 async->iov_iter, pkt_idx); 3748 3749 async->pkts_inflight_n += n_xfer; 3750 3751 pkt_err = pkt_idx - n_xfer; 3752 if (unlikely(pkt_err)) { 3753 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s: failed to transfer data.\n", 3754 __func__); 3755 3756 pkt_idx = n_xfer; 3757 /* recover available ring */ 3758 vq->last_avail_idx -= pkt_err; 3759 3760 /** 3761 * recover async channel copy related structures and free pktmbufs 3762 * for error pkts. 3763 */ 3764 async->desc_idx_split -= pkt_err; 3765 while (pkt_err-- > 0) { 3766 rte_pktmbuf_free(pkts_info[slot_idx & (vq->size - 1)].mbuf); 3767 slot_idx--; 3768 } 3769 } 3770 3771 async->pkts_idx += pkt_idx; 3772 if (async->pkts_idx >= vq->size) 3773 async->pkts_idx -= vq->size; 3774 3775 out: 3776 /* DMA device may serve other queues, unconditionally check completed. */ 3777 nr_done_pkts = async_poll_dequeue_completed(dev, vq, pkts, pkts_size, 3778 dma_id, vchan_id, legacy_ol_flags); 3779 3780 return nr_done_pkts; 3781 } 3782 3783 __rte_noinline 3784 static uint16_t 3785 virtio_dev_tx_async_split_legacy(struct virtio_net *dev, 3786 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, 3787 struct rte_mbuf **pkts, uint16_t count, 3788 int16_t dma_id, uint16_t vchan_id) 3789 { 3790 return virtio_dev_tx_async_split(dev, vq, mbuf_pool, 3791 pkts, count, dma_id, vchan_id, true); 3792 } 3793 3794 __rte_noinline 3795 static uint16_t 3796 virtio_dev_tx_async_split_compliant(struct virtio_net *dev, 3797 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, 3798 struct rte_mbuf **pkts, uint16_t count, 3799 int16_t dma_id, uint16_t vchan_id) 3800 { 3801 return virtio_dev_tx_async_split(dev, vq, mbuf_pool, 3802 pkts, count, dma_id, vchan_id, false); 3803 } 3804 3805 static __rte_always_inline void 3806 vhost_async_shadow_dequeue_single_packed(struct vhost_virtqueue *vq, 3807 uint16_t buf_id, uint16_t count) 3808 { 3809 struct vhost_async *async = vq->async; 3810 uint16_t idx = async->buffer_idx_packed; 3811 3812 async->buffers_packed[idx].id = buf_id; 3813 async->buffers_packed[idx].len = 0; 3814 async->buffers_packed[idx].count = count; 3815 3816 async->buffer_idx_packed++; 3817 if (async->buffer_idx_packed >= vq->size) 3818 async->buffer_idx_packed -= vq->size; 3819 3820 } 3821 3822 static __rte_always_inline int 3823 virtio_dev_tx_async_single_packed(struct virtio_net *dev, 3824 struct vhost_virtqueue *vq, 3825 struct rte_mempool *mbuf_pool, 3826 struct rte_mbuf *pkts, 3827 uint16_t slot_idx, 3828 bool legacy_ol_flags) 3829 { 3830 int err; 3831 uint16_t buf_id, desc_count = 0; 3832 uint16_t nr_vec = 0; 3833 uint32_t buf_len; 3834 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 3835 struct vhost_async *async = vq->async; 3836 struct async_inflight_info *pkts_info = async->pkts_info; 3837 static bool allocerr_warned; 3838 3839 if (unlikely(fill_vec_buf_packed(dev, vq, vq->last_avail_idx, &desc_count, 3840 buf_vec, &nr_vec, &buf_id, &buf_len, 3841 VHOST_ACCESS_RO) < 0)) 3842 return -1; 3843 3844 if (unlikely(virtio_dev_pktmbuf_prep(dev, pkts, buf_len))) { 3845 if (!allocerr_warned) { 3846 VHOST_LOG_DATA(dev->ifname, ERR, "Failed mbuf alloc of size %d from %s.\n", 3847 buf_len, mbuf_pool->name); 3848 3849 allocerr_warned = true; 3850 } 3851 return -1; 3852 } 3853 3854 err = desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts, mbuf_pool, 3855 legacy_ol_flags, slot_idx, true); 3856 if (unlikely(err)) { 3857 rte_pktmbuf_free(pkts); 3858 if (!allocerr_warned) { 3859 VHOST_LOG_DATA(dev->ifname, ERR, "Failed to copy desc to mbuf on.\n"); 3860 allocerr_warned = true; 3861 } 3862 return -1; 3863 } 3864 3865 pkts_info[slot_idx].descs = desc_count; 3866 3867 /* update async shadow packed ring */ 3868 vhost_async_shadow_dequeue_single_packed(vq, buf_id, desc_count); 3869 3870 vq_inc_last_avail_packed(vq, desc_count); 3871 3872 return err; 3873 } 3874 3875 static __rte_always_inline int 3876 virtio_dev_tx_async_packed_batch(struct virtio_net *dev, 3877 struct vhost_virtqueue *vq, 3878 struct rte_mbuf **pkts, uint16_t slot_idx, 3879 uint16_t dma_id, uint16_t vchan_id) 3880 { 3881 uint16_t avail_idx = vq->last_avail_idx; 3882 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 3883 struct vhost_async *async = vq->async; 3884 struct async_inflight_info *pkts_info = async->pkts_info; 3885 struct virtio_net_hdr *hdr; 3886 uint32_t mbuf_offset = 0; 3887 uintptr_t desc_addrs[PACKED_BATCH_SIZE]; 3888 uint64_t desc_vva; 3889 uint64_t lens[PACKED_BATCH_SIZE]; 3890 void *host_iova[PACKED_BATCH_SIZE]; 3891 uint64_t mapped_len[PACKED_BATCH_SIZE]; 3892 uint16_t ids[PACKED_BATCH_SIZE]; 3893 uint16_t i; 3894 3895 if (vhost_async_tx_batch_packed_check(dev, vq, pkts, avail_idx, 3896 desc_addrs, lens, ids, dma_id, vchan_id)) 3897 return -1; 3898 3899 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 3900 rte_prefetch0((void *)(uintptr_t)desc_addrs[i]); 3901 3902 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3903 host_iova[i] = (void *)(uintptr_t)gpa_to_first_hpa(dev, 3904 desc_addrs[i] + buf_offset, pkts[i]->pkt_len, &mapped_len[i]); 3905 } 3906 3907 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3908 async_iter_initialize(dev, async); 3909 async_iter_add_iovec(dev, async, 3910 host_iova[i], 3911 (void *)(uintptr_t)rte_pktmbuf_iova_offset(pkts[i], mbuf_offset), 3912 mapped_len[i]); 3913 async->iter_idx++; 3914 } 3915 3916 if (virtio_net_with_host_offload(dev)) { 3917 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3918 desc_vva = vhost_iova_to_vva(dev, vq, desc_addrs[i], 3919 &lens[i], VHOST_ACCESS_RO); 3920 hdr = (struct virtio_net_hdr *)(uintptr_t)desc_vva; 3921 pkts_info[slot_idx + i].nethdr = *hdr; 3922 } 3923 } 3924 3925 vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE); 3926 3927 vhost_async_shadow_dequeue_packed_batch(vq, ids); 3928 3929 return 0; 3930 } 3931 3932 static __rte_always_inline uint16_t 3933 virtio_dev_tx_async_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, 3934 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, 3935 uint16_t count, uint16_t dma_id, uint16_t vchan_id, bool legacy_ol_flags) 3936 { 3937 uint32_t pkt_idx = 0; 3938 uint16_t slot_idx = 0; 3939 uint16_t nr_done_pkts = 0; 3940 uint16_t pkt_err = 0; 3941 uint32_t n_xfer; 3942 uint16_t i; 3943 struct vhost_async *async = vq->async; 3944 struct async_inflight_info *pkts_info = async->pkts_info; 3945 struct rte_mbuf *pkts_prealloc[MAX_PKT_BURST]; 3946 3947 VHOST_LOG_DATA(dev->ifname, DEBUG, "(%d) about to dequeue %u buffers\n", dev->vid, count); 3948 3949 async_iter_reset(async); 3950 3951 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts_prealloc, count)) 3952 goto out; 3953 3954 do { 3955 struct rte_mbuf *pkt = pkts_prealloc[pkt_idx]; 3956 3957 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); 3958 3959 slot_idx = (async->pkts_idx + pkt_idx) % vq->size; 3960 if (count - pkt_idx >= PACKED_BATCH_SIZE) { 3961 if (!virtio_dev_tx_async_packed_batch(dev, vq, &pkts_prealloc[pkt_idx], 3962 slot_idx, dma_id, vchan_id)) { 3963 for (i = 0; i < PACKED_BATCH_SIZE; i++) { 3964 slot_idx = (async->pkts_idx + pkt_idx) % vq->size; 3965 pkts_info[slot_idx].descs = 1; 3966 pkts_info[slot_idx].nr_buffers = 1; 3967 pkts_info[slot_idx].mbuf = pkts_prealloc[pkt_idx]; 3968 pkt_idx++; 3969 } 3970 continue; 3971 } 3972 } 3973 3974 if (unlikely(virtio_dev_tx_async_single_packed(dev, vq, mbuf_pool, pkt, 3975 slot_idx, legacy_ol_flags))) { 3976 rte_pktmbuf_free_bulk(&pkts_prealloc[pkt_idx], count - pkt_idx); 3977 3978 if (slot_idx == 0) 3979 slot_idx = vq->size - 1; 3980 else 3981 slot_idx--; 3982 3983 break; 3984 } 3985 3986 pkts_info[slot_idx].mbuf = pkt; 3987 pkt_idx++; 3988 } while (pkt_idx < count); 3989 3990 n_xfer = vhost_async_dma_transfer(dev, vq, dma_id, vchan_id, async->pkts_idx, 3991 async->iov_iter, pkt_idx); 3992 3993 async->pkts_inflight_n += n_xfer; 3994 3995 pkt_err = pkt_idx - n_xfer; 3996 3997 if (unlikely(pkt_err)) { 3998 uint16_t descs_err = 0; 3999 4000 pkt_idx -= pkt_err; 4001 4002 /** 4003 * recover DMA-copy related structures and free pktmbuf for DMA-error pkts. 4004 */ 4005 if (async->buffer_idx_packed >= pkt_err) 4006 async->buffer_idx_packed -= pkt_err; 4007 else 4008 async->buffer_idx_packed += vq->size - pkt_err; 4009 4010 while (pkt_err-- > 0) { 4011 rte_pktmbuf_free(pkts_info[slot_idx].mbuf); 4012 descs_err += pkts_info[slot_idx].descs; 4013 4014 if (slot_idx == 0) 4015 slot_idx = vq->size - 1; 4016 else 4017 slot_idx--; 4018 } 4019 4020 /* recover available ring */ 4021 if (vq->last_avail_idx >= descs_err) { 4022 vq->last_avail_idx -= descs_err; 4023 } else { 4024 vq->last_avail_idx += vq->size - descs_err; 4025 vq->avail_wrap_counter ^= 1; 4026 } 4027 } 4028 4029 async->pkts_idx += pkt_idx; 4030 if (async->pkts_idx >= vq->size) 4031 async->pkts_idx -= vq->size; 4032 4033 out: 4034 nr_done_pkts = async_poll_dequeue_completed(dev, vq, pkts, count, 4035 dma_id, vchan_id, legacy_ol_flags); 4036 4037 return nr_done_pkts; 4038 } 4039 4040 __rte_noinline 4041 static uint16_t 4042 virtio_dev_tx_async_packed_legacy(struct virtio_net *dev, struct vhost_virtqueue *vq, 4043 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, 4044 uint16_t count, uint16_t dma_id, uint16_t vchan_id) 4045 { 4046 return virtio_dev_tx_async_packed(dev, vq, mbuf_pool, 4047 pkts, count, dma_id, vchan_id, true); 4048 } 4049 4050 __rte_noinline 4051 static uint16_t 4052 virtio_dev_tx_async_packed_compliant(struct virtio_net *dev, struct vhost_virtqueue *vq, 4053 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, 4054 uint16_t count, uint16_t dma_id, uint16_t vchan_id) 4055 { 4056 return virtio_dev_tx_async_packed(dev, vq, mbuf_pool, 4057 pkts, count, dma_id, vchan_id, false); 4058 } 4059 4060 uint16_t 4061 rte_vhost_async_try_dequeue_burst(int vid, uint16_t queue_id, 4062 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count, 4063 int *nr_inflight, int16_t dma_id, uint16_t vchan_id) 4064 { 4065 struct virtio_net *dev; 4066 struct rte_mbuf *rarp_mbuf = NULL; 4067 struct vhost_virtqueue *vq; 4068 int16_t success = 1; 4069 4070 dev = get_device(vid); 4071 if (!dev || !nr_inflight) 4072 return 0; 4073 4074 *nr_inflight = -1; 4075 4076 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { 4077 VHOST_LOG_DATA(dev->ifname, ERR, "%s: built-in vhost net backend is disabled.\n", 4078 __func__); 4079 return 0; 4080 } 4081 4082 if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) { 4083 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid virtqueue idx %d.\n", 4084 __func__, queue_id); 4085 return 0; 4086 } 4087 4088 if (unlikely(dma_id < 0 || dma_id >= RTE_DMADEV_DEFAULT_MAX)) { 4089 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid dma id %d.\n", 4090 __func__, dma_id); 4091 return 0; 4092 } 4093 4094 if (unlikely(!dma_copy_track[dma_id].vchans || 4095 !dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) { 4096 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid channel %d:%u.\n", 4097 __func__, dma_id, vchan_id); 4098 return 0; 4099 } 4100 4101 vq = dev->virtqueue[queue_id]; 4102 4103 if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0)) 4104 return 0; 4105 4106 if (unlikely(vq->enabled == 0)) { 4107 count = 0; 4108 goto out_access_unlock; 4109 } 4110 4111 if (unlikely(!vq->async)) { 4112 VHOST_LOG_DATA(dev->ifname, ERR, "%s: async not registered for queue id %d.\n", 4113 __func__, queue_id); 4114 count = 0; 4115 goto out_access_unlock; 4116 } 4117 4118 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 4119 vhost_user_iotlb_rd_lock(vq); 4120 4121 if (unlikely(vq->access_ok == 0)) 4122 if (unlikely(vring_translate(dev, vq) < 0)) { 4123 count = 0; 4124 goto out; 4125 } 4126 4127 /* 4128 * Construct a RARP broadcast packet, and inject it to the "pkts" 4129 * array, to looks like that guest actually send such packet. 4130 * 4131 * Check user_send_rarp() for more information. 4132 * 4133 * broadcast_rarp shares a cacheline in the virtio_net structure 4134 * with some fields that are accessed during enqueue and 4135 * __atomic_compare_exchange_n causes a write if performed compare 4136 * and exchange. This could result in false sharing between enqueue 4137 * and dequeue. 4138 * 4139 * Prevent unnecessary false sharing by reading broadcast_rarp first 4140 * and only performing compare and exchange if the read indicates it 4141 * is likely to be set. 4142 */ 4143 if (unlikely(__atomic_load_n(&dev->broadcast_rarp, __ATOMIC_ACQUIRE) && 4144 __atomic_compare_exchange_n(&dev->broadcast_rarp, 4145 &success, 0, 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED))) { 4146 4147 rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac); 4148 if (rarp_mbuf == NULL) { 4149 VHOST_LOG_DATA(dev->ifname, ERR, "failed to make RARP packet.\n"); 4150 count = 0; 4151 goto out; 4152 } 4153 /* 4154 * Inject it to the head of "pkts" array, so that switch's mac 4155 * learning table will get updated first. 4156 */ 4157 pkts[0] = rarp_mbuf; 4158 vhost_queue_stats_update(dev, vq, pkts, 1); 4159 pkts++; 4160 count -= 1; 4161 } 4162 4163 if (vq_is_packed(dev)) { 4164 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS) 4165 count = virtio_dev_tx_async_packed_legacy(dev, vq, mbuf_pool, 4166 pkts, count, dma_id, vchan_id); 4167 else 4168 count = virtio_dev_tx_async_packed_compliant(dev, vq, mbuf_pool, 4169 pkts, count, dma_id, vchan_id); 4170 } else { 4171 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS) 4172 count = virtio_dev_tx_async_split_legacy(dev, vq, mbuf_pool, 4173 pkts, count, dma_id, vchan_id); 4174 else 4175 count = virtio_dev_tx_async_split_compliant(dev, vq, mbuf_pool, 4176 pkts, count, dma_id, vchan_id); 4177 } 4178 4179 *nr_inflight = vq->async->pkts_inflight_n; 4180 vhost_queue_stats_update(dev, vq, pkts, count); 4181 4182 out: 4183 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 4184 vhost_user_iotlb_rd_unlock(vq); 4185 4186 out_access_unlock: 4187 rte_spinlock_unlock(&vq->access_lock); 4188 4189 if (unlikely(rarp_mbuf != NULL)) 4190 count += 1; 4191 4192 return count; 4193 } 4194