1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2016 Intel Corporation 3 */ 4 5 #include <stdint.h> 6 #include <stdbool.h> 7 #include <linux/virtio_net.h> 8 9 #include <rte_mbuf.h> 10 #include <rte_memcpy.h> 11 #include <rte_net.h> 12 #include <rte_ether.h> 13 #include <rte_ip.h> 14 #include <rte_dmadev.h> 15 #include <rte_vhost.h> 16 #include <rte_tcp.h> 17 #include <rte_udp.h> 18 #include <rte_sctp.h> 19 #include <rte_arp.h> 20 #include <rte_spinlock.h> 21 #include <rte_malloc.h> 22 #include <rte_vhost_async.h> 23 24 #include "iotlb.h" 25 #include "vhost.h" 26 27 #define MAX_BATCH_LEN 256 28 29 static __rte_always_inline uint16_t 30 async_poll_dequeue_completed(struct virtio_net *dev, struct vhost_virtqueue *vq, 31 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, 32 uint16_t vchan_id, bool legacy_ol_flags); 33 34 /* DMA device copy operation tracking array. */ 35 struct async_dma_info dma_copy_track[RTE_DMADEV_DEFAULT_MAX]; 36 37 static __rte_always_inline bool 38 rxvq_is_mergeable(struct virtio_net *dev) 39 { 40 return dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF); 41 } 42 43 static __rte_always_inline bool 44 virtio_net_is_inorder(struct virtio_net *dev) 45 { 46 return dev->features & (1ULL << VIRTIO_F_IN_ORDER); 47 } 48 49 static bool 50 is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t nr_vring) 51 { 52 return (is_tx ^ (idx & 1)) == 0 && idx < nr_vring; 53 } 54 55 static inline void 56 vhost_queue_stats_update(struct virtio_net *dev, struct vhost_virtqueue *vq, 57 struct rte_mbuf **pkts, uint16_t count) 58 __rte_shared_locks_required(&vq->access_lock) 59 { 60 struct virtqueue_stats *stats = &vq->stats; 61 int i; 62 63 if (!(dev->flags & VIRTIO_DEV_STATS_ENABLED)) 64 return; 65 66 for (i = 0; i < count; i++) { 67 struct rte_ether_addr *ea; 68 struct rte_mbuf *pkt = pkts[i]; 69 uint32_t pkt_len = rte_pktmbuf_pkt_len(pkt); 70 71 stats->packets++; 72 stats->bytes += pkt_len; 73 74 if (pkt_len == 64) { 75 stats->size_bins[1]++; 76 } else if (pkt_len > 64 && pkt_len < 1024) { 77 uint32_t bin; 78 79 /* count zeros, and offset into correct bin */ 80 bin = (sizeof(pkt_len) * 8) - rte_clz32(pkt_len) - 5; 81 stats->size_bins[bin]++; 82 } else { 83 if (pkt_len < 64) 84 stats->size_bins[0]++; 85 else if (pkt_len < 1519) 86 stats->size_bins[6]++; 87 else 88 stats->size_bins[7]++; 89 } 90 91 ea = rte_pktmbuf_mtod(pkt, struct rte_ether_addr *); 92 if (rte_is_multicast_ether_addr(ea)) { 93 if (rte_is_broadcast_ether_addr(ea)) 94 stats->broadcast++; 95 else 96 stats->multicast++; 97 } 98 } 99 } 100 101 static __rte_always_inline int64_t 102 vhost_async_dma_transfer_one(struct virtio_net *dev, struct vhost_virtqueue *vq, 103 int16_t dma_id, uint16_t vchan_id, uint16_t flag_idx, 104 struct vhost_iov_iter *pkt) 105 __rte_shared_locks_required(&vq->access_lock) 106 { 107 struct async_dma_vchan_info *dma_info = &dma_copy_track[dma_id].vchans[vchan_id]; 108 uint16_t ring_mask = dma_info->ring_mask; 109 static bool vhost_async_dma_copy_log; 110 111 112 struct vhost_iovec *iov = pkt->iov; 113 int copy_idx = 0; 114 uint32_t nr_segs = pkt->nr_segs; 115 uint16_t i; 116 117 if (rte_dma_burst_capacity(dma_id, vchan_id) < nr_segs) 118 return -1; 119 120 for (i = 0; i < nr_segs; i++) { 121 copy_idx = rte_dma_copy(dma_id, vchan_id, (rte_iova_t)iov[i].src_addr, 122 (rte_iova_t)iov[i].dst_addr, iov[i].len, RTE_DMA_OP_FLAG_LLC); 123 /** 124 * Since all memory is pinned and DMA vChannel 125 * ring has enough space, failure should be a 126 * rare case. If failure happens, it means DMA 127 * device encounters serious errors; in this 128 * case, please stop async data-path and check 129 * what has happened to DMA device. 130 */ 131 if (unlikely(copy_idx < 0)) { 132 if (!vhost_async_dma_copy_log) { 133 VHOST_LOG_DATA(dev->ifname, ERR, 134 "DMA copy failed for channel %d:%u\n", 135 dma_id, vchan_id); 136 vhost_async_dma_copy_log = true; 137 } 138 return -1; 139 } 140 } 141 142 /** 143 * Only store packet completion flag address in the last copy's 144 * slot, and other slots are set to NULL. 145 */ 146 dma_info->pkts_cmpl_flag_addr[copy_idx & ring_mask] = &vq->async->pkts_cmpl_flag[flag_idx]; 147 148 return nr_segs; 149 } 150 151 static __rte_always_inline uint16_t 152 vhost_async_dma_transfer(struct virtio_net *dev, struct vhost_virtqueue *vq, 153 int16_t dma_id, uint16_t vchan_id, uint16_t head_idx, 154 struct vhost_iov_iter *pkts, uint16_t nr_pkts) 155 __rte_shared_locks_required(&vq->access_lock) 156 { 157 struct async_dma_vchan_info *dma_info = &dma_copy_track[dma_id].vchans[vchan_id]; 158 int64_t ret, nr_copies = 0; 159 uint16_t pkt_idx; 160 161 rte_spinlock_lock(&dma_info->dma_lock); 162 163 for (pkt_idx = 0; pkt_idx < nr_pkts; pkt_idx++) { 164 ret = vhost_async_dma_transfer_one(dev, vq, dma_id, vchan_id, head_idx, 165 &pkts[pkt_idx]); 166 if (unlikely(ret < 0)) 167 break; 168 169 nr_copies += ret; 170 head_idx++; 171 if (head_idx >= vq->size) 172 head_idx -= vq->size; 173 } 174 175 if (likely(nr_copies > 0)) 176 rte_dma_submit(dma_id, vchan_id); 177 178 rte_spinlock_unlock(&dma_info->dma_lock); 179 180 return pkt_idx; 181 } 182 183 static __rte_always_inline uint16_t 184 vhost_async_dma_check_completed(struct virtio_net *dev, int16_t dma_id, uint16_t vchan_id, 185 uint16_t max_pkts) 186 { 187 struct async_dma_vchan_info *dma_info = &dma_copy_track[dma_id].vchans[vchan_id]; 188 uint16_t ring_mask = dma_info->ring_mask; 189 uint16_t last_idx = 0; 190 uint16_t nr_copies; 191 uint16_t copy_idx; 192 uint16_t i; 193 bool has_error = false; 194 static bool vhost_async_dma_complete_log; 195 196 rte_spinlock_lock(&dma_info->dma_lock); 197 198 /** 199 * Print error log for debugging, if DMA reports error during 200 * DMA transfer. We do not handle error in vhost level. 201 */ 202 nr_copies = rte_dma_completed(dma_id, vchan_id, max_pkts, &last_idx, &has_error); 203 if (unlikely(!vhost_async_dma_complete_log && has_error)) { 204 VHOST_LOG_DATA(dev->ifname, ERR, 205 "DMA completion failure on channel %d:%u\n", 206 dma_id, vchan_id); 207 vhost_async_dma_complete_log = true; 208 } else if (nr_copies == 0) { 209 goto out; 210 } 211 212 copy_idx = last_idx - nr_copies + 1; 213 for (i = 0; i < nr_copies; i++) { 214 bool *flag; 215 216 flag = dma_info->pkts_cmpl_flag_addr[copy_idx & ring_mask]; 217 if (flag) { 218 /** 219 * Mark the packet flag as received. The flag 220 * could belong to another virtqueue but write 221 * is atomic. 222 */ 223 *flag = true; 224 dma_info->pkts_cmpl_flag_addr[copy_idx & ring_mask] = NULL; 225 } 226 copy_idx++; 227 } 228 229 out: 230 rte_spinlock_unlock(&dma_info->dma_lock); 231 return nr_copies; 232 } 233 234 static inline void 235 do_data_copy_enqueue(struct virtio_net *dev, struct vhost_virtqueue *vq) 236 __rte_shared_locks_required(&vq->iotlb_lock) 237 { 238 struct batch_copy_elem *elem = vq->batch_copy_elems; 239 uint16_t count = vq->batch_copy_nb_elems; 240 int i; 241 242 for (i = 0; i < count; i++) { 243 rte_memcpy(elem[i].dst, elem[i].src, elem[i].len); 244 vhost_log_cache_write_iova(dev, vq, elem[i].log_addr, 245 elem[i].len); 246 PRINT_PACKET(dev, (uintptr_t)elem[i].dst, elem[i].len, 0); 247 } 248 249 vq->batch_copy_nb_elems = 0; 250 } 251 252 static inline void 253 do_data_copy_dequeue(struct vhost_virtqueue *vq) 254 { 255 struct batch_copy_elem *elem = vq->batch_copy_elems; 256 uint16_t count = vq->batch_copy_nb_elems; 257 int i; 258 259 for (i = 0; i < count; i++) 260 rte_memcpy(elem[i].dst, elem[i].src, elem[i].len); 261 262 vq->batch_copy_nb_elems = 0; 263 } 264 265 static __rte_always_inline void 266 do_flush_shadow_used_ring_split(struct virtio_net *dev, 267 struct vhost_virtqueue *vq, 268 uint16_t to, uint16_t from, uint16_t size) 269 { 270 rte_memcpy(&vq->used->ring[to], 271 &vq->shadow_used_split[from], 272 size * sizeof(struct vring_used_elem)); 273 vhost_log_cache_used_vring(dev, vq, 274 offsetof(struct vring_used, ring[to]), 275 size * sizeof(struct vring_used_elem)); 276 } 277 278 static __rte_always_inline void 279 flush_shadow_used_ring_split(struct virtio_net *dev, struct vhost_virtqueue *vq) 280 { 281 uint16_t used_idx = vq->last_used_idx & (vq->size - 1); 282 283 if (used_idx + vq->shadow_used_idx <= vq->size) { 284 do_flush_shadow_used_ring_split(dev, vq, used_idx, 0, 285 vq->shadow_used_idx); 286 } else { 287 uint16_t size; 288 289 /* update used ring interval [used_idx, vq->size] */ 290 size = vq->size - used_idx; 291 do_flush_shadow_used_ring_split(dev, vq, used_idx, 0, size); 292 293 /* update the left half used ring interval [0, left_size] */ 294 do_flush_shadow_used_ring_split(dev, vq, 0, size, 295 vq->shadow_used_idx - size); 296 } 297 vq->last_used_idx += vq->shadow_used_idx; 298 299 vhost_log_cache_sync(dev, vq); 300 301 rte_atomic_fetch_add_explicit((unsigned short __rte_atomic *)&vq->used->idx, 302 vq->shadow_used_idx, rte_memory_order_release); 303 vq->shadow_used_idx = 0; 304 vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx), 305 sizeof(vq->used->idx)); 306 } 307 308 static __rte_always_inline void 309 update_shadow_used_ring_split(struct vhost_virtqueue *vq, 310 uint16_t desc_idx, uint32_t len) 311 { 312 uint16_t i = vq->shadow_used_idx++; 313 314 vq->shadow_used_split[i].id = desc_idx; 315 vq->shadow_used_split[i].len = len; 316 } 317 318 static __rte_always_inline void 319 vhost_flush_enqueue_shadow_packed(struct virtio_net *dev, 320 struct vhost_virtqueue *vq) 321 { 322 int i; 323 uint16_t used_idx = vq->last_used_idx; 324 uint16_t head_idx = vq->last_used_idx; 325 uint16_t head_flags = 0; 326 327 /* Split loop in two to save memory barriers */ 328 for (i = 0; i < vq->shadow_used_idx; i++) { 329 vq->desc_packed[used_idx].id = vq->shadow_used_packed[i].id; 330 vq->desc_packed[used_idx].len = vq->shadow_used_packed[i].len; 331 332 used_idx += vq->shadow_used_packed[i].count; 333 if (used_idx >= vq->size) 334 used_idx -= vq->size; 335 } 336 337 /* The ordering for storing desc flags needs to be enforced. */ 338 rte_atomic_thread_fence(rte_memory_order_release); 339 340 for (i = 0; i < vq->shadow_used_idx; i++) { 341 uint16_t flags; 342 343 if (vq->shadow_used_packed[i].len) 344 flags = VRING_DESC_F_WRITE; 345 else 346 flags = 0; 347 348 if (vq->used_wrap_counter) { 349 flags |= VRING_DESC_F_USED; 350 flags |= VRING_DESC_F_AVAIL; 351 } else { 352 flags &= ~VRING_DESC_F_USED; 353 flags &= ~VRING_DESC_F_AVAIL; 354 } 355 356 if (i > 0) { 357 vq->desc_packed[vq->last_used_idx].flags = flags; 358 359 vhost_log_cache_used_vring(dev, vq, 360 vq->last_used_idx * 361 sizeof(struct vring_packed_desc), 362 sizeof(struct vring_packed_desc)); 363 } else { 364 head_idx = vq->last_used_idx; 365 head_flags = flags; 366 } 367 368 vq_inc_last_used_packed(vq, vq->shadow_used_packed[i].count); 369 } 370 371 vq->desc_packed[head_idx].flags = head_flags; 372 373 vhost_log_cache_used_vring(dev, vq, 374 head_idx * 375 sizeof(struct vring_packed_desc), 376 sizeof(struct vring_packed_desc)); 377 378 vq->shadow_used_idx = 0; 379 vhost_log_cache_sync(dev, vq); 380 } 381 382 static __rte_always_inline void 383 vhost_flush_dequeue_shadow_packed(struct virtio_net *dev, 384 struct vhost_virtqueue *vq) 385 { 386 struct vring_used_elem_packed *used_elem = &vq->shadow_used_packed[0]; 387 388 vq->desc_packed[vq->shadow_last_used_idx].id = used_elem->id; 389 /* desc flags is the synchronization point for virtio packed vring */ 390 rte_atomic_store_explicit( 391 (unsigned short __rte_atomic *)&vq->desc_packed[vq->shadow_last_used_idx].flags, 392 used_elem->flags, rte_memory_order_release); 393 394 vhost_log_cache_used_vring(dev, vq, vq->shadow_last_used_idx * 395 sizeof(struct vring_packed_desc), 396 sizeof(struct vring_packed_desc)); 397 vq->shadow_used_idx = 0; 398 vhost_log_cache_sync(dev, vq); 399 } 400 401 static __rte_always_inline void 402 vhost_flush_enqueue_batch_packed(struct virtio_net *dev, 403 struct vhost_virtqueue *vq, 404 uint64_t *lens, 405 uint16_t *ids) 406 { 407 uint16_t i; 408 uint16_t flags; 409 uint16_t last_used_idx; 410 struct vring_packed_desc *desc_base; 411 412 last_used_idx = vq->last_used_idx; 413 desc_base = &vq->desc_packed[last_used_idx]; 414 415 flags = PACKED_DESC_ENQUEUE_USED_FLAG(vq->used_wrap_counter); 416 417 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 418 desc_base[i].id = ids[i]; 419 desc_base[i].len = lens[i]; 420 } 421 422 rte_atomic_thread_fence(rte_memory_order_release); 423 424 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 425 desc_base[i].flags = flags; 426 } 427 428 vhost_log_cache_used_vring(dev, vq, last_used_idx * 429 sizeof(struct vring_packed_desc), 430 sizeof(struct vring_packed_desc) * 431 PACKED_BATCH_SIZE); 432 vhost_log_cache_sync(dev, vq); 433 434 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE); 435 } 436 437 static __rte_always_inline void 438 vhost_async_shadow_enqueue_packed_batch(struct vhost_virtqueue *vq, 439 uint64_t *lens, 440 uint16_t *ids) 441 __rte_exclusive_locks_required(&vq->access_lock) 442 { 443 uint16_t i; 444 struct vhost_async *async = vq->async; 445 446 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 447 async->buffers_packed[async->buffer_idx_packed].id = ids[i]; 448 async->buffers_packed[async->buffer_idx_packed].len = lens[i]; 449 async->buffers_packed[async->buffer_idx_packed].count = 1; 450 async->buffer_idx_packed++; 451 if (async->buffer_idx_packed >= vq->size) 452 async->buffer_idx_packed -= vq->size; 453 } 454 } 455 456 static __rte_always_inline void 457 vhost_async_shadow_dequeue_packed_batch(struct vhost_virtqueue *vq, uint16_t *ids) 458 __rte_shared_locks_required(&vq->access_lock) 459 { 460 uint16_t i; 461 struct vhost_async *async = vq->async; 462 463 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 464 async->buffers_packed[async->buffer_idx_packed].id = ids[i]; 465 async->buffers_packed[async->buffer_idx_packed].len = 0; 466 async->buffers_packed[async->buffer_idx_packed].count = 1; 467 468 async->buffer_idx_packed++; 469 if (async->buffer_idx_packed >= vq->size) 470 async->buffer_idx_packed -= vq->size; 471 } 472 } 473 474 static __rte_always_inline void 475 vhost_shadow_dequeue_batch_packed_inorder(struct vhost_virtqueue *vq, 476 uint16_t id) 477 { 478 vq->shadow_used_packed[0].id = id; 479 480 if (!vq->shadow_used_idx) { 481 vq->shadow_last_used_idx = vq->last_used_idx; 482 vq->shadow_used_packed[0].flags = 483 PACKED_DESC_DEQUEUE_USED_FLAG(vq->used_wrap_counter); 484 vq->shadow_used_packed[0].len = 0; 485 vq->shadow_used_packed[0].count = 1; 486 vq->shadow_used_idx++; 487 } 488 489 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE); 490 } 491 492 static __rte_always_inline void 493 vhost_shadow_dequeue_batch_packed(struct virtio_net *dev, 494 struct vhost_virtqueue *vq, 495 uint16_t *ids) 496 { 497 uint16_t flags; 498 uint16_t i; 499 uint16_t begin; 500 501 flags = PACKED_DESC_DEQUEUE_USED_FLAG(vq->used_wrap_counter); 502 503 if (!vq->shadow_used_idx) { 504 vq->shadow_last_used_idx = vq->last_used_idx; 505 vq->shadow_used_packed[0].id = ids[0]; 506 vq->shadow_used_packed[0].len = 0; 507 vq->shadow_used_packed[0].count = 1; 508 vq->shadow_used_packed[0].flags = flags; 509 vq->shadow_used_idx++; 510 begin = 1; 511 } else 512 begin = 0; 513 514 vhost_for_each_try_unroll(i, begin, PACKED_BATCH_SIZE) { 515 vq->desc_packed[vq->last_used_idx + i].id = ids[i]; 516 vq->desc_packed[vq->last_used_idx + i].len = 0; 517 } 518 519 rte_atomic_thread_fence(rte_memory_order_release); 520 vhost_for_each_try_unroll(i, begin, PACKED_BATCH_SIZE) 521 vq->desc_packed[vq->last_used_idx + i].flags = flags; 522 523 vhost_log_cache_used_vring(dev, vq, vq->last_used_idx * 524 sizeof(struct vring_packed_desc), 525 sizeof(struct vring_packed_desc) * 526 PACKED_BATCH_SIZE); 527 vhost_log_cache_sync(dev, vq); 528 529 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE); 530 } 531 532 static __rte_always_inline void 533 vhost_shadow_dequeue_single_packed(struct vhost_virtqueue *vq, 534 uint16_t buf_id, 535 uint16_t count) 536 { 537 uint16_t flags; 538 539 flags = vq->desc_packed[vq->last_used_idx].flags; 540 if (vq->used_wrap_counter) { 541 flags |= VRING_DESC_F_USED; 542 flags |= VRING_DESC_F_AVAIL; 543 } else { 544 flags &= ~VRING_DESC_F_USED; 545 flags &= ~VRING_DESC_F_AVAIL; 546 } 547 548 if (!vq->shadow_used_idx) { 549 vq->shadow_last_used_idx = vq->last_used_idx; 550 551 vq->shadow_used_packed[0].id = buf_id; 552 vq->shadow_used_packed[0].len = 0; 553 vq->shadow_used_packed[0].flags = flags; 554 vq->shadow_used_idx++; 555 } else { 556 vq->desc_packed[vq->last_used_idx].id = buf_id; 557 vq->desc_packed[vq->last_used_idx].len = 0; 558 vq->desc_packed[vq->last_used_idx].flags = flags; 559 } 560 561 vq_inc_last_used_packed(vq, count); 562 } 563 564 static __rte_always_inline void 565 vhost_shadow_dequeue_single_packed_inorder(struct vhost_virtqueue *vq, 566 uint16_t buf_id, 567 uint16_t count) 568 { 569 uint16_t flags; 570 571 vq->shadow_used_packed[0].id = buf_id; 572 573 flags = vq->desc_packed[vq->last_used_idx].flags; 574 if (vq->used_wrap_counter) { 575 flags |= VRING_DESC_F_USED; 576 flags |= VRING_DESC_F_AVAIL; 577 } else { 578 flags &= ~VRING_DESC_F_USED; 579 flags &= ~VRING_DESC_F_AVAIL; 580 } 581 582 if (!vq->shadow_used_idx) { 583 vq->shadow_last_used_idx = vq->last_used_idx; 584 vq->shadow_used_packed[0].len = 0; 585 vq->shadow_used_packed[0].flags = flags; 586 vq->shadow_used_idx++; 587 } 588 589 vq_inc_last_used_packed(vq, count); 590 } 591 592 static __rte_always_inline void 593 vhost_shadow_enqueue_packed(struct vhost_virtqueue *vq, 594 uint32_t *len, 595 uint16_t *id, 596 uint16_t *count, 597 uint16_t num_buffers) 598 { 599 uint16_t i; 600 601 for (i = 0; i < num_buffers; i++) { 602 /* enqueue shadow flush action aligned with batch num */ 603 if (!vq->shadow_used_idx) 604 vq->shadow_aligned_idx = vq->last_used_idx & 605 PACKED_BATCH_MASK; 606 vq->shadow_used_packed[vq->shadow_used_idx].id = id[i]; 607 vq->shadow_used_packed[vq->shadow_used_idx].len = len[i]; 608 vq->shadow_used_packed[vq->shadow_used_idx].count = count[i]; 609 vq->shadow_aligned_idx += count[i]; 610 vq->shadow_used_idx++; 611 } 612 } 613 614 static __rte_always_inline void 615 vhost_async_shadow_enqueue_packed(struct vhost_virtqueue *vq, 616 uint32_t *len, 617 uint16_t *id, 618 uint16_t *count, 619 uint16_t num_buffers) 620 __rte_exclusive_locks_required(&vq->access_lock) 621 { 622 uint16_t i; 623 struct vhost_async *async = vq->async; 624 625 for (i = 0; i < num_buffers; i++) { 626 async->buffers_packed[async->buffer_idx_packed].id = id[i]; 627 async->buffers_packed[async->buffer_idx_packed].len = len[i]; 628 async->buffers_packed[async->buffer_idx_packed].count = count[i]; 629 async->buffer_idx_packed++; 630 if (async->buffer_idx_packed >= vq->size) 631 async->buffer_idx_packed -= vq->size; 632 } 633 } 634 635 static __rte_always_inline void 636 vhost_shadow_enqueue_single_packed(struct virtio_net *dev, 637 struct vhost_virtqueue *vq, 638 uint32_t *len, 639 uint16_t *id, 640 uint16_t *count, 641 uint16_t num_buffers) 642 __rte_shared_locks_required(&vq->iotlb_lock) 643 { 644 vhost_shadow_enqueue_packed(vq, len, id, count, num_buffers); 645 646 if (vq->shadow_aligned_idx >= PACKED_BATCH_SIZE) { 647 do_data_copy_enqueue(dev, vq); 648 vhost_flush_enqueue_shadow_packed(dev, vq); 649 } 650 } 651 652 /* avoid write operation when necessary, to lessen cache issues */ 653 #define ASSIGN_UNLESS_EQUAL(var, val) do { \ 654 if ((var) != (val)) \ 655 (var) = (val); \ 656 } while (0) 657 658 static __rte_always_inline void 659 virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr) 660 { 661 uint64_t csum_l4 = m_buf->ol_flags & RTE_MBUF_F_TX_L4_MASK; 662 663 if (m_buf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) 664 csum_l4 |= RTE_MBUF_F_TX_TCP_CKSUM; 665 666 if (csum_l4) { 667 /* 668 * Pseudo-header checksum must be set as per Virtio spec. 669 * 670 * Note: We don't propagate rte_net_intel_cksum_prepare() 671 * errors, as it would have an impact on performance, and an 672 * error would mean the packet is dropped by the guest instead 673 * of being dropped here. 674 */ 675 rte_net_intel_cksum_prepare(m_buf); 676 677 net_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 678 net_hdr->csum_start = m_buf->l2_len + m_buf->l3_len; 679 680 switch (csum_l4) { 681 case RTE_MBUF_F_TX_TCP_CKSUM: 682 net_hdr->csum_offset = (offsetof(struct rte_tcp_hdr, 683 cksum)); 684 break; 685 case RTE_MBUF_F_TX_UDP_CKSUM: 686 net_hdr->csum_offset = (offsetof(struct rte_udp_hdr, 687 dgram_cksum)); 688 break; 689 case RTE_MBUF_F_TX_SCTP_CKSUM: 690 net_hdr->csum_offset = (offsetof(struct rte_sctp_hdr, 691 cksum)); 692 break; 693 } 694 } else { 695 ASSIGN_UNLESS_EQUAL(net_hdr->csum_start, 0); 696 ASSIGN_UNLESS_EQUAL(net_hdr->csum_offset, 0); 697 ASSIGN_UNLESS_EQUAL(net_hdr->flags, 0); 698 } 699 700 /* IP cksum verification cannot be bypassed, then calculate here */ 701 if (m_buf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) { 702 struct rte_ipv4_hdr *ipv4_hdr; 703 704 ipv4_hdr = rte_pktmbuf_mtod_offset(m_buf, struct rte_ipv4_hdr *, 705 m_buf->l2_len); 706 ipv4_hdr->hdr_checksum = 0; 707 ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr); 708 } 709 710 if (m_buf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) { 711 if (m_buf->ol_flags & RTE_MBUF_F_TX_IPV4) 712 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 713 else 714 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 715 net_hdr->gso_size = m_buf->tso_segsz; 716 net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len 717 + m_buf->l4_len; 718 } else if (m_buf->ol_flags & RTE_MBUF_F_TX_UDP_SEG) { 719 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP; 720 net_hdr->gso_size = m_buf->tso_segsz; 721 net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len + 722 m_buf->l4_len; 723 } else { 724 ASSIGN_UNLESS_EQUAL(net_hdr->gso_type, 0); 725 ASSIGN_UNLESS_EQUAL(net_hdr->gso_size, 0); 726 ASSIGN_UNLESS_EQUAL(net_hdr->hdr_len, 0); 727 } 728 } 729 730 static __rte_always_inline int 731 map_one_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, 732 struct buf_vector *buf_vec, uint16_t *vec_idx, 733 uint64_t desc_iova, uint64_t desc_len, uint8_t perm) 734 __rte_shared_locks_required(&vq->iotlb_lock) 735 { 736 uint16_t vec_id = *vec_idx; 737 738 while (desc_len) { 739 uint64_t desc_addr; 740 uint64_t desc_chunck_len = desc_len; 741 742 if (unlikely(vec_id >= BUF_VECTOR_MAX)) 743 return -1; 744 745 desc_addr = vhost_iova_to_vva(dev, vq, 746 desc_iova, 747 &desc_chunck_len, 748 perm); 749 if (unlikely(!desc_addr)) 750 return -1; 751 752 rte_prefetch0((void *)(uintptr_t)desc_addr); 753 754 buf_vec[vec_id].buf_iova = desc_iova; 755 buf_vec[vec_id].buf_addr = desc_addr; 756 buf_vec[vec_id].buf_len = desc_chunck_len; 757 758 desc_len -= desc_chunck_len; 759 desc_iova += desc_chunck_len; 760 vec_id++; 761 } 762 *vec_idx = vec_id; 763 764 return 0; 765 } 766 767 static __rte_always_inline int 768 fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 769 uint32_t avail_idx, uint16_t *vec_idx, 770 struct buf_vector *buf_vec, uint16_t *desc_chain_head, 771 uint32_t *desc_chain_len, uint8_t perm) 772 __rte_shared_locks_required(&vq->iotlb_lock) 773 { 774 uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)]; 775 uint16_t vec_id = *vec_idx; 776 uint32_t len = 0; 777 uint64_t dlen; 778 uint32_t nr_descs = vq->size; 779 uint32_t cnt = 0; 780 struct vring_desc *descs = vq->desc; 781 struct vring_desc *idesc = NULL; 782 783 if (unlikely(idx >= vq->size)) 784 return -1; 785 786 *desc_chain_head = idx; 787 788 if (vq->desc[idx].flags & VRING_DESC_F_INDIRECT) { 789 dlen = vq->desc[idx].len; 790 nr_descs = dlen / sizeof(struct vring_desc); 791 if (unlikely(nr_descs > vq->size)) 792 return -1; 793 794 descs = (struct vring_desc *)(uintptr_t) 795 vhost_iova_to_vva(dev, vq, vq->desc[idx].addr, 796 &dlen, 797 VHOST_ACCESS_RO); 798 if (unlikely(!descs)) 799 return -1; 800 801 if (unlikely(dlen < vq->desc[idx].len)) { 802 /* 803 * The indirect desc table is not contiguous 804 * in process VA space, we have to copy it. 805 */ 806 idesc = vhost_alloc_copy_ind_table(dev, vq, 807 vq->desc[idx].addr, vq->desc[idx].len); 808 if (unlikely(!idesc)) 809 return -1; 810 811 descs = idesc; 812 } 813 814 idx = 0; 815 } 816 817 while (1) { 818 if (unlikely(idx >= nr_descs || cnt++ >= nr_descs)) { 819 free_ind_table(idesc); 820 return -1; 821 } 822 823 dlen = descs[idx].len; 824 len += dlen; 825 826 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id, 827 descs[idx].addr, dlen, 828 perm))) { 829 free_ind_table(idesc); 830 return -1; 831 } 832 833 if ((descs[idx].flags & VRING_DESC_F_NEXT) == 0) 834 break; 835 836 idx = descs[idx].next; 837 } 838 839 *desc_chain_len = len; 840 *vec_idx = vec_id; 841 842 if (unlikely(!!idesc)) 843 free_ind_table(idesc); 844 845 return 0; 846 } 847 848 /* 849 * Returns -1 on fail, 0 on success 850 */ 851 static inline int 852 reserve_avail_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 853 uint64_t size, struct buf_vector *buf_vec, 854 uint16_t *num_buffers, uint16_t avail_head, 855 uint16_t *nr_vec) 856 __rte_shared_locks_required(&vq->iotlb_lock) 857 { 858 uint16_t cur_idx; 859 uint16_t vec_idx = 0; 860 uint16_t max_tries, tries = 0; 861 862 uint16_t head_idx = 0; 863 uint32_t len = 0; 864 865 *num_buffers = 0; 866 cur_idx = vq->last_avail_idx; 867 868 if (rxvq_is_mergeable(dev)) 869 max_tries = vq->size - 1; 870 else 871 max_tries = 1; 872 873 while (size > 0) { 874 if (unlikely(cur_idx == avail_head)) 875 return -1; 876 /* 877 * if we tried all available ring items, and still 878 * can't get enough buf, it means something abnormal 879 * happened. 880 */ 881 if (unlikely(++tries > max_tries)) 882 return -1; 883 884 if (unlikely(fill_vec_buf_split(dev, vq, cur_idx, 885 &vec_idx, buf_vec, 886 &head_idx, &len, 887 VHOST_ACCESS_RW) < 0)) 888 return -1; 889 len = RTE_MIN(len, size); 890 update_shadow_used_ring_split(vq, head_idx, len); 891 size -= len; 892 893 cur_idx++; 894 *num_buffers += 1; 895 } 896 897 *nr_vec = vec_idx; 898 899 return 0; 900 } 901 902 static __rte_always_inline int 903 fill_vec_buf_packed_indirect(struct virtio_net *dev, 904 struct vhost_virtqueue *vq, 905 struct vring_packed_desc *desc, uint16_t *vec_idx, 906 struct buf_vector *buf_vec, uint32_t *len, uint8_t perm) 907 __rte_shared_locks_required(&vq->iotlb_lock) 908 { 909 uint16_t i; 910 uint32_t nr_descs; 911 uint16_t vec_id = *vec_idx; 912 uint64_t dlen; 913 struct vring_packed_desc *descs, *idescs = NULL; 914 915 dlen = desc->len; 916 descs = (struct vring_packed_desc *)(uintptr_t) 917 vhost_iova_to_vva(dev, vq, desc->addr, &dlen, VHOST_ACCESS_RO); 918 if (unlikely(!descs)) 919 return -1; 920 921 if (unlikely(dlen < desc->len)) { 922 /* 923 * The indirect desc table is not contiguous 924 * in process VA space, we have to copy it. 925 */ 926 idescs = vhost_alloc_copy_ind_table(dev, 927 vq, desc->addr, desc->len); 928 if (unlikely(!idescs)) 929 return -1; 930 931 descs = idescs; 932 } 933 934 nr_descs = desc->len / sizeof(struct vring_packed_desc); 935 if (unlikely(nr_descs >= vq->size)) { 936 free_ind_table(idescs); 937 return -1; 938 } 939 940 for (i = 0; i < nr_descs; i++) { 941 if (unlikely(vec_id >= BUF_VECTOR_MAX)) { 942 free_ind_table(idescs); 943 return -1; 944 } 945 946 dlen = descs[i].len; 947 *len += dlen; 948 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id, 949 descs[i].addr, dlen, 950 perm))) 951 return -1; 952 } 953 *vec_idx = vec_id; 954 955 if (unlikely(!!idescs)) 956 free_ind_table(idescs); 957 958 return 0; 959 } 960 961 static __rte_always_inline int 962 fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, 963 uint16_t avail_idx, uint16_t *desc_count, 964 struct buf_vector *buf_vec, uint16_t *vec_idx, 965 uint16_t *buf_id, uint32_t *len, uint8_t perm) 966 __rte_shared_locks_required(&vq->iotlb_lock) 967 { 968 bool wrap_counter = vq->avail_wrap_counter; 969 struct vring_packed_desc *descs = vq->desc_packed; 970 uint16_t vec_id = *vec_idx; 971 uint64_t dlen; 972 973 if (avail_idx < vq->last_avail_idx) 974 wrap_counter ^= 1; 975 976 /* 977 * Perform a load-acquire barrier in desc_is_avail to 978 * enforce the ordering between desc flags and desc 979 * content. 980 */ 981 if (unlikely(!desc_is_avail(&descs[avail_idx], wrap_counter))) 982 return -1; 983 984 *desc_count = 0; 985 *len = 0; 986 987 while (1) { 988 if (unlikely(vec_id >= BUF_VECTOR_MAX)) 989 return -1; 990 991 if (unlikely(*desc_count >= vq->size)) 992 return -1; 993 994 *desc_count += 1; 995 *buf_id = descs[avail_idx].id; 996 997 if (descs[avail_idx].flags & VRING_DESC_F_INDIRECT) { 998 if (unlikely(fill_vec_buf_packed_indirect(dev, vq, 999 &descs[avail_idx], 1000 &vec_id, buf_vec, 1001 len, perm) < 0)) 1002 return -1; 1003 } else { 1004 dlen = descs[avail_idx].len; 1005 *len += dlen; 1006 1007 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id, 1008 descs[avail_idx].addr, 1009 dlen, 1010 perm))) 1011 return -1; 1012 } 1013 1014 if ((descs[avail_idx].flags & VRING_DESC_F_NEXT) == 0) 1015 break; 1016 1017 if (++avail_idx >= vq->size) { 1018 avail_idx -= vq->size; 1019 wrap_counter ^= 1; 1020 } 1021 } 1022 1023 *vec_idx = vec_id; 1024 1025 return 0; 1026 } 1027 1028 static __rte_noinline void 1029 copy_vnet_hdr_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, 1030 struct buf_vector *buf_vec, 1031 struct virtio_net_hdr_mrg_rxbuf *hdr) 1032 __rte_shared_locks_required(&vq->iotlb_lock) 1033 { 1034 uint64_t len; 1035 uint64_t remain = dev->vhost_hlen; 1036 uint64_t src = (uint64_t)(uintptr_t)hdr, dst; 1037 uint64_t iova = buf_vec->buf_iova; 1038 1039 while (remain) { 1040 len = RTE_MIN(remain, 1041 buf_vec->buf_len); 1042 dst = buf_vec->buf_addr; 1043 rte_memcpy((void *)(uintptr_t)dst, 1044 (void *)(uintptr_t)src, 1045 len); 1046 1047 PRINT_PACKET(dev, (uintptr_t)dst, 1048 (uint32_t)len, 0); 1049 vhost_log_cache_write_iova(dev, vq, 1050 iova, len); 1051 1052 remain -= len; 1053 iova += len; 1054 src += len; 1055 buf_vec++; 1056 } 1057 } 1058 1059 static __rte_always_inline int 1060 async_iter_initialize(struct virtio_net *dev, struct vhost_async *async) 1061 { 1062 struct vhost_iov_iter *iter; 1063 1064 if (unlikely(async->iovec_idx >= VHOST_MAX_ASYNC_VEC)) { 1065 VHOST_LOG_DATA(dev->ifname, ERR, "no more async iovec available\n"); 1066 return -1; 1067 } 1068 1069 iter = async->iov_iter + async->iter_idx; 1070 iter->iov = async->iovec + async->iovec_idx; 1071 iter->nr_segs = 0; 1072 1073 return 0; 1074 } 1075 1076 static __rte_always_inline int 1077 async_iter_add_iovec(struct virtio_net *dev, struct vhost_async *async, 1078 void *src, void *dst, size_t len) 1079 { 1080 struct vhost_iov_iter *iter; 1081 struct vhost_iovec *iovec; 1082 1083 if (unlikely(async->iovec_idx >= VHOST_MAX_ASYNC_VEC)) { 1084 static bool vhost_max_async_vec_log; 1085 1086 if (!vhost_max_async_vec_log) { 1087 VHOST_LOG_DATA(dev->ifname, ERR, "no more async iovec available\n"); 1088 vhost_max_async_vec_log = true; 1089 } 1090 1091 return -1; 1092 } 1093 1094 iter = async->iov_iter + async->iter_idx; 1095 iovec = async->iovec + async->iovec_idx; 1096 1097 iovec->src_addr = src; 1098 iovec->dst_addr = dst; 1099 iovec->len = len; 1100 1101 iter->nr_segs++; 1102 async->iovec_idx++; 1103 1104 return 0; 1105 } 1106 1107 static __rte_always_inline void 1108 async_iter_finalize(struct vhost_async *async) 1109 { 1110 async->iter_idx++; 1111 } 1112 1113 static __rte_always_inline void 1114 async_iter_cancel(struct vhost_async *async) 1115 { 1116 struct vhost_iov_iter *iter; 1117 1118 iter = async->iov_iter + async->iter_idx; 1119 async->iovec_idx -= iter->nr_segs; 1120 iter->nr_segs = 0; 1121 iter->iov = NULL; 1122 } 1123 1124 static __rte_always_inline void 1125 async_iter_reset(struct vhost_async *async) 1126 { 1127 async->iter_idx = 0; 1128 async->iovec_idx = 0; 1129 } 1130 1131 static __rte_always_inline int 1132 async_fill_seg(struct virtio_net *dev, struct vhost_virtqueue *vq, 1133 struct rte_mbuf *m, uint32_t mbuf_offset, 1134 uint64_t buf_iova, uint32_t cpy_len, bool to_desc) 1135 __rte_shared_locks_required(&vq->access_lock) 1136 __rte_shared_locks_required(&vq->iotlb_lock) 1137 { 1138 struct vhost_async *async = vq->async; 1139 uint64_t mapped_len; 1140 uint32_t buf_offset = 0; 1141 void *src, *dst; 1142 void *host_iova; 1143 1144 while (cpy_len) { 1145 host_iova = (void *)(uintptr_t)gpa_to_first_hpa(dev, 1146 buf_iova + buf_offset, cpy_len, &mapped_len); 1147 if (unlikely(!host_iova)) { 1148 VHOST_LOG_DATA(dev->ifname, ERR, 1149 "%s: failed to get host iova.\n", 1150 __func__); 1151 return -1; 1152 } 1153 1154 if (to_desc) { 1155 src = (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, mbuf_offset); 1156 dst = host_iova; 1157 } else { 1158 src = host_iova; 1159 dst = (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, mbuf_offset); 1160 } 1161 1162 if (unlikely(async_iter_add_iovec(dev, async, src, dst, (size_t)mapped_len))) 1163 return -1; 1164 1165 cpy_len -= (uint32_t)mapped_len; 1166 mbuf_offset += (uint32_t)mapped_len; 1167 buf_offset += (uint32_t)mapped_len; 1168 } 1169 1170 return 0; 1171 } 1172 1173 static __rte_always_inline void 1174 sync_fill_seg(struct virtio_net *dev, struct vhost_virtqueue *vq, 1175 struct rte_mbuf *m, uint32_t mbuf_offset, 1176 uint64_t buf_addr, uint64_t buf_iova, uint32_t cpy_len, bool to_desc) 1177 __rte_shared_locks_required(&vq->iotlb_lock) 1178 { 1179 struct batch_copy_elem *batch_copy = vq->batch_copy_elems; 1180 1181 if (likely(cpy_len > MAX_BATCH_LEN || vq->batch_copy_nb_elems >= vq->size)) { 1182 if (to_desc) { 1183 rte_memcpy((void *)((uintptr_t)(buf_addr)), 1184 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), 1185 cpy_len); 1186 vhost_log_cache_write_iova(dev, vq, buf_iova, cpy_len); 1187 PRINT_PACKET(dev, (uintptr_t)(buf_addr), cpy_len, 0); 1188 } else { 1189 rte_memcpy(rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), 1190 (void *)((uintptr_t)(buf_addr)), 1191 cpy_len); 1192 } 1193 } else { 1194 if (to_desc) { 1195 batch_copy[vq->batch_copy_nb_elems].dst = 1196 (void *)((uintptr_t)(buf_addr)); 1197 batch_copy[vq->batch_copy_nb_elems].src = 1198 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset); 1199 batch_copy[vq->batch_copy_nb_elems].log_addr = buf_iova; 1200 } else { 1201 batch_copy[vq->batch_copy_nb_elems].dst = 1202 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset); 1203 batch_copy[vq->batch_copy_nb_elems].src = 1204 (void *)((uintptr_t)(buf_addr)); 1205 } 1206 batch_copy[vq->batch_copy_nb_elems].len = cpy_len; 1207 vq->batch_copy_nb_elems++; 1208 } 1209 } 1210 1211 static __rte_always_inline int 1212 mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, 1213 struct rte_mbuf *m, struct buf_vector *buf_vec, 1214 uint16_t nr_vec, uint16_t num_buffers, bool is_async) 1215 __rte_shared_locks_required(&vq->access_lock) 1216 __rte_shared_locks_required(&vq->iotlb_lock) 1217 { 1218 uint32_t vec_idx = 0; 1219 uint32_t mbuf_offset, mbuf_avail; 1220 uint32_t buf_offset, buf_avail; 1221 uint64_t buf_addr, buf_iova, buf_len; 1222 uint32_t cpy_len; 1223 uint64_t hdr_addr; 1224 struct rte_mbuf *hdr_mbuf; 1225 struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL; 1226 struct vhost_async *async = vq->async; 1227 1228 if (unlikely(m == NULL)) 1229 return -1; 1230 1231 buf_addr = buf_vec[vec_idx].buf_addr; 1232 buf_iova = buf_vec[vec_idx].buf_iova; 1233 buf_len = buf_vec[vec_idx].buf_len; 1234 1235 if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) 1236 return -1; 1237 1238 hdr_mbuf = m; 1239 hdr_addr = buf_addr; 1240 if (unlikely(buf_len < dev->vhost_hlen)) { 1241 memset(&tmp_hdr, 0, sizeof(struct virtio_net_hdr_mrg_rxbuf)); 1242 hdr = &tmp_hdr; 1243 } else 1244 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr; 1245 1246 VHOST_LOG_DATA(dev->ifname, DEBUG, "RX: num merge buffers %d\n", num_buffers); 1247 1248 if (unlikely(buf_len < dev->vhost_hlen)) { 1249 buf_offset = dev->vhost_hlen - buf_len; 1250 vec_idx++; 1251 buf_addr = buf_vec[vec_idx].buf_addr; 1252 buf_iova = buf_vec[vec_idx].buf_iova; 1253 buf_len = buf_vec[vec_idx].buf_len; 1254 buf_avail = buf_len - buf_offset; 1255 } else { 1256 buf_offset = dev->vhost_hlen; 1257 buf_avail = buf_len - dev->vhost_hlen; 1258 } 1259 1260 mbuf_avail = rte_pktmbuf_data_len(m); 1261 mbuf_offset = 0; 1262 1263 if (is_async) { 1264 if (async_iter_initialize(dev, async)) 1265 return -1; 1266 } 1267 1268 while (mbuf_avail != 0 || m->next != NULL) { 1269 /* done with current buf, get the next one */ 1270 if (buf_avail == 0) { 1271 vec_idx++; 1272 if (unlikely(vec_idx >= nr_vec)) 1273 goto error; 1274 1275 buf_addr = buf_vec[vec_idx].buf_addr; 1276 buf_iova = buf_vec[vec_idx].buf_iova; 1277 buf_len = buf_vec[vec_idx].buf_len; 1278 1279 buf_offset = 0; 1280 buf_avail = buf_len; 1281 } 1282 1283 /* done with current mbuf, get the next one */ 1284 if (mbuf_avail == 0) { 1285 m = m->next; 1286 1287 mbuf_offset = 0; 1288 mbuf_avail = rte_pktmbuf_data_len(m); 1289 } 1290 1291 if (hdr_addr) { 1292 virtio_enqueue_offload(hdr_mbuf, &hdr->hdr); 1293 if (rxvq_is_mergeable(dev)) 1294 ASSIGN_UNLESS_EQUAL(hdr->num_buffers, 1295 num_buffers); 1296 1297 if (unlikely(hdr == &tmp_hdr)) { 1298 copy_vnet_hdr_to_desc(dev, vq, buf_vec, hdr); 1299 } else { 1300 PRINT_PACKET(dev, (uintptr_t)hdr_addr, 1301 dev->vhost_hlen, 0); 1302 vhost_log_cache_write_iova(dev, vq, 1303 buf_vec[0].buf_iova, 1304 dev->vhost_hlen); 1305 } 1306 1307 hdr_addr = 0; 1308 } 1309 1310 cpy_len = RTE_MIN(buf_avail, mbuf_avail); 1311 1312 if (is_async) { 1313 if (async_fill_seg(dev, vq, m, mbuf_offset, 1314 buf_iova + buf_offset, cpy_len, true) < 0) 1315 goto error; 1316 } else { 1317 sync_fill_seg(dev, vq, m, mbuf_offset, 1318 buf_addr + buf_offset, 1319 buf_iova + buf_offset, cpy_len, true); 1320 } 1321 1322 mbuf_avail -= cpy_len; 1323 mbuf_offset += cpy_len; 1324 buf_avail -= cpy_len; 1325 buf_offset += cpy_len; 1326 } 1327 1328 if (is_async) 1329 async_iter_finalize(async); 1330 1331 return 0; 1332 error: 1333 if (is_async) 1334 async_iter_cancel(async); 1335 1336 return -1; 1337 } 1338 1339 static __rte_always_inline int 1340 vhost_enqueue_single_packed(struct virtio_net *dev, 1341 struct vhost_virtqueue *vq, 1342 struct rte_mbuf *pkt, 1343 struct buf_vector *buf_vec, 1344 uint16_t *nr_descs) 1345 __rte_shared_locks_required(&vq->access_lock) 1346 __rte_shared_locks_required(&vq->iotlb_lock) 1347 { 1348 uint16_t nr_vec = 0; 1349 uint16_t avail_idx = vq->last_avail_idx; 1350 uint16_t max_tries, tries = 0; 1351 uint16_t buf_id = 0; 1352 uint32_t len = 0; 1353 uint16_t desc_count; 1354 uint64_t size = pkt->pkt_len + sizeof(struct virtio_net_hdr_mrg_rxbuf); 1355 uint16_t num_buffers = 0; 1356 uint32_t buffer_len[vq->size]; 1357 uint16_t buffer_buf_id[vq->size]; 1358 uint16_t buffer_desc_count[vq->size]; 1359 1360 if (rxvq_is_mergeable(dev)) 1361 max_tries = vq->size - 1; 1362 else 1363 max_tries = 1; 1364 1365 while (size > 0) { 1366 /* 1367 * if we tried all available ring items, and still 1368 * can't get enough buf, it means something abnormal 1369 * happened. 1370 */ 1371 if (unlikely(++tries > max_tries)) 1372 return -1; 1373 1374 if (unlikely(fill_vec_buf_packed(dev, vq, 1375 avail_idx, &desc_count, 1376 buf_vec, &nr_vec, 1377 &buf_id, &len, 1378 VHOST_ACCESS_RW) < 0)) 1379 return -1; 1380 1381 len = RTE_MIN(len, size); 1382 size -= len; 1383 1384 buffer_len[num_buffers] = len; 1385 buffer_buf_id[num_buffers] = buf_id; 1386 buffer_desc_count[num_buffers] = desc_count; 1387 num_buffers += 1; 1388 1389 *nr_descs += desc_count; 1390 avail_idx += desc_count; 1391 if (avail_idx >= vq->size) 1392 avail_idx -= vq->size; 1393 } 1394 1395 if (mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, num_buffers, false) < 0) 1396 return -1; 1397 1398 vhost_shadow_enqueue_single_packed(dev, vq, buffer_len, buffer_buf_id, 1399 buffer_desc_count, num_buffers); 1400 1401 return 0; 1402 } 1403 1404 static __rte_noinline uint32_t 1405 virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 1406 struct rte_mbuf **pkts, uint32_t count) 1407 __rte_shared_locks_required(&vq->access_lock) 1408 __rte_shared_locks_required(&vq->iotlb_lock) 1409 { 1410 uint32_t pkt_idx = 0; 1411 uint16_t num_buffers; 1412 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1413 uint16_t avail_head; 1414 1415 /* 1416 * The ordering between avail index and 1417 * desc reads needs to be enforced. 1418 */ 1419 avail_head = rte_atomic_load_explicit((unsigned short __rte_atomic *)&vq->avail->idx, 1420 rte_memory_order_acquire); 1421 1422 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); 1423 1424 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { 1425 uint64_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen; 1426 uint16_t nr_vec = 0; 1427 1428 if (unlikely(reserve_avail_buf_split(dev, vq, 1429 pkt_len, buf_vec, &num_buffers, 1430 avail_head, &nr_vec) < 0)) { 1431 VHOST_LOG_DATA(dev->ifname, DEBUG, 1432 "failed to get enough desc from vring\n"); 1433 vq->shadow_used_idx -= num_buffers; 1434 break; 1435 } 1436 1437 VHOST_LOG_DATA(dev->ifname, DEBUG, 1438 "current index %d | end index %d\n", 1439 vq->last_avail_idx, vq->last_avail_idx + num_buffers); 1440 1441 if (mbuf_to_desc(dev, vq, pkts[pkt_idx], buf_vec, nr_vec, 1442 num_buffers, false) < 0) { 1443 vq->shadow_used_idx -= num_buffers; 1444 break; 1445 } 1446 1447 vq->last_avail_idx += num_buffers; 1448 } 1449 1450 do_data_copy_enqueue(dev, vq); 1451 1452 if (likely(vq->shadow_used_idx)) { 1453 flush_shadow_used_ring_split(dev, vq); 1454 vhost_vring_call_split(dev, vq); 1455 } 1456 1457 return pkt_idx; 1458 } 1459 1460 static __rte_always_inline int 1461 virtio_dev_rx_sync_batch_check(struct virtio_net *dev, 1462 struct vhost_virtqueue *vq, 1463 struct rte_mbuf **pkts, 1464 uint64_t *desc_addrs, 1465 uint64_t *lens) 1466 __rte_shared_locks_required(&vq->iotlb_lock) 1467 { 1468 bool wrap_counter = vq->avail_wrap_counter; 1469 struct vring_packed_desc *descs = vq->desc_packed; 1470 uint16_t avail_idx = vq->last_avail_idx; 1471 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 1472 uint16_t i; 1473 1474 if (unlikely(avail_idx & PACKED_BATCH_MASK)) 1475 return -1; 1476 1477 if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size)) 1478 return -1; 1479 1480 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1481 if (unlikely(pkts[i]->next != NULL)) 1482 return -1; 1483 if (unlikely(!desc_is_avail(&descs[avail_idx + i], 1484 wrap_counter))) 1485 return -1; 1486 } 1487 1488 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1489 lens[i] = descs[avail_idx + i].len; 1490 1491 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1492 if (unlikely(pkts[i]->pkt_len > (lens[i] - buf_offset))) 1493 return -1; 1494 } 1495 1496 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1497 desc_addrs[i] = vhost_iova_to_vva(dev, vq, 1498 descs[avail_idx + i].addr, 1499 &lens[i], 1500 VHOST_ACCESS_RW); 1501 1502 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1503 if (unlikely(!desc_addrs[i])) 1504 return -1; 1505 if (unlikely(lens[i] != descs[avail_idx + i].len)) 1506 return -1; 1507 } 1508 1509 return 0; 1510 } 1511 1512 static __rte_always_inline int 1513 virtio_dev_rx_async_batch_check(struct vhost_virtqueue *vq, 1514 struct rte_mbuf **pkts, 1515 uint64_t *desc_addrs, 1516 uint64_t *lens, 1517 int16_t dma_id, 1518 uint16_t vchan_id) 1519 { 1520 bool wrap_counter = vq->avail_wrap_counter; 1521 struct vring_packed_desc *descs = vq->desc_packed; 1522 uint16_t avail_idx = vq->last_avail_idx; 1523 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 1524 uint16_t i; 1525 1526 if (unlikely(avail_idx & PACKED_BATCH_MASK)) 1527 return -1; 1528 1529 if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size)) 1530 return -1; 1531 1532 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1533 if (unlikely(pkts[i]->next != NULL)) 1534 return -1; 1535 if (unlikely(!desc_is_avail(&descs[avail_idx + i], 1536 wrap_counter))) 1537 return -1; 1538 } 1539 1540 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1541 lens[i] = descs[avail_idx + i].len; 1542 1543 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1544 if (unlikely(pkts[i]->pkt_len > (lens[i] - buf_offset))) 1545 return -1; 1546 } 1547 1548 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1549 desc_addrs[i] = descs[avail_idx + i].addr; 1550 1551 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1552 if (unlikely(!desc_addrs[i])) 1553 return -1; 1554 if (unlikely(lens[i] != descs[avail_idx + i].len)) 1555 return -1; 1556 } 1557 1558 if (rte_dma_burst_capacity(dma_id, vchan_id) < PACKED_BATCH_SIZE) 1559 return -1; 1560 1561 return 0; 1562 } 1563 1564 static __rte_always_inline void 1565 virtio_dev_rx_batch_packed_copy(struct virtio_net *dev, 1566 struct vhost_virtqueue *vq, 1567 struct rte_mbuf **pkts, 1568 uint64_t *desc_addrs, 1569 uint64_t *lens) 1570 __rte_shared_locks_required(&vq->iotlb_lock) 1571 { 1572 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 1573 struct virtio_net_hdr_mrg_rxbuf *hdrs[PACKED_BATCH_SIZE]; 1574 struct vring_packed_desc *descs = vq->desc_packed; 1575 uint16_t avail_idx = vq->last_avail_idx; 1576 uint16_t ids[PACKED_BATCH_SIZE]; 1577 uint16_t i; 1578 1579 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1580 rte_prefetch0((void *)(uintptr_t)desc_addrs[i]); 1581 hdrs[i] = (struct virtio_net_hdr_mrg_rxbuf *) 1582 (uintptr_t)desc_addrs[i]; 1583 lens[i] = pkts[i]->pkt_len + 1584 sizeof(struct virtio_net_hdr_mrg_rxbuf); 1585 } 1586 1587 if (rxvq_is_mergeable(dev)) { 1588 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1589 ASSIGN_UNLESS_EQUAL(hdrs[i]->num_buffers, 1); 1590 } 1591 } 1592 1593 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1594 virtio_enqueue_offload(pkts[i], &hdrs[i]->hdr); 1595 1596 vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE); 1597 1598 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1599 rte_memcpy((void *)(uintptr_t)(desc_addrs[i] + buf_offset), 1600 rte_pktmbuf_mtod_offset(pkts[i], void *, 0), 1601 pkts[i]->pkt_len); 1602 } 1603 1604 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1605 vhost_log_cache_write_iova(dev, vq, descs[avail_idx + i].addr, 1606 lens[i]); 1607 1608 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1609 ids[i] = descs[avail_idx + i].id; 1610 1611 vhost_flush_enqueue_batch_packed(dev, vq, lens, ids); 1612 } 1613 1614 static __rte_always_inline int 1615 virtio_dev_rx_sync_batch_packed(struct virtio_net *dev, 1616 struct vhost_virtqueue *vq, 1617 struct rte_mbuf **pkts) 1618 __rte_shared_locks_required(&vq->iotlb_lock) 1619 { 1620 uint64_t desc_addrs[PACKED_BATCH_SIZE]; 1621 uint64_t lens[PACKED_BATCH_SIZE]; 1622 1623 if (virtio_dev_rx_sync_batch_check(dev, vq, pkts, desc_addrs, lens) == -1) 1624 return -1; 1625 1626 if (vq->shadow_used_idx) { 1627 do_data_copy_enqueue(dev, vq); 1628 vhost_flush_enqueue_shadow_packed(dev, vq); 1629 } 1630 1631 virtio_dev_rx_batch_packed_copy(dev, vq, pkts, desc_addrs, lens); 1632 1633 return 0; 1634 } 1635 1636 static __rte_always_inline int16_t 1637 virtio_dev_rx_single_packed(struct virtio_net *dev, 1638 struct vhost_virtqueue *vq, 1639 struct rte_mbuf *pkt) 1640 __rte_shared_locks_required(&vq->access_lock) 1641 __rte_shared_locks_required(&vq->iotlb_lock) 1642 { 1643 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1644 uint16_t nr_descs = 0; 1645 1646 if (unlikely(vhost_enqueue_single_packed(dev, vq, pkt, buf_vec, 1647 &nr_descs) < 0)) { 1648 VHOST_LOG_DATA(dev->ifname, DEBUG, "failed to get enough desc from vring\n"); 1649 return -1; 1650 } 1651 1652 VHOST_LOG_DATA(dev->ifname, DEBUG, 1653 "current index %d | end index %d\n", 1654 vq->last_avail_idx, vq->last_avail_idx + nr_descs); 1655 1656 vq_inc_last_avail_packed(vq, nr_descs); 1657 1658 return 0; 1659 } 1660 1661 static __rte_noinline uint32_t 1662 virtio_dev_rx_packed(struct virtio_net *dev, 1663 struct vhost_virtqueue *__rte_restrict vq, 1664 struct rte_mbuf **__rte_restrict pkts, 1665 uint32_t count) 1666 __rte_shared_locks_required(&vq->access_lock) 1667 __rte_shared_locks_required(&vq->iotlb_lock) 1668 { 1669 uint32_t pkt_idx = 0; 1670 1671 do { 1672 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); 1673 1674 if (count - pkt_idx >= PACKED_BATCH_SIZE) { 1675 if (!virtio_dev_rx_sync_batch_packed(dev, vq, 1676 &pkts[pkt_idx])) { 1677 pkt_idx += PACKED_BATCH_SIZE; 1678 continue; 1679 } 1680 } 1681 1682 if (virtio_dev_rx_single_packed(dev, vq, pkts[pkt_idx])) 1683 break; 1684 pkt_idx++; 1685 1686 } while (pkt_idx < count); 1687 1688 if (vq->shadow_used_idx) { 1689 do_data_copy_enqueue(dev, vq); 1690 vhost_flush_enqueue_shadow_packed(dev, vq); 1691 } 1692 1693 if (pkt_idx) 1694 vhost_vring_call_packed(dev, vq); 1695 1696 return pkt_idx; 1697 } 1698 1699 static __rte_always_inline uint32_t 1700 virtio_dev_rx(struct virtio_net *dev, struct vhost_virtqueue *vq, 1701 struct rte_mbuf **pkts, uint32_t count) 1702 { 1703 uint32_t nb_tx = 0; 1704 1705 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__); 1706 rte_rwlock_read_lock(&vq->access_lock); 1707 1708 if (unlikely(!vq->enabled)) 1709 goto out_access_unlock; 1710 1711 vhost_user_iotlb_rd_lock(vq); 1712 1713 if (unlikely(!vq->access_ok)) 1714 if (unlikely(vring_translate(dev, vq) < 0)) 1715 goto out; 1716 1717 count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); 1718 if (count == 0) 1719 goto out; 1720 1721 if (vq_is_packed(dev)) 1722 nb_tx = virtio_dev_rx_packed(dev, vq, pkts, count); 1723 else 1724 nb_tx = virtio_dev_rx_split(dev, vq, pkts, count); 1725 1726 vhost_queue_stats_update(dev, vq, pkts, nb_tx); 1727 1728 out: 1729 vhost_user_iotlb_rd_unlock(vq); 1730 1731 out_access_unlock: 1732 rte_rwlock_read_unlock(&vq->access_lock); 1733 1734 return nb_tx; 1735 } 1736 1737 uint16_t 1738 rte_vhost_enqueue_burst(int vid, uint16_t queue_id, 1739 struct rte_mbuf **__rte_restrict pkts, uint16_t count) 1740 { 1741 struct virtio_net *dev = get_device(vid); 1742 1743 if (!dev) 1744 return 0; 1745 1746 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { 1747 VHOST_LOG_DATA(dev->ifname, ERR, 1748 "%s: built-in vhost net backend is disabled.\n", 1749 __func__); 1750 return 0; 1751 } 1752 1753 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { 1754 VHOST_LOG_DATA(dev->ifname, ERR, 1755 "%s: invalid virtqueue idx %d.\n", 1756 __func__, queue_id); 1757 return 0; 1758 } 1759 1760 return virtio_dev_rx(dev, dev->virtqueue[queue_id], pkts, count); 1761 } 1762 1763 static __rte_always_inline uint16_t 1764 async_get_first_inflight_pkt_idx(struct vhost_virtqueue *vq) 1765 __rte_shared_locks_required(&vq->access_lock) 1766 { 1767 struct vhost_async *async = vq->async; 1768 1769 if (async->pkts_idx >= async->pkts_inflight_n) 1770 return async->pkts_idx - async->pkts_inflight_n; 1771 else 1772 return vq->size - async->pkts_inflight_n + async->pkts_idx; 1773 } 1774 1775 static __rte_always_inline void 1776 store_dma_desc_info_split(struct vring_used_elem *s_ring, struct vring_used_elem *d_ring, 1777 uint16_t ring_size, uint16_t s_idx, uint16_t d_idx, uint16_t count) 1778 { 1779 size_t elem_size = sizeof(struct vring_used_elem); 1780 1781 if (d_idx + count <= ring_size) { 1782 rte_memcpy(d_ring + d_idx, s_ring + s_idx, count * elem_size); 1783 } else { 1784 uint16_t size = ring_size - d_idx; 1785 1786 rte_memcpy(d_ring + d_idx, s_ring + s_idx, size * elem_size); 1787 rte_memcpy(d_ring, s_ring + s_idx + size, (count - size) * elem_size); 1788 } 1789 } 1790 1791 static __rte_noinline uint32_t 1792 virtio_dev_rx_async_submit_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 1793 struct rte_mbuf **pkts, uint32_t count, int16_t dma_id, uint16_t vchan_id) 1794 __rte_exclusive_locks_required(&vq->access_lock) 1795 __rte_shared_locks_required(&vq->iotlb_lock) 1796 { 1797 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1798 uint32_t pkt_idx = 0; 1799 uint16_t num_buffers; 1800 uint16_t avail_head; 1801 1802 struct vhost_async *async = vq->async; 1803 struct async_inflight_info *pkts_info = async->pkts_info; 1804 uint32_t pkt_err = 0; 1805 uint16_t n_xfer; 1806 uint16_t slot_idx = 0; 1807 1808 /* 1809 * The ordering between avail index and desc reads need to be enforced. 1810 */ 1811 avail_head = rte_atomic_load_explicit((unsigned short __rte_atomic *)&vq->avail->idx, 1812 rte_memory_order_acquire); 1813 1814 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); 1815 1816 async_iter_reset(async); 1817 1818 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { 1819 uint64_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen; 1820 uint16_t nr_vec = 0; 1821 1822 if (unlikely(reserve_avail_buf_split(dev, vq, pkt_len, buf_vec, 1823 &num_buffers, avail_head, &nr_vec) < 0)) { 1824 VHOST_LOG_DATA(dev->ifname, DEBUG, 1825 "failed to get enough desc from vring\n"); 1826 vq->shadow_used_idx -= num_buffers; 1827 break; 1828 } 1829 1830 VHOST_LOG_DATA(dev->ifname, DEBUG, 1831 "current index %d | end index %d\n", 1832 vq->last_avail_idx, vq->last_avail_idx + num_buffers); 1833 1834 if (mbuf_to_desc(dev, vq, pkts[pkt_idx], buf_vec, nr_vec, num_buffers, true) < 0) { 1835 vq->shadow_used_idx -= num_buffers; 1836 break; 1837 } 1838 1839 slot_idx = (async->pkts_idx + pkt_idx) & (vq->size - 1); 1840 pkts_info[slot_idx].descs = num_buffers; 1841 pkts_info[slot_idx].mbuf = pkts[pkt_idx]; 1842 1843 vq->last_avail_idx += num_buffers; 1844 } 1845 1846 if (unlikely(pkt_idx == 0)) 1847 return 0; 1848 1849 n_xfer = vhost_async_dma_transfer(dev, vq, dma_id, vchan_id, async->pkts_idx, 1850 async->iov_iter, pkt_idx); 1851 1852 pkt_err = pkt_idx - n_xfer; 1853 if (unlikely(pkt_err)) { 1854 uint16_t num_descs = 0; 1855 1856 VHOST_LOG_DATA(dev->ifname, DEBUG, 1857 "%s: failed to transfer %u packets for queue %u.\n", 1858 __func__, pkt_err, vq->index); 1859 1860 /* update number of completed packets */ 1861 pkt_idx = n_xfer; 1862 1863 /* calculate the sum of descriptors to revert */ 1864 while (pkt_err-- > 0) { 1865 num_descs += pkts_info[slot_idx & (vq->size - 1)].descs; 1866 slot_idx--; 1867 } 1868 1869 /* recover shadow used ring and available ring */ 1870 vq->shadow_used_idx -= num_descs; 1871 vq->last_avail_idx -= num_descs; 1872 } 1873 1874 /* keep used descriptors */ 1875 if (likely(vq->shadow_used_idx)) { 1876 uint16_t to = async->desc_idx_split & (vq->size - 1); 1877 1878 store_dma_desc_info_split(vq->shadow_used_split, 1879 async->descs_split, vq->size, 0, to, 1880 vq->shadow_used_idx); 1881 1882 async->desc_idx_split += vq->shadow_used_idx; 1883 1884 async->pkts_idx += pkt_idx; 1885 if (async->pkts_idx >= vq->size) 1886 async->pkts_idx -= vq->size; 1887 1888 async->pkts_inflight_n += pkt_idx; 1889 vq->shadow_used_idx = 0; 1890 } 1891 1892 return pkt_idx; 1893 } 1894 1895 1896 static __rte_always_inline int 1897 vhost_enqueue_async_packed(struct virtio_net *dev, 1898 struct vhost_virtqueue *vq, 1899 struct rte_mbuf *pkt, 1900 struct buf_vector *buf_vec, 1901 uint16_t *nr_descs, 1902 uint16_t *nr_buffers) 1903 __rte_exclusive_locks_required(&vq->access_lock) 1904 __rte_shared_locks_required(&vq->iotlb_lock) 1905 { 1906 uint16_t nr_vec = 0; 1907 uint16_t avail_idx = vq->last_avail_idx; 1908 uint16_t max_tries, tries = 0; 1909 uint16_t buf_id = 0; 1910 uint32_t len = 0; 1911 uint16_t desc_count = 0; 1912 uint64_t size = pkt->pkt_len + sizeof(struct virtio_net_hdr_mrg_rxbuf); 1913 uint32_t buffer_len[vq->size]; 1914 uint16_t buffer_buf_id[vq->size]; 1915 uint16_t buffer_desc_count[vq->size]; 1916 1917 if (rxvq_is_mergeable(dev)) 1918 max_tries = vq->size - 1; 1919 else 1920 max_tries = 1; 1921 1922 while (size > 0) { 1923 /* 1924 * if we tried all available ring items, and still 1925 * can't get enough buf, it means something abnormal 1926 * happened. 1927 */ 1928 if (unlikely(++tries > max_tries)) 1929 return -1; 1930 1931 if (unlikely(fill_vec_buf_packed(dev, vq, 1932 avail_idx, &desc_count, 1933 buf_vec, &nr_vec, 1934 &buf_id, &len, 1935 VHOST_ACCESS_RW) < 0)) 1936 return -1; 1937 1938 len = RTE_MIN(len, size); 1939 size -= len; 1940 1941 buffer_len[*nr_buffers] = len; 1942 buffer_buf_id[*nr_buffers] = buf_id; 1943 buffer_desc_count[*nr_buffers] = desc_count; 1944 *nr_buffers += 1; 1945 *nr_descs += desc_count; 1946 avail_idx += desc_count; 1947 if (avail_idx >= vq->size) 1948 avail_idx -= vq->size; 1949 } 1950 1951 if (unlikely(mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, *nr_buffers, true) < 0)) 1952 return -1; 1953 1954 vhost_async_shadow_enqueue_packed(vq, buffer_len, buffer_buf_id, 1955 buffer_desc_count, *nr_buffers); 1956 1957 return 0; 1958 } 1959 1960 static __rte_always_inline int16_t 1961 virtio_dev_rx_async_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, 1962 struct rte_mbuf *pkt, uint16_t *nr_descs, uint16_t *nr_buffers) 1963 __rte_exclusive_locks_required(&vq->access_lock) 1964 __rte_shared_locks_required(&vq->iotlb_lock) 1965 { 1966 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1967 1968 if (unlikely(vhost_enqueue_async_packed(dev, vq, pkt, buf_vec, 1969 nr_descs, nr_buffers) < 0)) { 1970 VHOST_LOG_DATA(dev->ifname, DEBUG, "failed to get enough desc from vring\n"); 1971 return -1; 1972 } 1973 1974 VHOST_LOG_DATA(dev->ifname, DEBUG, 1975 "current index %d | end index %d\n", 1976 vq->last_avail_idx, vq->last_avail_idx + *nr_descs); 1977 1978 return 0; 1979 } 1980 1981 static __rte_always_inline void 1982 virtio_dev_rx_async_packed_batch_enqueue(struct virtio_net *dev, 1983 struct vhost_virtqueue *vq, 1984 struct rte_mbuf **pkts, 1985 uint64_t *desc_addrs, 1986 uint64_t *lens) 1987 __rte_exclusive_locks_required(&vq->access_lock) 1988 __rte_shared_locks_required(&vq->iotlb_lock) 1989 { 1990 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 1991 struct virtio_net_hdr_mrg_rxbuf *hdrs[PACKED_BATCH_SIZE]; 1992 struct vring_packed_desc *descs = vq->desc_packed; 1993 struct vhost_async *async = vq->async; 1994 uint16_t avail_idx = vq->last_avail_idx; 1995 uint32_t mbuf_offset = 0; 1996 uint16_t ids[PACKED_BATCH_SIZE]; 1997 uint64_t mapped_len[PACKED_BATCH_SIZE]; 1998 void *host_iova[PACKED_BATCH_SIZE]; 1999 uintptr_t desc; 2000 uint16_t i; 2001 2002 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 2003 rte_prefetch0((void *)(uintptr_t)desc_addrs[i]); 2004 desc = vhost_iova_to_vva(dev, vq, desc_addrs[i], &lens[i], VHOST_ACCESS_RW); 2005 hdrs[i] = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc; 2006 lens[i] = pkts[i]->pkt_len + 2007 sizeof(struct virtio_net_hdr_mrg_rxbuf); 2008 } 2009 2010 if (rxvq_is_mergeable(dev)) { 2011 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 2012 ASSIGN_UNLESS_EQUAL(hdrs[i]->num_buffers, 1); 2013 } 2014 } 2015 2016 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 2017 virtio_enqueue_offload(pkts[i], &hdrs[i]->hdr); 2018 2019 vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE); 2020 2021 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 2022 host_iova[i] = (void *)(uintptr_t)gpa_to_first_hpa(dev, 2023 desc_addrs[i] + buf_offset, lens[i], &mapped_len[i]); 2024 } 2025 2026 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 2027 async_iter_initialize(dev, async); 2028 async_iter_add_iovec(dev, async, 2029 (void *)(uintptr_t)rte_pktmbuf_iova_offset(pkts[i], mbuf_offset), 2030 host_iova[i], 2031 mapped_len[i]); 2032 async->iter_idx++; 2033 } 2034 2035 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 2036 vhost_log_cache_write_iova(dev, vq, descs[avail_idx + i].addr, lens[i]); 2037 2038 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 2039 ids[i] = descs[avail_idx + i].id; 2040 2041 vhost_async_shadow_enqueue_packed_batch(vq, lens, ids); 2042 } 2043 2044 static __rte_always_inline int 2045 virtio_dev_rx_async_packed_batch(struct virtio_net *dev, 2046 struct vhost_virtqueue *vq, 2047 struct rte_mbuf **pkts, 2048 int16_t dma_id, uint16_t vchan_id) 2049 __rte_exclusive_locks_required(&vq->access_lock) 2050 __rte_shared_locks_required(&vq->iotlb_lock) 2051 { 2052 uint64_t desc_addrs[PACKED_BATCH_SIZE]; 2053 uint64_t lens[PACKED_BATCH_SIZE]; 2054 2055 if (virtio_dev_rx_async_batch_check(vq, pkts, desc_addrs, lens, dma_id, vchan_id) == -1) 2056 return -1; 2057 2058 virtio_dev_rx_async_packed_batch_enqueue(dev, vq, pkts, desc_addrs, lens); 2059 2060 return 0; 2061 } 2062 2063 static __rte_always_inline void 2064 dma_error_handler_packed(struct vhost_virtqueue *vq, uint16_t slot_idx, 2065 uint32_t nr_err, uint32_t *pkt_idx) 2066 __rte_exclusive_locks_required(&vq->access_lock) 2067 { 2068 uint16_t descs_err = 0; 2069 uint16_t buffers_err = 0; 2070 struct vhost_async *async = vq->async; 2071 struct async_inflight_info *pkts_info = vq->async->pkts_info; 2072 2073 *pkt_idx -= nr_err; 2074 /* calculate the sum of buffers and descs of DMA-error packets. */ 2075 while (nr_err-- > 0) { 2076 descs_err += pkts_info[slot_idx % vq->size].descs; 2077 buffers_err += pkts_info[slot_idx % vq->size].nr_buffers; 2078 slot_idx--; 2079 } 2080 2081 if (vq->last_avail_idx >= descs_err) { 2082 vq->last_avail_idx -= descs_err; 2083 } else { 2084 vq->last_avail_idx = vq->last_avail_idx + vq->size - descs_err; 2085 vq->avail_wrap_counter ^= 1; 2086 } 2087 2088 if (async->buffer_idx_packed >= buffers_err) 2089 async->buffer_idx_packed -= buffers_err; 2090 else 2091 async->buffer_idx_packed = async->buffer_idx_packed + vq->size - buffers_err; 2092 } 2093 2094 static __rte_noinline uint32_t 2095 virtio_dev_rx_async_submit_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, 2096 struct rte_mbuf **pkts, uint32_t count, int16_t dma_id, uint16_t vchan_id) 2097 __rte_exclusive_locks_required(&vq->access_lock) 2098 __rte_shared_locks_required(&vq->iotlb_lock) 2099 { 2100 uint32_t pkt_idx = 0; 2101 uint16_t n_xfer; 2102 uint16_t num_buffers; 2103 uint16_t num_descs; 2104 2105 struct vhost_async *async = vq->async; 2106 struct async_inflight_info *pkts_info = async->pkts_info; 2107 uint32_t pkt_err = 0; 2108 uint16_t slot_idx = 0; 2109 uint16_t i; 2110 2111 do { 2112 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); 2113 2114 if (count - pkt_idx >= PACKED_BATCH_SIZE) { 2115 if (!virtio_dev_rx_async_packed_batch(dev, vq, &pkts[pkt_idx], 2116 dma_id, vchan_id)) { 2117 for (i = 0; i < PACKED_BATCH_SIZE; i++) { 2118 slot_idx = (async->pkts_idx + pkt_idx) % vq->size; 2119 pkts_info[slot_idx].descs = 1; 2120 pkts_info[slot_idx].nr_buffers = 1; 2121 pkts_info[slot_idx].mbuf = pkts[pkt_idx]; 2122 pkt_idx++; 2123 } 2124 continue; 2125 } 2126 } 2127 2128 num_buffers = 0; 2129 num_descs = 0; 2130 if (unlikely(virtio_dev_rx_async_packed(dev, vq, pkts[pkt_idx], 2131 &num_descs, &num_buffers) < 0)) 2132 break; 2133 2134 slot_idx = (async->pkts_idx + pkt_idx) % vq->size; 2135 2136 pkts_info[slot_idx].descs = num_descs; 2137 pkts_info[slot_idx].nr_buffers = num_buffers; 2138 pkts_info[slot_idx].mbuf = pkts[pkt_idx]; 2139 2140 pkt_idx++; 2141 vq_inc_last_avail_packed(vq, num_descs); 2142 } while (pkt_idx < count); 2143 2144 if (unlikely(pkt_idx == 0)) 2145 return 0; 2146 2147 n_xfer = vhost_async_dma_transfer(dev, vq, dma_id, vchan_id, async->pkts_idx, 2148 async->iov_iter, pkt_idx); 2149 2150 async_iter_reset(async); 2151 2152 pkt_err = pkt_idx - n_xfer; 2153 if (unlikely(pkt_err)) { 2154 VHOST_LOG_DATA(dev->ifname, DEBUG, 2155 "%s: failed to transfer %u packets for queue %u.\n", 2156 __func__, pkt_err, vq->index); 2157 dma_error_handler_packed(vq, slot_idx, pkt_err, &pkt_idx); 2158 } 2159 2160 async->pkts_idx += pkt_idx; 2161 if (async->pkts_idx >= vq->size) 2162 async->pkts_idx -= vq->size; 2163 2164 async->pkts_inflight_n += pkt_idx; 2165 2166 return pkt_idx; 2167 } 2168 2169 static __rte_always_inline void 2170 write_back_completed_descs_split(struct vhost_virtqueue *vq, uint16_t n_descs) 2171 __rte_shared_locks_required(&vq->access_lock) 2172 { 2173 struct vhost_async *async = vq->async; 2174 uint16_t nr_left = n_descs; 2175 uint16_t nr_copy; 2176 uint16_t to, from; 2177 2178 do { 2179 from = async->last_desc_idx_split & (vq->size - 1); 2180 nr_copy = nr_left + from <= vq->size ? nr_left : vq->size - from; 2181 to = vq->last_used_idx & (vq->size - 1); 2182 2183 if (to + nr_copy <= vq->size) { 2184 rte_memcpy(&vq->used->ring[to], &async->descs_split[from], 2185 nr_copy * sizeof(struct vring_used_elem)); 2186 } else { 2187 uint16_t size = vq->size - to; 2188 2189 rte_memcpy(&vq->used->ring[to], &async->descs_split[from], 2190 size * sizeof(struct vring_used_elem)); 2191 rte_memcpy(&vq->used->ring[0], &async->descs_split[from + size], 2192 (nr_copy - size) * sizeof(struct vring_used_elem)); 2193 } 2194 2195 async->last_desc_idx_split += nr_copy; 2196 vq->last_used_idx += nr_copy; 2197 nr_left -= nr_copy; 2198 } while (nr_left > 0); 2199 } 2200 2201 static __rte_always_inline void 2202 write_back_completed_descs_packed(struct vhost_virtqueue *vq, 2203 uint16_t n_buffers) 2204 __rte_shared_locks_required(&vq->access_lock) 2205 { 2206 struct vhost_async *async = vq->async; 2207 uint16_t from = async->last_buffer_idx_packed; 2208 uint16_t used_idx = vq->last_used_idx; 2209 uint16_t head_idx = vq->last_used_idx; 2210 uint16_t head_flags = 0; 2211 uint16_t i; 2212 2213 /* Split loop in two to save memory barriers */ 2214 for (i = 0; i < n_buffers; i++) { 2215 vq->desc_packed[used_idx].id = async->buffers_packed[from].id; 2216 vq->desc_packed[used_idx].len = async->buffers_packed[from].len; 2217 2218 used_idx += async->buffers_packed[from].count; 2219 if (used_idx >= vq->size) 2220 used_idx -= vq->size; 2221 2222 from++; 2223 if (from >= vq->size) 2224 from = 0; 2225 } 2226 2227 /* The ordering for storing desc flags needs to be enforced. */ 2228 rte_atomic_thread_fence(rte_memory_order_release); 2229 2230 from = async->last_buffer_idx_packed; 2231 2232 for (i = 0; i < n_buffers; i++) { 2233 uint16_t flags; 2234 2235 if (async->buffers_packed[from].len) 2236 flags = VRING_DESC_F_WRITE; 2237 else 2238 flags = 0; 2239 2240 if (vq->used_wrap_counter) { 2241 flags |= VRING_DESC_F_USED; 2242 flags |= VRING_DESC_F_AVAIL; 2243 } else { 2244 flags &= ~VRING_DESC_F_USED; 2245 flags &= ~VRING_DESC_F_AVAIL; 2246 } 2247 2248 if (i > 0) { 2249 vq->desc_packed[vq->last_used_idx].flags = flags; 2250 } else { 2251 head_idx = vq->last_used_idx; 2252 head_flags = flags; 2253 } 2254 2255 vq_inc_last_used_packed(vq, async->buffers_packed[from].count); 2256 2257 from++; 2258 if (from == vq->size) 2259 from = 0; 2260 } 2261 2262 vq->desc_packed[head_idx].flags = head_flags; 2263 async->last_buffer_idx_packed = from; 2264 } 2265 2266 static __rte_always_inline uint16_t 2267 vhost_poll_enqueue_completed(struct virtio_net *dev, struct vhost_virtqueue *vq, 2268 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, uint16_t vchan_id) 2269 __rte_shared_locks_required(&vq->access_lock) 2270 { 2271 struct vhost_async *async = vq->async; 2272 struct async_inflight_info *pkts_info = async->pkts_info; 2273 uint16_t nr_cpl_pkts = 0; 2274 uint16_t n_descs = 0, n_buffers = 0; 2275 uint16_t start_idx, from, i; 2276 2277 /* Check completed copies for the given DMA vChannel */ 2278 vhost_async_dma_check_completed(dev, dma_id, vchan_id, VHOST_DMA_MAX_COPY_COMPLETE); 2279 2280 start_idx = async_get_first_inflight_pkt_idx(vq); 2281 /** 2282 * Calculate the number of copy completed packets. 2283 * Note that there may be completed packets even if 2284 * no copies are reported done by the given DMA vChannel, 2285 * as it's possible that a virtqueue uses multiple DMA 2286 * vChannels. 2287 */ 2288 from = start_idx; 2289 while (vq->async->pkts_cmpl_flag[from] && count--) { 2290 vq->async->pkts_cmpl_flag[from] = false; 2291 from++; 2292 if (from >= vq->size) 2293 from -= vq->size; 2294 nr_cpl_pkts++; 2295 } 2296 2297 if (nr_cpl_pkts == 0) 2298 return 0; 2299 2300 for (i = 0; i < nr_cpl_pkts; i++) { 2301 from = (start_idx + i) % vq->size; 2302 /* Only used with packed ring */ 2303 n_buffers += pkts_info[from].nr_buffers; 2304 /* Only used with split ring */ 2305 n_descs += pkts_info[from].descs; 2306 pkts[i] = pkts_info[from].mbuf; 2307 } 2308 2309 async->pkts_inflight_n -= nr_cpl_pkts; 2310 2311 if (likely(vq->enabled && vq->access_ok)) { 2312 if (vq_is_packed(dev)) { 2313 write_back_completed_descs_packed(vq, n_buffers); 2314 vhost_vring_call_packed(dev, vq); 2315 } else { 2316 write_back_completed_descs_split(vq, n_descs); 2317 rte_atomic_fetch_add_explicit( 2318 (unsigned short __rte_atomic *)&vq->used->idx, 2319 n_descs, rte_memory_order_release); 2320 vhost_vring_call_split(dev, vq); 2321 } 2322 } else { 2323 if (vq_is_packed(dev)) { 2324 async->last_buffer_idx_packed += n_buffers; 2325 if (async->last_buffer_idx_packed >= vq->size) 2326 async->last_buffer_idx_packed -= vq->size; 2327 } else { 2328 async->last_desc_idx_split += n_descs; 2329 } 2330 } 2331 2332 return nr_cpl_pkts; 2333 } 2334 2335 uint16_t 2336 rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id, 2337 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, 2338 uint16_t vchan_id) 2339 { 2340 struct virtio_net *dev = get_device(vid); 2341 struct vhost_virtqueue *vq; 2342 uint16_t n_pkts_cpl = 0; 2343 2344 if (unlikely(!dev)) 2345 return 0; 2346 2347 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__); 2348 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { 2349 VHOST_LOG_DATA(dev->ifname, ERR, 2350 "%s: invalid virtqueue idx %d.\n", 2351 __func__, queue_id); 2352 return 0; 2353 } 2354 2355 if (unlikely(!dma_copy_track[dma_id].vchans || 2356 !dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) { 2357 VHOST_LOG_DATA(dev->ifname, ERR, 2358 "%s: invalid channel %d:%u.\n", 2359 __func__, dma_id, vchan_id); 2360 return 0; 2361 } 2362 2363 vq = dev->virtqueue[queue_id]; 2364 2365 if (rte_rwlock_read_trylock(&vq->access_lock)) { 2366 VHOST_LOG_DATA(dev->ifname, DEBUG, 2367 "%s: virtqueue %u is busy.\n", 2368 __func__, queue_id); 2369 return 0; 2370 } 2371 2372 if (unlikely(!vq->async)) { 2373 VHOST_LOG_DATA(dev->ifname, ERR, 2374 "%s: async not registered for virtqueue %d.\n", 2375 __func__, queue_id); 2376 goto out; 2377 } 2378 2379 n_pkts_cpl = vhost_poll_enqueue_completed(dev, vq, pkts, count, dma_id, vchan_id); 2380 2381 vhost_queue_stats_update(dev, vq, pkts, n_pkts_cpl); 2382 vq->stats.inflight_completed += n_pkts_cpl; 2383 2384 out: 2385 rte_rwlock_read_unlock(&vq->access_lock); 2386 2387 return n_pkts_cpl; 2388 } 2389 2390 uint16_t 2391 rte_vhost_clear_queue_thread_unsafe(int vid, uint16_t queue_id, 2392 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, 2393 uint16_t vchan_id) 2394 { 2395 struct virtio_net *dev = get_device(vid); 2396 struct vhost_virtqueue *vq; 2397 uint16_t n_pkts_cpl = 0; 2398 2399 if (!dev) 2400 return 0; 2401 2402 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__); 2403 if (unlikely(queue_id >= dev->nr_vring)) { 2404 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid virtqueue idx %d.\n", 2405 __func__, queue_id); 2406 return 0; 2407 } 2408 2409 if (unlikely(dma_id < 0 || dma_id >= RTE_DMADEV_DEFAULT_MAX)) { 2410 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid dma id %d.\n", 2411 __func__, dma_id); 2412 return 0; 2413 } 2414 2415 vq = dev->virtqueue[queue_id]; 2416 2417 vq_assert_lock(dev, vq); 2418 2419 if (unlikely(!vq->async)) { 2420 VHOST_LOG_DATA(dev->ifname, ERR, 2421 "%s: async not registered for virtqueue %d.\n", 2422 __func__, queue_id); 2423 return 0; 2424 } 2425 2426 if (unlikely(!dma_copy_track[dma_id].vchans || 2427 !dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) { 2428 VHOST_LOG_DATA(dev->ifname, ERR, 2429 "%s: invalid channel %d:%u.\n", 2430 __func__, dma_id, vchan_id); 2431 return 0; 2432 } 2433 2434 if ((queue_id & 1) == 0) 2435 n_pkts_cpl = vhost_poll_enqueue_completed(dev, vq, pkts, count, 2436 dma_id, vchan_id); 2437 else 2438 n_pkts_cpl = async_poll_dequeue_completed(dev, vq, pkts, count, 2439 dma_id, vchan_id, dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS); 2440 2441 vhost_queue_stats_update(dev, vq, pkts, n_pkts_cpl); 2442 vq->stats.inflight_completed += n_pkts_cpl; 2443 2444 return n_pkts_cpl; 2445 } 2446 2447 uint16_t 2448 rte_vhost_clear_queue(int vid, uint16_t queue_id, struct rte_mbuf **pkts, 2449 uint16_t count, int16_t dma_id, uint16_t vchan_id) 2450 { 2451 struct virtio_net *dev = get_device(vid); 2452 struct vhost_virtqueue *vq; 2453 uint16_t n_pkts_cpl = 0; 2454 2455 if (!dev) 2456 return 0; 2457 2458 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__); 2459 if (unlikely(queue_id >= dev->nr_vring)) { 2460 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid virtqueue idx %u.\n", 2461 __func__, queue_id); 2462 return 0; 2463 } 2464 2465 if (unlikely(dma_id < 0 || dma_id >= RTE_DMADEV_DEFAULT_MAX)) { 2466 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid dma id %d.\n", 2467 __func__, dma_id); 2468 return 0; 2469 } 2470 2471 vq = dev->virtqueue[queue_id]; 2472 2473 if (rte_rwlock_read_trylock(&vq->access_lock)) { 2474 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s: virtqueue %u is busy.\n", 2475 __func__, queue_id); 2476 return 0; 2477 } 2478 2479 if (unlikely(!vq->async)) { 2480 VHOST_LOG_DATA(dev->ifname, ERR, "%s: async not registered for queue id %u.\n", 2481 __func__, queue_id); 2482 goto out_access_unlock; 2483 } 2484 2485 if (unlikely(!dma_copy_track[dma_id].vchans || 2486 !dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) { 2487 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid channel %d:%u.\n", 2488 __func__, dma_id, vchan_id); 2489 goto out_access_unlock; 2490 } 2491 2492 if ((queue_id & 1) == 0) 2493 n_pkts_cpl = vhost_poll_enqueue_completed(dev, vq, pkts, count, 2494 dma_id, vchan_id); 2495 else 2496 n_pkts_cpl = async_poll_dequeue_completed(dev, vq, pkts, count, 2497 dma_id, vchan_id, dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS); 2498 2499 vhost_queue_stats_update(dev, vq, pkts, n_pkts_cpl); 2500 vq->stats.inflight_completed += n_pkts_cpl; 2501 2502 out_access_unlock: 2503 rte_rwlock_read_unlock(&vq->access_lock); 2504 2505 return n_pkts_cpl; 2506 } 2507 2508 static __rte_always_inline uint32_t 2509 virtio_dev_rx_async_submit(struct virtio_net *dev, struct vhost_virtqueue *vq, 2510 struct rte_mbuf **pkts, uint32_t count, int16_t dma_id, uint16_t vchan_id) 2511 { 2512 uint32_t nb_tx = 0; 2513 2514 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__); 2515 2516 if (unlikely(!dma_copy_track[dma_id].vchans || 2517 !dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) { 2518 VHOST_LOG_DATA(dev->ifname, ERR, 2519 "%s: invalid channel %d:%u.\n", 2520 __func__, dma_id, vchan_id); 2521 return 0; 2522 } 2523 2524 rte_rwlock_write_lock(&vq->access_lock); 2525 2526 if (unlikely(!vq->enabled || !vq->async)) 2527 goto out_access_unlock; 2528 2529 vhost_user_iotlb_rd_lock(vq); 2530 2531 if (unlikely(!vq->access_ok)) 2532 if (unlikely(vring_translate(dev, vq) < 0)) 2533 goto out; 2534 2535 count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); 2536 if (count == 0) 2537 goto out; 2538 2539 if (vq_is_packed(dev)) 2540 nb_tx = virtio_dev_rx_async_submit_packed(dev, vq, pkts, count, 2541 dma_id, vchan_id); 2542 else 2543 nb_tx = virtio_dev_rx_async_submit_split(dev, vq, pkts, count, 2544 dma_id, vchan_id); 2545 2546 vq->stats.inflight_submitted += nb_tx; 2547 2548 out: 2549 vhost_user_iotlb_rd_unlock(vq); 2550 2551 out_access_unlock: 2552 rte_rwlock_write_unlock(&vq->access_lock); 2553 2554 return nb_tx; 2555 } 2556 2557 uint16_t 2558 rte_vhost_submit_enqueue_burst(int vid, uint16_t queue_id, 2559 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, 2560 uint16_t vchan_id) 2561 { 2562 struct virtio_net *dev = get_device(vid); 2563 2564 if (!dev) 2565 return 0; 2566 2567 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { 2568 VHOST_LOG_DATA(dev->ifname, ERR, 2569 "%s: built-in vhost net backend is disabled.\n", 2570 __func__); 2571 return 0; 2572 } 2573 2574 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { 2575 VHOST_LOG_DATA(dev->ifname, ERR, 2576 "%s: invalid virtqueue idx %d.\n", 2577 __func__, queue_id); 2578 return 0; 2579 } 2580 2581 return virtio_dev_rx_async_submit(dev, dev->virtqueue[queue_id], pkts, count, 2582 dma_id, vchan_id); 2583 } 2584 2585 static inline bool 2586 virtio_net_with_host_offload(struct virtio_net *dev) 2587 { 2588 if (dev->features & 2589 ((1ULL << VIRTIO_NET_F_CSUM) | 2590 (1ULL << VIRTIO_NET_F_HOST_ECN) | 2591 (1ULL << VIRTIO_NET_F_HOST_TSO4) | 2592 (1ULL << VIRTIO_NET_F_HOST_TSO6) | 2593 (1ULL << VIRTIO_NET_F_HOST_UFO))) 2594 return true; 2595 2596 return false; 2597 } 2598 2599 static int 2600 parse_headers(struct rte_mbuf *m, uint8_t *l4_proto) 2601 { 2602 struct rte_ipv4_hdr *ipv4_hdr; 2603 struct rte_ipv6_hdr *ipv6_hdr; 2604 struct rte_ether_hdr *eth_hdr; 2605 uint16_t ethertype; 2606 uint16_t data_len = rte_pktmbuf_data_len(m); 2607 2608 if (data_len < sizeof(struct rte_ether_hdr)) 2609 return -EINVAL; 2610 2611 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 2612 2613 m->l2_len = sizeof(struct rte_ether_hdr); 2614 ethertype = rte_be_to_cpu_16(eth_hdr->ether_type); 2615 2616 if (ethertype == RTE_ETHER_TYPE_VLAN) { 2617 if (data_len < sizeof(struct rte_ether_hdr) + 2618 sizeof(struct rte_vlan_hdr)) 2619 goto error; 2620 2621 struct rte_vlan_hdr *vlan_hdr = 2622 (struct rte_vlan_hdr *)(eth_hdr + 1); 2623 2624 m->l2_len += sizeof(struct rte_vlan_hdr); 2625 ethertype = rte_be_to_cpu_16(vlan_hdr->eth_proto); 2626 } 2627 2628 switch (ethertype) { 2629 case RTE_ETHER_TYPE_IPV4: 2630 if (data_len < m->l2_len + sizeof(struct rte_ipv4_hdr)) 2631 goto error; 2632 ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *, 2633 m->l2_len); 2634 m->l3_len = rte_ipv4_hdr_len(ipv4_hdr); 2635 if (data_len < m->l2_len + m->l3_len) 2636 goto error; 2637 m->ol_flags |= RTE_MBUF_F_TX_IPV4; 2638 *l4_proto = ipv4_hdr->next_proto_id; 2639 break; 2640 case RTE_ETHER_TYPE_IPV6: 2641 if (data_len < m->l2_len + sizeof(struct rte_ipv6_hdr)) 2642 goto error; 2643 ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *, 2644 m->l2_len); 2645 m->l3_len = sizeof(struct rte_ipv6_hdr); 2646 m->ol_flags |= RTE_MBUF_F_TX_IPV6; 2647 *l4_proto = ipv6_hdr->proto; 2648 break; 2649 default: 2650 /* a valid L3 header is needed for further L4 parsing */ 2651 goto error; 2652 } 2653 2654 /* both CSUM and GSO need a valid L4 header */ 2655 switch (*l4_proto) { 2656 case IPPROTO_TCP: 2657 if (data_len < m->l2_len + m->l3_len + 2658 sizeof(struct rte_tcp_hdr)) 2659 goto error; 2660 break; 2661 case IPPROTO_UDP: 2662 if (data_len < m->l2_len + m->l3_len + 2663 sizeof(struct rte_udp_hdr)) 2664 goto error; 2665 break; 2666 case IPPROTO_SCTP: 2667 if (data_len < m->l2_len + m->l3_len + 2668 sizeof(struct rte_sctp_hdr)) 2669 goto error; 2670 break; 2671 default: 2672 goto error; 2673 } 2674 2675 return 0; 2676 2677 error: 2678 m->l2_len = 0; 2679 m->l3_len = 0; 2680 m->ol_flags = 0; 2681 return -EINVAL; 2682 } 2683 2684 static __rte_always_inline void 2685 vhost_dequeue_offload_legacy(struct virtio_net *dev, struct virtio_net_hdr *hdr, 2686 struct rte_mbuf *m) 2687 { 2688 uint8_t l4_proto = 0; 2689 struct rte_tcp_hdr *tcp_hdr = NULL; 2690 uint16_t tcp_len; 2691 uint16_t data_len = rte_pktmbuf_data_len(m); 2692 2693 if (parse_headers(m, &l4_proto) < 0) 2694 return; 2695 2696 if (hdr->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) { 2697 if (hdr->csum_start == (m->l2_len + m->l3_len)) { 2698 switch (hdr->csum_offset) { 2699 case (offsetof(struct rte_tcp_hdr, cksum)): 2700 if (l4_proto != IPPROTO_TCP) 2701 goto error; 2702 m->ol_flags |= RTE_MBUF_F_TX_TCP_CKSUM; 2703 break; 2704 case (offsetof(struct rte_udp_hdr, dgram_cksum)): 2705 if (l4_proto != IPPROTO_UDP) 2706 goto error; 2707 m->ol_flags |= RTE_MBUF_F_TX_UDP_CKSUM; 2708 break; 2709 case (offsetof(struct rte_sctp_hdr, cksum)): 2710 if (l4_proto != IPPROTO_SCTP) 2711 goto error; 2712 m->ol_flags |= RTE_MBUF_F_TX_SCTP_CKSUM; 2713 break; 2714 default: 2715 goto error; 2716 } 2717 } else { 2718 goto error; 2719 } 2720 } 2721 2722 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 2723 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 2724 case VIRTIO_NET_HDR_GSO_TCPV4: 2725 case VIRTIO_NET_HDR_GSO_TCPV6: 2726 if (l4_proto != IPPROTO_TCP) 2727 goto error; 2728 tcp_hdr = rte_pktmbuf_mtod_offset(m, 2729 struct rte_tcp_hdr *, 2730 m->l2_len + m->l3_len); 2731 tcp_len = (tcp_hdr->data_off & 0xf0) >> 2; 2732 if (data_len < m->l2_len + m->l3_len + tcp_len) 2733 goto error; 2734 m->ol_flags |= RTE_MBUF_F_TX_TCP_SEG; 2735 m->tso_segsz = hdr->gso_size; 2736 m->l4_len = tcp_len; 2737 break; 2738 case VIRTIO_NET_HDR_GSO_UDP: 2739 if (l4_proto != IPPROTO_UDP) 2740 goto error; 2741 m->ol_flags |= RTE_MBUF_F_TX_UDP_SEG; 2742 m->tso_segsz = hdr->gso_size; 2743 m->l4_len = sizeof(struct rte_udp_hdr); 2744 break; 2745 default: 2746 VHOST_LOG_DATA(dev->ifname, WARNING, 2747 "unsupported gso type %u.\n", 2748 hdr->gso_type); 2749 goto error; 2750 } 2751 } 2752 return; 2753 2754 error: 2755 m->l2_len = 0; 2756 m->l3_len = 0; 2757 m->ol_flags = 0; 2758 } 2759 2760 static __rte_always_inline void 2761 vhost_dequeue_offload(struct virtio_net *dev, struct virtio_net_hdr *hdr, 2762 struct rte_mbuf *m, bool legacy_ol_flags) 2763 { 2764 struct rte_net_hdr_lens hdr_lens; 2765 int l4_supported = 0; 2766 uint32_t ptype; 2767 2768 if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE) 2769 return; 2770 2771 if (legacy_ol_flags) { 2772 vhost_dequeue_offload_legacy(dev, hdr, m); 2773 return; 2774 } 2775 2776 m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN; 2777 2778 ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK); 2779 m->packet_type = ptype; 2780 if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP || 2781 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP || 2782 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP) 2783 l4_supported = 1; 2784 2785 /* According to Virtio 1.1 spec, the device only needs to look at 2786 * VIRTIO_NET_HDR_F_NEEDS_CSUM in the packet transmission path. 2787 * This differs from the processing incoming packets path where the 2788 * driver could rely on VIRTIO_NET_HDR_F_DATA_VALID flag set by the 2789 * device. 2790 * 2791 * 5.1.6.2.1 Driver Requirements: Packet Transmission 2792 * The driver MUST NOT set the VIRTIO_NET_HDR_F_DATA_VALID and 2793 * VIRTIO_NET_HDR_F_RSC_INFO bits in flags. 2794 * 2795 * 5.1.6.2.2 Device Requirements: Packet Transmission 2796 * The device MUST ignore flag bits that it does not recognize. 2797 */ 2798 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 2799 uint32_t hdrlen; 2800 2801 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len; 2802 if (hdr->csum_start <= hdrlen && l4_supported != 0) { 2803 m->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_NONE; 2804 } else { 2805 /* Unknown proto or tunnel, do sw cksum. We can assume 2806 * the cksum field is in the first segment since the 2807 * buffers we provided to the host are large enough. 2808 * In case of SCTP, this will be wrong since it's a CRC 2809 * but there's nothing we can do. 2810 */ 2811 uint16_t csum = 0, off; 2812 2813 if (rte_raw_cksum_mbuf(m, hdr->csum_start, 2814 rte_pktmbuf_pkt_len(m) - hdr->csum_start, &csum) < 0) 2815 return; 2816 if (likely(csum != 0xffff)) 2817 csum = ~csum; 2818 off = hdr->csum_offset + hdr->csum_start; 2819 if (rte_pktmbuf_data_len(m) >= off + 1) 2820 *rte_pktmbuf_mtod_offset(m, uint16_t *, off) = csum; 2821 } 2822 } 2823 2824 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 2825 if (hdr->gso_size == 0) 2826 return; 2827 2828 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 2829 case VIRTIO_NET_HDR_GSO_TCPV4: 2830 case VIRTIO_NET_HDR_GSO_TCPV6: 2831 if ((ptype & RTE_PTYPE_L4_MASK) != RTE_PTYPE_L4_TCP) 2832 break; 2833 m->ol_flags |= RTE_MBUF_F_RX_LRO | RTE_MBUF_F_RX_L4_CKSUM_NONE; 2834 m->tso_segsz = hdr->gso_size; 2835 break; 2836 case VIRTIO_NET_HDR_GSO_UDP: 2837 if ((ptype & RTE_PTYPE_L4_MASK) != RTE_PTYPE_L4_UDP) 2838 break; 2839 m->ol_flags |= RTE_MBUF_F_RX_LRO | RTE_MBUF_F_RX_L4_CKSUM_NONE; 2840 m->tso_segsz = hdr->gso_size; 2841 break; 2842 default: 2843 break; 2844 } 2845 } 2846 } 2847 2848 static __rte_noinline void 2849 copy_vnet_hdr_from_desc(struct virtio_net_hdr *hdr, 2850 struct buf_vector *buf_vec) 2851 { 2852 uint64_t len; 2853 uint64_t remain = sizeof(struct virtio_net_hdr); 2854 uint64_t src; 2855 uint64_t dst = (uint64_t)(uintptr_t)hdr; 2856 2857 while (remain) { 2858 len = RTE_MIN(remain, buf_vec->buf_len); 2859 src = buf_vec->buf_addr; 2860 rte_memcpy((void *)(uintptr_t)dst, 2861 (void *)(uintptr_t)src, len); 2862 2863 remain -= len; 2864 dst += len; 2865 buf_vec++; 2866 } 2867 } 2868 2869 static __rte_always_inline int 2870 desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, 2871 struct buf_vector *buf_vec, uint16_t nr_vec, 2872 struct rte_mbuf *m, struct rte_mempool *mbuf_pool, 2873 bool legacy_ol_flags, uint16_t slot_idx, bool is_async) 2874 __rte_shared_locks_required(&vq->access_lock) 2875 __rte_shared_locks_required(&vq->iotlb_lock) 2876 { 2877 uint32_t buf_avail, buf_offset, buf_len; 2878 uint64_t buf_addr, buf_iova; 2879 uint32_t mbuf_avail, mbuf_offset; 2880 uint32_t hdr_remain = dev->vhost_hlen; 2881 uint32_t cpy_len; 2882 struct rte_mbuf *cur = m, *prev = m; 2883 struct virtio_net_hdr tmp_hdr; 2884 struct virtio_net_hdr *hdr = NULL; 2885 uint16_t vec_idx; 2886 struct vhost_async *async = vq->async; 2887 struct async_inflight_info *pkts_info; 2888 2889 /* 2890 * The caller has checked the descriptors chain is larger than the 2891 * header size. 2892 */ 2893 2894 if (virtio_net_with_host_offload(dev)) { 2895 if (unlikely(buf_vec[0].buf_len < sizeof(struct virtio_net_hdr))) { 2896 /* 2897 * No luck, the virtio-net header doesn't fit 2898 * in a contiguous virtual area. 2899 */ 2900 copy_vnet_hdr_from_desc(&tmp_hdr, buf_vec); 2901 hdr = &tmp_hdr; 2902 } else { 2903 hdr = (struct virtio_net_hdr *)((uintptr_t)buf_vec[0].buf_addr); 2904 } 2905 } 2906 2907 for (vec_idx = 0; vec_idx < nr_vec; vec_idx++) { 2908 if (buf_vec[vec_idx].buf_len > hdr_remain) 2909 break; 2910 2911 hdr_remain -= buf_vec[vec_idx].buf_len; 2912 } 2913 2914 buf_addr = buf_vec[vec_idx].buf_addr; 2915 buf_iova = buf_vec[vec_idx].buf_iova; 2916 buf_len = buf_vec[vec_idx].buf_len; 2917 buf_offset = hdr_remain; 2918 buf_avail = buf_vec[vec_idx].buf_len - hdr_remain; 2919 2920 PRINT_PACKET(dev, 2921 (uintptr_t)(buf_addr + buf_offset), 2922 (uint32_t)buf_avail, 0); 2923 2924 mbuf_offset = 0; 2925 mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM; 2926 2927 if (is_async) { 2928 pkts_info = async->pkts_info; 2929 if (async_iter_initialize(dev, async)) 2930 return -1; 2931 } 2932 2933 while (1) { 2934 cpy_len = RTE_MIN(buf_avail, mbuf_avail); 2935 2936 if (is_async) { 2937 if (async_fill_seg(dev, vq, cur, mbuf_offset, 2938 buf_iova + buf_offset, cpy_len, false) < 0) 2939 goto error; 2940 } else if (likely(hdr && cur == m)) { 2941 rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, mbuf_offset), 2942 (void *)((uintptr_t)(buf_addr + buf_offset)), 2943 cpy_len); 2944 } else { 2945 sync_fill_seg(dev, vq, cur, mbuf_offset, 2946 buf_addr + buf_offset, 2947 buf_iova + buf_offset, cpy_len, false); 2948 } 2949 2950 mbuf_avail -= cpy_len; 2951 mbuf_offset += cpy_len; 2952 buf_avail -= cpy_len; 2953 buf_offset += cpy_len; 2954 2955 /* This buf reaches to its end, get the next one */ 2956 if (buf_avail == 0) { 2957 if (++vec_idx >= nr_vec) 2958 break; 2959 2960 buf_addr = buf_vec[vec_idx].buf_addr; 2961 buf_iova = buf_vec[vec_idx].buf_iova; 2962 buf_len = buf_vec[vec_idx].buf_len; 2963 2964 buf_offset = 0; 2965 buf_avail = buf_len; 2966 2967 PRINT_PACKET(dev, (uintptr_t)buf_addr, 2968 (uint32_t)buf_avail, 0); 2969 } 2970 2971 /* 2972 * This mbuf reaches to its end, get a new one 2973 * to hold more data. 2974 */ 2975 if (mbuf_avail == 0) { 2976 cur = rte_pktmbuf_alloc(mbuf_pool); 2977 if (unlikely(cur == NULL)) { 2978 VHOST_LOG_DATA(dev->ifname, ERR, 2979 "failed to allocate memory for mbuf.\n"); 2980 goto error; 2981 } 2982 2983 prev->next = cur; 2984 prev->data_len = mbuf_offset; 2985 m->nb_segs += 1; 2986 m->pkt_len += mbuf_offset; 2987 prev = cur; 2988 2989 mbuf_offset = 0; 2990 mbuf_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM; 2991 } 2992 } 2993 2994 prev->data_len = mbuf_offset; 2995 m->pkt_len += mbuf_offset; 2996 2997 if (is_async) { 2998 async_iter_finalize(async); 2999 if (hdr) 3000 pkts_info[slot_idx].nethdr = *hdr; 3001 } else if (hdr) { 3002 vhost_dequeue_offload(dev, hdr, m, legacy_ol_flags); 3003 } 3004 3005 return 0; 3006 error: 3007 if (is_async) 3008 async_iter_cancel(async); 3009 3010 return -1; 3011 } 3012 3013 static void 3014 virtio_dev_extbuf_free(void *addr __rte_unused, void *opaque) 3015 { 3016 rte_free(opaque); 3017 } 3018 3019 static int 3020 virtio_dev_extbuf_alloc(struct virtio_net *dev, struct rte_mbuf *pkt, uint32_t size) 3021 { 3022 struct rte_mbuf_ext_shared_info *shinfo = NULL; 3023 uint32_t total_len = RTE_PKTMBUF_HEADROOM + size; 3024 uint16_t buf_len; 3025 rte_iova_t iova; 3026 void *buf; 3027 3028 total_len += sizeof(*shinfo) + sizeof(uintptr_t); 3029 total_len = RTE_ALIGN_CEIL(total_len, sizeof(uintptr_t)); 3030 3031 if (unlikely(total_len > UINT16_MAX)) 3032 return -ENOSPC; 3033 3034 buf_len = total_len; 3035 buf = rte_malloc(NULL, buf_len, RTE_CACHE_LINE_SIZE); 3036 if (unlikely(buf == NULL)) 3037 return -ENOMEM; 3038 3039 /* Initialize shinfo */ 3040 shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len, 3041 virtio_dev_extbuf_free, buf); 3042 if (unlikely(shinfo == NULL)) { 3043 rte_free(buf); 3044 VHOST_LOG_DATA(dev->ifname, ERR, "failed to init shinfo\n"); 3045 return -1; 3046 } 3047 3048 iova = rte_malloc_virt2iova(buf); 3049 rte_pktmbuf_attach_extbuf(pkt, buf, iova, buf_len, shinfo); 3050 rte_pktmbuf_reset_headroom(pkt); 3051 3052 return 0; 3053 } 3054 3055 /* 3056 * Prepare a host supported pktmbuf. 3057 */ 3058 static __rte_always_inline int 3059 virtio_dev_pktmbuf_prep(struct virtio_net *dev, struct rte_mbuf *pkt, 3060 uint32_t data_len) 3061 { 3062 if (rte_pktmbuf_tailroom(pkt) >= data_len) 3063 return 0; 3064 3065 /* attach an external buffer if supported */ 3066 if (dev->extbuf && !virtio_dev_extbuf_alloc(dev, pkt, data_len)) 3067 return 0; 3068 3069 /* check if chained buffers are allowed */ 3070 if (!dev->linearbuf) 3071 return 0; 3072 3073 return -1; 3074 } 3075 3076 __rte_always_inline 3077 static uint16_t 3078 virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 3079 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count, 3080 bool legacy_ol_flags) 3081 __rte_shared_locks_required(&vq->access_lock) 3082 __rte_shared_locks_required(&vq->iotlb_lock) 3083 { 3084 uint16_t i; 3085 uint16_t avail_entries; 3086 uint16_t dropped = 0; 3087 static bool allocerr_warned; 3088 3089 /* 3090 * The ordering between avail index and 3091 * desc reads needs to be enforced. 3092 */ 3093 avail_entries = rte_atomic_load_explicit((unsigned short __rte_atomic *)&vq->avail->idx, 3094 rte_memory_order_acquire) - vq->last_avail_idx; 3095 if (avail_entries == 0) 3096 return 0; 3097 3098 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); 3099 3100 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__); 3101 3102 count = RTE_MIN(count, MAX_PKT_BURST); 3103 count = RTE_MIN(count, avail_entries); 3104 VHOST_LOG_DATA(dev->ifname, DEBUG, "about to dequeue %u buffers\n", count); 3105 3106 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, count)) 3107 return 0; 3108 3109 for (i = 0; i < count; i++) { 3110 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 3111 uint16_t head_idx; 3112 uint32_t buf_len; 3113 uint16_t nr_vec = 0; 3114 int err; 3115 3116 if (unlikely(fill_vec_buf_split(dev, vq, 3117 vq->last_avail_idx + i, 3118 &nr_vec, buf_vec, 3119 &head_idx, &buf_len, 3120 VHOST_ACCESS_RO) < 0)) 3121 break; 3122 3123 update_shadow_used_ring_split(vq, head_idx, 0); 3124 3125 if (unlikely(buf_len <= dev->vhost_hlen)) { 3126 dropped += 1; 3127 i++; 3128 break; 3129 } 3130 3131 buf_len -= dev->vhost_hlen; 3132 3133 err = virtio_dev_pktmbuf_prep(dev, pkts[i], buf_len); 3134 if (unlikely(err)) { 3135 /* 3136 * mbuf allocation fails for jumbo packets when external 3137 * buffer allocation is not allowed and linear buffer 3138 * is required. Drop this packet. 3139 */ 3140 if (!allocerr_warned) { 3141 VHOST_LOG_DATA(dev->ifname, ERR, 3142 "failed mbuf alloc of size %d from %s.\n", 3143 buf_len, mbuf_pool->name); 3144 allocerr_warned = true; 3145 } 3146 dropped += 1; 3147 i++; 3148 break; 3149 } 3150 3151 err = desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i], 3152 mbuf_pool, legacy_ol_flags, 0, false); 3153 if (unlikely(err)) { 3154 if (!allocerr_warned) { 3155 VHOST_LOG_DATA(dev->ifname, ERR, "failed to copy desc to mbuf.\n"); 3156 allocerr_warned = true; 3157 } 3158 dropped += 1; 3159 i++; 3160 break; 3161 } 3162 3163 } 3164 3165 if (dropped) 3166 rte_pktmbuf_free_bulk(&pkts[i - 1], count - i + 1); 3167 3168 vq->last_avail_idx += i; 3169 3170 do_data_copy_dequeue(vq); 3171 if (unlikely(i < count)) 3172 vq->shadow_used_idx = i; 3173 if (likely(vq->shadow_used_idx)) { 3174 flush_shadow_used_ring_split(dev, vq); 3175 vhost_vring_call_split(dev, vq); 3176 } 3177 3178 return (i - dropped); 3179 } 3180 3181 __rte_noinline 3182 static uint16_t 3183 virtio_dev_tx_split_legacy(struct virtio_net *dev, 3184 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, 3185 struct rte_mbuf **pkts, uint16_t count) 3186 __rte_shared_locks_required(&vq->access_lock) 3187 __rte_shared_locks_required(&vq->iotlb_lock) 3188 { 3189 return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, true); 3190 } 3191 3192 __rte_noinline 3193 static uint16_t 3194 virtio_dev_tx_split_compliant(struct virtio_net *dev, 3195 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, 3196 struct rte_mbuf **pkts, uint16_t count) 3197 __rte_shared_locks_required(&vq->access_lock) 3198 __rte_shared_locks_required(&vq->iotlb_lock) 3199 { 3200 return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, false); 3201 } 3202 3203 static __rte_always_inline int 3204 vhost_reserve_avail_batch_packed(struct virtio_net *dev, 3205 struct vhost_virtqueue *vq, 3206 struct rte_mbuf **pkts, 3207 uint16_t avail_idx, 3208 uintptr_t *desc_addrs, 3209 uint16_t *ids) 3210 __rte_shared_locks_required(&vq->iotlb_lock) 3211 { 3212 bool wrap = vq->avail_wrap_counter; 3213 struct vring_packed_desc *descs = vq->desc_packed; 3214 uint64_t lens[PACKED_BATCH_SIZE]; 3215 uint64_t buf_lens[PACKED_BATCH_SIZE]; 3216 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 3217 uint16_t flags, i; 3218 3219 if (unlikely(avail_idx & PACKED_BATCH_MASK)) 3220 return -1; 3221 if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size)) 3222 return -1; 3223 3224 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3225 flags = descs[avail_idx + i].flags; 3226 if (unlikely((wrap != !!(flags & VRING_DESC_F_AVAIL)) || 3227 (wrap == !!(flags & VRING_DESC_F_USED)) || 3228 (flags & PACKED_DESC_SINGLE_DEQUEUE_FLAG))) 3229 return -1; 3230 } 3231 3232 rte_atomic_thread_fence(rte_memory_order_acquire); 3233 3234 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 3235 lens[i] = descs[avail_idx + i].len; 3236 3237 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3238 desc_addrs[i] = vhost_iova_to_vva(dev, vq, 3239 descs[avail_idx + i].addr, 3240 &lens[i], VHOST_ACCESS_RW); 3241 } 3242 3243 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3244 if (unlikely(!desc_addrs[i])) 3245 return -1; 3246 if (unlikely((lens[i] != descs[avail_idx + i].len))) 3247 return -1; 3248 } 3249 3250 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3251 if (virtio_dev_pktmbuf_prep(dev, pkts[i], lens[i])) 3252 goto err; 3253 } 3254 3255 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 3256 buf_lens[i] = pkts[i]->buf_len - pkts[i]->data_off; 3257 3258 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3259 if (unlikely(buf_lens[i] < (lens[i] - buf_offset))) 3260 goto err; 3261 } 3262 3263 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3264 pkts[i]->pkt_len = lens[i] - buf_offset; 3265 pkts[i]->data_len = pkts[i]->pkt_len; 3266 ids[i] = descs[avail_idx + i].id; 3267 } 3268 3269 return 0; 3270 3271 err: 3272 return -1; 3273 } 3274 3275 static __rte_always_inline int 3276 vhost_async_tx_batch_packed_check(struct virtio_net *dev, 3277 struct vhost_virtqueue *vq, 3278 struct rte_mbuf **pkts, 3279 uint16_t avail_idx, 3280 uintptr_t *desc_addrs, 3281 uint64_t *lens, 3282 uint16_t *ids, 3283 int16_t dma_id, 3284 uint16_t vchan_id) 3285 { 3286 bool wrap = vq->avail_wrap_counter; 3287 struct vring_packed_desc *descs = vq->desc_packed; 3288 uint64_t buf_lens[PACKED_BATCH_SIZE]; 3289 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 3290 uint16_t flags, i; 3291 3292 if (unlikely(avail_idx & PACKED_BATCH_MASK)) 3293 return -1; 3294 if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size)) 3295 return -1; 3296 3297 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3298 flags = descs[avail_idx + i].flags; 3299 if (unlikely((wrap != !!(flags & VRING_DESC_F_AVAIL)) || 3300 (wrap == !!(flags & VRING_DESC_F_USED)) || 3301 (flags & PACKED_DESC_SINGLE_DEQUEUE_FLAG))) 3302 return -1; 3303 } 3304 3305 rte_atomic_thread_fence(rte_memory_order_acquire); 3306 3307 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 3308 lens[i] = descs[avail_idx + i].len; 3309 3310 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3311 desc_addrs[i] = descs[avail_idx + i].addr; 3312 } 3313 3314 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3315 if (unlikely(!desc_addrs[i])) 3316 return -1; 3317 if (unlikely((lens[i] != descs[avail_idx + i].len))) 3318 return -1; 3319 } 3320 3321 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3322 if (virtio_dev_pktmbuf_prep(dev, pkts[i], lens[i])) 3323 goto err; 3324 } 3325 3326 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 3327 buf_lens[i] = pkts[i]->buf_len - pkts[i]->data_off; 3328 3329 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3330 if (unlikely(buf_lens[i] < (lens[i] - buf_offset))) 3331 goto err; 3332 } 3333 3334 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3335 pkts[i]->pkt_len = lens[i] - buf_offset; 3336 pkts[i]->data_len = pkts[i]->pkt_len; 3337 ids[i] = descs[avail_idx + i].id; 3338 } 3339 3340 if (rte_dma_burst_capacity(dma_id, vchan_id) < PACKED_BATCH_SIZE) 3341 return -1; 3342 3343 return 0; 3344 3345 err: 3346 return -1; 3347 } 3348 3349 static __rte_always_inline int 3350 virtio_dev_tx_batch_packed(struct virtio_net *dev, 3351 struct vhost_virtqueue *vq, 3352 struct rte_mbuf **pkts, 3353 bool legacy_ol_flags) 3354 __rte_shared_locks_required(&vq->iotlb_lock) 3355 { 3356 uint16_t avail_idx = vq->last_avail_idx; 3357 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 3358 struct virtio_net_hdr *hdr; 3359 uintptr_t desc_addrs[PACKED_BATCH_SIZE]; 3360 uint16_t ids[PACKED_BATCH_SIZE]; 3361 uint16_t i; 3362 3363 if (vhost_reserve_avail_batch_packed(dev, vq, pkts, avail_idx, 3364 desc_addrs, ids)) 3365 return -1; 3366 3367 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 3368 rte_prefetch0((void *)(uintptr_t)desc_addrs[i]); 3369 3370 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 3371 rte_memcpy(rte_pktmbuf_mtod_offset(pkts[i], void *, 0), 3372 (void *)(uintptr_t)(desc_addrs[i] + buf_offset), 3373 pkts[i]->pkt_len); 3374 3375 if (virtio_net_with_host_offload(dev)) { 3376 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3377 hdr = (struct virtio_net_hdr *)(desc_addrs[i]); 3378 vhost_dequeue_offload(dev, hdr, pkts[i], legacy_ol_flags); 3379 } 3380 } 3381 3382 if (virtio_net_is_inorder(dev)) 3383 vhost_shadow_dequeue_batch_packed_inorder(vq, 3384 ids[PACKED_BATCH_SIZE - 1]); 3385 else 3386 vhost_shadow_dequeue_batch_packed(dev, vq, ids); 3387 3388 vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE); 3389 3390 return 0; 3391 } 3392 3393 static __rte_always_inline int 3394 vhost_dequeue_single_packed(struct virtio_net *dev, 3395 struct vhost_virtqueue *vq, 3396 struct rte_mempool *mbuf_pool, 3397 struct rte_mbuf *pkts, 3398 uint16_t *buf_id, 3399 uint16_t *desc_count, 3400 bool legacy_ol_flags) 3401 __rte_shared_locks_required(&vq->access_lock) 3402 __rte_shared_locks_required(&vq->iotlb_lock) 3403 { 3404 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 3405 uint32_t buf_len; 3406 uint16_t nr_vec = 0; 3407 int err; 3408 static bool allocerr_warned; 3409 3410 if (unlikely(fill_vec_buf_packed(dev, vq, 3411 vq->last_avail_idx, desc_count, 3412 buf_vec, &nr_vec, 3413 buf_id, &buf_len, 3414 VHOST_ACCESS_RO) < 0)) 3415 return -1; 3416 3417 if (unlikely(buf_len <= dev->vhost_hlen)) 3418 return -1; 3419 3420 buf_len -= dev->vhost_hlen; 3421 3422 if (unlikely(virtio_dev_pktmbuf_prep(dev, pkts, buf_len))) { 3423 if (!allocerr_warned) { 3424 VHOST_LOG_DATA(dev->ifname, ERR, 3425 "failed mbuf alloc of size %d from %s.\n", 3426 buf_len, mbuf_pool->name); 3427 allocerr_warned = true; 3428 } 3429 return -1; 3430 } 3431 3432 err = desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts, 3433 mbuf_pool, legacy_ol_flags, 0, false); 3434 if (unlikely(err)) { 3435 if (!allocerr_warned) { 3436 VHOST_LOG_DATA(dev->ifname, ERR, "failed to copy desc to mbuf.\n"); 3437 allocerr_warned = true; 3438 } 3439 return -1; 3440 } 3441 3442 return 0; 3443 } 3444 3445 static __rte_always_inline int 3446 virtio_dev_tx_single_packed(struct virtio_net *dev, 3447 struct vhost_virtqueue *vq, 3448 struct rte_mempool *mbuf_pool, 3449 struct rte_mbuf *pkts, 3450 bool legacy_ol_flags) 3451 __rte_shared_locks_required(&vq->access_lock) 3452 __rte_shared_locks_required(&vq->iotlb_lock) 3453 { 3454 3455 uint16_t buf_id, desc_count = 0; 3456 int ret; 3457 3458 ret = vhost_dequeue_single_packed(dev, vq, mbuf_pool, pkts, &buf_id, 3459 &desc_count, legacy_ol_flags); 3460 3461 if (likely(desc_count > 0)) { 3462 if (virtio_net_is_inorder(dev)) 3463 vhost_shadow_dequeue_single_packed_inorder(vq, buf_id, 3464 desc_count); 3465 else 3466 vhost_shadow_dequeue_single_packed(vq, buf_id, 3467 desc_count); 3468 3469 vq_inc_last_avail_packed(vq, desc_count); 3470 } 3471 3472 return ret; 3473 } 3474 3475 __rte_always_inline 3476 static uint16_t 3477 virtio_dev_tx_packed(struct virtio_net *dev, 3478 struct vhost_virtqueue *__rte_restrict vq, 3479 struct rte_mempool *mbuf_pool, 3480 struct rte_mbuf **__rte_restrict pkts, 3481 uint32_t count, 3482 bool legacy_ol_flags) 3483 __rte_shared_locks_required(&vq->access_lock) 3484 __rte_shared_locks_required(&vq->iotlb_lock) 3485 { 3486 uint32_t pkt_idx = 0; 3487 3488 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, count)) 3489 return 0; 3490 3491 do { 3492 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); 3493 3494 if (count - pkt_idx >= PACKED_BATCH_SIZE) { 3495 if (!virtio_dev_tx_batch_packed(dev, vq, 3496 &pkts[pkt_idx], 3497 legacy_ol_flags)) { 3498 pkt_idx += PACKED_BATCH_SIZE; 3499 continue; 3500 } 3501 } 3502 3503 if (virtio_dev_tx_single_packed(dev, vq, mbuf_pool, 3504 pkts[pkt_idx], 3505 legacy_ol_flags)) 3506 break; 3507 pkt_idx++; 3508 } while (pkt_idx < count); 3509 3510 if (pkt_idx != count) 3511 rte_pktmbuf_free_bulk(&pkts[pkt_idx], count - pkt_idx); 3512 3513 if (vq->shadow_used_idx) { 3514 do_data_copy_dequeue(vq); 3515 3516 vhost_flush_dequeue_shadow_packed(dev, vq); 3517 vhost_vring_call_packed(dev, vq); 3518 } 3519 3520 return pkt_idx; 3521 } 3522 3523 __rte_noinline 3524 static uint16_t 3525 virtio_dev_tx_packed_legacy(struct virtio_net *dev, 3526 struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool, 3527 struct rte_mbuf **__rte_restrict pkts, uint32_t count) 3528 __rte_shared_locks_required(&vq->access_lock) 3529 __rte_shared_locks_required(&vq->iotlb_lock) 3530 { 3531 return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, true); 3532 } 3533 3534 __rte_noinline 3535 static uint16_t 3536 virtio_dev_tx_packed_compliant(struct virtio_net *dev, 3537 struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool, 3538 struct rte_mbuf **__rte_restrict pkts, uint32_t count) 3539 __rte_shared_locks_required(&vq->access_lock) 3540 __rte_shared_locks_required(&vq->iotlb_lock) 3541 { 3542 return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, false); 3543 } 3544 3545 uint16_t 3546 rte_vhost_dequeue_burst(int vid, uint16_t queue_id, 3547 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) 3548 { 3549 struct virtio_net *dev; 3550 struct rte_mbuf *rarp_mbuf = NULL; 3551 struct vhost_virtqueue *vq; 3552 int16_t success = 1; 3553 3554 dev = get_device(vid); 3555 if (!dev) 3556 return 0; 3557 3558 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { 3559 VHOST_LOG_DATA(dev->ifname, ERR, 3560 "%s: built-in vhost net backend is disabled.\n", 3561 __func__); 3562 return 0; 3563 } 3564 3565 if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) { 3566 VHOST_LOG_DATA(dev->ifname, ERR, 3567 "%s: invalid virtqueue idx %d.\n", 3568 __func__, queue_id); 3569 return 0; 3570 } 3571 3572 vq = dev->virtqueue[queue_id]; 3573 3574 if (unlikely(rte_rwlock_read_trylock(&vq->access_lock) != 0)) 3575 return 0; 3576 3577 if (unlikely(!vq->enabled)) { 3578 count = 0; 3579 goto out_access_unlock; 3580 } 3581 3582 vhost_user_iotlb_rd_lock(vq); 3583 3584 if (unlikely(!vq->access_ok)) 3585 if (unlikely(vring_translate(dev, vq) < 0)) { 3586 count = 0; 3587 goto out; 3588 } 3589 3590 /* 3591 * Construct a RARP broadcast packet, and inject it to the "pkts" 3592 * array, to looks like that guest actually send such packet. 3593 * 3594 * Check user_send_rarp() for more information. 3595 * 3596 * broadcast_rarp shares a cacheline in the virtio_net structure 3597 * with some fields that are accessed during enqueue and 3598 * rte_atomic_compare_exchange_strong_explicit causes a write if performed compare 3599 * and exchange. This could result in false sharing between enqueue 3600 * and dequeue. 3601 * 3602 * Prevent unnecessary false sharing by reading broadcast_rarp first 3603 * and only performing compare and exchange if the read indicates it 3604 * is likely to be set. 3605 */ 3606 if (unlikely(rte_atomic_load_explicit(&dev->broadcast_rarp, rte_memory_order_acquire) && 3607 rte_atomic_compare_exchange_strong_explicit(&dev->broadcast_rarp, 3608 &success, 0, rte_memory_order_release, rte_memory_order_relaxed))) { 3609 3610 rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac); 3611 if (rarp_mbuf == NULL) { 3612 VHOST_LOG_DATA(dev->ifname, ERR, "failed to make RARP packet.\n"); 3613 count = 0; 3614 goto out; 3615 } 3616 /* 3617 * Inject it to the head of "pkts" array, so that switch's mac 3618 * learning table will get updated first. 3619 */ 3620 pkts[0] = rarp_mbuf; 3621 vhost_queue_stats_update(dev, vq, pkts, 1); 3622 pkts++; 3623 count -= 1; 3624 } 3625 3626 if (vq_is_packed(dev)) { 3627 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS) 3628 count = virtio_dev_tx_packed_legacy(dev, vq, mbuf_pool, pkts, count); 3629 else 3630 count = virtio_dev_tx_packed_compliant(dev, vq, mbuf_pool, pkts, count); 3631 } else { 3632 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS) 3633 count = virtio_dev_tx_split_legacy(dev, vq, mbuf_pool, pkts, count); 3634 else 3635 count = virtio_dev_tx_split_compliant(dev, vq, mbuf_pool, pkts, count); 3636 } 3637 3638 vhost_queue_stats_update(dev, vq, pkts, count); 3639 3640 out: 3641 vhost_user_iotlb_rd_unlock(vq); 3642 3643 out_access_unlock: 3644 rte_rwlock_read_unlock(&vq->access_lock); 3645 3646 if (unlikely(rarp_mbuf != NULL)) 3647 count += 1; 3648 3649 return count; 3650 } 3651 3652 static __rte_always_inline uint16_t 3653 async_poll_dequeue_completed(struct virtio_net *dev, struct vhost_virtqueue *vq, 3654 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, 3655 uint16_t vchan_id, bool legacy_ol_flags) 3656 __rte_shared_locks_required(&vq->access_lock) 3657 { 3658 uint16_t start_idx, from, i; 3659 uint16_t nr_cpl_pkts = 0; 3660 struct async_inflight_info *pkts_info = vq->async->pkts_info; 3661 3662 vhost_async_dma_check_completed(dev, dma_id, vchan_id, VHOST_DMA_MAX_COPY_COMPLETE); 3663 3664 start_idx = async_get_first_inflight_pkt_idx(vq); 3665 3666 from = start_idx; 3667 while (vq->async->pkts_cmpl_flag[from] && count--) { 3668 vq->async->pkts_cmpl_flag[from] = false; 3669 from = (from + 1) % vq->size; 3670 nr_cpl_pkts++; 3671 } 3672 3673 if (nr_cpl_pkts == 0) 3674 return 0; 3675 3676 for (i = 0; i < nr_cpl_pkts; i++) { 3677 from = (start_idx + i) % vq->size; 3678 pkts[i] = pkts_info[from].mbuf; 3679 3680 if (virtio_net_with_host_offload(dev)) 3681 vhost_dequeue_offload(dev, &pkts_info[from].nethdr, pkts[i], 3682 legacy_ol_flags); 3683 } 3684 3685 /* write back completed descs to used ring and update used idx */ 3686 if (vq_is_packed(dev)) { 3687 write_back_completed_descs_packed(vq, nr_cpl_pkts); 3688 vhost_vring_call_packed(dev, vq); 3689 } else { 3690 write_back_completed_descs_split(vq, nr_cpl_pkts); 3691 rte_atomic_fetch_add_explicit((unsigned short __rte_atomic *)&vq->used->idx, 3692 nr_cpl_pkts, rte_memory_order_release); 3693 vhost_vring_call_split(dev, vq); 3694 } 3695 vq->async->pkts_inflight_n -= nr_cpl_pkts; 3696 3697 return nr_cpl_pkts; 3698 } 3699 3700 static __rte_always_inline uint16_t 3701 virtio_dev_tx_async_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 3702 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count, 3703 int16_t dma_id, uint16_t vchan_id, bool legacy_ol_flags) 3704 __rte_shared_locks_required(&vq->access_lock) 3705 __rte_shared_locks_required(&vq->iotlb_lock) 3706 { 3707 static bool allocerr_warned; 3708 bool dropped = false; 3709 uint16_t avail_entries; 3710 uint16_t pkt_idx, slot_idx = 0; 3711 uint16_t nr_done_pkts = 0; 3712 uint16_t pkt_err = 0; 3713 uint16_t n_xfer; 3714 struct vhost_async *async = vq->async; 3715 struct async_inflight_info *pkts_info = async->pkts_info; 3716 struct rte_mbuf *pkts_prealloc[MAX_PKT_BURST]; 3717 uint16_t pkts_size = count; 3718 3719 /** 3720 * The ordering between avail index and 3721 * desc reads needs to be enforced. 3722 */ 3723 avail_entries = rte_atomic_load_explicit((unsigned short __rte_atomic *)&vq->avail->idx, 3724 rte_memory_order_acquire) - vq->last_avail_idx; 3725 if (avail_entries == 0) 3726 goto out; 3727 3728 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); 3729 3730 async_iter_reset(async); 3731 3732 count = RTE_MIN(count, MAX_PKT_BURST); 3733 count = RTE_MIN(count, avail_entries); 3734 VHOST_LOG_DATA(dev->ifname, DEBUG, "about to dequeue %u buffers\n", count); 3735 3736 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts_prealloc, count)) 3737 goto out; 3738 3739 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { 3740 uint16_t head_idx = 0; 3741 uint16_t nr_vec = 0; 3742 uint16_t to; 3743 uint32_t buf_len; 3744 int err; 3745 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 3746 struct rte_mbuf *pkt = pkts_prealloc[pkt_idx]; 3747 3748 if (unlikely(fill_vec_buf_split(dev, vq, vq->last_avail_idx, 3749 &nr_vec, buf_vec, 3750 &head_idx, &buf_len, 3751 VHOST_ACCESS_RO) < 0)) { 3752 dropped = true; 3753 break; 3754 } 3755 3756 if (unlikely(buf_len <= dev->vhost_hlen)) { 3757 dropped = true; 3758 break; 3759 } 3760 3761 buf_len -= dev->vhost_hlen; 3762 3763 err = virtio_dev_pktmbuf_prep(dev, pkt, buf_len); 3764 if (unlikely(err)) { 3765 /** 3766 * mbuf allocation fails for jumbo packets when external 3767 * buffer allocation is not allowed and linear buffer 3768 * is required. Drop this packet. 3769 */ 3770 if (!allocerr_warned) { 3771 VHOST_LOG_DATA(dev->ifname, ERR, 3772 "%s: Failed mbuf alloc of size %d from %s\n", 3773 __func__, buf_len, mbuf_pool->name); 3774 allocerr_warned = true; 3775 } 3776 dropped = true; 3777 slot_idx--; 3778 break; 3779 } 3780 3781 slot_idx = (async->pkts_idx + pkt_idx) & (vq->size - 1); 3782 err = desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkt, mbuf_pool, 3783 legacy_ol_flags, slot_idx, true); 3784 if (unlikely(err)) { 3785 if (!allocerr_warned) { 3786 VHOST_LOG_DATA(dev->ifname, ERR, 3787 "%s: Failed to offload copies to async channel.\n", 3788 __func__); 3789 allocerr_warned = true; 3790 } 3791 dropped = true; 3792 slot_idx--; 3793 break; 3794 } 3795 3796 pkts_info[slot_idx].mbuf = pkt; 3797 3798 /* store used descs */ 3799 to = async->desc_idx_split & (vq->size - 1); 3800 async->descs_split[to].id = head_idx; 3801 async->descs_split[to].len = 0; 3802 async->desc_idx_split++; 3803 3804 vq->last_avail_idx++; 3805 } 3806 3807 if (unlikely(dropped)) 3808 rte_pktmbuf_free_bulk(&pkts_prealloc[pkt_idx], count - pkt_idx); 3809 3810 n_xfer = vhost_async_dma_transfer(dev, vq, dma_id, vchan_id, async->pkts_idx, 3811 async->iov_iter, pkt_idx); 3812 3813 async->pkts_inflight_n += n_xfer; 3814 3815 pkt_err = pkt_idx - n_xfer; 3816 if (unlikely(pkt_err)) { 3817 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s: failed to transfer data.\n", 3818 __func__); 3819 3820 pkt_idx = n_xfer; 3821 /* recover available ring */ 3822 vq->last_avail_idx -= pkt_err; 3823 3824 /** 3825 * recover async channel copy related structures and free pktmbufs 3826 * for error pkts. 3827 */ 3828 async->desc_idx_split -= pkt_err; 3829 while (pkt_err-- > 0) { 3830 rte_pktmbuf_free(pkts_info[slot_idx & (vq->size - 1)].mbuf); 3831 slot_idx--; 3832 } 3833 } 3834 3835 async->pkts_idx += pkt_idx; 3836 if (async->pkts_idx >= vq->size) 3837 async->pkts_idx -= vq->size; 3838 3839 out: 3840 /* DMA device may serve other queues, unconditionally check completed. */ 3841 nr_done_pkts = async_poll_dequeue_completed(dev, vq, pkts, pkts_size, 3842 dma_id, vchan_id, legacy_ol_flags); 3843 3844 return nr_done_pkts; 3845 } 3846 3847 __rte_noinline 3848 static uint16_t 3849 virtio_dev_tx_async_split_legacy(struct virtio_net *dev, 3850 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, 3851 struct rte_mbuf **pkts, uint16_t count, 3852 int16_t dma_id, uint16_t vchan_id) 3853 __rte_shared_locks_required(&vq->access_lock) 3854 __rte_shared_locks_required(&vq->iotlb_lock) 3855 { 3856 return virtio_dev_tx_async_split(dev, vq, mbuf_pool, 3857 pkts, count, dma_id, vchan_id, true); 3858 } 3859 3860 __rte_noinline 3861 static uint16_t 3862 virtio_dev_tx_async_split_compliant(struct virtio_net *dev, 3863 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, 3864 struct rte_mbuf **pkts, uint16_t count, 3865 int16_t dma_id, uint16_t vchan_id) 3866 __rte_shared_locks_required(&vq->access_lock) 3867 __rte_shared_locks_required(&vq->iotlb_lock) 3868 { 3869 return virtio_dev_tx_async_split(dev, vq, mbuf_pool, 3870 pkts, count, dma_id, vchan_id, false); 3871 } 3872 3873 static __rte_always_inline void 3874 vhost_async_shadow_dequeue_single_packed(struct vhost_virtqueue *vq, 3875 uint16_t buf_id, uint16_t count) 3876 __rte_shared_locks_required(&vq->access_lock) 3877 { 3878 struct vhost_async *async = vq->async; 3879 uint16_t idx = async->buffer_idx_packed; 3880 3881 async->buffers_packed[idx].id = buf_id; 3882 async->buffers_packed[idx].len = 0; 3883 async->buffers_packed[idx].count = count; 3884 3885 async->buffer_idx_packed++; 3886 if (async->buffer_idx_packed >= vq->size) 3887 async->buffer_idx_packed -= vq->size; 3888 3889 } 3890 3891 static __rte_always_inline int 3892 virtio_dev_tx_async_single_packed(struct virtio_net *dev, 3893 struct vhost_virtqueue *vq, 3894 struct rte_mempool *mbuf_pool, 3895 struct rte_mbuf *pkts, 3896 uint16_t slot_idx, 3897 bool legacy_ol_flags) 3898 __rte_shared_locks_required(&vq->access_lock) 3899 __rte_shared_locks_required(&vq->iotlb_lock) 3900 { 3901 int err; 3902 uint16_t buf_id, desc_count = 0; 3903 uint16_t nr_vec = 0; 3904 uint32_t buf_len; 3905 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 3906 struct vhost_async *async = vq->async; 3907 struct async_inflight_info *pkts_info = async->pkts_info; 3908 static bool allocerr_warned; 3909 3910 if (unlikely(fill_vec_buf_packed(dev, vq, vq->last_avail_idx, &desc_count, 3911 buf_vec, &nr_vec, &buf_id, &buf_len, 3912 VHOST_ACCESS_RO) < 0)) 3913 return -1; 3914 3915 if (unlikely(virtio_dev_pktmbuf_prep(dev, pkts, buf_len))) { 3916 if (!allocerr_warned) { 3917 VHOST_LOG_DATA(dev->ifname, ERR, "Failed mbuf alloc of size %d from %s.\n", 3918 buf_len, mbuf_pool->name); 3919 3920 allocerr_warned = true; 3921 } 3922 return -1; 3923 } 3924 3925 err = desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts, mbuf_pool, 3926 legacy_ol_flags, slot_idx, true); 3927 if (unlikely(err)) { 3928 rte_pktmbuf_free(pkts); 3929 if (!allocerr_warned) { 3930 VHOST_LOG_DATA(dev->ifname, ERR, "Failed to copy desc to mbuf on.\n"); 3931 allocerr_warned = true; 3932 } 3933 return -1; 3934 } 3935 3936 pkts_info[slot_idx].descs = desc_count; 3937 3938 /* update async shadow packed ring */ 3939 vhost_async_shadow_dequeue_single_packed(vq, buf_id, desc_count); 3940 3941 vq_inc_last_avail_packed(vq, desc_count); 3942 3943 return err; 3944 } 3945 3946 static __rte_always_inline int 3947 virtio_dev_tx_async_packed_batch(struct virtio_net *dev, 3948 struct vhost_virtqueue *vq, 3949 struct rte_mbuf **pkts, uint16_t slot_idx, 3950 uint16_t dma_id, uint16_t vchan_id) 3951 __rte_shared_locks_required(&vq->access_lock) 3952 __rte_shared_locks_required(&vq->iotlb_lock) 3953 { 3954 uint16_t avail_idx = vq->last_avail_idx; 3955 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 3956 struct vhost_async *async = vq->async; 3957 struct async_inflight_info *pkts_info = async->pkts_info; 3958 struct virtio_net_hdr *hdr; 3959 uint32_t mbuf_offset = 0; 3960 uintptr_t desc_addrs[PACKED_BATCH_SIZE]; 3961 uint64_t desc_vva; 3962 uint64_t lens[PACKED_BATCH_SIZE]; 3963 void *host_iova[PACKED_BATCH_SIZE]; 3964 uint64_t mapped_len[PACKED_BATCH_SIZE]; 3965 uint16_t ids[PACKED_BATCH_SIZE]; 3966 uint16_t i; 3967 3968 if (vhost_async_tx_batch_packed_check(dev, vq, pkts, avail_idx, 3969 desc_addrs, lens, ids, dma_id, vchan_id)) 3970 return -1; 3971 3972 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 3973 rte_prefetch0((void *)(uintptr_t)desc_addrs[i]); 3974 3975 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3976 host_iova[i] = (void *)(uintptr_t)gpa_to_first_hpa(dev, 3977 desc_addrs[i] + buf_offset, pkts[i]->pkt_len, &mapped_len[i]); 3978 } 3979 3980 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3981 async_iter_initialize(dev, async); 3982 async_iter_add_iovec(dev, async, 3983 host_iova[i], 3984 (void *)(uintptr_t)rte_pktmbuf_iova_offset(pkts[i], mbuf_offset), 3985 mapped_len[i]); 3986 async->iter_idx++; 3987 } 3988 3989 if (virtio_net_with_host_offload(dev)) { 3990 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3991 desc_vva = vhost_iova_to_vva(dev, vq, desc_addrs[i], 3992 &lens[i], VHOST_ACCESS_RO); 3993 hdr = (struct virtio_net_hdr *)(uintptr_t)desc_vva; 3994 pkts_info[slot_idx + i].nethdr = *hdr; 3995 } 3996 } 3997 3998 vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE); 3999 4000 vhost_async_shadow_dequeue_packed_batch(vq, ids); 4001 4002 return 0; 4003 } 4004 4005 static __rte_always_inline uint16_t 4006 virtio_dev_tx_async_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, 4007 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, 4008 uint16_t count, uint16_t dma_id, uint16_t vchan_id, bool legacy_ol_flags) 4009 __rte_shared_locks_required(&vq->access_lock) 4010 __rte_shared_locks_required(&vq->iotlb_lock) 4011 { 4012 uint32_t pkt_idx = 0; 4013 uint16_t slot_idx = 0; 4014 uint16_t nr_done_pkts = 0; 4015 uint16_t pkt_err = 0; 4016 uint32_t n_xfer; 4017 uint16_t i; 4018 struct vhost_async *async = vq->async; 4019 struct async_inflight_info *pkts_info = async->pkts_info; 4020 struct rte_mbuf *pkts_prealloc[MAX_PKT_BURST]; 4021 4022 VHOST_LOG_DATA(dev->ifname, DEBUG, "(%d) about to dequeue %u buffers\n", dev->vid, count); 4023 4024 async_iter_reset(async); 4025 4026 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts_prealloc, count)) 4027 goto out; 4028 4029 do { 4030 struct rte_mbuf *pkt = pkts_prealloc[pkt_idx]; 4031 4032 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); 4033 4034 slot_idx = (async->pkts_idx + pkt_idx) % vq->size; 4035 if (count - pkt_idx >= PACKED_BATCH_SIZE) { 4036 if (!virtio_dev_tx_async_packed_batch(dev, vq, &pkts_prealloc[pkt_idx], 4037 slot_idx, dma_id, vchan_id)) { 4038 for (i = 0; i < PACKED_BATCH_SIZE; i++) { 4039 slot_idx = (async->pkts_idx + pkt_idx) % vq->size; 4040 pkts_info[slot_idx].descs = 1; 4041 pkts_info[slot_idx].nr_buffers = 1; 4042 pkts_info[slot_idx].mbuf = pkts_prealloc[pkt_idx]; 4043 pkt_idx++; 4044 } 4045 continue; 4046 } 4047 } 4048 4049 if (unlikely(virtio_dev_tx_async_single_packed(dev, vq, mbuf_pool, pkt, 4050 slot_idx, legacy_ol_flags))) { 4051 rte_pktmbuf_free_bulk(&pkts_prealloc[pkt_idx], count - pkt_idx); 4052 4053 if (slot_idx == 0) 4054 slot_idx = vq->size - 1; 4055 else 4056 slot_idx--; 4057 4058 break; 4059 } 4060 4061 pkts_info[slot_idx].mbuf = pkt; 4062 pkt_idx++; 4063 } while (pkt_idx < count); 4064 4065 n_xfer = vhost_async_dma_transfer(dev, vq, dma_id, vchan_id, async->pkts_idx, 4066 async->iov_iter, pkt_idx); 4067 4068 async->pkts_inflight_n += n_xfer; 4069 4070 pkt_err = pkt_idx - n_xfer; 4071 4072 if (unlikely(pkt_err)) { 4073 uint16_t descs_err = 0; 4074 4075 pkt_idx -= pkt_err; 4076 4077 /** 4078 * recover DMA-copy related structures and free pktmbuf for DMA-error pkts. 4079 */ 4080 if (async->buffer_idx_packed >= pkt_err) 4081 async->buffer_idx_packed -= pkt_err; 4082 else 4083 async->buffer_idx_packed += vq->size - pkt_err; 4084 4085 while (pkt_err-- > 0) { 4086 rte_pktmbuf_free(pkts_info[slot_idx].mbuf); 4087 descs_err += pkts_info[slot_idx].descs; 4088 4089 if (slot_idx == 0) 4090 slot_idx = vq->size - 1; 4091 else 4092 slot_idx--; 4093 } 4094 4095 /* recover available ring */ 4096 if (vq->last_avail_idx >= descs_err) { 4097 vq->last_avail_idx -= descs_err; 4098 } else { 4099 vq->last_avail_idx += vq->size - descs_err; 4100 vq->avail_wrap_counter ^= 1; 4101 } 4102 } 4103 4104 async->pkts_idx += pkt_idx; 4105 if (async->pkts_idx >= vq->size) 4106 async->pkts_idx -= vq->size; 4107 4108 out: 4109 nr_done_pkts = async_poll_dequeue_completed(dev, vq, pkts, count, 4110 dma_id, vchan_id, legacy_ol_flags); 4111 4112 return nr_done_pkts; 4113 } 4114 4115 __rte_noinline 4116 static uint16_t 4117 virtio_dev_tx_async_packed_legacy(struct virtio_net *dev, struct vhost_virtqueue *vq, 4118 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, 4119 uint16_t count, uint16_t dma_id, uint16_t vchan_id) 4120 __rte_shared_locks_required(&vq->access_lock) 4121 __rte_shared_locks_required(&vq->iotlb_lock) 4122 { 4123 return virtio_dev_tx_async_packed(dev, vq, mbuf_pool, 4124 pkts, count, dma_id, vchan_id, true); 4125 } 4126 4127 __rte_noinline 4128 static uint16_t 4129 virtio_dev_tx_async_packed_compliant(struct virtio_net *dev, struct vhost_virtqueue *vq, 4130 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, 4131 uint16_t count, uint16_t dma_id, uint16_t vchan_id) 4132 __rte_shared_locks_required(&vq->access_lock) 4133 __rte_shared_locks_required(&vq->iotlb_lock) 4134 { 4135 return virtio_dev_tx_async_packed(dev, vq, mbuf_pool, 4136 pkts, count, dma_id, vchan_id, false); 4137 } 4138 4139 uint16_t 4140 rte_vhost_async_try_dequeue_burst(int vid, uint16_t queue_id, 4141 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count, 4142 int *nr_inflight, int16_t dma_id, uint16_t vchan_id) 4143 { 4144 struct virtio_net *dev; 4145 struct rte_mbuf *rarp_mbuf = NULL; 4146 struct vhost_virtqueue *vq; 4147 int16_t success = 1; 4148 4149 dev = get_device(vid); 4150 if (!dev || !nr_inflight) 4151 return 0; 4152 4153 *nr_inflight = -1; 4154 4155 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { 4156 VHOST_LOG_DATA(dev->ifname, ERR, "%s: built-in vhost net backend is disabled.\n", 4157 __func__); 4158 return 0; 4159 } 4160 4161 if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) { 4162 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid virtqueue idx %d.\n", 4163 __func__, queue_id); 4164 return 0; 4165 } 4166 4167 if (unlikely(dma_id < 0 || dma_id >= RTE_DMADEV_DEFAULT_MAX)) { 4168 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid dma id %d.\n", 4169 __func__, dma_id); 4170 return 0; 4171 } 4172 4173 if (unlikely(!dma_copy_track[dma_id].vchans || 4174 !dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) { 4175 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid channel %d:%u.\n", 4176 __func__, dma_id, vchan_id); 4177 return 0; 4178 } 4179 4180 vq = dev->virtqueue[queue_id]; 4181 4182 if (unlikely(rte_rwlock_read_trylock(&vq->access_lock) != 0)) 4183 return 0; 4184 4185 if (unlikely(vq->enabled == 0)) { 4186 count = 0; 4187 goto out_access_unlock; 4188 } 4189 4190 if (unlikely(!vq->async)) { 4191 VHOST_LOG_DATA(dev->ifname, ERR, "%s: async not registered for queue id %d.\n", 4192 __func__, queue_id); 4193 count = 0; 4194 goto out_access_unlock; 4195 } 4196 4197 vhost_user_iotlb_rd_lock(vq); 4198 4199 if (unlikely(vq->access_ok == 0)) 4200 if (unlikely(vring_translate(dev, vq) < 0)) { 4201 count = 0; 4202 goto out; 4203 } 4204 4205 /* 4206 * Construct a RARP broadcast packet, and inject it to the "pkts" 4207 * array, to looks like that guest actually send such packet. 4208 * 4209 * Check user_send_rarp() for more information. 4210 * 4211 * broadcast_rarp shares a cacheline in the virtio_net structure 4212 * with some fields that are accessed during enqueue and 4213 * rte_atomic_compare_exchange_strong_explicit causes a write if performed compare 4214 * and exchange. This could result in false sharing between enqueue 4215 * and dequeue. 4216 * 4217 * Prevent unnecessary false sharing by reading broadcast_rarp first 4218 * and only performing compare and exchange if the read indicates it 4219 * is likely to be set. 4220 */ 4221 if (unlikely(rte_atomic_load_explicit(&dev->broadcast_rarp, rte_memory_order_acquire) && 4222 rte_atomic_compare_exchange_strong_explicit(&dev->broadcast_rarp, 4223 &success, 0, rte_memory_order_release, rte_memory_order_relaxed))) { 4224 4225 rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac); 4226 if (rarp_mbuf == NULL) { 4227 VHOST_LOG_DATA(dev->ifname, ERR, "failed to make RARP packet.\n"); 4228 count = 0; 4229 goto out; 4230 } 4231 /* 4232 * Inject it to the head of "pkts" array, so that switch's mac 4233 * learning table will get updated first. 4234 */ 4235 pkts[0] = rarp_mbuf; 4236 vhost_queue_stats_update(dev, vq, pkts, 1); 4237 pkts++; 4238 count -= 1; 4239 } 4240 4241 if (vq_is_packed(dev)) { 4242 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS) 4243 count = virtio_dev_tx_async_packed_legacy(dev, vq, mbuf_pool, 4244 pkts, count, dma_id, vchan_id); 4245 else 4246 count = virtio_dev_tx_async_packed_compliant(dev, vq, mbuf_pool, 4247 pkts, count, dma_id, vchan_id); 4248 } else { 4249 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS) 4250 count = virtio_dev_tx_async_split_legacy(dev, vq, mbuf_pool, 4251 pkts, count, dma_id, vchan_id); 4252 else 4253 count = virtio_dev_tx_async_split_compliant(dev, vq, mbuf_pool, 4254 pkts, count, dma_id, vchan_id); 4255 } 4256 4257 *nr_inflight = vq->async->pkts_inflight_n; 4258 vhost_queue_stats_update(dev, vq, pkts, count); 4259 4260 out: 4261 vhost_user_iotlb_rd_unlock(vq); 4262 4263 out_access_unlock: 4264 rte_rwlock_read_unlock(&vq->access_lock); 4265 4266 if (unlikely(rarp_mbuf != NULL)) 4267 count += 1; 4268 4269 return count; 4270 } 4271