1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2016 Intel Corporation 3 */ 4 5 #include <stdint.h> 6 #include <stdbool.h> 7 #include <linux/virtio_net.h> 8 9 #include <rte_mbuf.h> 10 #include <rte_memcpy.h> 11 #include <rte_net.h> 12 #include <rte_ether.h> 13 #include <rte_ip.h> 14 #include <rte_vhost.h> 15 #include <rte_tcp.h> 16 #include <rte_udp.h> 17 #include <rte_sctp.h> 18 #include <rte_arp.h> 19 #include <rte_spinlock.h> 20 #include <rte_malloc.h> 21 #include <rte_vhost_async.h> 22 23 #include "iotlb.h" 24 #include "vhost.h" 25 26 #define MAX_BATCH_LEN 256 27 28 #define VHOST_ASYNC_BATCH_THRESHOLD 32 29 30 static __rte_always_inline bool 31 rxvq_is_mergeable(struct virtio_net *dev) 32 { 33 return dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF); 34 } 35 36 static __rte_always_inline bool 37 virtio_net_is_inorder(struct virtio_net *dev) 38 { 39 return dev->features & (1ULL << VIRTIO_F_IN_ORDER); 40 } 41 42 static bool 43 is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t nr_vring) 44 { 45 return (is_tx ^ (idx & 1)) == 0 && idx < nr_vring; 46 } 47 48 static inline void 49 do_data_copy_enqueue(struct virtio_net *dev, struct vhost_virtqueue *vq) 50 { 51 struct batch_copy_elem *elem = vq->batch_copy_elems; 52 uint16_t count = vq->batch_copy_nb_elems; 53 int i; 54 55 for (i = 0; i < count; i++) { 56 rte_memcpy(elem[i].dst, elem[i].src, elem[i].len); 57 vhost_log_cache_write_iova(dev, vq, elem[i].log_addr, 58 elem[i].len); 59 PRINT_PACKET(dev, (uintptr_t)elem[i].dst, elem[i].len, 0); 60 } 61 62 vq->batch_copy_nb_elems = 0; 63 } 64 65 static inline void 66 do_data_copy_dequeue(struct vhost_virtqueue *vq) 67 { 68 struct batch_copy_elem *elem = vq->batch_copy_elems; 69 uint16_t count = vq->batch_copy_nb_elems; 70 int i; 71 72 for (i = 0; i < count; i++) 73 rte_memcpy(elem[i].dst, elem[i].src, elem[i].len); 74 75 vq->batch_copy_nb_elems = 0; 76 } 77 78 static __rte_always_inline void 79 do_flush_shadow_used_ring_split(struct virtio_net *dev, 80 struct vhost_virtqueue *vq, 81 uint16_t to, uint16_t from, uint16_t size) 82 { 83 rte_memcpy(&vq->used->ring[to], 84 &vq->shadow_used_split[from], 85 size * sizeof(struct vring_used_elem)); 86 vhost_log_cache_used_vring(dev, vq, 87 offsetof(struct vring_used, ring[to]), 88 size * sizeof(struct vring_used_elem)); 89 } 90 91 static __rte_always_inline void 92 flush_shadow_used_ring_split(struct virtio_net *dev, struct vhost_virtqueue *vq) 93 { 94 uint16_t used_idx = vq->last_used_idx & (vq->size - 1); 95 96 if (used_idx + vq->shadow_used_idx <= vq->size) { 97 do_flush_shadow_used_ring_split(dev, vq, used_idx, 0, 98 vq->shadow_used_idx); 99 } else { 100 uint16_t size; 101 102 /* update used ring interval [used_idx, vq->size] */ 103 size = vq->size - used_idx; 104 do_flush_shadow_used_ring_split(dev, vq, used_idx, 0, size); 105 106 /* update the left half used ring interval [0, left_size] */ 107 do_flush_shadow_used_ring_split(dev, vq, 0, size, 108 vq->shadow_used_idx - size); 109 } 110 vq->last_used_idx += vq->shadow_used_idx; 111 112 vhost_log_cache_sync(dev, vq); 113 114 __atomic_add_fetch(&vq->used->idx, vq->shadow_used_idx, 115 __ATOMIC_RELEASE); 116 vq->shadow_used_idx = 0; 117 vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx), 118 sizeof(vq->used->idx)); 119 } 120 121 static __rte_always_inline void 122 update_shadow_used_ring_split(struct vhost_virtqueue *vq, 123 uint16_t desc_idx, uint32_t len) 124 { 125 uint16_t i = vq->shadow_used_idx++; 126 127 vq->shadow_used_split[i].id = desc_idx; 128 vq->shadow_used_split[i].len = len; 129 } 130 131 static __rte_always_inline void 132 vhost_flush_enqueue_shadow_packed(struct virtio_net *dev, 133 struct vhost_virtqueue *vq) 134 { 135 int i; 136 uint16_t used_idx = vq->last_used_idx; 137 uint16_t head_idx = vq->last_used_idx; 138 uint16_t head_flags = 0; 139 140 /* Split loop in two to save memory barriers */ 141 for (i = 0; i < vq->shadow_used_idx; i++) { 142 vq->desc_packed[used_idx].id = vq->shadow_used_packed[i].id; 143 vq->desc_packed[used_idx].len = vq->shadow_used_packed[i].len; 144 145 used_idx += vq->shadow_used_packed[i].count; 146 if (used_idx >= vq->size) 147 used_idx -= vq->size; 148 } 149 150 /* The ordering for storing desc flags needs to be enforced. */ 151 rte_atomic_thread_fence(__ATOMIC_RELEASE); 152 153 for (i = 0; i < vq->shadow_used_idx; i++) { 154 uint16_t flags; 155 156 if (vq->shadow_used_packed[i].len) 157 flags = VRING_DESC_F_WRITE; 158 else 159 flags = 0; 160 161 if (vq->used_wrap_counter) { 162 flags |= VRING_DESC_F_USED; 163 flags |= VRING_DESC_F_AVAIL; 164 } else { 165 flags &= ~VRING_DESC_F_USED; 166 flags &= ~VRING_DESC_F_AVAIL; 167 } 168 169 if (i > 0) { 170 vq->desc_packed[vq->last_used_idx].flags = flags; 171 172 vhost_log_cache_used_vring(dev, vq, 173 vq->last_used_idx * 174 sizeof(struct vring_packed_desc), 175 sizeof(struct vring_packed_desc)); 176 } else { 177 head_idx = vq->last_used_idx; 178 head_flags = flags; 179 } 180 181 vq_inc_last_used_packed(vq, vq->shadow_used_packed[i].count); 182 } 183 184 vq->desc_packed[head_idx].flags = head_flags; 185 186 vhost_log_cache_used_vring(dev, vq, 187 head_idx * 188 sizeof(struct vring_packed_desc), 189 sizeof(struct vring_packed_desc)); 190 191 vq->shadow_used_idx = 0; 192 vhost_log_cache_sync(dev, vq); 193 } 194 195 static __rte_always_inline void 196 vhost_flush_dequeue_shadow_packed(struct virtio_net *dev, 197 struct vhost_virtqueue *vq) 198 { 199 struct vring_used_elem_packed *used_elem = &vq->shadow_used_packed[0]; 200 201 vq->desc_packed[vq->shadow_last_used_idx].id = used_elem->id; 202 /* desc flags is the synchronization point for virtio packed vring */ 203 __atomic_store_n(&vq->desc_packed[vq->shadow_last_used_idx].flags, 204 used_elem->flags, __ATOMIC_RELEASE); 205 206 vhost_log_cache_used_vring(dev, vq, vq->shadow_last_used_idx * 207 sizeof(struct vring_packed_desc), 208 sizeof(struct vring_packed_desc)); 209 vq->shadow_used_idx = 0; 210 vhost_log_cache_sync(dev, vq); 211 } 212 213 static __rte_always_inline void 214 vhost_flush_enqueue_batch_packed(struct virtio_net *dev, 215 struct vhost_virtqueue *vq, 216 uint64_t *lens, 217 uint16_t *ids) 218 { 219 uint16_t i; 220 uint16_t flags; 221 uint16_t last_used_idx; 222 struct vring_packed_desc *desc_base; 223 224 last_used_idx = vq->last_used_idx; 225 desc_base = &vq->desc_packed[last_used_idx]; 226 227 flags = PACKED_DESC_ENQUEUE_USED_FLAG(vq->used_wrap_counter); 228 229 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 230 desc_base[i].id = ids[i]; 231 desc_base[i].len = lens[i]; 232 } 233 234 rte_atomic_thread_fence(__ATOMIC_RELEASE); 235 236 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 237 desc_base[i].flags = flags; 238 } 239 240 vhost_log_cache_used_vring(dev, vq, last_used_idx * 241 sizeof(struct vring_packed_desc), 242 sizeof(struct vring_packed_desc) * 243 PACKED_BATCH_SIZE); 244 vhost_log_cache_sync(dev, vq); 245 246 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE); 247 } 248 249 static __rte_always_inline void 250 vhost_shadow_dequeue_batch_packed_inorder(struct vhost_virtqueue *vq, 251 uint16_t id) 252 { 253 vq->shadow_used_packed[0].id = id; 254 255 if (!vq->shadow_used_idx) { 256 vq->shadow_last_used_idx = vq->last_used_idx; 257 vq->shadow_used_packed[0].flags = 258 PACKED_DESC_DEQUEUE_USED_FLAG(vq->used_wrap_counter); 259 vq->shadow_used_packed[0].len = 0; 260 vq->shadow_used_packed[0].count = 1; 261 vq->shadow_used_idx++; 262 } 263 264 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE); 265 } 266 267 static __rte_always_inline void 268 vhost_shadow_dequeue_batch_packed(struct virtio_net *dev, 269 struct vhost_virtqueue *vq, 270 uint16_t *ids) 271 { 272 uint16_t flags; 273 uint16_t i; 274 uint16_t begin; 275 276 flags = PACKED_DESC_DEQUEUE_USED_FLAG(vq->used_wrap_counter); 277 278 if (!vq->shadow_used_idx) { 279 vq->shadow_last_used_idx = vq->last_used_idx; 280 vq->shadow_used_packed[0].id = ids[0]; 281 vq->shadow_used_packed[0].len = 0; 282 vq->shadow_used_packed[0].count = 1; 283 vq->shadow_used_packed[0].flags = flags; 284 vq->shadow_used_idx++; 285 begin = 1; 286 } else 287 begin = 0; 288 289 vhost_for_each_try_unroll(i, begin, PACKED_BATCH_SIZE) { 290 vq->desc_packed[vq->last_used_idx + i].id = ids[i]; 291 vq->desc_packed[vq->last_used_idx + i].len = 0; 292 } 293 294 rte_atomic_thread_fence(__ATOMIC_RELEASE); 295 vhost_for_each_try_unroll(i, begin, PACKED_BATCH_SIZE) 296 vq->desc_packed[vq->last_used_idx + i].flags = flags; 297 298 vhost_log_cache_used_vring(dev, vq, vq->last_used_idx * 299 sizeof(struct vring_packed_desc), 300 sizeof(struct vring_packed_desc) * 301 PACKED_BATCH_SIZE); 302 vhost_log_cache_sync(dev, vq); 303 304 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE); 305 } 306 307 static __rte_always_inline void 308 vhost_shadow_dequeue_single_packed(struct vhost_virtqueue *vq, 309 uint16_t buf_id, 310 uint16_t count) 311 { 312 uint16_t flags; 313 314 flags = vq->desc_packed[vq->last_used_idx].flags; 315 if (vq->used_wrap_counter) { 316 flags |= VRING_DESC_F_USED; 317 flags |= VRING_DESC_F_AVAIL; 318 } else { 319 flags &= ~VRING_DESC_F_USED; 320 flags &= ~VRING_DESC_F_AVAIL; 321 } 322 323 if (!vq->shadow_used_idx) { 324 vq->shadow_last_used_idx = vq->last_used_idx; 325 326 vq->shadow_used_packed[0].id = buf_id; 327 vq->shadow_used_packed[0].len = 0; 328 vq->shadow_used_packed[0].flags = flags; 329 vq->shadow_used_idx++; 330 } else { 331 vq->desc_packed[vq->last_used_idx].id = buf_id; 332 vq->desc_packed[vq->last_used_idx].len = 0; 333 vq->desc_packed[vq->last_used_idx].flags = flags; 334 } 335 336 vq_inc_last_used_packed(vq, count); 337 } 338 339 static __rte_always_inline void 340 vhost_shadow_dequeue_single_packed_inorder(struct vhost_virtqueue *vq, 341 uint16_t buf_id, 342 uint16_t count) 343 { 344 uint16_t flags; 345 346 vq->shadow_used_packed[0].id = buf_id; 347 348 flags = vq->desc_packed[vq->last_used_idx].flags; 349 if (vq->used_wrap_counter) { 350 flags |= VRING_DESC_F_USED; 351 flags |= VRING_DESC_F_AVAIL; 352 } else { 353 flags &= ~VRING_DESC_F_USED; 354 flags &= ~VRING_DESC_F_AVAIL; 355 } 356 357 if (!vq->shadow_used_idx) { 358 vq->shadow_last_used_idx = vq->last_used_idx; 359 vq->shadow_used_packed[0].len = 0; 360 vq->shadow_used_packed[0].flags = flags; 361 vq->shadow_used_idx++; 362 } 363 364 vq_inc_last_used_packed(vq, count); 365 } 366 367 static __rte_always_inline void 368 vhost_shadow_enqueue_packed(struct vhost_virtqueue *vq, 369 uint32_t *len, 370 uint16_t *id, 371 uint16_t *count, 372 uint16_t num_buffers) 373 { 374 uint16_t i; 375 376 for (i = 0; i < num_buffers; i++) { 377 /* enqueue shadow flush action aligned with batch num */ 378 if (!vq->shadow_used_idx) 379 vq->shadow_aligned_idx = vq->last_used_idx & 380 PACKED_BATCH_MASK; 381 vq->shadow_used_packed[vq->shadow_used_idx].id = id[i]; 382 vq->shadow_used_packed[vq->shadow_used_idx].len = len[i]; 383 vq->shadow_used_packed[vq->shadow_used_idx].count = count[i]; 384 vq->shadow_aligned_idx += count[i]; 385 vq->shadow_used_idx++; 386 } 387 } 388 389 static __rte_always_inline void 390 vhost_shadow_enqueue_single_packed(struct virtio_net *dev, 391 struct vhost_virtqueue *vq, 392 uint32_t *len, 393 uint16_t *id, 394 uint16_t *count, 395 uint16_t num_buffers) 396 { 397 vhost_shadow_enqueue_packed(vq, len, id, count, num_buffers); 398 399 if (vq->shadow_aligned_idx >= PACKED_BATCH_SIZE) { 400 do_data_copy_enqueue(dev, vq); 401 vhost_flush_enqueue_shadow_packed(dev, vq); 402 } 403 } 404 405 /* avoid write operation when necessary, to lessen cache issues */ 406 #define ASSIGN_UNLESS_EQUAL(var, val) do { \ 407 if ((var) != (val)) \ 408 (var) = (val); \ 409 } while (0) 410 411 static __rte_always_inline void 412 virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr) 413 { 414 uint64_t csum_l4 = m_buf->ol_flags & PKT_TX_L4_MASK; 415 416 if (m_buf->ol_flags & PKT_TX_TCP_SEG) 417 csum_l4 |= PKT_TX_TCP_CKSUM; 418 419 if (csum_l4) { 420 net_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 421 net_hdr->csum_start = m_buf->l2_len + m_buf->l3_len; 422 423 switch (csum_l4) { 424 case PKT_TX_TCP_CKSUM: 425 net_hdr->csum_offset = (offsetof(struct rte_tcp_hdr, 426 cksum)); 427 break; 428 case PKT_TX_UDP_CKSUM: 429 net_hdr->csum_offset = (offsetof(struct rte_udp_hdr, 430 dgram_cksum)); 431 break; 432 case PKT_TX_SCTP_CKSUM: 433 net_hdr->csum_offset = (offsetof(struct rte_sctp_hdr, 434 cksum)); 435 break; 436 } 437 } else { 438 ASSIGN_UNLESS_EQUAL(net_hdr->csum_start, 0); 439 ASSIGN_UNLESS_EQUAL(net_hdr->csum_offset, 0); 440 ASSIGN_UNLESS_EQUAL(net_hdr->flags, 0); 441 } 442 443 /* IP cksum verification cannot be bypassed, then calculate here */ 444 if (m_buf->ol_flags & PKT_TX_IP_CKSUM) { 445 struct rte_ipv4_hdr *ipv4_hdr; 446 447 ipv4_hdr = rte_pktmbuf_mtod_offset(m_buf, struct rte_ipv4_hdr *, 448 m_buf->l2_len); 449 ipv4_hdr->hdr_checksum = 0; 450 ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr); 451 } 452 453 if (m_buf->ol_flags & PKT_TX_TCP_SEG) { 454 if (m_buf->ol_flags & PKT_TX_IPV4) 455 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 456 else 457 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 458 net_hdr->gso_size = m_buf->tso_segsz; 459 net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len 460 + m_buf->l4_len; 461 } else if (m_buf->ol_flags & PKT_TX_UDP_SEG) { 462 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP; 463 net_hdr->gso_size = m_buf->tso_segsz; 464 net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len + 465 m_buf->l4_len; 466 } else { 467 ASSIGN_UNLESS_EQUAL(net_hdr->gso_type, 0); 468 ASSIGN_UNLESS_EQUAL(net_hdr->gso_size, 0); 469 ASSIGN_UNLESS_EQUAL(net_hdr->hdr_len, 0); 470 } 471 } 472 473 static __rte_always_inline int 474 map_one_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, 475 struct buf_vector *buf_vec, uint16_t *vec_idx, 476 uint64_t desc_iova, uint64_t desc_len, uint8_t perm) 477 { 478 uint16_t vec_id = *vec_idx; 479 480 while (desc_len) { 481 uint64_t desc_addr; 482 uint64_t desc_chunck_len = desc_len; 483 484 if (unlikely(vec_id >= BUF_VECTOR_MAX)) 485 return -1; 486 487 desc_addr = vhost_iova_to_vva(dev, vq, 488 desc_iova, 489 &desc_chunck_len, 490 perm); 491 if (unlikely(!desc_addr)) 492 return -1; 493 494 rte_prefetch0((void *)(uintptr_t)desc_addr); 495 496 buf_vec[vec_id].buf_iova = desc_iova; 497 buf_vec[vec_id].buf_addr = desc_addr; 498 buf_vec[vec_id].buf_len = desc_chunck_len; 499 500 desc_len -= desc_chunck_len; 501 desc_iova += desc_chunck_len; 502 vec_id++; 503 } 504 *vec_idx = vec_id; 505 506 return 0; 507 } 508 509 static __rte_always_inline int 510 fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 511 uint32_t avail_idx, uint16_t *vec_idx, 512 struct buf_vector *buf_vec, uint16_t *desc_chain_head, 513 uint32_t *desc_chain_len, uint8_t perm) 514 { 515 uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)]; 516 uint16_t vec_id = *vec_idx; 517 uint32_t len = 0; 518 uint64_t dlen; 519 uint32_t nr_descs = vq->size; 520 uint32_t cnt = 0; 521 struct vring_desc *descs = vq->desc; 522 struct vring_desc *idesc = NULL; 523 524 if (unlikely(idx >= vq->size)) 525 return -1; 526 527 *desc_chain_head = idx; 528 529 if (vq->desc[idx].flags & VRING_DESC_F_INDIRECT) { 530 dlen = vq->desc[idx].len; 531 nr_descs = dlen / sizeof(struct vring_desc); 532 if (unlikely(nr_descs > vq->size)) 533 return -1; 534 535 descs = (struct vring_desc *)(uintptr_t) 536 vhost_iova_to_vva(dev, vq, vq->desc[idx].addr, 537 &dlen, 538 VHOST_ACCESS_RO); 539 if (unlikely(!descs)) 540 return -1; 541 542 if (unlikely(dlen < vq->desc[idx].len)) { 543 /* 544 * The indirect desc table is not contiguous 545 * in process VA space, we have to copy it. 546 */ 547 idesc = vhost_alloc_copy_ind_table(dev, vq, 548 vq->desc[idx].addr, vq->desc[idx].len); 549 if (unlikely(!idesc)) 550 return -1; 551 552 descs = idesc; 553 } 554 555 idx = 0; 556 } 557 558 while (1) { 559 if (unlikely(idx >= nr_descs || cnt++ >= nr_descs)) { 560 free_ind_table(idesc); 561 return -1; 562 } 563 564 dlen = descs[idx].len; 565 len += dlen; 566 567 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id, 568 descs[idx].addr, dlen, 569 perm))) { 570 free_ind_table(idesc); 571 return -1; 572 } 573 574 if ((descs[idx].flags & VRING_DESC_F_NEXT) == 0) 575 break; 576 577 idx = descs[idx].next; 578 } 579 580 *desc_chain_len = len; 581 *vec_idx = vec_id; 582 583 if (unlikely(!!idesc)) 584 free_ind_table(idesc); 585 586 return 0; 587 } 588 589 /* 590 * Returns -1 on fail, 0 on success 591 */ 592 static inline int 593 reserve_avail_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 594 uint32_t size, struct buf_vector *buf_vec, 595 uint16_t *num_buffers, uint16_t avail_head, 596 uint16_t *nr_vec) 597 { 598 uint16_t cur_idx; 599 uint16_t vec_idx = 0; 600 uint16_t max_tries, tries = 0; 601 602 uint16_t head_idx = 0; 603 uint32_t len = 0; 604 605 *num_buffers = 0; 606 cur_idx = vq->last_avail_idx; 607 608 if (rxvq_is_mergeable(dev)) 609 max_tries = vq->size - 1; 610 else 611 max_tries = 1; 612 613 while (size > 0) { 614 if (unlikely(cur_idx == avail_head)) 615 return -1; 616 /* 617 * if we tried all available ring items, and still 618 * can't get enough buf, it means something abnormal 619 * happened. 620 */ 621 if (unlikely(++tries > max_tries)) 622 return -1; 623 624 if (unlikely(fill_vec_buf_split(dev, vq, cur_idx, 625 &vec_idx, buf_vec, 626 &head_idx, &len, 627 VHOST_ACCESS_RW) < 0)) 628 return -1; 629 len = RTE_MIN(len, size); 630 update_shadow_used_ring_split(vq, head_idx, len); 631 size -= len; 632 633 cur_idx++; 634 *num_buffers += 1; 635 } 636 637 *nr_vec = vec_idx; 638 639 return 0; 640 } 641 642 static __rte_always_inline int 643 fill_vec_buf_packed_indirect(struct virtio_net *dev, 644 struct vhost_virtqueue *vq, 645 struct vring_packed_desc *desc, uint16_t *vec_idx, 646 struct buf_vector *buf_vec, uint32_t *len, uint8_t perm) 647 { 648 uint16_t i; 649 uint32_t nr_descs; 650 uint16_t vec_id = *vec_idx; 651 uint64_t dlen; 652 struct vring_packed_desc *descs, *idescs = NULL; 653 654 dlen = desc->len; 655 descs = (struct vring_packed_desc *)(uintptr_t) 656 vhost_iova_to_vva(dev, vq, desc->addr, &dlen, VHOST_ACCESS_RO); 657 if (unlikely(!descs)) 658 return -1; 659 660 if (unlikely(dlen < desc->len)) { 661 /* 662 * The indirect desc table is not contiguous 663 * in process VA space, we have to copy it. 664 */ 665 idescs = vhost_alloc_copy_ind_table(dev, 666 vq, desc->addr, desc->len); 667 if (unlikely(!idescs)) 668 return -1; 669 670 descs = idescs; 671 } 672 673 nr_descs = desc->len / sizeof(struct vring_packed_desc); 674 if (unlikely(nr_descs >= vq->size)) { 675 free_ind_table(idescs); 676 return -1; 677 } 678 679 for (i = 0; i < nr_descs; i++) { 680 if (unlikely(vec_id >= BUF_VECTOR_MAX)) { 681 free_ind_table(idescs); 682 return -1; 683 } 684 685 dlen = descs[i].len; 686 *len += dlen; 687 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id, 688 descs[i].addr, dlen, 689 perm))) 690 return -1; 691 } 692 *vec_idx = vec_id; 693 694 if (unlikely(!!idescs)) 695 free_ind_table(idescs); 696 697 return 0; 698 } 699 700 static __rte_always_inline int 701 fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, 702 uint16_t avail_idx, uint16_t *desc_count, 703 struct buf_vector *buf_vec, uint16_t *vec_idx, 704 uint16_t *buf_id, uint32_t *len, uint8_t perm) 705 { 706 bool wrap_counter = vq->avail_wrap_counter; 707 struct vring_packed_desc *descs = vq->desc_packed; 708 uint16_t vec_id = *vec_idx; 709 uint64_t dlen; 710 711 if (avail_idx < vq->last_avail_idx) 712 wrap_counter ^= 1; 713 714 /* 715 * Perform a load-acquire barrier in desc_is_avail to 716 * enforce the ordering between desc flags and desc 717 * content. 718 */ 719 if (unlikely(!desc_is_avail(&descs[avail_idx], wrap_counter))) 720 return -1; 721 722 *desc_count = 0; 723 *len = 0; 724 725 while (1) { 726 if (unlikely(vec_id >= BUF_VECTOR_MAX)) 727 return -1; 728 729 if (unlikely(*desc_count >= vq->size)) 730 return -1; 731 732 *desc_count += 1; 733 *buf_id = descs[avail_idx].id; 734 735 if (descs[avail_idx].flags & VRING_DESC_F_INDIRECT) { 736 if (unlikely(fill_vec_buf_packed_indirect(dev, vq, 737 &descs[avail_idx], 738 &vec_id, buf_vec, 739 len, perm) < 0)) 740 return -1; 741 } else { 742 dlen = descs[avail_idx].len; 743 *len += dlen; 744 745 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id, 746 descs[avail_idx].addr, 747 dlen, 748 perm))) 749 return -1; 750 } 751 752 if ((descs[avail_idx].flags & VRING_DESC_F_NEXT) == 0) 753 break; 754 755 if (++avail_idx >= vq->size) { 756 avail_idx -= vq->size; 757 wrap_counter ^= 1; 758 } 759 } 760 761 *vec_idx = vec_id; 762 763 return 0; 764 } 765 766 static __rte_noinline void 767 copy_vnet_hdr_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, 768 struct buf_vector *buf_vec, 769 struct virtio_net_hdr_mrg_rxbuf *hdr) 770 { 771 uint64_t len; 772 uint64_t remain = dev->vhost_hlen; 773 uint64_t src = (uint64_t)(uintptr_t)hdr, dst; 774 uint64_t iova = buf_vec->buf_iova; 775 776 while (remain) { 777 len = RTE_MIN(remain, 778 buf_vec->buf_len); 779 dst = buf_vec->buf_addr; 780 rte_memcpy((void *)(uintptr_t)dst, 781 (void *)(uintptr_t)src, 782 len); 783 784 PRINT_PACKET(dev, (uintptr_t)dst, 785 (uint32_t)len, 0); 786 vhost_log_cache_write_iova(dev, vq, 787 iova, len); 788 789 remain -= len; 790 iova += len; 791 src += len; 792 buf_vec++; 793 } 794 } 795 796 static __rte_always_inline int 797 copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, 798 struct rte_mbuf *m, struct buf_vector *buf_vec, 799 uint16_t nr_vec, uint16_t num_buffers) 800 { 801 uint32_t vec_idx = 0; 802 uint32_t mbuf_offset, mbuf_avail; 803 uint32_t buf_offset, buf_avail; 804 uint64_t buf_addr, buf_iova, buf_len; 805 uint32_t cpy_len; 806 uint64_t hdr_addr; 807 struct rte_mbuf *hdr_mbuf; 808 struct batch_copy_elem *batch_copy = vq->batch_copy_elems; 809 struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL; 810 int error = 0; 811 812 if (unlikely(m == NULL)) { 813 error = -1; 814 goto out; 815 } 816 817 buf_addr = buf_vec[vec_idx].buf_addr; 818 buf_iova = buf_vec[vec_idx].buf_iova; 819 buf_len = buf_vec[vec_idx].buf_len; 820 821 if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) { 822 error = -1; 823 goto out; 824 } 825 826 hdr_mbuf = m; 827 hdr_addr = buf_addr; 828 if (unlikely(buf_len < dev->vhost_hlen)) { 829 memset(&tmp_hdr, 0, sizeof(struct virtio_net_hdr_mrg_rxbuf)); 830 hdr = &tmp_hdr; 831 } else 832 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr; 833 834 VHOST_LOG_DATA(DEBUG, "(%d) RX: num merge buffers %d\n", 835 dev->vid, num_buffers); 836 837 if (unlikely(buf_len < dev->vhost_hlen)) { 838 buf_offset = dev->vhost_hlen - buf_len; 839 vec_idx++; 840 buf_addr = buf_vec[vec_idx].buf_addr; 841 buf_iova = buf_vec[vec_idx].buf_iova; 842 buf_len = buf_vec[vec_idx].buf_len; 843 buf_avail = buf_len - buf_offset; 844 } else { 845 buf_offset = dev->vhost_hlen; 846 buf_avail = buf_len - dev->vhost_hlen; 847 } 848 849 mbuf_avail = rte_pktmbuf_data_len(m); 850 mbuf_offset = 0; 851 while (mbuf_avail != 0 || m->next != NULL) { 852 /* done with current buf, get the next one */ 853 if (buf_avail == 0) { 854 vec_idx++; 855 if (unlikely(vec_idx >= nr_vec)) { 856 error = -1; 857 goto out; 858 } 859 860 buf_addr = buf_vec[vec_idx].buf_addr; 861 buf_iova = buf_vec[vec_idx].buf_iova; 862 buf_len = buf_vec[vec_idx].buf_len; 863 864 buf_offset = 0; 865 buf_avail = buf_len; 866 } 867 868 /* done with current mbuf, get the next one */ 869 if (mbuf_avail == 0) { 870 m = m->next; 871 872 mbuf_offset = 0; 873 mbuf_avail = rte_pktmbuf_data_len(m); 874 } 875 876 if (hdr_addr) { 877 virtio_enqueue_offload(hdr_mbuf, &hdr->hdr); 878 if (rxvq_is_mergeable(dev)) 879 ASSIGN_UNLESS_EQUAL(hdr->num_buffers, 880 num_buffers); 881 882 if (unlikely(hdr == &tmp_hdr)) { 883 copy_vnet_hdr_to_desc(dev, vq, buf_vec, hdr); 884 } else { 885 PRINT_PACKET(dev, (uintptr_t)hdr_addr, 886 dev->vhost_hlen, 0); 887 vhost_log_cache_write_iova(dev, vq, 888 buf_vec[0].buf_iova, 889 dev->vhost_hlen); 890 } 891 892 hdr_addr = 0; 893 } 894 895 cpy_len = RTE_MIN(buf_avail, mbuf_avail); 896 897 if (likely(cpy_len > MAX_BATCH_LEN || 898 vq->batch_copy_nb_elems >= vq->size)) { 899 rte_memcpy((void *)((uintptr_t)(buf_addr + buf_offset)), 900 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), 901 cpy_len); 902 vhost_log_cache_write_iova(dev, vq, 903 buf_iova + buf_offset, 904 cpy_len); 905 PRINT_PACKET(dev, (uintptr_t)(buf_addr + buf_offset), 906 cpy_len, 0); 907 } else { 908 batch_copy[vq->batch_copy_nb_elems].dst = 909 (void *)((uintptr_t)(buf_addr + buf_offset)); 910 batch_copy[vq->batch_copy_nb_elems].src = 911 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset); 912 batch_copy[vq->batch_copy_nb_elems].log_addr = 913 buf_iova + buf_offset; 914 batch_copy[vq->batch_copy_nb_elems].len = cpy_len; 915 vq->batch_copy_nb_elems++; 916 } 917 918 mbuf_avail -= cpy_len; 919 mbuf_offset += cpy_len; 920 buf_avail -= cpy_len; 921 buf_offset += cpy_len; 922 } 923 924 out: 925 926 return error; 927 } 928 929 static __rte_always_inline void 930 async_fill_vec(struct iovec *v, void *base, size_t len) 931 { 932 v->iov_base = base; 933 v->iov_len = len; 934 } 935 936 static __rte_always_inline void 937 async_fill_iter(struct rte_vhost_iov_iter *it, size_t count, 938 struct iovec *vec, unsigned long nr_seg) 939 { 940 it->offset = 0; 941 it->count = count; 942 943 if (count) { 944 it->iov = vec; 945 it->nr_segs = nr_seg; 946 } else { 947 it->iov = 0; 948 it->nr_segs = 0; 949 } 950 } 951 952 static __rte_always_inline void 953 async_fill_desc(struct rte_vhost_async_desc *desc, 954 struct rte_vhost_iov_iter *src, struct rte_vhost_iov_iter *dst) 955 { 956 desc->src = src; 957 desc->dst = dst; 958 } 959 960 static __rte_always_inline int 961 async_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, 962 struct rte_mbuf *m, struct buf_vector *buf_vec, 963 uint16_t nr_vec, uint16_t num_buffers, 964 struct iovec *src_iovec, struct iovec *dst_iovec, 965 struct rte_vhost_iov_iter *src_it, 966 struct rte_vhost_iov_iter *dst_it) 967 { 968 uint32_t vec_idx = 0; 969 uint32_t mbuf_offset, mbuf_avail; 970 uint32_t buf_offset, buf_avail; 971 uint64_t buf_addr, buf_iova, buf_len; 972 uint32_t cpy_len, cpy_threshold; 973 uint64_t hdr_addr; 974 struct rte_mbuf *hdr_mbuf; 975 struct batch_copy_elem *batch_copy = vq->batch_copy_elems; 976 struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL; 977 int error = 0; 978 uint64_t mapped_len; 979 980 uint32_t tlen = 0; 981 int tvec_idx = 0; 982 void *hpa; 983 984 if (unlikely(m == NULL)) { 985 error = -1; 986 goto out; 987 } 988 989 cpy_threshold = vq->async_threshold; 990 991 buf_addr = buf_vec[vec_idx].buf_addr; 992 buf_iova = buf_vec[vec_idx].buf_iova; 993 buf_len = buf_vec[vec_idx].buf_len; 994 995 if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) { 996 error = -1; 997 goto out; 998 } 999 1000 hdr_mbuf = m; 1001 hdr_addr = buf_addr; 1002 if (unlikely(buf_len < dev->vhost_hlen)) { 1003 memset(&tmp_hdr, 0, sizeof(struct virtio_net_hdr_mrg_rxbuf)); 1004 hdr = &tmp_hdr; 1005 } else 1006 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr; 1007 1008 VHOST_LOG_DATA(DEBUG, "(%d) RX: num merge buffers %d\n", 1009 dev->vid, num_buffers); 1010 1011 if (unlikely(buf_len < dev->vhost_hlen)) { 1012 buf_offset = dev->vhost_hlen - buf_len; 1013 vec_idx++; 1014 buf_addr = buf_vec[vec_idx].buf_addr; 1015 buf_iova = buf_vec[vec_idx].buf_iova; 1016 buf_len = buf_vec[vec_idx].buf_len; 1017 buf_avail = buf_len - buf_offset; 1018 } else { 1019 buf_offset = dev->vhost_hlen; 1020 buf_avail = buf_len - dev->vhost_hlen; 1021 } 1022 1023 mbuf_avail = rte_pktmbuf_data_len(m); 1024 mbuf_offset = 0; 1025 1026 while (mbuf_avail != 0 || m->next != NULL) { 1027 /* done with current buf, get the next one */ 1028 if (buf_avail == 0) { 1029 vec_idx++; 1030 if (unlikely(vec_idx >= nr_vec)) { 1031 error = -1; 1032 goto out; 1033 } 1034 1035 buf_addr = buf_vec[vec_idx].buf_addr; 1036 buf_iova = buf_vec[vec_idx].buf_iova; 1037 buf_len = buf_vec[vec_idx].buf_len; 1038 1039 buf_offset = 0; 1040 buf_avail = buf_len; 1041 } 1042 1043 /* done with current mbuf, get the next one */ 1044 if (mbuf_avail == 0) { 1045 m = m->next; 1046 1047 mbuf_offset = 0; 1048 mbuf_avail = rte_pktmbuf_data_len(m); 1049 } 1050 1051 if (hdr_addr) { 1052 virtio_enqueue_offload(hdr_mbuf, &hdr->hdr); 1053 if (rxvq_is_mergeable(dev)) 1054 ASSIGN_UNLESS_EQUAL(hdr->num_buffers, 1055 num_buffers); 1056 1057 if (unlikely(hdr == &tmp_hdr)) { 1058 copy_vnet_hdr_to_desc(dev, vq, buf_vec, hdr); 1059 } else { 1060 PRINT_PACKET(dev, (uintptr_t)hdr_addr, 1061 dev->vhost_hlen, 0); 1062 vhost_log_cache_write_iova(dev, vq, 1063 buf_vec[0].buf_iova, 1064 dev->vhost_hlen); 1065 } 1066 1067 hdr_addr = 0; 1068 } 1069 1070 cpy_len = RTE_MIN(buf_avail, mbuf_avail); 1071 1072 while (unlikely(cpy_len && cpy_len >= cpy_threshold)) { 1073 hpa = (void *)(uintptr_t)gpa_to_first_hpa(dev, 1074 buf_iova + buf_offset, 1075 cpy_len, &mapped_len); 1076 1077 if (unlikely(!hpa || mapped_len < cpy_threshold)) 1078 break; 1079 1080 async_fill_vec(src_iovec + tvec_idx, 1081 (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, 1082 mbuf_offset), (size_t)mapped_len); 1083 1084 async_fill_vec(dst_iovec + tvec_idx, 1085 hpa, (size_t)mapped_len); 1086 1087 tlen += (uint32_t)mapped_len; 1088 cpy_len -= (uint32_t)mapped_len; 1089 mbuf_avail -= (uint32_t)mapped_len; 1090 mbuf_offset += (uint32_t)mapped_len; 1091 buf_avail -= (uint32_t)mapped_len; 1092 buf_offset += (uint32_t)mapped_len; 1093 tvec_idx++; 1094 } 1095 1096 if (likely(cpy_len)) { 1097 if (unlikely(vq->batch_copy_nb_elems >= vq->size)) { 1098 rte_memcpy( 1099 (void *)((uintptr_t)(buf_addr + buf_offset)), 1100 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), 1101 cpy_len); 1102 1103 PRINT_PACKET(dev, 1104 (uintptr_t)(buf_addr + buf_offset), 1105 cpy_len, 0); 1106 } else { 1107 batch_copy[vq->batch_copy_nb_elems].dst = 1108 (void *)((uintptr_t)(buf_addr + buf_offset)); 1109 batch_copy[vq->batch_copy_nb_elems].src = 1110 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset); 1111 batch_copy[vq->batch_copy_nb_elems].log_addr = 1112 buf_iova + buf_offset; 1113 batch_copy[vq->batch_copy_nb_elems].len = 1114 cpy_len; 1115 vq->batch_copy_nb_elems++; 1116 } 1117 1118 mbuf_avail -= cpy_len; 1119 mbuf_offset += cpy_len; 1120 buf_avail -= cpy_len; 1121 buf_offset += cpy_len; 1122 } 1123 1124 } 1125 1126 out: 1127 if (tlen) { 1128 async_fill_iter(src_it, tlen, src_iovec, tvec_idx); 1129 async_fill_iter(dst_it, tlen, dst_iovec, tvec_idx); 1130 } else { 1131 src_it->count = 0; 1132 } 1133 1134 return error; 1135 } 1136 1137 static __rte_always_inline int 1138 vhost_enqueue_single_packed(struct virtio_net *dev, 1139 struct vhost_virtqueue *vq, 1140 struct rte_mbuf *pkt, 1141 struct buf_vector *buf_vec, 1142 uint16_t *nr_descs) 1143 { 1144 uint16_t nr_vec = 0; 1145 uint16_t avail_idx = vq->last_avail_idx; 1146 uint16_t max_tries, tries = 0; 1147 uint16_t buf_id = 0; 1148 uint32_t len = 0; 1149 uint16_t desc_count; 1150 uint32_t size = pkt->pkt_len + sizeof(struct virtio_net_hdr_mrg_rxbuf); 1151 uint16_t num_buffers = 0; 1152 uint32_t buffer_len[vq->size]; 1153 uint16_t buffer_buf_id[vq->size]; 1154 uint16_t buffer_desc_count[vq->size]; 1155 1156 if (rxvq_is_mergeable(dev)) 1157 max_tries = vq->size - 1; 1158 else 1159 max_tries = 1; 1160 1161 while (size > 0) { 1162 /* 1163 * if we tried all available ring items, and still 1164 * can't get enough buf, it means something abnormal 1165 * happened. 1166 */ 1167 if (unlikely(++tries > max_tries)) 1168 return -1; 1169 1170 if (unlikely(fill_vec_buf_packed(dev, vq, 1171 avail_idx, &desc_count, 1172 buf_vec, &nr_vec, 1173 &buf_id, &len, 1174 VHOST_ACCESS_RW) < 0)) 1175 return -1; 1176 1177 len = RTE_MIN(len, size); 1178 size -= len; 1179 1180 buffer_len[num_buffers] = len; 1181 buffer_buf_id[num_buffers] = buf_id; 1182 buffer_desc_count[num_buffers] = desc_count; 1183 num_buffers += 1; 1184 1185 *nr_descs += desc_count; 1186 avail_idx += desc_count; 1187 if (avail_idx >= vq->size) 1188 avail_idx -= vq->size; 1189 } 1190 1191 if (copy_mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, num_buffers) < 0) 1192 return -1; 1193 1194 vhost_shadow_enqueue_single_packed(dev, vq, buffer_len, buffer_buf_id, 1195 buffer_desc_count, num_buffers); 1196 1197 return 0; 1198 } 1199 1200 static __rte_noinline uint32_t 1201 virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 1202 struct rte_mbuf **pkts, uint32_t count) 1203 { 1204 uint32_t pkt_idx = 0; 1205 uint16_t num_buffers; 1206 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1207 uint16_t avail_head; 1208 1209 /* 1210 * The ordering between avail index and 1211 * desc reads needs to be enforced. 1212 */ 1213 avail_head = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE); 1214 1215 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); 1216 1217 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { 1218 uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen; 1219 uint16_t nr_vec = 0; 1220 1221 if (unlikely(reserve_avail_buf_split(dev, vq, 1222 pkt_len, buf_vec, &num_buffers, 1223 avail_head, &nr_vec) < 0)) { 1224 VHOST_LOG_DATA(DEBUG, 1225 "(%d) failed to get enough desc from vring\n", 1226 dev->vid); 1227 vq->shadow_used_idx -= num_buffers; 1228 break; 1229 } 1230 1231 VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n", 1232 dev->vid, vq->last_avail_idx, 1233 vq->last_avail_idx + num_buffers); 1234 1235 if (copy_mbuf_to_desc(dev, vq, pkts[pkt_idx], 1236 buf_vec, nr_vec, 1237 num_buffers) < 0) { 1238 vq->shadow_used_idx -= num_buffers; 1239 break; 1240 } 1241 1242 vq->last_avail_idx += num_buffers; 1243 } 1244 1245 do_data_copy_enqueue(dev, vq); 1246 1247 if (likely(vq->shadow_used_idx)) { 1248 flush_shadow_used_ring_split(dev, vq); 1249 vhost_vring_call_split(dev, vq); 1250 } 1251 1252 return pkt_idx; 1253 } 1254 1255 static __rte_always_inline int 1256 virtio_dev_rx_sync_batch_check(struct virtio_net *dev, 1257 struct vhost_virtqueue *vq, 1258 struct rte_mbuf **pkts, 1259 uint64_t *desc_addrs, 1260 uint64_t *lens) 1261 { 1262 bool wrap_counter = vq->avail_wrap_counter; 1263 struct vring_packed_desc *descs = vq->desc_packed; 1264 uint16_t avail_idx = vq->last_avail_idx; 1265 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 1266 uint16_t i; 1267 1268 if (unlikely(avail_idx & PACKED_BATCH_MASK)) 1269 return -1; 1270 1271 if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size)) 1272 return -1; 1273 1274 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1275 if (unlikely(pkts[i]->next != NULL)) 1276 return -1; 1277 if (unlikely(!desc_is_avail(&descs[avail_idx + i], 1278 wrap_counter))) 1279 return -1; 1280 } 1281 1282 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1283 lens[i] = descs[avail_idx + i].len; 1284 1285 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1286 if (unlikely(pkts[i]->pkt_len > (lens[i] - buf_offset))) 1287 return -1; 1288 } 1289 1290 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1291 desc_addrs[i] = vhost_iova_to_vva(dev, vq, 1292 descs[avail_idx + i].addr, 1293 &lens[i], 1294 VHOST_ACCESS_RW); 1295 1296 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1297 if (unlikely(!desc_addrs[i])) 1298 return -1; 1299 if (unlikely(lens[i] != descs[avail_idx + i].len)) 1300 return -1; 1301 } 1302 1303 return 0; 1304 } 1305 1306 static __rte_always_inline int 1307 virtio_dev_rx_async_batch_check(struct virtio_net *dev, 1308 struct vhost_virtqueue *vq, 1309 struct rte_mbuf **pkts, 1310 uint64_t *desc_addrs, 1311 uint64_t *lens) 1312 { 1313 bool wrap_counter = vq->avail_wrap_counter; 1314 struct vring_packed_desc *descs = vq->desc_packed; 1315 uint16_t avail_idx = vq->last_avail_idx; 1316 uint16_t used_idx = vq->last_used_idx; 1317 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 1318 uint32_t cpy_threshold = vq->async_threshold; 1319 uint16_t i; 1320 1321 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1322 if (unlikely(pkts[i]->data_len >= cpy_threshold)) 1323 return -1; 1324 } 1325 1326 if (unlikely(avail_idx & PACKED_BATCH_MASK)) 1327 return -1; 1328 1329 if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size)) 1330 return -1; 1331 1332 if (unlikely((used_idx + PACKED_BATCH_SIZE) > vq->size)) 1333 return -1; 1334 1335 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1336 if (unlikely(pkts[i]->next != NULL)) 1337 return -1; 1338 if (unlikely(!desc_is_avail(&descs[avail_idx + i], 1339 wrap_counter))) 1340 return -1; 1341 } 1342 1343 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1344 lens[i] = descs[avail_idx + i].len; 1345 1346 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1347 if (unlikely(pkts[i]->pkt_len > (lens[i] - buf_offset))) 1348 return -1; 1349 } 1350 1351 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1352 desc_addrs[i] = vhost_iova_to_vva(dev, vq, 1353 descs[avail_idx + i].addr, 1354 &lens[i], 1355 VHOST_ACCESS_RW); 1356 1357 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1358 if (unlikely(!desc_addrs[i])) 1359 return -1; 1360 if (unlikely(lens[i] != descs[avail_idx + i].len)) 1361 return -1; 1362 } 1363 1364 return 0; 1365 } 1366 1367 static __rte_always_inline void 1368 virtio_dev_rx_batch_packed_copy(struct virtio_net *dev, 1369 struct vhost_virtqueue *vq, 1370 struct rte_mbuf **pkts, 1371 uint64_t *desc_addrs, 1372 uint64_t *lens) 1373 { 1374 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 1375 struct virtio_net_hdr_mrg_rxbuf *hdrs[PACKED_BATCH_SIZE]; 1376 struct vring_packed_desc *descs = vq->desc_packed; 1377 uint16_t avail_idx = vq->last_avail_idx; 1378 uint16_t ids[PACKED_BATCH_SIZE]; 1379 uint16_t i; 1380 1381 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1382 rte_prefetch0((void *)(uintptr_t)desc_addrs[i]); 1383 hdrs[i] = (struct virtio_net_hdr_mrg_rxbuf *) 1384 (uintptr_t)desc_addrs[i]; 1385 lens[i] = pkts[i]->pkt_len + 1386 sizeof(struct virtio_net_hdr_mrg_rxbuf); 1387 } 1388 1389 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1390 virtio_enqueue_offload(pkts[i], &hdrs[i]->hdr); 1391 1392 vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE); 1393 1394 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1395 rte_memcpy((void *)(uintptr_t)(desc_addrs[i] + buf_offset), 1396 rte_pktmbuf_mtod_offset(pkts[i], void *, 0), 1397 pkts[i]->pkt_len); 1398 } 1399 1400 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1401 vhost_log_cache_write_iova(dev, vq, descs[avail_idx + i].addr, 1402 lens[i]); 1403 1404 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1405 ids[i] = descs[avail_idx + i].id; 1406 1407 vhost_flush_enqueue_batch_packed(dev, vq, lens, ids); 1408 } 1409 1410 static __rte_always_inline int 1411 virtio_dev_rx_sync_batch_packed(struct virtio_net *dev, 1412 struct vhost_virtqueue *vq, 1413 struct rte_mbuf **pkts) 1414 { 1415 uint64_t desc_addrs[PACKED_BATCH_SIZE]; 1416 uint64_t lens[PACKED_BATCH_SIZE]; 1417 1418 if (virtio_dev_rx_sync_batch_check(dev, vq, pkts, desc_addrs, lens) == -1) 1419 return -1; 1420 1421 if (vq->shadow_used_idx) { 1422 do_data_copy_enqueue(dev, vq); 1423 vhost_flush_enqueue_shadow_packed(dev, vq); 1424 } 1425 1426 virtio_dev_rx_batch_packed_copy(dev, vq, pkts, desc_addrs, lens); 1427 1428 return 0; 1429 } 1430 1431 static __rte_always_inline int 1432 virtio_dev_rx_async_batch_packed(struct virtio_net *dev, 1433 struct vhost_virtqueue *vq, 1434 struct rte_mbuf **pkts, 1435 struct rte_mbuf **comp_pkts, uint32_t *pkt_done) 1436 { 1437 uint16_t i; 1438 uint64_t desc_addrs[PACKED_BATCH_SIZE]; 1439 uint64_t lens[PACKED_BATCH_SIZE]; 1440 1441 if (virtio_dev_rx_async_batch_check(dev, vq, pkts, desc_addrs, lens) == -1) 1442 return -1; 1443 1444 virtio_dev_rx_batch_packed_copy(dev, vq, pkts, desc_addrs, lens); 1445 1446 if (vq->shadow_used_idx) { 1447 do_data_copy_enqueue(dev, vq); 1448 vhost_flush_enqueue_shadow_packed(dev, vq); 1449 } 1450 1451 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1452 comp_pkts[(*pkt_done)++] = pkts[i]; 1453 1454 return 0; 1455 } 1456 1457 static __rte_always_inline int16_t 1458 virtio_dev_rx_single_packed(struct virtio_net *dev, 1459 struct vhost_virtqueue *vq, 1460 struct rte_mbuf *pkt) 1461 { 1462 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1463 uint16_t nr_descs = 0; 1464 1465 if (unlikely(vhost_enqueue_single_packed(dev, vq, pkt, buf_vec, 1466 &nr_descs) < 0)) { 1467 VHOST_LOG_DATA(DEBUG, 1468 "(%d) failed to get enough desc from vring\n", 1469 dev->vid); 1470 return -1; 1471 } 1472 1473 VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n", 1474 dev->vid, vq->last_avail_idx, 1475 vq->last_avail_idx + nr_descs); 1476 1477 vq_inc_last_avail_packed(vq, nr_descs); 1478 1479 return 0; 1480 } 1481 1482 static __rte_noinline uint32_t 1483 virtio_dev_rx_packed(struct virtio_net *dev, 1484 struct vhost_virtqueue *__rte_restrict vq, 1485 struct rte_mbuf **__rte_restrict pkts, 1486 uint32_t count) 1487 { 1488 uint32_t pkt_idx = 0; 1489 1490 do { 1491 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); 1492 1493 if (count - pkt_idx >= PACKED_BATCH_SIZE) { 1494 if (!virtio_dev_rx_sync_batch_packed(dev, vq, 1495 &pkts[pkt_idx])) { 1496 pkt_idx += PACKED_BATCH_SIZE; 1497 continue; 1498 } 1499 } 1500 1501 if (virtio_dev_rx_single_packed(dev, vq, pkts[pkt_idx])) 1502 break; 1503 pkt_idx++; 1504 1505 } while (pkt_idx < count); 1506 1507 if (vq->shadow_used_idx) { 1508 do_data_copy_enqueue(dev, vq); 1509 vhost_flush_enqueue_shadow_packed(dev, vq); 1510 } 1511 1512 if (pkt_idx) 1513 vhost_vring_call_packed(dev, vq); 1514 1515 return pkt_idx; 1516 } 1517 1518 static __rte_always_inline uint32_t 1519 virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, 1520 struct rte_mbuf **pkts, uint32_t count) 1521 { 1522 struct vhost_virtqueue *vq; 1523 uint32_t nb_tx = 0; 1524 1525 VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__); 1526 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { 1527 VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n", 1528 dev->vid, __func__, queue_id); 1529 return 0; 1530 } 1531 1532 vq = dev->virtqueue[queue_id]; 1533 1534 rte_spinlock_lock(&vq->access_lock); 1535 1536 if (unlikely(!vq->enabled)) 1537 goto out_access_unlock; 1538 1539 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 1540 vhost_user_iotlb_rd_lock(vq); 1541 1542 if (unlikely(!vq->access_ok)) 1543 if (unlikely(vring_translate(dev, vq) < 0)) 1544 goto out; 1545 1546 count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); 1547 if (count == 0) 1548 goto out; 1549 1550 if (vq_is_packed(dev)) 1551 nb_tx = virtio_dev_rx_packed(dev, vq, pkts, count); 1552 else 1553 nb_tx = virtio_dev_rx_split(dev, vq, pkts, count); 1554 1555 out: 1556 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 1557 vhost_user_iotlb_rd_unlock(vq); 1558 1559 out_access_unlock: 1560 rte_spinlock_unlock(&vq->access_lock); 1561 1562 return nb_tx; 1563 } 1564 1565 uint16_t 1566 rte_vhost_enqueue_burst(int vid, uint16_t queue_id, 1567 struct rte_mbuf **__rte_restrict pkts, uint16_t count) 1568 { 1569 struct virtio_net *dev = get_device(vid); 1570 1571 if (!dev) 1572 return 0; 1573 1574 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { 1575 VHOST_LOG_DATA(ERR, 1576 "(%d) %s: built-in vhost net backend is disabled.\n", 1577 dev->vid, __func__); 1578 return 0; 1579 } 1580 1581 return virtio_dev_rx(dev, queue_id, pkts, count); 1582 } 1583 1584 static __rte_always_inline uint16_t 1585 virtio_dev_rx_async_get_info_idx(uint16_t pkts_idx, 1586 uint16_t vq_size, uint16_t n_inflight) 1587 { 1588 return pkts_idx > n_inflight ? (pkts_idx - n_inflight) : 1589 (vq_size - n_inflight + pkts_idx) % vq_size; 1590 } 1591 1592 static __rte_always_inline void 1593 store_dma_desc_info_split(struct vring_used_elem *s_ring, struct vring_used_elem *d_ring, 1594 uint16_t ring_size, uint16_t s_idx, uint16_t d_idx, uint16_t count) 1595 { 1596 size_t elem_size = sizeof(struct vring_used_elem); 1597 1598 if (d_idx + count <= ring_size) { 1599 rte_memcpy(d_ring + d_idx, s_ring + s_idx, count * elem_size); 1600 } else { 1601 uint16_t size = ring_size - d_idx; 1602 1603 rte_memcpy(d_ring + d_idx, s_ring + s_idx, size * elem_size); 1604 rte_memcpy(d_ring, s_ring + s_idx + size, (count - size) * elem_size); 1605 } 1606 } 1607 1608 static __rte_always_inline void 1609 store_dma_desc_info_packed(struct vring_used_elem_packed *s_ring, 1610 struct vring_used_elem_packed *d_ring, 1611 uint16_t ring_size, uint16_t s_idx, uint16_t d_idx, uint16_t count) 1612 { 1613 size_t elem_size = sizeof(struct vring_used_elem_packed); 1614 1615 if (d_idx + count <= ring_size) { 1616 rte_memcpy(d_ring + d_idx, s_ring + s_idx, count * elem_size); 1617 } else { 1618 uint16_t size = ring_size - d_idx; 1619 1620 rte_memcpy(d_ring + d_idx, s_ring + s_idx, size * elem_size); 1621 rte_memcpy(d_ring, s_ring + s_idx + size, (count - size) * elem_size); 1622 } 1623 } 1624 1625 static __rte_noinline uint32_t 1626 virtio_dev_rx_async_submit_split(struct virtio_net *dev, 1627 struct vhost_virtqueue *vq, uint16_t queue_id, 1628 struct rte_mbuf **pkts, uint32_t count, 1629 struct rte_mbuf **comp_pkts, uint32_t *comp_count) 1630 { 1631 uint32_t pkt_idx = 0, pkt_burst_idx = 0; 1632 uint16_t num_buffers; 1633 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1634 uint16_t avail_head; 1635 1636 struct rte_vhost_iov_iter *it_pool = vq->it_pool; 1637 struct iovec *vec_pool = vq->vec_pool; 1638 struct rte_vhost_async_desc tdes[MAX_PKT_BURST]; 1639 struct iovec *src_iovec = vec_pool; 1640 struct iovec *dst_iovec = vec_pool + (VHOST_MAX_ASYNC_VEC >> 1); 1641 uint16_t slot_idx = 0; 1642 uint16_t segs_await = 0; 1643 uint16_t iovec_idx = 0, it_idx = 0; 1644 struct async_inflight_info *pkts_info = vq->async_pkts_info; 1645 uint32_t n_pkts = 0, pkt_err = 0; 1646 uint32_t num_async_pkts = 0, num_done_pkts = 0; 1647 struct { 1648 uint16_t pkt_idx; 1649 uint16_t last_avail_idx; 1650 } async_pkts_log[MAX_PKT_BURST]; 1651 1652 /* 1653 * The ordering between avail index and desc reads need to be enforced. 1654 */ 1655 avail_head = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE); 1656 1657 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); 1658 1659 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { 1660 uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen; 1661 uint16_t nr_vec = 0; 1662 1663 if (unlikely(reserve_avail_buf_split(dev, vq, 1664 pkt_len, buf_vec, &num_buffers, 1665 avail_head, &nr_vec) < 0)) { 1666 VHOST_LOG_DATA(DEBUG, 1667 "(%d) failed to get enough desc from vring\n", 1668 dev->vid); 1669 vq->shadow_used_idx -= num_buffers; 1670 break; 1671 } 1672 1673 VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n", 1674 dev->vid, vq->last_avail_idx, 1675 vq->last_avail_idx + num_buffers); 1676 1677 if (async_mbuf_to_desc(dev, vq, pkts[pkt_idx], buf_vec, nr_vec, num_buffers, 1678 &src_iovec[iovec_idx], &dst_iovec[iovec_idx], 1679 &it_pool[it_idx], &it_pool[it_idx + 1]) < 0) { 1680 vq->shadow_used_idx -= num_buffers; 1681 break; 1682 } 1683 1684 slot_idx = (vq->async_pkts_idx + num_async_pkts) & 1685 (vq->size - 1); 1686 if (it_pool[it_idx].count) { 1687 uint16_t from, to; 1688 1689 async_fill_desc(&tdes[pkt_burst_idx++], 1690 &it_pool[it_idx], &it_pool[it_idx + 1]); 1691 pkts_info[slot_idx].descs = num_buffers; 1692 pkts_info[slot_idx].mbuf = pkts[pkt_idx]; 1693 async_pkts_log[num_async_pkts].pkt_idx = pkt_idx; 1694 async_pkts_log[num_async_pkts++].last_avail_idx = 1695 vq->last_avail_idx; 1696 1697 iovec_idx += it_pool[it_idx].nr_segs; 1698 it_idx += 2; 1699 1700 segs_await += it_pool[it_idx].nr_segs; 1701 1702 /** 1703 * recover shadow used ring and keep DMA-occupied 1704 * descriptors. 1705 */ 1706 from = vq->shadow_used_idx - num_buffers; 1707 to = vq->async_desc_idx_split & (vq->size - 1); 1708 1709 store_dma_desc_info_split(vq->shadow_used_split, 1710 vq->async_descs_split, vq->size, from, to, num_buffers); 1711 1712 vq->async_desc_idx_split += num_buffers; 1713 vq->shadow_used_idx -= num_buffers; 1714 } else 1715 comp_pkts[num_done_pkts++] = pkts[pkt_idx]; 1716 1717 vq->last_avail_idx += num_buffers; 1718 1719 /* 1720 * conditions to trigger async device transfer: 1721 * - buffered packet number reaches transfer threshold 1722 * - unused async iov number is less than max vhost vector 1723 */ 1724 if (unlikely(pkt_burst_idx >= VHOST_ASYNC_BATCH_THRESHOLD || 1725 ((VHOST_MAX_ASYNC_VEC >> 1) - segs_await < 1726 BUF_VECTOR_MAX))) { 1727 n_pkts = vq->async_ops.transfer_data(dev->vid, 1728 queue_id, tdes, 0, pkt_burst_idx); 1729 iovec_idx = 0; 1730 it_idx = 0; 1731 1732 segs_await = 0; 1733 vq->async_pkts_inflight_n += n_pkts; 1734 1735 if (unlikely(n_pkts < pkt_burst_idx)) { 1736 /* 1737 * log error packets number here and do actual 1738 * error processing when applications poll 1739 * completion 1740 */ 1741 pkt_err = pkt_burst_idx - n_pkts; 1742 pkt_burst_idx = 0; 1743 break; 1744 } 1745 1746 pkt_burst_idx = 0; 1747 } 1748 } 1749 1750 if (pkt_burst_idx) { 1751 n_pkts = vq->async_ops.transfer_data(dev->vid, 1752 queue_id, tdes, 0, pkt_burst_idx); 1753 vq->async_pkts_inflight_n += n_pkts; 1754 1755 if (unlikely(n_pkts < pkt_burst_idx)) 1756 pkt_err = pkt_burst_idx - n_pkts; 1757 } 1758 1759 do_data_copy_enqueue(dev, vq); 1760 1761 if (unlikely(pkt_err)) { 1762 uint16_t num_descs = 0; 1763 1764 num_async_pkts -= pkt_err; 1765 /* calculate the sum of descriptors of DMA-error packets. */ 1766 while (pkt_err-- > 0) { 1767 num_descs += pkts_info[slot_idx & (vq->size - 1)].descs; 1768 slot_idx--; 1769 } 1770 vq->async_desc_idx_split -= num_descs; 1771 /* recover shadow used ring and available ring */ 1772 vq->shadow_used_idx -= (vq->last_avail_idx - 1773 async_pkts_log[num_async_pkts].last_avail_idx - 1774 num_descs); 1775 vq->last_avail_idx = 1776 async_pkts_log[num_async_pkts].last_avail_idx; 1777 pkt_idx = async_pkts_log[num_async_pkts].pkt_idx; 1778 num_done_pkts = pkt_idx - num_async_pkts; 1779 } 1780 1781 vq->async_pkts_idx += num_async_pkts; 1782 *comp_count = num_done_pkts; 1783 1784 if (likely(vq->shadow_used_idx)) { 1785 flush_shadow_used_ring_split(dev, vq); 1786 vhost_vring_call_split(dev, vq); 1787 } 1788 1789 return pkt_idx; 1790 } 1791 1792 static __rte_always_inline void 1793 vhost_update_used_packed(struct vhost_virtqueue *vq, 1794 struct vring_used_elem_packed *shadow_ring, 1795 uint16_t count) 1796 { 1797 int i; 1798 uint16_t used_idx = vq->last_used_idx; 1799 uint16_t head_idx = vq->last_used_idx; 1800 uint16_t head_flags = 0; 1801 1802 if (count == 0) 1803 return; 1804 1805 /* Split loop in two to save memory barriers */ 1806 for (i = 0; i < count; i++) { 1807 vq->desc_packed[used_idx].id = shadow_ring[i].id; 1808 vq->desc_packed[used_idx].len = shadow_ring[i].len; 1809 1810 used_idx += shadow_ring[i].count; 1811 if (used_idx >= vq->size) 1812 used_idx -= vq->size; 1813 } 1814 1815 /* The ordering for storing desc flags needs to be enforced. */ 1816 rte_atomic_thread_fence(__ATOMIC_RELEASE); 1817 1818 for (i = 0; i < count; i++) { 1819 uint16_t flags; 1820 1821 if (vq->shadow_used_packed[i].len) 1822 flags = VRING_DESC_F_WRITE; 1823 else 1824 flags = 0; 1825 1826 if (vq->used_wrap_counter) { 1827 flags |= VRING_DESC_F_USED; 1828 flags |= VRING_DESC_F_AVAIL; 1829 } else { 1830 flags &= ~VRING_DESC_F_USED; 1831 flags &= ~VRING_DESC_F_AVAIL; 1832 } 1833 1834 if (i > 0) { 1835 vq->desc_packed[vq->last_used_idx].flags = flags; 1836 } else { 1837 head_idx = vq->last_used_idx; 1838 head_flags = flags; 1839 } 1840 1841 vq_inc_last_used_packed(vq, shadow_ring[i].count); 1842 } 1843 1844 vq->desc_packed[head_idx].flags = head_flags; 1845 } 1846 1847 static __rte_always_inline int 1848 vhost_enqueue_async_single_packed(struct virtio_net *dev, 1849 struct vhost_virtqueue *vq, 1850 struct rte_mbuf *pkt, 1851 struct buf_vector *buf_vec, 1852 uint16_t *nr_descs, 1853 uint16_t *nr_buffers, 1854 struct vring_packed_desc *async_descs, 1855 struct iovec *src_iovec, struct iovec *dst_iovec, 1856 struct rte_vhost_iov_iter *src_it, 1857 struct rte_vhost_iov_iter *dst_it) 1858 { 1859 uint16_t nr_vec = 0; 1860 uint16_t avail_idx = vq->last_avail_idx; 1861 uint16_t max_tries, tries = 0; 1862 uint16_t buf_id = 0; 1863 uint32_t len = 0; 1864 uint16_t desc_count = 0; 1865 uint32_t size = pkt->pkt_len + sizeof(struct virtio_net_hdr_mrg_rxbuf); 1866 uint32_t buffer_len[vq->size]; 1867 uint16_t buffer_buf_id[vq->size]; 1868 uint16_t buffer_desc_count[vq->size]; 1869 1870 if (rxvq_is_mergeable(dev)) 1871 max_tries = vq->size - 1; 1872 else 1873 max_tries = 1; 1874 1875 while (size > 0) { 1876 /* 1877 * if we tried all available ring items, and still 1878 * can't get enough buf, it means something abnormal 1879 * happened. 1880 */ 1881 if (unlikely(++tries > max_tries)) 1882 return -1; 1883 1884 if (unlikely(fill_vec_buf_packed(dev, vq, avail_idx, &desc_count, buf_vec, &nr_vec, 1885 &buf_id, &len, VHOST_ACCESS_RW) < 0)) 1886 return -1; 1887 1888 len = RTE_MIN(len, size); 1889 size -= len; 1890 1891 buffer_len[*nr_buffers] = len; 1892 buffer_buf_id[*nr_buffers] = buf_id; 1893 buffer_desc_count[*nr_buffers] = desc_count; 1894 *nr_buffers += 1; 1895 1896 *nr_descs += desc_count; 1897 avail_idx += desc_count; 1898 if (avail_idx >= vq->size) 1899 avail_idx -= vq->size; 1900 } 1901 1902 if (async_mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, *nr_buffers, src_iovec, dst_iovec, 1903 src_it, dst_it) < 0) 1904 return -1; 1905 /* store descriptors for DMA */ 1906 if (avail_idx >= *nr_descs) { 1907 rte_memcpy(async_descs, &vq->desc_packed[vq->last_avail_idx], 1908 *nr_descs * sizeof(struct vring_packed_desc)); 1909 } else { 1910 uint16_t nr_copy = vq->size - vq->last_avail_idx; 1911 1912 rte_memcpy(async_descs, &vq->desc_packed[vq->last_avail_idx], 1913 nr_copy * sizeof(struct vring_packed_desc)); 1914 rte_memcpy(async_descs + nr_copy, vq->desc_packed, 1915 (*nr_descs - nr_copy) * sizeof(struct vring_packed_desc)); 1916 } 1917 1918 vhost_shadow_enqueue_packed(vq, buffer_len, buffer_buf_id, buffer_desc_count, *nr_buffers); 1919 1920 return 0; 1921 } 1922 1923 static __rte_always_inline int16_t 1924 virtio_dev_rx_async_single_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, 1925 struct rte_mbuf *pkt, uint16_t *nr_descs, uint16_t *nr_buffers, 1926 struct vring_packed_desc *async_descs, 1927 struct iovec *src_iovec, struct iovec *dst_iovec, 1928 struct rte_vhost_iov_iter *src_it, struct rte_vhost_iov_iter *dst_it) 1929 { 1930 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1931 1932 if (unlikely(vhost_enqueue_async_single_packed(dev, vq, pkt, buf_vec, nr_descs, nr_buffers, 1933 async_descs, src_iovec, dst_iovec, 1934 src_it, dst_it) < 0)) { 1935 VHOST_LOG_DATA(DEBUG, "(%d) failed to get enough desc from vring\n", dev->vid); 1936 return -1; 1937 } 1938 1939 VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n", 1940 dev->vid, vq->last_avail_idx, vq->last_avail_idx + *nr_descs); 1941 1942 return 0; 1943 } 1944 1945 static __rte_always_inline void 1946 dma_error_handler_packed(struct vhost_virtqueue *vq, struct vring_packed_desc *async_descs, 1947 uint16_t async_descs_idx, uint16_t slot_idx, uint32_t nr_err, 1948 uint32_t *pkt_idx, uint32_t *num_async_pkts, uint32_t *num_done_pkts) 1949 { 1950 uint16_t descs_err = 0; 1951 uint16_t buffers_err = 0; 1952 struct async_inflight_info *pkts_info = vq->async_pkts_info; 1953 1954 *num_async_pkts -= nr_err; 1955 *pkt_idx -= nr_err; 1956 /* calculate the sum of buffers and descs of DMA-error packets. */ 1957 while (nr_err-- > 0) { 1958 descs_err += pkts_info[slot_idx % vq->size].descs; 1959 buffers_err += pkts_info[slot_idx % vq->size].nr_buffers; 1960 slot_idx--; 1961 } 1962 1963 vq->async_buffer_idx_packed -= buffers_err; 1964 1965 if (vq->last_avail_idx >= descs_err) { 1966 vq->last_avail_idx -= descs_err; 1967 1968 rte_memcpy(&vq->desc_packed[vq->last_avail_idx], 1969 &async_descs[async_descs_idx - descs_err], 1970 descs_err * sizeof(struct vring_packed_desc)); 1971 } else { 1972 uint16_t nr_copy; 1973 1974 vq->last_avail_idx = vq->last_avail_idx + vq->size - descs_err; 1975 nr_copy = vq->size - vq->last_avail_idx; 1976 rte_memcpy(&vq->desc_packed[vq->last_avail_idx], 1977 &async_descs[async_descs_idx - descs_err], 1978 nr_copy * sizeof(struct vring_packed_desc)); 1979 descs_err -= nr_copy; 1980 rte_memcpy(&vq->desc_packed[0], &async_descs[async_descs_idx - descs_err], 1981 descs_err * sizeof(struct vring_packed_desc)); 1982 vq->avail_wrap_counter ^= 1; 1983 } 1984 1985 *num_done_pkts = *pkt_idx - *num_async_pkts; 1986 } 1987 1988 static __rte_noinline uint32_t 1989 virtio_dev_rx_async_submit_packed(struct virtio_net *dev, 1990 struct vhost_virtqueue *vq, uint16_t queue_id, 1991 struct rte_mbuf **pkts, uint32_t count, 1992 struct rte_mbuf **comp_pkts, uint32_t *comp_count) 1993 { 1994 uint32_t pkt_idx = 0, pkt_burst_idx = 0; 1995 uint32_t remained = count; 1996 uint16_t async_descs_idx = 0; 1997 uint16_t num_buffers; 1998 uint16_t num_descs; 1999 2000 struct rte_vhost_iov_iter *it_pool = vq->it_pool; 2001 struct iovec *vec_pool = vq->vec_pool; 2002 struct rte_vhost_async_desc tdes[MAX_PKT_BURST]; 2003 struct iovec *src_iovec = vec_pool; 2004 struct iovec *dst_iovec = vec_pool + (VHOST_MAX_ASYNC_VEC >> 1); 2005 uint16_t slot_idx = 0; 2006 uint16_t segs_await = 0; 2007 uint16_t iovec_idx = 0, it_idx = 0; 2008 struct async_inflight_info *pkts_info = vq->async_pkts_info; 2009 uint32_t n_pkts = 0, pkt_err = 0; 2010 uint32_t num_async_pkts = 0, num_done_pkts = 0; 2011 struct vring_packed_desc async_descs[vq->size]; 2012 2013 do { 2014 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); 2015 if (remained >= PACKED_BATCH_SIZE) { 2016 if (!virtio_dev_rx_async_batch_packed(dev, vq, 2017 &pkts[pkt_idx], comp_pkts, &num_done_pkts)) { 2018 pkt_idx += PACKED_BATCH_SIZE; 2019 remained -= PACKED_BATCH_SIZE; 2020 continue; 2021 } 2022 } 2023 2024 num_buffers = 0; 2025 num_descs = 0; 2026 if (unlikely(virtio_dev_rx_async_single_packed(dev, vq, pkts[pkt_idx], 2027 &num_descs, &num_buffers, 2028 &async_descs[async_descs_idx], 2029 &src_iovec[iovec_idx], &dst_iovec[iovec_idx], 2030 &it_pool[it_idx], &it_pool[it_idx + 1]) < 0)) 2031 break; 2032 2033 VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n", 2034 dev->vid, vq->last_avail_idx, 2035 vq->last_avail_idx + num_descs); 2036 2037 slot_idx = (vq->async_pkts_idx + num_async_pkts) % vq->size; 2038 if (it_pool[it_idx].count) { 2039 uint16_t from; 2040 2041 async_descs_idx += num_descs; 2042 async_fill_desc(&tdes[pkt_burst_idx++], 2043 &it_pool[it_idx], &it_pool[it_idx + 1]); 2044 pkts_info[slot_idx].descs = num_descs; 2045 pkts_info[slot_idx].nr_buffers = num_buffers; 2046 pkts_info[slot_idx].mbuf = pkts[pkt_idx]; 2047 num_async_pkts++; 2048 iovec_idx += it_pool[it_idx].nr_segs; 2049 it_idx += 2; 2050 2051 segs_await += it_pool[it_idx].nr_segs; 2052 2053 /** 2054 * recover shadow used ring and keep DMA-occupied 2055 * descriptors. 2056 */ 2057 from = vq->shadow_used_idx - num_buffers; 2058 store_dma_desc_info_packed(vq->shadow_used_packed, 2059 vq->async_buffers_packed, vq->size, from, 2060 vq->async_buffer_idx_packed, num_buffers); 2061 2062 vq->async_buffer_idx_packed += num_buffers; 2063 if (vq->async_buffer_idx_packed >= vq->size) 2064 vq->async_buffer_idx_packed -= vq->size; 2065 vq->shadow_used_idx -= num_buffers; 2066 } else { 2067 comp_pkts[num_done_pkts++] = pkts[pkt_idx]; 2068 } 2069 2070 pkt_idx++; 2071 remained--; 2072 vq_inc_last_avail_packed(vq, num_descs); 2073 2074 /* 2075 * conditions to trigger async device transfer: 2076 * - buffered packet number reaches transfer threshold 2077 * - unused async iov number is less than max vhost vector 2078 */ 2079 if (unlikely(pkt_burst_idx >= VHOST_ASYNC_BATCH_THRESHOLD || 2080 ((VHOST_MAX_ASYNC_VEC >> 1) - segs_await < BUF_VECTOR_MAX))) { 2081 n_pkts = vq->async_ops.transfer_data(dev->vid, queue_id, 2082 tdes, 0, pkt_burst_idx); 2083 iovec_idx = 0; 2084 it_idx = 0; 2085 segs_await = 0; 2086 vq->async_pkts_inflight_n += n_pkts; 2087 2088 if (unlikely(n_pkts < pkt_burst_idx)) { 2089 /* 2090 * log error packets number here and do actual 2091 * error processing when applications poll 2092 * completion 2093 */ 2094 pkt_err = pkt_burst_idx - n_pkts; 2095 pkt_burst_idx = 0; 2096 break; 2097 } 2098 2099 pkt_burst_idx = 0; 2100 } 2101 } while (pkt_idx < count); 2102 2103 if (pkt_burst_idx) { 2104 n_pkts = vq->async_ops.transfer_data(dev->vid, queue_id, tdes, 0, pkt_burst_idx); 2105 vq->async_pkts_inflight_n += n_pkts; 2106 2107 if (unlikely(n_pkts < pkt_burst_idx)) 2108 pkt_err = pkt_burst_idx - n_pkts; 2109 } 2110 2111 do_data_copy_enqueue(dev, vq); 2112 2113 if (unlikely(pkt_err)) 2114 dma_error_handler_packed(vq, async_descs, async_descs_idx, slot_idx, pkt_err, 2115 &pkt_idx, &num_async_pkts, &num_done_pkts); 2116 vq->async_pkts_idx += num_async_pkts; 2117 if (vq->async_pkts_idx >= vq->size) 2118 vq->async_pkts_idx -= vq->size; 2119 *comp_count = num_done_pkts; 2120 2121 if (likely(vq->shadow_used_idx)) { 2122 vhost_flush_enqueue_shadow_packed(dev, vq); 2123 vhost_vring_call_packed(dev, vq); 2124 } 2125 2126 return pkt_idx; 2127 } 2128 2129 static __rte_always_inline void 2130 write_back_completed_descs_split(struct vhost_virtqueue *vq, uint16_t n_descs) 2131 { 2132 uint16_t nr_left = n_descs; 2133 uint16_t nr_copy; 2134 uint16_t to, from; 2135 2136 do { 2137 from = vq->last_async_desc_idx_split & (vq->size - 1); 2138 nr_copy = nr_left + from <= vq->size ? nr_left : vq->size - from; 2139 to = vq->last_used_idx & (vq->size - 1); 2140 2141 if (to + nr_copy <= vq->size) { 2142 rte_memcpy(&vq->used->ring[to], &vq->async_descs_split[from], 2143 nr_copy * sizeof(struct vring_used_elem)); 2144 } else { 2145 uint16_t size = vq->size - to; 2146 2147 rte_memcpy(&vq->used->ring[to], &vq->async_descs_split[from], 2148 size * sizeof(struct vring_used_elem)); 2149 rte_memcpy(&vq->used->ring[0], &vq->async_descs_split[from + size], 2150 (nr_copy - size) * sizeof(struct vring_used_elem)); 2151 } 2152 2153 vq->last_async_desc_idx_split += nr_copy; 2154 vq->last_used_idx += nr_copy; 2155 nr_left -= nr_copy; 2156 } while (nr_left > 0); 2157 } 2158 2159 static __rte_always_inline void 2160 write_back_completed_descs_packed(struct vhost_virtqueue *vq, 2161 uint16_t n_buffers) 2162 { 2163 uint16_t nr_left = n_buffers; 2164 uint16_t from, to; 2165 2166 do { 2167 from = vq->last_async_buffer_idx_packed; 2168 to = (from + nr_left) % vq->size; 2169 if (to > from) { 2170 vhost_update_used_packed(vq, vq->async_buffers_packed + from, to - from); 2171 vq->last_async_buffer_idx_packed += nr_left; 2172 nr_left = 0; 2173 } else { 2174 vhost_update_used_packed(vq, vq->async_buffers_packed + from, 2175 vq->size - from); 2176 vq->last_async_buffer_idx_packed = 0; 2177 nr_left -= vq->size - from; 2178 } 2179 } while (nr_left > 0); 2180 } 2181 2182 uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id, 2183 struct rte_mbuf **pkts, uint16_t count) 2184 { 2185 struct virtio_net *dev = get_device(vid); 2186 struct vhost_virtqueue *vq; 2187 uint16_t n_pkts_cpl = 0, n_pkts_put = 0, n_descs = 0, n_buffers = 0; 2188 uint16_t start_idx, pkts_idx, vq_size; 2189 struct async_inflight_info *pkts_info; 2190 uint16_t from, i; 2191 2192 if (!dev) 2193 return 0; 2194 2195 VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__); 2196 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { 2197 VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n", 2198 dev->vid, __func__, queue_id); 2199 return 0; 2200 } 2201 2202 vq = dev->virtqueue[queue_id]; 2203 2204 if (unlikely(!vq->async_registered)) { 2205 VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for queue id %d.\n", 2206 dev->vid, __func__, queue_id); 2207 return 0; 2208 } 2209 2210 rte_spinlock_lock(&vq->access_lock); 2211 2212 pkts_idx = vq->async_pkts_idx % vq->size; 2213 pkts_info = vq->async_pkts_info; 2214 vq_size = vq->size; 2215 start_idx = virtio_dev_rx_async_get_info_idx(pkts_idx, 2216 vq_size, vq->async_pkts_inflight_n); 2217 2218 if (count > vq->async_last_pkts_n) 2219 n_pkts_cpl = vq->async_ops.check_completed_copies(vid, 2220 queue_id, 0, count - vq->async_last_pkts_n); 2221 n_pkts_cpl += vq->async_last_pkts_n; 2222 2223 n_pkts_put = RTE_MIN(count, n_pkts_cpl); 2224 if (unlikely(n_pkts_put == 0)) { 2225 vq->async_last_pkts_n = n_pkts_cpl; 2226 goto done; 2227 } 2228 2229 if (vq_is_packed(dev)) { 2230 for (i = 0; i < n_pkts_put; i++) { 2231 from = (start_idx + i) % vq_size; 2232 n_buffers += pkts_info[from].nr_buffers; 2233 pkts[i] = pkts_info[from].mbuf; 2234 } 2235 } else { 2236 for (i = 0; i < n_pkts_put; i++) { 2237 from = (start_idx + i) & (vq_size - 1); 2238 n_descs += pkts_info[from].descs; 2239 pkts[i] = pkts_info[from].mbuf; 2240 } 2241 } 2242 2243 vq->async_last_pkts_n = n_pkts_cpl - n_pkts_put; 2244 vq->async_pkts_inflight_n -= n_pkts_put; 2245 2246 if (likely(vq->enabled && vq->access_ok)) { 2247 if (vq_is_packed(dev)) { 2248 write_back_completed_descs_packed(vq, n_buffers); 2249 2250 vhost_vring_call_packed(dev, vq); 2251 } else { 2252 write_back_completed_descs_split(vq, n_descs); 2253 2254 __atomic_add_fetch(&vq->used->idx, n_descs, 2255 __ATOMIC_RELEASE); 2256 vhost_vring_call_split(dev, vq); 2257 } 2258 } else { 2259 if (vq_is_packed(dev)) { 2260 vq->last_async_buffer_idx_packed += n_buffers; 2261 if (vq->last_async_buffer_idx_packed >= vq->size) 2262 vq->last_async_buffer_idx_packed -= vq->size; 2263 } else { 2264 vq->last_async_desc_idx_split += n_descs; 2265 } 2266 } 2267 2268 done: 2269 rte_spinlock_unlock(&vq->access_lock); 2270 2271 return n_pkts_put; 2272 } 2273 2274 static __rte_always_inline uint32_t 2275 virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id, 2276 struct rte_mbuf **pkts, uint32_t count, 2277 struct rte_mbuf **comp_pkts, uint32_t *comp_count) 2278 { 2279 struct vhost_virtqueue *vq; 2280 uint32_t nb_tx = 0; 2281 2282 VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__); 2283 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { 2284 VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n", 2285 dev->vid, __func__, queue_id); 2286 return 0; 2287 } 2288 2289 vq = dev->virtqueue[queue_id]; 2290 2291 rte_spinlock_lock(&vq->access_lock); 2292 2293 if (unlikely(!vq->enabled || !vq->async_registered)) 2294 goto out_access_unlock; 2295 2296 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 2297 vhost_user_iotlb_rd_lock(vq); 2298 2299 if (unlikely(!vq->access_ok)) 2300 if (unlikely(vring_translate(dev, vq) < 0)) 2301 goto out; 2302 2303 count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); 2304 if (count == 0) 2305 goto out; 2306 2307 if (vq_is_packed(dev)) 2308 nb_tx = virtio_dev_rx_async_submit_packed(dev, 2309 vq, queue_id, pkts, count, comp_pkts, 2310 comp_count); 2311 else 2312 nb_tx = virtio_dev_rx_async_submit_split(dev, 2313 vq, queue_id, pkts, count, comp_pkts, 2314 comp_count); 2315 2316 out: 2317 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 2318 vhost_user_iotlb_rd_unlock(vq); 2319 2320 out_access_unlock: 2321 rte_spinlock_unlock(&vq->access_lock); 2322 2323 return nb_tx; 2324 } 2325 2326 uint16_t 2327 rte_vhost_submit_enqueue_burst(int vid, uint16_t queue_id, 2328 struct rte_mbuf **pkts, uint16_t count, 2329 struct rte_mbuf **comp_pkts, uint32_t *comp_count) 2330 { 2331 struct virtio_net *dev = get_device(vid); 2332 2333 *comp_count = 0; 2334 if (!dev) 2335 return 0; 2336 2337 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { 2338 VHOST_LOG_DATA(ERR, 2339 "(%d) %s: built-in vhost net backend is disabled.\n", 2340 dev->vid, __func__); 2341 return 0; 2342 } 2343 2344 return virtio_dev_rx_async_submit(dev, queue_id, pkts, count, comp_pkts, 2345 comp_count); 2346 } 2347 2348 static inline bool 2349 virtio_net_with_host_offload(struct virtio_net *dev) 2350 { 2351 if (dev->features & 2352 ((1ULL << VIRTIO_NET_F_CSUM) | 2353 (1ULL << VIRTIO_NET_F_HOST_ECN) | 2354 (1ULL << VIRTIO_NET_F_HOST_TSO4) | 2355 (1ULL << VIRTIO_NET_F_HOST_TSO6) | 2356 (1ULL << VIRTIO_NET_F_HOST_UFO))) 2357 return true; 2358 2359 return false; 2360 } 2361 2362 static int 2363 parse_headers(struct rte_mbuf *m, uint8_t *l4_proto) 2364 { 2365 struct rte_ipv4_hdr *ipv4_hdr; 2366 struct rte_ipv6_hdr *ipv6_hdr; 2367 struct rte_ether_hdr *eth_hdr; 2368 uint16_t ethertype; 2369 uint16_t data_len = rte_pktmbuf_data_len(m); 2370 2371 if (data_len < sizeof(struct rte_ether_hdr)) 2372 return -EINVAL; 2373 2374 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 2375 2376 m->l2_len = sizeof(struct rte_ether_hdr); 2377 ethertype = rte_be_to_cpu_16(eth_hdr->ether_type); 2378 2379 if (ethertype == RTE_ETHER_TYPE_VLAN) { 2380 if (data_len < sizeof(struct rte_ether_hdr) + 2381 sizeof(struct rte_vlan_hdr)) 2382 goto error; 2383 2384 struct rte_vlan_hdr *vlan_hdr = 2385 (struct rte_vlan_hdr *)(eth_hdr + 1); 2386 2387 m->l2_len += sizeof(struct rte_vlan_hdr); 2388 ethertype = rte_be_to_cpu_16(vlan_hdr->eth_proto); 2389 } 2390 2391 switch (ethertype) { 2392 case RTE_ETHER_TYPE_IPV4: 2393 if (data_len < m->l2_len + sizeof(struct rte_ipv4_hdr)) 2394 goto error; 2395 ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *, 2396 m->l2_len); 2397 m->l3_len = rte_ipv4_hdr_len(ipv4_hdr); 2398 if (data_len < m->l2_len + m->l3_len) 2399 goto error; 2400 m->ol_flags |= PKT_TX_IPV4; 2401 *l4_proto = ipv4_hdr->next_proto_id; 2402 break; 2403 case RTE_ETHER_TYPE_IPV6: 2404 if (data_len < m->l2_len + sizeof(struct rte_ipv6_hdr)) 2405 goto error; 2406 ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *, 2407 m->l2_len); 2408 m->l3_len = sizeof(struct rte_ipv6_hdr); 2409 m->ol_flags |= PKT_TX_IPV6; 2410 *l4_proto = ipv6_hdr->proto; 2411 break; 2412 default: 2413 /* a valid L3 header is needed for further L4 parsing */ 2414 goto error; 2415 } 2416 2417 /* both CSUM and GSO need a valid L4 header */ 2418 switch (*l4_proto) { 2419 case IPPROTO_TCP: 2420 if (data_len < m->l2_len + m->l3_len + 2421 sizeof(struct rte_tcp_hdr)) 2422 goto error; 2423 break; 2424 case IPPROTO_UDP: 2425 if (data_len < m->l2_len + m->l3_len + 2426 sizeof(struct rte_udp_hdr)) 2427 goto error; 2428 break; 2429 case IPPROTO_SCTP: 2430 if (data_len < m->l2_len + m->l3_len + 2431 sizeof(struct rte_sctp_hdr)) 2432 goto error; 2433 break; 2434 default: 2435 goto error; 2436 } 2437 2438 return 0; 2439 2440 error: 2441 m->l2_len = 0; 2442 m->l3_len = 0; 2443 m->ol_flags = 0; 2444 return -EINVAL; 2445 } 2446 2447 static __rte_always_inline void 2448 vhost_dequeue_offload_legacy(struct virtio_net_hdr *hdr, struct rte_mbuf *m) 2449 { 2450 uint8_t l4_proto = 0; 2451 struct rte_tcp_hdr *tcp_hdr = NULL; 2452 uint16_t tcp_len; 2453 uint16_t data_len = rte_pktmbuf_data_len(m); 2454 2455 if (parse_headers(m, &l4_proto) < 0) 2456 return; 2457 2458 if (hdr->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) { 2459 if (hdr->csum_start == (m->l2_len + m->l3_len)) { 2460 switch (hdr->csum_offset) { 2461 case (offsetof(struct rte_tcp_hdr, cksum)): 2462 if (l4_proto != IPPROTO_TCP) 2463 goto error; 2464 m->ol_flags |= PKT_TX_TCP_CKSUM; 2465 break; 2466 case (offsetof(struct rte_udp_hdr, dgram_cksum)): 2467 if (l4_proto != IPPROTO_UDP) 2468 goto error; 2469 m->ol_flags |= PKT_TX_UDP_CKSUM; 2470 break; 2471 case (offsetof(struct rte_sctp_hdr, cksum)): 2472 if (l4_proto != IPPROTO_SCTP) 2473 goto error; 2474 m->ol_flags |= PKT_TX_SCTP_CKSUM; 2475 break; 2476 default: 2477 goto error; 2478 } 2479 } else { 2480 goto error; 2481 } 2482 } 2483 2484 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 2485 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 2486 case VIRTIO_NET_HDR_GSO_TCPV4: 2487 case VIRTIO_NET_HDR_GSO_TCPV6: 2488 if (l4_proto != IPPROTO_TCP) 2489 goto error; 2490 tcp_hdr = rte_pktmbuf_mtod_offset(m, 2491 struct rte_tcp_hdr *, 2492 m->l2_len + m->l3_len); 2493 tcp_len = (tcp_hdr->data_off & 0xf0) >> 2; 2494 if (data_len < m->l2_len + m->l3_len + tcp_len) 2495 goto error; 2496 m->ol_flags |= PKT_TX_TCP_SEG; 2497 m->tso_segsz = hdr->gso_size; 2498 m->l4_len = tcp_len; 2499 break; 2500 case VIRTIO_NET_HDR_GSO_UDP: 2501 if (l4_proto != IPPROTO_UDP) 2502 goto error; 2503 m->ol_flags |= PKT_TX_UDP_SEG; 2504 m->tso_segsz = hdr->gso_size; 2505 m->l4_len = sizeof(struct rte_udp_hdr); 2506 break; 2507 default: 2508 VHOST_LOG_DATA(WARNING, 2509 "unsupported gso type %u.\n", hdr->gso_type); 2510 goto error; 2511 } 2512 } 2513 return; 2514 2515 error: 2516 m->l2_len = 0; 2517 m->l3_len = 0; 2518 m->ol_flags = 0; 2519 } 2520 2521 static __rte_always_inline void 2522 vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m, 2523 bool legacy_ol_flags) 2524 { 2525 struct rte_net_hdr_lens hdr_lens; 2526 int l4_supported = 0; 2527 uint32_t ptype; 2528 2529 if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE) 2530 return; 2531 2532 if (legacy_ol_flags) { 2533 vhost_dequeue_offload_legacy(hdr, m); 2534 return; 2535 } 2536 2537 m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN; 2538 2539 ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK); 2540 m->packet_type = ptype; 2541 if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP || 2542 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP || 2543 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP) 2544 l4_supported = 1; 2545 2546 /* According to Virtio 1.1 spec, the device only needs to look at 2547 * VIRTIO_NET_HDR_F_NEEDS_CSUM in the packet transmission path. 2548 * This differs from the processing incoming packets path where the 2549 * driver could rely on VIRTIO_NET_HDR_F_DATA_VALID flag set by the 2550 * device. 2551 * 2552 * 5.1.6.2.1 Driver Requirements: Packet Transmission 2553 * The driver MUST NOT set the VIRTIO_NET_HDR_F_DATA_VALID and 2554 * VIRTIO_NET_HDR_F_RSC_INFO bits in flags. 2555 * 2556 * 5.1.6.2.2 Device Requirements: Packet Transmission 2557 * The device MUST ignore flag bits that it does not recognize. 2558 */ 2559 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 2560 uint32_t hdrlen; 2561 2562 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len; 2563 if (hdr->csum_start <= hdrlen && l4_supported != 0) { 2564 m->ol_flags |= PKT_RX_L4_CKSUM_NONE; 2565 } else { 2566 /* Unknown proto or tunnel, do sw cksum. We can assume 2567 * the cksum field is in the first segment since the 2568 * buffers we provided to the host are large enough. 2569 * In case of SCTP, this will be wrong since it's a CRC 2570 * but there's nothing we can do. 2571 */ 2572 uint16_t csum = 0, off; 2573 2574 if (rte_raw_cksum_mbuf(m, hdr->csum_start, 2575 rte_pktmbuf_pkt_len(m) - hdr->csum_start, &csum) < 0) 2576 return; 2577 if (likely(csum != 0xffff)) 2578 csum = ~csum; 2579 off = hdr->csum_offset + hdr->csum_start; 2580 if (rte_pktmbuf_data_len(m) >= off + 1) 2581 *rte_pktmbuf_mtod_offset(m, uint16_t *, off) = csum; 2582 } 2583 } 2584 2585 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 2586 if (hdr->gso_size == 0) 2587 return; 2588 2589 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 2590 case VIRTIO_NET_HDR_GSO_TCPV4: 2591 case VIRTIO_NET_HDR_GSO_TCPV6: 2592 if ((ptype & RTE_PTYPE_L4_MASK) != RTE_PTYPE_L4_TCP) 2593 break; 2594 m->ol_flags |= PKT_RX_LRO | PKT_RX_L4_CKSUM_NONE; 2595 m->tso_segsz = hdr->gso_size; 2596 break; 2597 case VIRTIO_NET_HDR_GSO_UDP: 2598 if ((ptype & RTE_PTYPE_L4_MASK) != RTE_PTYPE_L4_UDP) 2599 break; 2600 m->ol_flags |= PKT_RX_LRO | PKT_RX_L4_CKSUM_NONE; 2601 m->tso_segsz = hdr->gso_size; 2602 break; 2603 default: 2604 break; 2605 } 2606 } 2607 } 2608 2609 static __rte_noinline void 2610 copy_vnet_hdr_from_desc(struct virtio_net_hdr *hdr, 2611 struct buf_vector *buf_vec) 2612 { 2613 uint64_t len; 2614 uint64_t remain = sizeof(struct virtio_net_hdr); 2615 uint64_t src; 2616 uint64_t dst = (uint64_t)(uintptr_t)hdr; 2617 2618 while (remain) { 2619 len = RTE_MIN(remain, buf_vec->buf_len); 2620 src = buf_vec->buf_addr; 2621 rte_memcpy((void *)(uintptr_t)dst, 2622 (void *)(uintptr_t)src, len); 2623 2624 remain -= len; 2625 dst += len; 2626 buf_vec++; 2627 } 2628 } 2629 2630 static __rte_always_inline int 2631 copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, 2632 struct buf_vector *buf_vec, uint16_t nr_vec, 2633 struct rte_mbuf *m, struct rte_mempool *mbuf_pool, 2634 bool legacy_ol_flags) 2635 { 2636 uint32_t buf_avail, buf_offset; 2637 uint64_t buf_addr, buf_len; 2638 uint32_t mbuf_avail, mbuf_offset; 2639 uint32_t cpy_len; 2640 struct rte_mbuf *cur = m, *prev = m; 2641 struct virtio_net_hdr tmp_hdr; 2642 struct virtio_net_hdr *hdr = NULL; 2643 /* A counter to avoid desc dead loop chain */ 2644 uint16_t vec_idx = 0; 2645 struct batch_copy_elem *batch_copy = vq->batch_copy_elems; 2646 int error = 0; 2647 2648 buf_addr = buf_vec[vec_idx].buf_addr; 2649 buf_len = buf_vec[vec_idx].buf_len; 2650 2651 if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) { 2652 error = -1; 2653 goto out; 2654 } 2655 2656 if (virtio_net_with_host_offload(dev)) { 2657 if (unlikely(buf_len < sizeof(struct virtio_net_hdr))) { 2658 /* 2659 * No luck, the virtio-net header doesn't fit 2660 * in a contiguous virtual area. 2661 */ 2662 copy_vnet_hdr_from_desc(&tmp_hdr, buf_vec); 2663 hdr = &tmp_hdr; 2664 } else { 2665 hdr = (struct virtio_net_hdr *)((uintptr_t)buf_addr); 2666 } 2667 } 2668 2669 /* 2670 * A virtio driver normally uses at least 2 desc buffers 2671 * for Tx: the first for storing the header, and others 2672 * for storing the data. 2673 */ 2674 if (unlikely(buf_len < dev->vhost_hlen)) { 2675 buf_offset = dev->vhost_hlen - buf_len; 2676 vec_idx++; 2677 buf_addr = buf_vec[vec_idx].buf_addr; 2678 buf_len = buf_vec[vec_idx].buf_len; 2679 buf_avail = buf_len - buf_offset; 2680 } else if (buf_len == dev->vhost_hlen) { 2681 if (unlikely(++vec_idx >= nr_vec)) 2682 goto out; 2683 buf_addr = buf_vec[vec_idx].buf_addr; 2684 buf_len = buf_vec[vec_idx].buf_len; 2685 2686 buf_offset = 0; 2687 buf_avail = buf_len; 2688 } else { 2689 buf_offset = dev->vhost_hlen; 2690 buf_avail = buf_vec[vec_idx].buf_len - dev->vhost_hlen; 2691 } 2692 2693 PRINT_PACKET(dev, 2694 (uintptr_t)(buf_addr + buf_offset), 2695 (uint32_t)buf_avail, 0); 2696 2697 mbuf_offset = 0; 2698 mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM; 2699 while (1) { 2700 cpy_len = RTE_MIN(buf_avail, mbuf_avail); 2701 2702 if (likely(cpy_len > MAX_BATCH_LEN || 2703 vq->batch_copy_nb_elems >= vq->size || 2704 (hdr && cur == m))) { 2705 rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, 2706 mbuf_offset), 2707 (void *)((uintptr_t)(buf_addr + 2708 buf_offset)), cpy_len); 2709 } else { 2710 batch_copy[vq->batch_copy_nb_elems].dst = 2711 rte_pktmbuf_mtod_offset(cur, void *, 2712 mbuf_offset); 2713 batch_copy[vq->batch_copy_nb_elems].src = 2714 (void *)((uintptr_t)(buf_addr + buf_offset)); 2715 batch_copy[vq->batch_copy_nb_elems].len = cpy_len; 2716 vq->batch_copy_nb_elems++; 2717 } 2718 2719 mbuf_avail -= cpy_len; 2720 mbuf_offset += cpy_len; 2721 buf_avail -= cpy_len; 2722 buf_offset += cpy_len; 2723 2724 /* This buf reaches to its end, get the next one */ 2725 if (buf_avail == 0) { 2726 if (++vec_idx >= nr_vec) 2727 break; 2728 2729 buf_addr = buf_vec[vec_idx].buf_addr; 2730 buf_len = buf_vec[vec_idx].buf_len; 2731 2732 buf_offset = 0; 2733 buf_avail = buf_len; 2734 2735 PRINT_PACKET(dev, (uintptr_t)buf_addr, 2736 (uint32_t)buf_avail, 0); 2737 } 2738 2739 /* 2740 * This mbuf reaches to its end, get a new one 2741 * to hold more data. 2742 */ 2743 if (mbuf_avail == 0) { 2744 cur = rte_pktmbuf_alloc(mbuf_pool); 2745 if (unlikely(cur == NULL)) { 2746 VHOST_LOG_DATA(ERR, "Failed to " 2747 "allocate memory for mbuf.\n"); 2748 error = -1; 2749 goto out; 2750 } 2751 2752 prev->next = cur; 2753 prev->data_len = mbuf_offset; 2754 m->nb_segs += 1; 2755 m->pkt_len += mbuf_offset; 2756 prev = cur; 2757 2758 mbuf_offset = 0; 2759 mbuf_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM; 2760 } 2761 } 2762 2763 prev->data_len = mbuf_offset; 2764 m->pkt_len += mbuf_offset; 2765 2766 if (hdr) 2767 vhost_dequeue_offload(hdr, m, legacy_ol_flags); 2768 2769 out: 2770 2771 return error; 2772 } 2773 2774 static void 2775 virtio_dev_extbuf_free(void *addr __rte_unused, void *opaque) 2776 { 2777 rte_free(opaque); 2778 } 2779 2780 static int 2781 virtio_dev_extbuf_alloc(struct rte_mbuf *pkt, uint32_t size) 2782 { 2783 struct rte_mbuf_ext_shared_info *shinfo = NULL; 2784 uint32_t total_len = RTE_PKTMBUF_HEADROOM + size; 2785 uint16_t buf_len; 2786 rte_iova_t iova; 2787 void *buf; 2788 2789 total_len += sizeof(*shinfo) + sizeof(uintptr_t); 2790 total_len = RTE_ALIGN_CEIL(total_len, sizeof(uintptr_t)); 2791 2792 if (unlikely(total_len > UINT16_MAX)) 2793 return -ENOSPC; 2794 2795 buf_len = total_len; 2796 buf = rte_malloc(NULL, buf_len, RTE_CACHE_LINE_SIZE); 2797 if (unlikely(buf == NULL)) 2798 return -ENOMEM; 2799 2800 /* Initialize shinfo */ 2801 shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len, 2802 virtio_dev_extbuf_free, buf); 2803 if (unlikely(shinfo == NULL)) { 2804 rte_free(buf); 2805 VHOST_LOG_DATA(ERR, "Failed to init shinfo\n"); 2806 return -1; 2807 } 2808 2809 iova = rte_malloc_virt2iova(buf); 2810 rte_pktmbuf_attach_extbuf(pkt, buf, iova, buf_len, shinfo); 2811 rte_pktmbuf_reset_headroom(pkt); 2812 2813 return 0; 2814 } 2815 2816 /* 2817 * Prepare a host supported pktmbuf. 2818 */ 2819 static __rte_always_inline int 2820 virtio_dev_pktmbuf_prep(struct virtio_net *dev, struct rte_mbuf *pkt, 2821 uint32_t data_len) 2822 { 2823 if (rte_pktmbuf_tailroom(pkt) >= data_len) 2824 return 0; 2825 2826 /* attach an external buffer if supported */ 2827 if (dev->extbuf && !virtio_dev_extbuf_alloc(pkt, data_len)) 2828 return 0; 2829 2830 /* check if chained buffers are allowed */ 2831 if (!dev->linearbuf) 2832 return 0; 2833 2834 return -1; 2835 } 2836 2837 __rte_always_inline 2838 static uint16_t 2839 virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 2840 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count, 2841 bool legacy_ol_flags) 2842 { 2843 uint16_t i; 2844 uint16_t free_entries; 2845 uint16_t dropped = 0; 2846 static bool allocerr_warned; 2847 2848 /* 2849 * The ordering between avail index and 2850 * desc reads needs to be enforced. 2851 */ 2852 free_entries = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE) - 2853 vq->last_avail_idx; 2854 if (free_entries == 0) 2855 return 0; 2856 2857 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); 2858 2859 VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__); 2860 2861 count = RTE_MIN(count, MAX_PKT_BURST); 2862 count = RTE_MIN(count, free_entries); 2863 VHOST_LOG_DATA(DEBUG, "(%d) about to dequeue %u buffers\n", 2864 dev->vid, count); 2865 2866 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, count)) 2867 return 0; 2868 2869 for (i = 0; i < count; i++) { 2870 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 2871 uint16_t head_idx; 2872 uint32_t buf_len; 2873 uint16_t nr_vec = 0; 2874 int err; 2875 2876 if (unlikely(fill_vec_buf_split(dev, vq, 2877 vq->last_avail_idx + i, 2878 &nr_vec, buf_vec, 2879 &head_idx, &buf_len, 2880 VHOST_ACCESS_RO) < 0)) 2881 break; 2882 2883 update_shadow_used_ring_split(vq, head_idx, 0); 2884 2885 err = virtio_dev_pktmbuf_prep(dev, pkts[i], buf_len); 2886 if (unlikely(err)) { 2887 /* 2888 * mbuf allocation fails for jumbo packets when external 2889 * buffer allocation is not allowed and linear buffer 2890 * is required. Drop this packet. 2891 */ 2892 if (!allocerr_warned) { 2893 VHOST_LOG_DATA(ERR, 2894 "Failed mbuf alloc of size %d from %s on %s.\n", 2895 buf_len, mbuf_pool->name, dev->ifname); 2896 allocerr_warned = true; 2897 } 2898 dropped += 1; 2899 i++; 2900 break; 2901 } 2902 2903 err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i], 2904 mbuf_pool, legacy_ol_flags); 2905 if (unlikely(err)) { 2906 if (!allocerr_warned) { 2907 VHOST_LOG_DATA(ERR, 2908 "Failed to copy desc to mbuf on %s.\n", 2909 dev->ifname); 2910 allocerr_warned = true; 2911 } 2912 dropped += 1; 2913 i++; 2914 break; 2915 } 2916 } 2917 2918 if (dropped) 2919 rte_pktmbuf_free_bulk(&pkts[i - 1], count - i + 1); 2920 2921 vq->last_avail_idx += i; 2922 2923 do_data_copy_dequeue(vq); 2924 if (unlikely(i < count)) 2925 vq->shadow_used_idx = i; 2926 if (likely(vq->shadow_used_idx)) { 2927 flush_shadow_used_ring_split(dev, vq); 2928 vhost_vring_call_split(dev, vq); 2929 } 2930 2931 return (i - dropped); 2932 } 2933 2934 __rte_noinline 2935 static uint16_t 2936 virtio_dev_tx_split_legacy(struct virtio_net *dev, 2937 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, 2938 struct rte_mbuf **pkts, uint16_t count) 2939 { 2940 return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, true); 2941 } 2942 2943 __rte_noinline 2944 static uint16_t 2945 virtio_dev_tx_split_compliant(struct virtio_net *dev, 2946 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, 2947 struct rte_mbuf **pkts, uint16_t count) 2948 { 2949 return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, false); 2950 } 2951 2952 static __rte_always_inline int 2953 vhost_reserve_avail_batch_packed(struct virtio_net *dev, 2954 struct vhost_virtqueue *vq, 2955 struct rte_mbuf **pkts, 2956 uint16_t avail_idx, 2957 uintptr_t *desc_addrs, 2958 uint16_t *ids) 2959 { 2960 bool wrap = vq->avail_wrap_counter; 2961 struct vring_packed_desc *descs = vq->desc_packed; 2962 uint64_t lens[PACKED_BATCH_SIZE]; 2963 uint64_t buf_lens[PACKED_BATCH_SIZE]; 2964 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 2965 uint16_t flags, i; 2966 2967 if (unlikely(avail_idx & PACKED_BATCH_MASK)) 2968 return -1; 2969 if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size)) 2970 return -1; 2971 2972 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 2973 flags = descs[avail_idx + i].flags; 2974 if (unlikely((wrap != !!(flags & VRING_DESC_F_AVAIL)) || 2975 (wrap == !!(flags & VRING_DESC_F_USED)) || 2976 (flags & PACKED_DESC_SINGLE_DEQUEUE_FLAG))) 2977 return -1; 2978 } 2979 2980 rte_atomic_thread_fence(__ATOMIC_ACQUIRE); 2981 2982 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 2983 lens[i] = descs[avail_idx + i].len; 2984 2985 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 2986 desc_addrs[i] = vhost_iova_to_vva(dev, vq, 2987 descs[avail_idx + i].addr, 2988 &lens[i], VHOST_ACCESS_RW); 2989 } 2990 2991 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 2992 if (unlikely(!desc_addrs[i])) 2993 return -1; 2994 if (unlikely((lens[i] != descs[avail_idx + i].len))) 2995 return -1; 2996 } 2997 2998 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 2999 if (virtio_dev_pktmbuf_prep(dev, pkts[i], lens[i])) 3000 goto err; 3001 } 3002 3003 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 3004 buf_lens[i] = pkts[i]->buf_len - pkts[i]->data_off; 3005 3006 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3007 if (unlikely(buf_lens[i] < (lens[i] - buf_offset))) 3008 goto err; 3009 } 3010 3011 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3012 pkts[i]->pkt_len = lens[i] - buf_offset; 3013 pkts[i]->data_len = pkts[i]->pkt_len; 3014 ids[i] = descs[avail_idx + i].id; 3015 } 3016 3017 return 0; 3018 3019 err: 3020 return -1; 3021 } 3022 3023 static __rte_always_inline int 3024 virtio_dev_tx_batch_packed(struct virtio_net *dev, 3025 struct vhost_virtqueue *vq, 3026 struct rte_mbuf **pkts, 3027 bool legacy_ol_flags) 3028 { 3029 uint16_t avail_idx = vq->last_avail_idx; 3030 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 3031 struct virtio_net_hdr *hdr; 3032 uintptr_t desc_addrs[PACKED_BATCH_SIZE]; 3033 uint16_t ids[PACKED_BATCH_SIZE]; 3034 uint16_t i; 3035 3036 if (vhost_reserve_avail_batch_packed(dev, vq, pkts, avail_idx, 3037 desc_addrs, ids)) 3038 return -1; 3039 3040 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 3041 rte_prefetch0((void *)(uintptr_t)desc_addrs[i]); 3042 3043 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 3044 rte_memcpy(rte_pktmbuf_mtod_offset(pkts[i], void *, 0), 3045 (void *)(uintptr_t)(desc_addrs[i] + buf_offset), 3046 pkts[i]->pkt_len); 3047 3048 if (virtio_net_with_host_offload(dev)) { 3049 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3050 hdr = (struct virtio_net_hdr *)(desc_addrs[i]); 3051 vhost_dequeue_offload(hdr, pkts[i], legacy_ol_flags); 3052 } 3053 } 3054 3055 if (virtio_net_is_inorder(dev)) 3056 vhost_shadow_dequeue_batch_packed_inorder(vq, 3057 ids[PACKED_BATCH_SIZE - 1]); 3058 else 3059 vhost_shadow_dequeue_batch_packed(dev, vq, ids); 3060 3061 vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE); 3062 3063 return 0; 3064 } 3065 3066 static __rte_always_inline int 3067 vhost_dequeue_single_packed(struct virtio_net *dev, 3068 struct vhost_virtqueue *vq, 3069 struct rte_mempool *mbuf_pool, 3070 struct rte_mbuf *pkts, 3071 uint16_t *buf_id, 3072 uint16_t *desc_count, 3073 bool legacy_ol_flags) 3074 { 3075 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 3076 uint32_t buf_len; 3077 uint16_t nr_vec = 0; 3078 int err; 3079 static bool allocerr_warned; 3080 3081 if (unlikely(fill_vec_buf_packed(dev, vq, 3082 vq->last_avail_idx, desc_count, 3083 buf_vec, &nr_vec, 3084 buf_id, &buf_len, 3085 VHOST_ACCESS_RO) < 0)) 3086 return -1; 3087 3088 if (unlikely(virtio_dev_pktmbuf_prep(dev, pkts, buf_len))) { 3089 if (!allocerr_warned) { 3090 VHOST_LOG_DATA(ERR, 3091 "Failed mbuf alloc of size %d from %s on %s.\n", 3092 buf_len, mbuf_pool->name, dev->ifname); 3093 allocerr_warned = true; 3094 } 3095 return -1; 3096 } 3097 3098 err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts, 3099 mbuf_pool, legacy_ol_flags); 3100 if (unlikely(err)) { 3101 if (!allocerr_warned) { 3102 VHOST_LOG_DATA(ERR, 3103 "Failed to copy desc to mbuf on %s.\n", 3104 dev->ifname); 3105 allocerr_warned = true; 3106 } 3107 return -1; 3108 } 3109 3110 return 0; 3111 } 3112 3113 static __rte_always_inline int 3114 virtio_dev_tx_single_packed(struct virtio_net *dev, 3115 struct vhost_virtqueue *vq, 3116 struct rte_mempool *mbuf_pool, 3117 struct rte_mbuf *pkts, 3118 bool legacy_ol_flags) 3119 { 3120 3121 uint16_t buf_id, desc_count = 0; 3122 int ret; 3123 3124 ret = vhost_dequeue_single_packed(dev, vq, mbuf_pool, pkts, &buf_id, 3125 &desc_count, legacy_ol_flags); 3126 3127 if (likely(desc_count > 0)) { 3128 if (virtio_net_is_inorder(dev)) 3129 vhost_shadow_dequeue_single_packed_inorder(vq, buf_id, 3130 desc_count); 3131 else 3132 vhost_shadow_dequeue_single_packed(vq, buf_id, 3133 desc_count); 3134 3135 vq_inc_last_avail_packed(vq, desc_count); 3136 } 3137 3138 return ret; 3139 } 3140 3141 __rte_always_inline 3142 static uint16_t 3143 virtio_dev_tx_packed(struct virtio_net *dev, 3144 struct vhost_virtqueue *__rte_restrict vq, 3145 struct rte_mempool *mbuf_pool, 3146 struct rte_mbuf **__rte_restrict pkts, 3147 uint32_t count, 3148 bool legacy_ol_flags) 3149 { 3150 uint32_t pkt_idx = 0; 3151 3152 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, count)) 3153 return 0; 3154 3155 do { 3156 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); 3157 3158 if (count - pkt_idx >= PACKED_BATCH_SIZE) { 3159 if (!virtio_dev_tx_batch_packed(dev, vq, 3160 &pkts[pkt_idx], 3161 legacy_ol_flags)) { 3162 pkt_idx += PACKED_BATCH_SIZE; 3163 continue; 3164 } 3165 } 3166 3167 if (virtio_dev_tx_single_packed(dev, vq, mbuf_pool, 3168 pkts[pkt_idx], 3169 legacy_ol_flags)) 3170 break; 3171 pkt_idx++; 3172 } while (pkt_idx < count); 3173 3174 if (pkt_idx != count) 3175 rte_pktmbuf_free_bulk(&pkts[pkt_idx], count - pkt_idx); 3176 3177 if (vq->shadow_used_idx) { 3178 do_data_copy_dequeue(vq); 3179 3180 vhost_flush_dequeue_shadow_packed(dev, vq); 3181 vhost_vring_call_packed(dev, vq); 3182 } 3183 3184 return pkt_idx; 3185 } 3186 3187 __rte_noinline 3188 static uint16_t 3189 virtio_dev_tx_packed_legacy(struct virtio_net *dev, 3190 struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool, 3191 struct rte_mbuf **__rte_restrict pkts, uint32_t count) 3192 { 3193 return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, true); 3194 } 3195 3196 __rte_noinline 3197 static uint16_t 3198 virtio_dev_tx_packed_compliant(struct virtio_net *dev, 3199 struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool, 3200 struct rte_mbuf **__rte_restrict pkts, uint32_t count) 3201 { 3202 return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, false); 3203 } 3204 3205 uint16_t 3206 rte_vhost_dequeue_burst(int vid, uint16_t queue_id, 3207 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) 3208 { 3209 struct virtio_net *dev; 3210 struct rte_mbuf *rarp_mbuf = NULL; 3211 struct vhost_virtqueue *vq; 3212 int16_t success = 1; 3213 3214 dev = get_device(vid); 3215 if (!dev) 3216 return 0; 3217 3218 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { 3219 VHOST_LOG_DATA(ERR, 3220 "(%d) %s: built-in vhost net backend is disabled.\n", 3221 dev->vid, __func__); 3222 return 0; 3223 } 3224 3225 if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) { 3226 VHOST_LOG_DATA(ERR, 3227 "(%d) %s: invalid virtqueue idx %d.\n", 3228 dev->vid, __func__, queue_id); 3229 return 0; 3230 } 3231 3232 vq = dev->virtqueue[queue_id]; 3233 3234 if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0)) 3235 return 0; 3236 3237 if (unlikely(!vq->enabled)) { 3238 count = 0; 3239 goto out_access_unlock; 3240 } 3241 3242 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 3243 vhost_user_iotlb_rd_lock(vq); 3244 3245 if (unlikely(!vq->access_ok)) 3246 if (unlikely(vring_translate(dev, vq) < 0)) { 3247 count = 0; 3248 goto out; 3249 } 3250 3251 /* 3252 * Construct a RARP broadcast packet, and inject it to the "pkts" 3253 * array, to looks like that guest actually send such packet. 3254 * 3255 * Check user_send_rarp() for more information. 3256 * 3257 * broadcast_rarp shares a cacheline in the virtio_net structure 3258 * with some fields that are accessed during enqueue and 3259 * __atomic_compare_exchange_n causes a write if performed compare 3260 * and exchange. This could result in false sharing between enqueue 3261 * and dequeue. 3262 * 3263 * Prevent unnecessary false sharing by reading broadcast_rarp first 3264 * and only performing compare and exchange if the read indicates it 3265 * is likely to be set. 3266 */ 3267 if (unlikely(__atomic_load_n(&dev->broadcast_rarp, __ATOMIC_ACQUIRE) && 3268 __atomic_compare_exchange_n(&dev->broadcast_rarp, 3269 &success, 0, 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED))) { 3270 3271 rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac); 3272 if (rarp_mbuf == NULL) { 3273 VHOST_LOG_DATA(ERR, "Failed to make RARP packet.\n"); 3274 count = 0; 3275 goto out; 3276 } 3277 count -= 1; 3278 } 3279 3280 if (vq_is_packed(dev)) { 3281 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS) 3282 count = virtio_dev_tx_packed_legacy(dev, vq, mbuf_pool, pkts, count); 3283 else 3284 count = virtio_dev_tx_packed_compliant(dev, vq, mbuf_pool, pkts, count); 3285 } else { 3286 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS) 3287 count = virtio_dev_tx_split_legacy(dev, vq, mbuf_pool, pkts, count); 3288 else 3289 count = virtio_dev_tx_split_compliant(dev, vq, mbuf_pool, pkts, count); 3290 } 3291 3292 out: 3293 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 3294 vhost_user_iotlb_rd_unlock(vq); 3295 3296 out_access_unlock: 3297 rte_spinlock_unlock(&vq->access_lock); 3298 3299 if (unlikely(rarp_mbuf != NULL)) { 3300 /* 3301 * Inject it to the head of "pkts" array, so that switch's mac 3302 * learning table will get updated first. 3303 */ 3304 memmove(&pkts[1], pkts, count * sizeof(struct rte_mbuf *)); 3305 pkts[0] = rarp_mbuf; 3306 count += 1; 3307 } 3308 3309 return count; 3310 } 3311