1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2016 Intel Corporation 3 */ 4 5 #include <stdint.h> 6 #include <stdbool.h> 7 #include <linux/virtio_net.h> 8 9 #include <rte_mbuf.h> 10 #include <rte_memcpy.h> 11 #include <rte_net.h> 12 #include <rte_ether.h> 13 #include <rte_ip.h> 14 #include <rte_vhost.h> 15 #include <rte_tcp.h> 16 #include <rte_udp.h> 17 #include <rte_sctp.h> 18 #include <rte_arp.h> 19 #include <rte_spinlock.h> 20 #include <rte_malloc.h> 21 #include <rte_vhost_async.h> 22 23 #include "iotlb.h" 24 #include "vhost.h" 25 26 #define MAX_BATCH_LEN 256 27 28 #define VHOST_ASYNC_BATCH_THRESHOLD 32 29 30 static __rte_always_inline bool 31 rxvq_is_mergeable(struct virtio_net *dev) 32 { 33 return dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF); 34 } 35 36 static __rte_always_inline bool 37 virtio_net_is_inorder(struct virtio_net *dev) 38 { 39 return dev->features & (1ULL << VIRTIO_F_IN_ORDER); 40 } 41 42 static bool 43 is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t nr_vring) 44 { 45 return (is_tx ^ (idx & 1)) == 0 && idx < nr_vring; 46 } 47 48 static inline void 49 do_data_copy_enqueue(struct virtio_net *dev, struct vhost_virtqueue *vq) 50 { 51 struct batch_copy_elem *elem = vq->batch_copy_elems; 52 uint16_t count = vq->batch_copy_nb_elems; 53 int i; 54 55 for (i = 0; i < count; i++) { 56 rte_memcpy(elem[i].dst, elem[i].src, elem[i].len); 57 vhost_log_cache_write_iova(dev, vq, elem[i].log_addr, 58 elem[i].len); 59 PRINT_PACKET(dev, (uintptr_t)elem[i].dst, elem[i].len, 0); 60 } 61 62 vq->batch_copy_nb_elems = 0; 63 } 64 65 static inline void 66 do_data_copy_dequeue(struct vhost_virtqueue *vq) 67 { 68 struct batch_copy_elem *elem = vq->batch_copy_elems; 69 uint16_t count = vq->batch_copy_nb_elems; 70 int i; 71 72 for (i = 0; i < count; i++) 73 rte_memcpy(elem[i].dst, elem[i].src, elem[i].len); 74 75 vq->batch_copy_nb_elems = 0; 76 } 77 78 static __rte_always_inline void 79 do_flush_shadow_used_ring_split(struct virtio_net *dev, 80 struct vhost_virtqueue *vq, 81 uint16_t to, uint16_t from, uint16_t size) 82 { 83 rte_memcpy(&vq->used->ring[to], 84 &vq->shadow_used_split[from], 85 size * sizeof(struct vring_used_elem)); 86 vhost_log_cache_used_vring(dev, vq, 87 offsetof(struct vring_used, ring[to]), 88 size * sizeof(struct vring_used_elem)); 89 } 90 91 static __rte_always_inline void 92 flush_shadow_used_ring_split(struct virtio_net *dev, struct vhost_virtqueue *vq) 93 { 94 uint16_t used_idx = vq->last_used_idx & (vq->size - 1); 95 96 if (used_idx + vq->shadow_used_idx <= vq->size) { 97 do_flush_shadow_used_ring_split(dev, vq, used_idx, 0, 98 vq->shadow_used_idx); 99 } else { 100 uint16_t size; 101 102 /* update used ring interval [used_idx, vq->size] */ 103 size = vq->size - used_idx; 104 do_flush_shadow_used_ring_split(dev, vq, used_idx, 0, size); 105 106 /* update the left half used ring interval [0, left_size] */ 107 do_flush_shadow_used_ring_split(dev, vq, 0, size, 108 vq->shadow_used_idx - size); 109 } 110 vq->last_used_idx += vq->shadow_used_idx; 111 112 vhost_log_cache_sync(dev, vq); 113 114 __atomic_add_fetch(&vq->used->idx, vq->shadow_used_idx, 115 __ATOMIC_RELEASE); 116 vq->shadow_used_idx = 0; 117 vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx), 118 sizeof(vq->used->idx)); 119 } 120 121 static __rte_always_inline void 122 update_shadow_used_ring_split(struct vhost_virtqueue *vq, 123 uint16_t desc_idx, uint32_t len) 124 { 125 uint16_t i = vq->shadow_used_idx++; 126 127 vq->shadow_used_split[i].id = desc_idx; 128 vq->shadow_used_split[i].len = len; 129 } 130 131 static __rte_always_inline void 132 vhost_flush_enqueue_shadow_packed(struct virtio_net *dev, 133 struct vhost_virtqueue *vq) 134 { 135 int i; 136 uint16_t used_idx = vq->last_used_idx; 137 uint16_t head_idx = vq->last_used_idx; 138 uint16_t head_flags = 0; 139 140 /* Split loop in two to save memory barriers */ 141 for (i = 0; i < vq->shadow_used_idx; i++) { 142 vq->desc_packed[used_idx].id = vq->shadow_used_packed[i].id; 143 vq->desc_packed[used_idx].len = vq->shadow_used_packed[i].len; 144 145 used_idx += vq->shadow_used_packed[i].count; 146 if (used_idx >= vq->size) 147 used_idx -= vq->size; 148 } 149 150 /* The ordering for storing desc flags needs to be enforced. */ 151 rte_atomic_thread_fence(__ATOMIC_RELEASE); 152 153 for (i = 0; i < vq->shadow_used_idx; i++) { 154 uint16_t flags; 155 156 if (vq->shadow_used_packed[i].len) 157 flags = VRING_DESC_F_WRITE; 158 else 159 flags = 0; 160 161 if (vq->used_wrap_counter) { 162 flags |= VRING_DESC_F_USED; 163 flags |= VRING_DESC_F_AVAIL; 164 } else { 165 flags &= ~VRING_DESC_F_USED; 166 flags &= ~VRING_DESC_F_AVAIL; 167 } 168 169 if (i > 0) { 170 vq->desc_packed[vq->last_used_idx].flags = flags; 171 172 vhost_log_cache_used_vring(dev, vq, 173 vq->last_used_idx * 174 sizeof(struct vring_packed_desc), 175 sizeof(struct vring_packed_desc)); 176 } else { 177 head_idx = vq->last_used_idx; 178 head_flags = flags; 179 } 180 181 vq_inc_last_used_packed(vq, vq->shadow_used_packed[i].count); 182 } 183 184 vq->desc_packed[head_idx].flags = head_flags; 185 186 vhost_log_cache_used_vring(dev, vq, 187 head_idx * 188 sizeof(struct vring_packed_desc), 189 sizeof(struct vring_packed_desc)); 190 191 vq->shadow_used_idx = 0; 192 vhost_log_cache_sync(dev, vq); 193 } 194 195 static __rte_always_inline void 196 vhost_flush_dequeue_shadow_packed(struct virtio_net *dev, 197 struct vhost_virtqueue *vq) 198 { 199 struct vring_used_elem_packed *used_elem = &vq->shadow_used_packed[0]; 200 201 vq->desc_packed[vq->shadow_last_used_idx].id = used_elem->id; 202 /* desc flags is the synchronization point for virtio packed vring */ 203 __atomic_store_n(&vq->desc_packed[vq->shadow_last_used_idx].flags, 204 used_elem->flags, __ATOMIC_RELEASE); 205 206 vhost_log_cache_used_vring(dev, vq, vq->shadow_last_used_idx * 207 sizeof(struct vring_packed_desc), 208 sizeof(struct vring_packed_desc)); 209 vq->shadow_used_idx = 0; 210 vhost_log_cache_sync(dev, vq); 211 } 212 213 static __rte_always_inline void 214 vhost_flush_enqueue_batch_packed(struct virtio_net *dev, 215 struct vhost_virtqueue *vq, 216 uint64_t *lens, 217 uint16_t *ids) 218 { 219 uint16_t i; 220 uint16_t flags; 221 uint16_t last_used_idx; 222 struct vring_packed_desc *desc_base; 223 224 last_used_idx = vq->last_used_idx; 225 desc_base = &vq->desc_packed[last_used_idx]; 226 227 flags = PACKED_DESC_ENQUEUE_USED_FLAG(vq->used_wrap_counter); 228 229 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 230 desc_base[i].id = ids[i]; 231 desc_base[i].len = lens[i]; 232 } 233 234 rte_atomic_thread_fence(__ATOMIC_RELEASE); 235 236 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 237 desc_base[i].flags = flags; 238 } 239 240 vhost_log_cache_used_vring(dev, vq, last_used_idx * 241 sizeof(struct vring_packed_desc), 242 sizeof(struct vring_packed_desc) * 243 PACKED_BATCH_SIZE); 244 vhost_log_cache_sync(dev, vq); 245 246 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE); 247 } 248 249 static __rte_always_inline void 250 vhost_shadow_dequeue_batch_packed_inorder(struct vhost_virtqueue *vq, 251 uint16_t id) 252 { 253 vq->shadow_used_packed[0].id = id; 254 255 if (!vq->shadow_used_idx) { 256 vq->shadow_last_used_idx = vq->last_used_idx; 257 vq->shadow_used_packed[0].flags = 258 PACKED_DESC_DEQUEUE_USED_FLAG(vq->used_wrap_counter); 259 vq->shadow_used_packed[0].len = 0; 260 vq->shadow_used_packed[0].count = 1; 261 vq->shadow_used_idx++; 262 } 263 264 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE); 265 } 266 267 static __rte_always_inline void 268 vhost_shadow_dequeue_batch_packed(struct virtio_net *dev, 269 struct vhost_virtqueue *vq, 270 uint16_t *ids) 271 { 272 uint16_t flags; 273 uint16_t i; 274 uint16_t begin; 275 276 flags = PACKED_DESC_DEQUEUE_USED_FLAG(vq->used_wrap_counter); 277 278 if (!vq->shadow_used_idx) { 279 vq->shadow_last_used_idx = vq->last_used_idx; 280 vq->shadow_used_packed[0].id = ids[0]; 281 vq->shadow_used_packed[0].len = 0; 282 vq->shadow_used_packed[0].count = 1; 283 vq->shadow_used_packed[0].flags = flags; 284 vq->shadow_used_idx++; 285 begin = 1; 286 } else 287 begin = 0; 288 289 vhost_for_each_try_unroll(i, begin, PACKED_BATCH_SIZE) { 290 vq->desc_packed[vq->last_used_idx + i].id = ids[i]; 291 vq->desc_packed[vq->last_used_idx + i].len = 0; 292 } 293 294 rte_atomic_thread_fence(__ATOMIC_RELEASE); 295 vhost_for_each_try_unroll(i, begin, PACKED_BATCH_SIZE) 296 vq->desc_packed[vq->last_used_idx + i].flags = flags; 297 298 vhost_log_cache_used_vring(dev, vq, vq->last_used_idx * 299 sizeof(struct vring_packed_desc), 300 sizeof(struct vring_packed_desc) * 301 PACKED_BATCH_SIZE); 302 vhost_log_cache_sync(dev, vq); 303 304 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE); 305 } 306 307 static __rte_always_inline void 308 vhost_shadow_dequeue_single_packed(struct vhost_virtqueue *vq, 309 uint16_t buf_id, 310 uint16_t count) 311 { 312 uint16_t flags; 313 314 flags = vq->desc_packed[vq->last_used_idx].flags; 315 if (vq->used_wrap_counter) { 316 flags |= VRING_DESC_F_USED; 317 flags |= VRING_DESC_F_AVAIL; 318 } else { 319 flags &= ~VRING_DESC_F_USED; 320 flags &= ~VRING_DESC_F_AVAIL; 321 } 322 323 if (!vq->shadow_used_idx) { 324 vq->shadow_last_used_idx = vq->last_used_idx; 325 326 vq->shadow_used_packed[0].id = buf_id; 327 vq->shadow_used_packed[0].len = 0; 328 vq->shadow_used_packed[0].flags = flags; 329 vq->shadow_used_idx++; 330 } else { 331 vq->desc_packed[vq->last_used_idx].id = buf_id; 332 vq->desc_packed[vq->last_used_idx].len = 0; 333 vq->desc_packed[vq->last_used_idx].flags = flags; 334 } 335 336 vq_inc_last_used_packed(vq, count); 337 } 338 339 static __rte_always_inline void 340 vhost_shadow_dequeue_single_packed_inorder(struct vhost_virtqueue *vq, 341 uint16_t buf_id, 342 uint16_t count) 343 { 344 uint16_t flags; 345 346 vq->shadow_used_packed[0].id = buf_id; 347 348 flags = vq->desc_packed[vq->last_used_idx].flags; 349 if (vq->used_wrap_counter) { 350 flags |= VRING_DESC_F_USED; 351 flags |= VRING_DESC_F_AVAIL; 352 } else { 353 flags &= ~VRING_DESC_F_USED; 354 flags &= ~VRING_DESC_F_AVAIL; 355 } 356 357 if (!vq->shadow_used_idx) { 358 vq->shadow_last_used_idx = vq->last_used_idx; 359 vq->shadow_used_packed[0].len = 0; 360 vq->shadow_used_packed[0].flags = flags; 361 vq->shadow_used_idx++; 362 } 363 364 vq_inc_last_used_packed(vq, count); 365 } 366 367 static __rte_always_inline void 368 vhost_shadow_enqueue_packed(struct vhost_virtqueue *vq, 369 uint32_t *len, 370 uint16_t *id, 371 uint16_t *count, 372 uint16_t num_buffers) 373 { 374 uint16_t i; 375 376 for (i = 0; i < num_buffers; i++) { 377 /* enqueue shadow flush action aligned with batch num */ 378 if (!vq->shadow_used_idx) 379 vq->shadow_aligned_idx = vq->last_used_idx & 380 PACKED_BATCH_MASK; 381 vq->shadow_used_packed[vq->shadow_used_idx].id = id[i]; 382 vq->shadow_used_packed[vq->shadow_used_idx].len = len[i]; 383 vq->shadow_used_packed[vq->shadow_used_idx].count = count[i]; 384 vq->shadow_aligned_idx += count[i]; 385 vq->shadow_used_idx++; 386 } 387 } 388 389 static __rte_always_inline void 390 vhost_shadow_enqueue_single_packed(struct virtio_net *dev, 391 struct vhost_virtqueue *vq, 392 uint32_t *len, 393 uint16_t *id, 394 uint16_t *count, 395 uint16_t num_buffers) 396 { 397 vhost_shadow_enqueue_packed(vq, len, id, count, num_buffers); 398 399 if (vq->shadow_aligned_idx >= PACKED_BATCH_SIZE) { 400 do_data_copy_enqueue(dev, vq); 401 vhost_flush_enqueue_shadow_packed(dev, vq); 402 } 403 } 404 405 /* avoid write operation when necessary, to lessen cache issues */ 406 #define ASSIGN_UNLESS_EQUAL(var, val) do { \ 407 if ((var) != (val)) \ 408 (var) = (val); \ 409 } while (0) 410 411 static __rte_always_inline void 412 virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr) 413 { 414 uint64_t csum_l4 = m_buf->ol_flags & PKT_TX_L4_MASK; 415 416 if (m_buf->ol_flags & PKT_TX_TCP_SEG) 417 csum_l4 |= PKT_TX_TCP_CKSUM; 418 419 if (csum_l4) { 420 net_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 421 net_hdr->csum_start = m_buf->l2_len + m_buf->l3_len; 422 423 switch (csum_l4) { 424 case PKT_TX_TCP_CKSUM: 425 net_hdr->csum_offset = (offsetof(struct rte_tcp_hdr, 426 cksum)); 427 break; 428 case PKT_TX_UDP_CKSUM: 429 net_hdr->csum_offset = (offsetof(struct rte_udp_hdr, 430 dgram_cksum)); 431 break; 432 case PKT_TX_SCTP_CKSUM: 433 net_hdr->csum_offset = (offsetof(struct rte_sctp_hdr, 434 cksum)); 435 break; 436 } 437 } else { 438 ASSIGN_UNLESS_EQUAL(net_hdr->csum_start, 0); 439 ASSIGN_UNLESS_EQUAL(net_hdr->csum_offset, 0); 440 ASSIGN_UNLESS_EQUAL(net_hdr->flags, 0); 441 } 442 443 /* IP cksum verification cannot be bypassed, then calculate here */ 444 if (m_buf->ol_flags & PKT_TX_IP_CKSUM) { 445 struct rte_ipv4_hdr *ipv4_hdr; 446 447 ipv4_hdr = rte_pktmbuf_mtod_offset(m_buf, struct rte_ipv4_hdr *, 448 m_buf->l2_len); 449 ipv4_hdr->hdr_checksum = 0; 450 ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr); 451 } 452 453 if (m_buf->ol_flags & PKT_TX_TCP_SEG) { 454 if (m_buf->ol_flags & PKT_TX_IPV4) 455 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 456 else 457 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 458 net_hdr->gso_size = m_buf->tso_segsz; 459 net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len 460 + m_buf->l4_len; 461 } else if (m_buf->ol_flags & PKT_TX_UDP_SEG) { 462 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP; 463 net_hdr->gso_size = m_buf->tso_segsz; 464 net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len + 465 m_buf->l4_len; 466 } else { 467 ASSIGN_UNLESS_EQUAL(net_hdr->gso_type, 0); 468 ASSIGN_UNLESS_EQUAL(net_hdr->gso_size, 0); 469 ASSIGN_UNLESS_EQUAL(net_hdr->hdr_len, 0); 470 } 471 } 472 473 static __rte_always_inline int 474 map_one_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, 475 struct buf_vector *buf_vec, uint16_t *vec_idx, 476 uint64_t desc_iova, uint64_t desc_len, uint8_t perm) 477 { 478 uint16_t vec_id = *vec_idx; 479 480 while (desc_len) { 481 uint64_t desc_addr; 482 uint64_t desc_chunck_len = desc_len; 483 484 if (unlikely(vec_id >= BUF_VECTOR_MAX)) 485 return -1; 486 487 desc_addr = vhost_iova_to_vva(dev, vq, 488 desc_iova, 489 &desc_chunck_len, 490 perm); 491 if (unlikely(!desc_addr)) 492 return -1; 493 494 rte_prefetch0((void *)(uintptr_t)desc_addr); 495 496 buf_vec[vec_id].buf_iova = desc_iova; 497 buf_vec[vec_id].buf_addr = desc_addr; 498 buf_vec[vec_id].buf_len = desc_chunck_len; 499 500 desc_len -= desc_chunck_len; 501 desc_iova += desc_chunck_len; 502 vec_id++; 503 } 504 *vec_idx = vec_id; 505 506 return 0; 507 } 508 509 static __rte_always_inline int 510 fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 511 uint32_t avail_idx, uint16_t *vec_idx, 512 struct buf_vector *buf_vec, uint16_t *desc_chain_head, 513 uint32_t *desc_chain_len, uint8_t perm) 514 { 515 uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)]; 516 uint16_t vec_id = *vec_idx; 517 uint32_t len = 0; 518 uint64_t dlen; 519 uint32_t nr_descs = vq->size; 520 uint32_t cnt = 0; 521 struct vring_desc *descs = vq->desc; 522 struct vring_desc *idesc = NULL; 523 524 if (unlikely(idx >= vq->size)) 525 return -1; 526 527 *desc_chain_head = idx; 528 529 if (vq->desc[idx].flags & VRING_DESC_F_INDIRECT) { 530 dlen = vq->desc[idx].len; 531 nr_descs = dlen / sizeof(struct vring_desc); 532 if (unlikely(nr_descs > vq->size)) 533 return -1; 534 535 descs = (struct vring_desc *)(uintptr_t) 536 vhost_iova_to_vva(dev, vq, vq->desc[idx].addr, 537 &dlen, 538 VHOST_ACCESS_RO); 539 if (unlikely(!descs)) 540 return -1; 541 542 if (unlikely(dlen < vq->desc[idx].len)) { 543 /* 544 * The indirect desc table is not contiguous 545 * in process VA space, we have to copy it. 546 */ 547 idesc = vhost_alloc_copy_ind_table(dev, vq, 548 vq->desc[idx].addr, vq->desc[idx].len); 549 if (unlikely(!idesc)) 550 return -1; 551 552 descs = idesc; 553 } 554 555 idx = 0; 556 } 557 558 while (1) { 559 if (unlikely(idx >= nr_descs || cnt++ >= nr_descs)) { 560 free_ind_table(idesc); 561 return -1; 562 } 563 564 dlen = descs[idx].len; 565 len += dlen; 566 567 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id, 568 descs[idx].addr, dlen, 569 perm))) { 570 free_ind_table(idesc); 571 return -1; 572 } 573 574 if ((descs[idx].flags & VRING_DESC_F_NEXT) == 0) 575 break; 576 577 idx = descs[idx].next; 578 } 579 580 *desc_chain_len = len; 581 *vec_idx = vec_id; 582 583 if (unlikely(!!idesc)) 584 free_ind_table(idesc); 585 586 return 0; 587 } 588 589 /* 590 * Returns -1 on fail, 0 on success 591 */ 592 static inline int 593 reserve_avail_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 594 uint32_t size, struct buf_vector *buf_vec, 595 uint16_t *num_buffers, uint16_t avail_head, 596 uint16_t *nr_vec) 597 { 598 uint16_t cur_idx; 599 uint16_t vec_idx = 0; 600 uint16_t max_tries, tries = 0; 601 602 uint16_t head_idx = 0; 603 uint32_t len = 0; 604 605 *num_buffers = 0; 606 cur_idx = vq->last_avail_idx; 607 608 if (rxvq_is_mergeable(dev)) 609 max_tries = vq->size - 1; 610 else 611 max_tries = 1; 612 613 while (size > 0) { 614 if (unlikely(cur_idx == avail_head)) 615 return -1; 616 /* 617 * if we tried all available ring items, and still 618 * can't get enough buf, it means something abnormal 619 * happened. 620 */ 621 if (unlikely(++tries > max_tries)) 622 return -1; 623 624 if (unlikely(fill_vec_buf_split(dev, vq, cur_idx, 625 &vec_idx, buf_vec, 626 &head_idx, &len, 627 VHOST_ACCESS_RW) < 0)) 628 return -1; 629 len = RTE_MIN(len, size); 630 update_shadow_used_ring_split(vq, head_idx, len); 631 size -= len; 632 633 cur_idx++; 634 *num_buffers += 1; 635 } 636 637 *nr_vec = vec_idx; 638 639 return 0; 640 } 641 642 static __rte_always_inline int 643 fill_vec_buf_packed_indirect(struct virtio_net *dev, 644 struct vhost_virtqueue *vq, 645 struct vring_packed_desc *desc, uint16_t *vec_idx, 646 struct buf_vector *buf_vec, uint32_t *len, uint8_t perm) 647 { 648 uint16_t i; 649 uint32_t nr_descs; 650 uint16_t vec_id = *vec_idx; 651 uint64_t dlen; 652 struct vring_packed_desc *descs, *idescs = NULL; 653 654 dlen = desc->len; 655 descs = (struct vring_packed_desc *)(uintptr_t) 656 vhost_iova_to_vva(dev, vq, desc->addr, &dlen, VHOST_ACCESS_RO); 657 if (unlikely(!descs)) 658 return -1; 659 660 if (unlikely(dlen < desc->len)) { 661 /* 662 * The indirect desc table is not contiguous 663 * in process VA space, we have to copy it. 664 */ 665 idescs = vhost_alloc_copy_ind_table(dev, 666 vq, desc->addr, desc->len); 667 if (unlikely(!idescs)) 668 return -1; 669 670 descs = idescs; 671 } 672 673 nr_descs = desc->len / sizeof(struct vring_packed_desc); 674 if (unlikely(nr_descs >= vq->size)) { 675 free_ind_table(idescs); 676 return -1; 677 } 678 679 for (i = 0; i < nr_descs; i++) { 680 if (unlikely(vec_id >= BUF_VECTOR_MAX)) { 681 free_ind_table(idescs); 682 return -1; 683 } 684 685 dlen = descs[i].len; 686 *len += dlen; 687 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id, 688 descs[i].addr, dlen, 689 perm))) 690 return -1; 691 } 692 *vec_idx = vec_id; 693 694 if (unlikely(!!idescs)) 695 free_ind_table(idescs); 696 697 return 0; 698 } 699 700 static __rte_always_inline int 701 fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, 702 uint16_t avail_idx, uint16_t *desc_count, 703 struct buf_vector *buf_vec, uint16_t *vec_idx, 704 uint16_t *buf_id, uint32_t *len, uint8_t perm) 705 { 706 bool wrap_counter = vq->avail_wrap_counter; 707 struct vring_packed_desc *descs = vq->desc_packed; 708 uint16_t vec_id = *vec_idx; 709 uint64_t dlen; 710 711 if (avail_idx < vq->last_avail_idx) 712 wrap_counter ^= 1; 713 714 /* 715 * Perform a load-acquire barrier in desc_is_avail to 716 * enforce the ordering between desc flags and desc 717 * content. 718 */ 719 if (unlikely(!desc_is_avail(&descs[avail_idx], wrap_counter))) 720 return -1; 721 722 *desc_count = 0; 723 *len = 0; 724 725 while (1) { 726 if (unlikely(vec_id >= BUF_VECTOR_MAX)) 727 return -1; 728 729 if (unlikely(*desc_count >= vq->size)) 730 return -1; 731 732 *desc_count += 1; 733 *buf_id = descs[avail_idx].id; 734 735 if (descs[avail_idx].flags & VRING_DESC_F_INDIRECT) { 736 if (unlikely(fill_vec_buf_packed_indirect(dev, vq, 737 &descs[avail_idx], 738 &vec_id, buf_vec, 739 len, perm) < 0)) 740 return -1; 741 } else { 742 dlen = descs[avail_idx].len; 743 *len += dlen; 744 745 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id, 746 descs[avail_idx].addr, 747 dlen, 748 perm))) 749 return -1; 750 } 751 752 if ((descs[avail_idx].flags & VRING_DESC_F_NEXT) == 0) 753 break; 754 755 if (++avail_idx >= vq->size) { 756 avail_idx -= vq->size; 757 wrap_counter ^= 1; 758 } 759 } 760 761 *vec_idx = vec_id; 762 763 return 0; 764 } 765 766 static __rte_noinline void 767 copy_vnet_hdr_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, 768 struct buf_vector *buf_vec, 769 struct virtio_net_hdr_mrg_rxbuf *hdr) 770 { 771 uint64_t len; 772 uint64_t remain = dev->vhost_hlen; 773 uint64_t src = (uint64_t)(uintptr_t)hdr, dst; 774 uint64_t iova = buf_vec->buf_iova; 775 776 while (remain) { 777 len = RTE_MIN(remain, 778 buf_vec->buf_len); 779 dst = buf_vec->buf_addr; 780 rte_memcpy((void *)(uintptr_t)dst, 781 (void *)(uintptr_t)src, 782 len); 783 784 PRINT_PACKET(dev, (uintptr_t)dst, 785 (uint32_t)len, 0); 786 vhost_log_cache_write_iova(dev, vq, 787 iova, len); 788 789 remain -= len; 790 iova += len; 791 src += len; 792 buf_vec++; 793 } 794 } 795 796 static __rte_always_inline int 797 copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, 798 struct rte_mbuf *m, struct buf_vector *buf_vec, 799 uint16_t nr_vec, uint16_t num_buffers) 800 { 801 uint32_t vec_idx = 0; 802 uint32_t mbuf_offset, mbuf_avail; 803 uint32_t buf_offset, buf_avail; 804 uint64_t buf_addr, buf_iova, buf_len; 805 uint32_t cpy_len; 806 uint64_t hdr_addr; 807 struct rte_mbuf *hdr_mbuf; 808 struct batch_copy_elem *batch_copy = vq->batch_copy_elems; 809 struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL; 810 int error = 0; 811 812 if (unlikely(m == NULL)) { 813 error = -1; 814 goto out; 815 } 816 817 buf_addr = buf_vec[vec_idx].buf_addr; 818 buf_iova = buf_vec[vec_idx].buf_iova; 819 buf_len = buf_vec[vec_idx].buf_len; 820 821 if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) { 822 error = -1; 823 goto out; 824 } 825 826 hdr_mbuf = m; 827 hdr_addr = buf_addr; 828 if (unlikely(buf_len < dev->vhost_hlen)) { 829 memset(&tmp_hdr, 0, sizeof(struct virtio_net_hdr_mrg_rxbuf)); 830 hdr = &tmp_hdr; 831 } else 832 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr; 833 834 VHOST_LOG_DATA(DEBUG, "(%d) RX: num merge buffers %d\n", 835 dev->vid, num_buffers); 836 837 if (unlikely(buf_len < dev->vhost_hlen)) { 838 buf_offset = dev->vhost_hlen - buf_len; 839 vec_idx++; 840 buf_addr = buf_vec[vec_idx].buf_addr; 841 buf_iova = buf_vec[vec_idx].buf_iova; 842 buf_len = buf_vec[vec_idx].buf_len; 843 buf_avail = buf_len - buf_offset; 844 } else { 845 buf_offset = dev->vhost_hlen; 846 buf_avail = buf_len - dev->vhost_hlen; 847 } 848 849 mbuf_avail = rte_pktmbuf_data_len(m); 850 mbuf_offset = 0; 851 while (mbuf_avail != 0 || m->next != NULL) { 852 /* done with current buf, get the next one */ 853 if (buf_avail == 0) { 854 vec_idx++; 855 if (unlikely(vec_idx >= nr_vec)) { 856 error = -1; 857 goto out; 858 } 859 860 buf_addr = buf_vec[vec_idx].buf_addr; 861 buf_iova = buf_vec[vec_idx].buf_iova; 862 buf_len = buf_vec[vec_idx].buf_len; 863 864 buf_offset = 0; 865 buf_avail = buf_len; 866 } 867 868 /* done with current mbuf, get the next one */ 869 if (mbuf_avail == 0) { 870 m = m->next; 871 872 mbuf_offset = 0; 873 mbuf_avail = rte_pktmbuf_data_len(m); 874 } 875 876 if (hdr_addr) { 877 virtio_enqueue_offload(hdr_mbuf, &hdr->hdr); 878 if (rxvq_is_mergeable(dev)) 879 ASSIGN_UNLESS_EQUAL(hdr->num_buffers, 880 num_buffers); 881 882 if (unlikely(hdr == &tmp_hdr)) { 883 copy_vnet_hdr_to_desc(dev, vq, buf_vec, hdr); 884 } else { 885 PRINT_PACKET(dev, (uintptr_t)hdr_addr, 886 dev->vhost_hlen, 0); 887 vhost_log_cache_write_iova(dev, vq, 888 buf_vec[0].buf_iova, 889 dev->vhost_hlen); 890 } 891 892 hdr_addr = 0; 893 } 894 895 cpy_len = RTE_MIN(buf_avail, mbuf_avail); 896 897 if (likely(cpy_len > MAX_BATCH_LEN || 898 vq->batch_copy_nb_elems >= vq->size)) { 899 rte_memcpy((void *)((uintptr_t)(buf_addr + buf_offset)), 900 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), 901 cpy_len); 902 vhost_log_cache_write_iova(dev, vq, 903 buf_iova + buf_offset, 904 cpy_len); 905 PRINT_PACKET(dev, (uintptr_t)(buf_addr + buf_offset), 906 cpy_len, 0); 907 } else { 908 batch_copy[vq->batch_copy_nb_elems].dst = 909 (void *)((uintptr_t)(buf_addr + buf_offset)); 910 batch_copy[vq->batch_copy_nb_elems].src = 911 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset); 912 batch_copy[vq->batch_copy_nb_elems].log_addr = 913 buf_iova + buf_offset; 914 batch_copy[vq->batch_copy_nb_elems].len = cpy_len; 915 vq->batch_copy_nb_elems++; 916 } 917 918 mbuf_avail -= cpy_len; 919 mbuf_offset += cpy_len; 920 buf_avail -= cpy_len; 921 buf_offset += cpy_len; 922 } 923 924 out: 925 926 return error; 927 } 928 929 static __rte_always_inline void 930 async_fill_vec(struct iovec *v, void *base, size_t len) 931 { 932 v->iov_base = base; 933 v->iov_len = len; 934 } 935 936 static __rte_always_inline void 937 async_fill_iter(struct rte_vhost_iov_iter *it, size_t count, 938 struct iovec *vec, unsigned long nr_seg) 939 { 940 it->offset = 0; 941 it->count = count; 942 943 if (count) { 944 it->iov = vec; 945 it->nr_segs = nr_seg; 946 } else { 947 it->iov = 0; 948 it->nr_segs = 0; 949 } 950 } 951 952 static __rte_always_inline void 953 async_fill_desc(struct rte_vhost_async_desc *desc, 954 struct rte_vhost_iov_iter *src, struct rte_vhost_iov_iter *dst) 955 { 956 desc->src = src; 957 desc->dst = dst; 958 } 959 960 static __rte_always_inline int 961 async_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, 962 struct rte_mbuf *m, struct buf_vector *buf_vec, 963 uint16_t nr_vec, uint16_t num_buffers, 964 struct iovec *src_iovec, struct iovec *dst_iovec, 965 struct rte_vhost_iov_iter *src_it, 966 struct rte_vhost_iov_iter *dst_it) 967 { 968 uint32_t vec_idx = 0; 969 uint32_t mbuf_offset, mbuf_avail; 970 uint32_t buf_offset, buf_avail; 971 uint64_t buf_addr, buf_iova, buf_len; 972 uint32_t cpy_len, cpy_threshold; 973 uint64_t hdr_addr; 974 struct rte_mbuf *hdr_mbuf; 975 struct batch_copy_elem *batch_copy = vq->batch_copy_elems; 976 struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL; 977 int error = 0; 978 uint64_t mapped_len; 979 980 uint32_t tlen = 0; 981 int tvec_idx = 0; 982 void *hpa; 983 984 if (unlikely(m == NULL)) { 985 error = -1; 986 goto out; 987 } 988 989 cpy_threshold = vq->async_threshold; 990 991 buf_addr = buf_vec[vec_idx].buf_addr; 992 buf_iova = buf_vec[vec_idx].buf_iova; 993 buf_len = buf_vec[vec_idx].buf_len; 994 995 if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) { 996 error = -1; 997 goto out; 998 } 999 1000 hdr_mbuf = m; 1001 hdr_addr = buf_addr; 1002 if (unlikely(buf_len < dev->vhost_hlen)) { 1003 memset(&tmp_hdr, 0, sizeof(struct virtio_net_hdr_mrg_rxbuf)); 1004 hdr = &tmp_hdr; 1005 } else 1006 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr; 1007 1008 VHOST_LOG_DATA(DEBUG, "(%d) RX: num merge buffers %d\n", 1009 dev->vid, num_buffers); 1010 1011 if (unlikely(buf_len < dev->vhost_hlen)) { 1012 buf_offset = dev->vhost_hlen - buf_len; 1013 vec_idx++; 1014 buf_addr = buf_vec[vec_idx].buf_addr; 1015 buf_iova = buf_vec[vec_idx].buf_iova; 1016 buf_len = buf_vec[vec_idx].buf_len; 1017 buf_avail = buf_len - buf_offset; 1018 } else { 1019 buf_offset = dev->vhost_hlen; 1020 buf_avail = buf_len - dev->vhost_hlen; 1021 } 1022 1023 mbuf_avail = rte_pktmbuf_data_len(m); 1024 mbuf_offset = 0; 1025 1026 while (mbuf_avail != 0 || m->next != NULL) { 1027 /* done with current buf, get the next one */ 1028 if (buf_avail == 0) { 1029 vec_idx++; 1030 if (unlikely(vec_idx >= nr_vec)) { 1031 error = -1; 1032 goto out; 1033 } 1034 1035 buf_addr = buf_vec[vec_idx].buf_addr; 1036 buf_iova = buf_vec[vec_idx].buf_iova; 1037 buf_len = buf_vec[vec_idx].buf_len; 1038 1039 buf_offset = 0; 1040 buf_avail = buf_len; 1041 } 1042 1043 /* done with current mbuf, get the next one */ 1044 if (mbuf_avail == 0) { 1045 m = m->next; 1046 1047 mbuf_offset = 0; 1048 mbuf_avail = rte_pktmbuf_data_len(m); 1049 } 1050 1051 if (hdr_addr) { 1052 virtio_enqueue_offload(hdr_mbuf, &hdr->hdr); 1053 if (rxvq_is_mergeable(dev)) 1054 ASSIGN_UNLESS_EQUAL(hdr->num_buffers, 1055 num_buffers); 1056 1057 if (unlikely(hdr == &tmp_hdr)) { 1058 copy_vnet_hdr_to_desc(dev, vq, buf_vec, hdr); 1059 } else { 1060 PRINT_PACKET(dev, (uintptr_t)hdr_addr, 1061 dev->vhost_hlen, 0); 1062 vhost_log_cache_write_iova(dev, vq, 1063 buf_vec[0].buf_iova, 1064 dev->vhost_hlen); 1065 } 1066 1067 hdr_addr = 0; 1068 } 1069 1070 cpy_len = RTE_MIN(buf_avail, mbuf_avail); 1071 1072 while (unlikely(cpy_len && cpy_len >= cpy_threshold)) { 1073 hpa = (void *)(uintptr_t)gpa_to_first_hpa(dev, 1074 buf_iova + buf_offset, 1075 cpy_len, &mapped_len); 1076 1077 if (unlikely(!hpa || mapped_len < cpy_threshold)) 1078 break; 1079 1080 async_fill_vec(src_iovec + tvec_idx, 1081 (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, 1082 mbuf_offset), (size_t)mapped_len); 1083 1084 async_fill_vec(dst_iovec + tvec_idx, 1085 hpa, (size_t)mapped_len); 1086 1087 tlen += (uint32_t)mapped_len; 1088 cpy_len -= (uint32_t)mapped_len; 1089 mbuf_avail -= (uint32_t)mapped_len; 1090 mbuf_offset += (uint32_t)mapped_len; 1091 buf_avail -= (uint32_t)mapped_len; 1092 buf_offset += (uint32_t)mapped_len; 1093 tvec_idx++; 1094 } 1095 1096 if (likely(cpy_len)) { 1097 if (unlikely(vq->batch_copy_nb_elems >= vq->size)) { 1098 rte_memcpy( 1099 (void *)((uintptr_t)(buf_addr + buf_offset)), 1100 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), 1101 cpy_len); 1102 1103 PRINT_PACKET(dev, 1104 (uintptr_t)(buf_addr + buf_offset), 1105 cpy_len, 0); 1106 } else { 1107 batch_copy[vq->batch_copy_nb_elems].dst = 1108 (void *)((uintptr_t)(buf_addr + buf_offset)); 1109 batch_copy[vq->batch_copy_nb_elems].src = 1110 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset); 1111 batch_copy[vq->batch_copy_nb_elems].log_addr = 1112 buf_iova + buf_offset; 1113 batch_copy[vq->batch_copy_nb_elems].len = 1114 cpy_len; 1115 vq->batch_copy_nb_elems++; 1116 } 1117 1118 mbuf_avail -= cpy_len; 1119 mbuf_offset += cpy_len; 1120 buf_avail -= cpy_len; 1121 buf_offset += cpy_len; 1122 } 1123 1124 } 1125 1126 out: 1127 if (tlen) { 1128 async_fill_iter(src_it, tlen, src_iovec, tvec_idx); 1129 async_fill_iter(dst_it, tlen, dst_iovec, tvec_idx); 1130 } else { 1131 src_it->count = 0; 1132 } 1133 1134 return error; 1135 } 1136 1137 static __rte_always_inline int 1138 vhost_enqueue_single_packed(struct virtio_net *dev, 1139 struct vhost_virtqueue *vq, 1140 struct rte_mbuf *pkt, 1141 struct buf_vector *buf_vec, 1142 uint16_t *nr_descs) 1143 { 1144 uint16_t nr_vec = 0; 1145 uint16_t avail_idx = vq->last_avail_idx; 1146 uint16_t max_tries, tries = 0; 1147 uint16_t buf_id = 0; 1148 uint32_t len = 0; 1149 uint16_t desc_count; 1150 uint32_t size = pkt->pkt_len + sizeof(struct virtio_net_hdr_mrg_rxbuf); 1151 uint16_t num_buffers = 0; 1152 uint32_t buffer_len[vq->size]; 1153 uint16_t buffer_buf_id[vq->size]; 1154 uint16_t buffer_desc_count[vq->size]; 1155 1156 if (rxvq_is_mergeable(dev)) 1157 max_tries = vq->size - 1; 1158 else 1159 max_tries = 1; 1160 1161 while (size > 0) { 1162 /* 1163 * if we tried all available ring items, and still 1164 * can't get enough buf, it means something abnormal 1165 * happened. 1166 */ 1167 if (unlikely(++tries > max_tries)) 1168 return -1; 1169 1170 if (unlikely(fill_vec_buf_packed(dev, vq, 1171 avail_idx, &desc_count, 1172 buf_vec, &nr_vec, 1173 &buf_id, &len, 1174 VHOST_ACCESS_RW) < 0)) 1175 return -1; 1176 1177 len = RTE_MIN(len, size); 1178 size -= len; 1179 1180 buffer_len[num_buffers] = len; 1181 buffer_buf_id[num_buffers] = buf_id; 1182 buffer_desc_count[num_buffers] = desc_count; 1183 num_buffers += 1; 1184 1185 *nr_descs += desc_count; 1186 avail_idx += desc_count; 1187 if (avail_idx >= vq->size) 1188 avail_idx -= vq->size; 1189 } 1190 1191 if (copy_mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, num_buffers) < 0) 1192 return -1; 1193 1194 vhost_shadow_enqueue_single_packed(dev, vq, buffer_len, buffer_buf_id, 1195 buffer_desc_count, num_buffers); 1196 1197 return 0; 1198 } 1199 1200 static __rte_noinline uint32_t 1201 virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 1202 struct rte_mbuf **pkts, uint32_t count) 1203 { 1204 uint32_t pkt_idx = 0; 1205 uint16_t num_buffers; 1206 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1207 uint16_t avail_head; 1208 1209 /* 1210 * The ordering between avail index and 1211 * desc reads needs to be enforced. 1212 */ 1213 avail_head = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE); 1214 1215 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); 1216 1217 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { 1218 uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen; 1219 uint16_t nr_vec = 0; 1220 1221 if (unlikely(reserve_avail_buf_split(dev, vq, 1222 pkt_len, buf_vec, &num_buffers, 1223 avail_head, &nr_vec) < 0)) { 1224 VHOST_LOG_DATA(DEBUG, 1225 "(%d) failed to get enough desc from vring\n", 1226 dev->vid); 1227 vq->shadow_used_idx -= num_buffers; 1228 break; 1229 } 1230 1231 VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n", 1232 dev->vid, vq->last_avail_idx, 1233 vq->last_avail_idx + num_buffers); 1234 1235 if (copy_mbuf_to_desc(dev, vq, pkts[pkt_idx], 1236 buf_vec, nr_vec, 1237 num_buffers) < 0) { 1238 vq->shadow_used_idx -= num_buffers; 1239 break; 1240 } 1241 1242 vq->last_avail_idx += num_buffers; 1243 } 1244 1245 do_data_copy_enqueue(dev, vq); 1246 1247 if (likely(vq->shadow_used_idx)) { 1248 flush_shadow_used_ring_split(dev, vq); 1249 vhost_vring_call_split(dev, vq); 1250 } 1251 1252 return pkt_idx; 1253 } 1254 1255 static __rte_always_inline int 1256 virtio_dev_rx_sync_batch_check(struct virtio_net *dev, 1257 struct vhost_virtqueue *vq, 1258 struct rte_mbuf **pkts, 1259 uint64_t *desc_addrs, 1260 uint64_t *lens) 1261 { 1262 bool wrap_counter = vq->avail_wrap_counter; 1263 struct vring_packed_desc *descs = vq->desc_packed; 1264 uint16_t avail_idx = vq->last_avail_idx; 1265 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 1266 uint16_t i; 1267 1268 if (unlikely(avail_idx & PACKED_BATCH_MASK)) 1269 return -1; 1270 1271 if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size)) 1272 return -1; 1273 1274 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1275 if (unlikely(pkts[i]->next != NULL)) 1276 return -1; 1277 if (unlikely(!desc_is_avail(&descs[avail_idx + i], 1278 wrap_counter))) 1279 return -1; 1280 } 1281 1282 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1283 lens[i] = descs[avail_idx + i].len; 1284 1285 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1286 if (unlikely(pkts[i]->pkt_len > (lens[i] - buf_offset))) 1287 return -1; 1288 } 1289 1290 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1291 desc_addrs[i] = vhost_iova_to_vva(dev, vq, 1292 descs[avail_idx + i].addr, 1293 &lens[i], 1294 VHOST_ACCESS_RW); 1295 1296 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1297 if (unlikely(!desc_addrs[i])) 1298 return -1; 1299 if (unlikely(lens[i] != descs[avail_idx + i].len)) 1300 return -1; 1301 } 1302 1303 return 0; 1304 } 1305 1306 static __rte_always_inline int 1307 virtio_dev_rx_async_batch_check(struct virtio_net *dev, 1308 struct vhost_virtqueue *vq, 1309 struct rte_mbuf **pkts, 1310 uint64_t *desc_addrs, 1311 uint64_t *lens) 1312 { 1313 bool wrap_counter = vq->avail_wrap_counter; 1314 struct vring_packed_desc *descs = vq->desc_packed; 1315 uint16_t avail_idx = vq->last_avail_idx; 1316 uint16_t used_idx = vq->last_used_idx; 1317 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 1318 uint32_t cpy_threshold = vq->async_threshold; 1319 uint16_t i; 1320 1321 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1322 if (unlikely(pkts[i]->data_len >= cpy_threshold)) 1323 return -1; 1324 } 1325 1326 if (unlikely(avail_idx & PACKED_BATCH_MASK)) 1327 return -1; 1328 1329 if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size)) 1330 return -1; 1331 1332 if (unlikely((used_idx + PACKED_BATCH_SIZE) > vq->size)) 1333 return -1; 1334 1335 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1336 if (unlikely(pkts[i]->next != NULL)) 1337 return -1; 1338 if (unlikely(!desc_is_avail(&descs[avail_idx + i], 1339 wrap_counter))) 1340 return -1; 1341 } 1342 1343 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1344 lens[i] = descs[avail_idx + i].len; 1345 1346 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1347 if (unlikely(pkts[i]->pkt_len > (lens[i] - buf_offset))) 1348 return -1; 1349 } 1350 1351 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1352 desc_addrs[i] = vhost_iova_to_vva(dev, vq, 1353 descs[avail_idx + i].addr, 1354 &lens[i], 1355 VHOST_ACCESS_RW); 1356 1357 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1358 if (unlikely(!desc_addrs[i])) 1359 return -1; 1360 if (unlikely(lens[i] != descs[avail_idx + i].len)) 1361 return -1; 1362 } 1363 1364 return 0; 1365 } 1366 1367 static __rte_always_inline void 1368 virtio_dev_rx_batch_packed_copy(struct virtio_net *dev, 1369 struct vhost_virtqueue *vq, 1370 struct rte_mbuf **pkts, 1371 uint64_t *desc_addrs, 1372 uint64_t *lens) 1373 { 1374 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 1375 struct virtio_net_hdr_mrg_rxbuf *hdrs[PACKED_BATCH_SIZE]; 1376 struct vring_packed_desc *descs = vq->desc_packed; 1377 uint16_t avail_idx = vq->last_avail_idx; 1378 uint16_t ids[PACKED_BATCH_SIZE]; 1379 uint16_t i; 1380 1381 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1382 rte_prefetch0((void *)(uintptr_t)desc_addrs[i]); 1383 hdrs[i] = (struct virtio_net_hdr_mrg_rxbuf *) 1384 (uintptr_t)desc_addrs[i]; 1385 lens[i] = pkts[i]->pkt_len + 1386 sizeof(struct virtio_net_hdr_mrg_rxbuf); 1387 } 1388 1389 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1390 virtio_enqueue_offload(pkts[i], &hdrs[i]->hdr); 1391 1392 vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE); 1393 1394 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1395 rte_memcpy((void *)(uintptr_t)(desc_addrs[i] + buf_offset), 1396 rte_pktmbuf_mtod_offset(pkts[i], void *, 0), 1397 pkts[i]->pkt_len); 1398 } 1399 1400 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1401 vhost_log_cache_write_iova(dev, vq, descs[avail_idx + i].addr, 1402 lens[i]); 1403 1404 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1405 ids[i] = descs[avail_idx + i].id; 1406 1407 vhost_flush_enqueue_batch_packed(dev, vq, lens, ids); 1408 } 1409 1410 static __rte_always_inline int 1411 virtio_dev_rx_sync_batch_packed(struct virtio_net *dev, 1412 struct vhost_virtqueue *vq, 1413 struct rte_mbuf **pkts) 1414 { 1415 uint64_t desc_addrs[PACKED_BATCH_SIZE]; 1416 uint64_t lens[PACKED_BATCH_SIZE]; 1417 1418 if (virtio_dev_rx_sync_batch_check(dev, vq, pkts, desc_addrs, lens) == -1) 1419 return -1; 1420 1421 if (vq->shadow_used_idx) { 1422 do_data_copy_enqueue(dev, vq); 1423 vhost_flush_enqueue_shadow_packed(dev, vq); 1424 } 1425 1426 virtio_dev_rx_batch_packed_copy(dev, vq, pkts, desc_addrs, lens); 1427 1428 return 0; 1429 } 1430 1431 static __rte_always_inline int 1432 virtio_dev_rx_async_batch_packed(struct virtio_net *dev, 1433 struct vhost_virtqueue *vq, 1434 struct rte_mbuf **pkts, 1435 struct rte_mbuf **comp_pkts, uint32_t *pkt_done) 1436 { 1437 uint16_t i; 1438 uint64_t desc_addrs[PACKED_BATCH_SIZE]; 1439 uint64_t lens[PACKED_BATCH_SIZE]; 1440 1441 if (virtio_dev_rx_async_batch_check(dev, vq, pkts, desc_addrs, lens) == -1) 1442 return -1; 1443 1444 virtio_dev_rx_batch_packed_copy(dev, vq, pkts, desc_addrs, lens); 1445 1446 if (vq->shadow_used_idx) { 1447 do_data_copy_enqueue(dev, vq); 1448 vhost_flush_enqueue_shadow_packed(dev, vq); 1449 } 1450 1451 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1452 comp_pkts[(*pkt_done)++] = pkts[i]; 1453 1454 return 0; 1455 } 1456 1457 static __rte_always_inline int16_t 1458 virtio_dev_rx_single_packed(struct virtio_net *dev, 1459 struct vhost_virtqueue *vq, 1460 struct rte_mbuf *pkt) 1461 { 1462 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1463 uint16_t nr_descs = 0; 1464 1465 if (unlikely(vhost_enqueue_single_packed(dev, vq, pkt, buf_vec, 1466 &nr_descs) < 0)) { 1467 VHOST_LOG_DATA(DEBUG, 1468 "(%d) failed to get enough desc from vring\n", 1469 dev->vid); 1470 return -1; 1471 } 1472 1473 VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n", 1474 dev->vid, vq->last_avail_idx, 1475 vq->last_avail_idx + nr_descs); 1476 1477 vq_inc_last_avail_packed(vq, nr_descs); 1478 1479 return 0; 1480 } 1481 1482 static __rte_noinline uint32_t 1483 virtio_dev_rx_packed(struct virtio_net *dev, 1484 struct vhost_virtqueue *__rte_restrict vq, 1485 struct rte_mbuf **__rte_restrict pkts, 1486 uint32_t count) 1487 { 1488 uint32_t pkt_idx = 0; 1489 1490 do { 1491 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); 1492 1493 if (count - pkt_idx >= PACKED_BATCH_SIZE) { 1494 if (!virtio_dev_rx_sync_batch_packed(dev, vq, 1495 &pkts[pkt_idx])) { 1496 pkt_idx += PACKED_BATCH_SIZE; 1497 continue; 1498 } 1499 } 1500 1501 if (virtio_dev_rx_single_packed(dev, vq, pkts[pkt_idx])) 1502 break; 1503 pkt_idx++; 1504 1505 } while (pkt_idx < count); 1506 1507 if (vq->shadow_used_idx) { 1508 do_data_copy_enqueue(dev, vq); 1509 vhost_flush_enqueue_shadow_packed(dev, vq); 1510 } 1511 1512 if (pkt_idx) 1513 vhost_vring_call_packed(dev, vq); 1514 1515 return pkt_idx; 1516 } 1517 1518 static __rte_always_inline uint32_t 1519 virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, 1520 struct rte_mbuf **pkts, uint32_t count) 1521 { 1522 struct vhost_virtqueue *vq; 1523 uint32_t nb_tx = 0; 1524 1525 VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__); 1526 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { 1527 VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n", 1528 dev->vid, __func__, queue_id); 1529 return 0; 1530 } 1531 1532 vq = dev->virtqueue[queue_id]; 1533 1534 rte_spinlock_lock(&vq->access_lock); 1535 1536 if (unlikely(!vq->enabled)) 1537 goto out_access_unlock; 1538 1539 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 1540 vhost_user_iotlb_rd_lock(vq); 1541 1542 if (unlikely(!vq->access_ok)) 1543 if (unlikely(vring_translate(dev, vq) < 0)) 1544 goto out; 1545 1546 count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); 1547 if (count == 0) 1548 goto out; 1549 1550 if (vq_is_packed(dev)) 1551 nb_tx = virtio_dev_rx_packed(dev, vq, pkts, count); 1552 else 1553 nb_tx = virtio_dev_rx_split(dev, vq, pkts, count); 1554 1555 out: 1556 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 1557 vhost_user_iotlb_rd_unlock(vq); 1558 1559 out_access_unlock: 1560 rte_spinlock_unlock(&vq->access_lock); 1561 1562 return nb_tx; 1563 } 1564 1565 uint16_t 1566 rte_vhost_enqueue_burst(int vid, uint16_t queue_id, 1567 struct rte_mbuf **__rte_restrict pkts, uint16_t count) 1568 { 1569 struct virtio_net *dev = get_device(vid); 1570 1571 if (!dev) 1572 return 0; 1573 1574 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { 1575 VHOST_LOG_DATA(ERR, 1576 "(%d) %s: built-in vhost net backend is disabled.\n", 1577 dev->vid, __func__); 1578 return 0; 1579 } 1580 1581 return virtio_dev_rx(dev, queue_id, pkts, count); 1582 } 1583 1584 static __rte_always_inline uint16_t 1585 virtio_dev_rx_async_get_info_idx(uint16_t pkts_idx, 1586 uint16_t vq_size, uint16_t n_inflight) 1587 { 1588 return pkts_idx > n_inflight ? (pkts_idx - n_inflight) : 1589 (vq_size - n_inflight + pkts_idx) % vq_size; 1590 } 1591 1592 static __rte_always_inline void 1593 store_dma_desc_info_split(struct vring_used_elem *s_ring, struct vring_used_elem *d_ring, 1594 uint16_t ring_size, uint16_t s_idx, uint16_t d_idx, uint16_t count) 1595 { 1596 size_t elem_size = sizeof(struct vring_used_elem); 1597 1598 if (d_idx + count <= ring_size) { 1599 rte_memcpy(d_ring + d_idx, s_ring + s_idx, count * elem_size); 1600 } else { 1601 uint16_t size = ring_size - d_idx; 1602 1603 rte_memcpy(d_ring + d_idx, s_ring + s_idx, size * elem_size); 1604 rte_memcpy(d_ring, s_ring + s_idx + size, (count - size) * elem_size); 1605 } 1606 } 1607 1608 static __rte_always_inline void 1609 store_dma_desc_info_packed(struct vring_used_elem_packed *s_ring, 1610 struct vring_used_elem_packed *d_ring, 1611 uint16_t ring_size, uint16_t s_idx, uint16_t d_idx, uint16_t count) 1612 { 1613 size_t elem_size = sizeof(struct vring_used_elem_packed); 1614 1615 if (d_idx + count <= ring_size) { 1616 rte_memcpy(d_ring + d_idx, s_ring + s_idx, count * elem_size); 1617 } else { 1618 uint16_t size = ring_size - d_idx; 1619 1620 rte_memcpy(d_ring + d_idx, s_ring + s_idx, size * elem_size); 1621 rte_memcpy(d_ring, s_ring + s_idx + size, (count - size) * elem_size); 1622 } 1623 } 1624 1625 static __rte_noinline uint32_t 1626 virtio_dev_rx_async_submit_split(struct virtio_net *dev, 1627 struct vhost_virtqueue *vq, uint16_t queue_id, 1628 struct rte_mbuf **pkts, uint32_t count, 1629 struct rte_mbuf **comp_pkts, uint32_t *comp_count) 1630 { 1631 uint32_t pkt_idx = 0, pkt_burst_idx = 0; 1632 uint16_t num_buffers; 1633 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1634 uint16_t avail_head; 1635 1636 struct rte_vhost_iov_iter *it_pool = vq->it_pool; 1637 struct iovec *vec_pool = vq->vec_pool; 1638 struct rte_vhost_async_desc tdes[MAX_PKT_BURST]; 1639 struct iovec *src_iovec = vec_pool; 1640 struct iovec *dst_iovec = vec_pool + (VHOST_MAX_ASYNC_VEC >> 1); 1641 uint16_t slot_idx = 0; 1642 uint16_t segs_await = 0; 1643 uint16_t iovec_idx = 0, it_idx = 0; 1644 struct async_inflight_info *pkts_info = vq->async_pkts_info; 1645 uint32_t n_pkts = 0, pkt_err = 0; 1646 uint32_t num_async_pkts = 0, num_done_pkts = 0; 1647 int32_t n_xfer; 1648 struct { 1649 uint16_t pkt_idx; 1650 uint16_t last_avail_idx; 1651 } async_pkts_log[MAX_PKT_BURST]; 1652 1653 /* 1654 * The ordering between avail index and desc reads need to be enforced. 1655 */ 1656 avail_head = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE); 1657 1658 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); 1659 1660 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { 1661 uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen; 1662 uint16_t nr_vec = 0; 1663 1664 if (unlikely(reserve_avail_buf_split(dev, vq, 1665 pkt_len, buf_vec, &num_buffers, 1666 avail_head, &nr_vec) < 0)) { 1667 VHOST_LOG_DATA(DEBUG, 1668 "(%d) failed to get enough desc from vring\n", 1669 dev->vid); 1670 vq->shadow_used_idx -= num_buffers; 1671 break; 1672 } 1673 1674 VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n", 1675 dev->vid, vq->last_avail_idx, 1676 vq->last_avail_idx + num_buffers); 1677 1678 if (async_mbuf_to_desc(dev, vq, pkts[pkt_idx], buf_vec, nr_vec, num_buffers, 1679 &src_iovec[iovec_idx], &dst_iovec[iovec_idx], 1680 &it_pool[it_idx], &it_pool[it_idx + 1]) < 0) { 1681 vq->shadow_used_idx -= num_buffers; 1682 break; 1683 } 1684 1685 slot_idx = (vq->async_pkts_idx + num_async_pkts) & 1686 (vq->size - 1); 1687 if (it_pool[it_idx].count) { 1688 uint16_t from, to; 1689 1690 async_fill_desc(&tdes[pkt_burst_idx++], 1691 &it_pool[it_idx], &it_pool[it_idx + 1]); 1692 pkts_info[slot_idx].descs = num_buffers; 1693 pkts_info[slot_idx].mbuf = pkts[pkt_idx]; 1694 async_pkts_log[num_async_pkts].pkt_idx = pkt_idx; 1695 async_pkts_log[num_async_pkts++].last_avail_idx = 1696 vq->last_avail_idx; 1697 1698 iovec_idx += it_pool[it_idx].nr_segs; 1699 it_idx += 2; 1700 1701 segs_await += it_pool[it_idx].nr_segs; 1702 1703 /** 1704 * recover shadow used ring and keep DMA-occupied 1705 * descriptors. 1706 */ 1707 from = vq->shadow_used_idx - num_buffers; 1708 to = vq->async_desc_idx_split & (vq->size - 1); 1709 1710 store_dma_desc_info_split(vq->shadow_used_split, 1711 vq->async_descs_split, vq->size, from, to, num_buffers); 1712 1713 vq->async_desc_idx_split += num_buffers; 1714 vq->shadow_used_idx -= num_buffers; 1715 } else 1716 comp_pkts[num_done_pkts++] = pkts[pkt_idx]; 1717 1718 vq->last_avail_idx += num_buffers; 1719 1720 /* 1721 * conditions to trigger async device transfer: 1722 * - buffered packet number reaches transfer threshold 1723 * - unused async iov number is less than max vhost vector 1724 */ 1725 if (unlikely(pkt_burst_idx >= VHOST_ASYNC_BATCH_THRESHOLD || 1726 ((VHOST_MAX_ASYNC_VEC >> 1) - segs_await < 1727 BUF_VECTOR_MAX))) { 1728 n_xfer = vq->async_ops.transfer_data(dev->vid, 1729 queue_id, tdes, 0, pkt_burst_idx); 1730 if (n_xfer >= 0) { 1731 n_pkts = n_xfer; 1732 } else { 1733 VHOST_LOG_DATA(ERR, 1734 "(%d) %s: failed to transfer data for queue id %d.\n", 1735 dev->vid, __func__, queue_id); 1736 n_pkts = 0; 1737 } 1738 1739 iovec_idx = 0; 1740 it_idx = 0; 1741 1742 segs_await = 0; 1743 vq->async_pkts_inflight_n += n_pkts; 1744 1745 if (unlikely(n_pkts < pkt_burst_idx)) { 1746 /* 1747 * log error packets number here and do actual 1748 * error processing when applications poll 1749 * completion 1750 */ 1751 pkt_err = pkt_burst_idx - n_pkts; 1752 pkt_burst_idx = 0; 1753 break; 1754 } 1755 1756 pkt_burst_idx = 0; 1757 } 1758 } 1759 1760 if (pkt_burst_idx) { 1761 n_xfer = vq->async_ops.transfer_data(dev->vid, queue_id, tdes, 0, pkt_burst_idx); 1762 if (n_xfer >= 0) { 1763 n_pkts = n_xfer; 1764 } else { 1765 VHOST_LOG_DATA(ERR, "(%d) %s: failed to transfer data for queue id %d.\n", 1766 dev->vid, __func__, queue_id); 1767 n_pkts = 0; 1768 } 1769 1770 vq->async_pkts_inflight_n += n_pkts; 1771 1772 if (unlikely(n_pkts < pkt_burst_idx)) 1773 pkt_err = pkt_burst_idx - n_pkts; 1774 } 1775 1776 do_data_copy_enqueue(dev, vq); 1777 1778 if (unlikely(pkt_err)) { 1779 uint16_t num_descs = 0; 1780 1781 num_async_pkts -= pkt_err; 1782 /* calculate the sum of descriptors of DMA-error packets. */ 1783 while (pkt_err-- > 0) { 1784 num_descs += pkts_info[slot_idx & (vq->size - 1)].descs; 1785 slot_idx--; 1786 } 1787 vq->async_desc_idx_split -= num_descs; 1788 /* recover shadow used ring and available ring */ 1789 vq->shadow_used_idx -= (vq->last_avail_idx - 1790 async_pkts_log[num_async_pkts].last_avail_idx - 1791 num_descs); 1792 vq->last_avail_idx = 1793 async_pkts_log[num_async_pkts].last_avail_idx; 1794 pkt_idx = async_pkts_log[num_async_pkts].pkt_idx; 1795 num_done_pkts = pkt_idx - num_async_pkts; 1796 } 1797 1798 vq->async_pkts_idx += num_async_pkts; 1799 *comp_count = num_done_pkts; 1800 1801 if (likely(vq->shadow_used_idx)) { 1802 flush_shadow_used_ring_split(dev, vq); 1803 vhost_vring_call_split(dev, vq); 1804 } 1805 1806 return pkt_idx; 1807 } 1808 1809 static __rte_always_inline void 1810 vhost_update_used_packed(struct vhost_virtqueue *vq, 1811 struct vring_used_elem_packed *shadow_ring, 1812 uint16_t count) 1813 { 1814 int i; 1815 uint16_t used_idx = vq->last_used_idx; 1816 uint16_t head_idx = vq->last_used_idx; 1817 uint16_t head_flags = 0; 1818 1819 if (count == 0) 1820 return; 1821 1822 /* Split loop in two to save memory barriers */ 1823 for (i = 0; i < count; i++) { 1824 vq->desc_packed[used_idx].id = shadow_ring[i].id; 1825 vq->desc_packed[used_idx].len = shadow_ring[i].len; 1826 1827 used_idx += shadow_ring[i].count; 1828 if (used_idx >= vq->size) 1829 used_idx -= vq->size; 1830 } 1831 1832 /* The ordering for storing desc flags needs to be enforced. */ 1833 rte_atomic_thread_fence(__ATOMIC_RELEASE); 1834 1835 for (i = 0; i < count; i++) { 1836 uint16_t flags; 1837 1838 if (vq->shadow_used_packed[i].len) 1839 flags = VRING_DESC_F_WRITE; 1840 else 1841 flags = 0; 1842 1843 if (vq->used_wrap_counter) { 1844 flags |= VRING_DESC_F_USED; 1845 flags |= VRING_DESC_F_AVAIL; 1846 } else { 1847 flags &= ~VRING_DESC_F_USED; 1848 flags &= ~VRING_DESC_F_AVAIL; 1849 } 1850 1851 if (i > 0) { 1852 vq->desc_packed[vq->last_used_idx].flags = flags; 1853 } else { 1854 head_idx = vq->last_used_idx; 1855 head_flags = flags; 1856 } 1857 1858 vq_inc_last_used_packed(vq, shadow_ring[i].count); 1859 } 1860 1861 vq->desc_packed[head_idx].flags = head_flags; 1862 } 1863 1864 static __rte_always_inline int 1865 vhost_enqueue_async_single_packed(struct virtio_net *dev, 1866 struct vhost_virtqueue *vq, 1867 struct rte_mbuf *pkt, 1868 struct buf_vector *buf_vec, 1869 uint16_t *nr_descs, 1870 uint16_t *nr_buffers, 1871 struct vring_packed_desc *async_descs, 1872 struct iovec *src_iovec, struct iovec *dst_iovec, 1873 struct rte_vhost_iov_iter *src_it, 1874 struct rte_vhost_iov_iter *dst_it) 1875 { 1876 uint16_t nr_vec = 0; 1877 uint16_t avail_idx = vq->last_avail_idx; 1878 uint16_t max_tries, tries = 0; 1879 uint16_t buf_id = 0; 1880 uint32_t len = 0; 1881 uint16_t desc_count = 0; 1882 uint32_t size = pkt->pkt_len + sizeof(struct virtio_net_hdr_mrg_rxbuf); 1883 uint32_t buffer_len[vq->size]; 1884 uint16_t buffer_buf_id[vq->size]; 1885 uint16_t buffer_desc_count[vq->size]; 1886 1887 if (rxvq_is_mergeable(dev)) 1888 max_tries = vq->size - 1; 1889 else 1890 max_tries = 1; 1891 1892 while (size > 0) { 1893 /* 1894 * if we tried all available ring items, and still 1895 * can't get enough buf, it means something abnormal 1896 * happened. 1897 */ 1898 if (unlikely(++tries > max_tries)) 1899 return -1; 1900 1901 if (unlikely(fill_vec_buf_packed(dev, vq, avail_idx, &desc_count, buf_vec, &nr_vec, 1902 &buf_id, &len, VHOST_ACCESS_RW) < 0)) 1903 return -1; 1904 1905 len = RTE_MIN(len, size); 1906 size -= len; 1907 1908 buffer_len[*nr_buffers] = len; 1909 buffer_buf_id[*nr_buffers] = buf_id; 1910 buffer_desc_count[*nr_buffers] = desc_count; 1911 *nr_buffers += 1; 1912 1913 *nr_descs += desc_count; 1914 avail_idx += desc_count; 1915 if (avail_idx >= vq->size) 1916 avail_idx -= vq->size; 1917 } 1918 1919 if (async_mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, *nr_buffers, src_iovec, dst_iovec, 1920 src_it, dst_it) < 0) 1921 return -1; 1922 /* store descriptors for DMA */ 1923 if (avail_idx >= *nr_descs) { 1924 rte_memcpy(async_descs, &vq->desc_packed[vq->last_avail_idx], 1925 *nr_descs * sizeof(struct vring_packed_desc)); 1926 } else { 1927 uint16_t nr_copy = vq->size - vq->last_avail_idx; 1928 1929 rte_memcpy(async_descs, &vq->desc_packed[vq->last_avail_idx], 1930 nr_copy * sizeof(struct vring_packed_desc)); 1931 rte_memcpy(async_descs + nr_copy, vq->desc_packed, 1932 (*nr_descs - nr_copy) * sizeof(struct vring_packed_desc)); 1933 } 1934 1935 vhost_shadow_enqueue_packed(vq, buffer_len, buffer_buf_id, buffer_desc_count, *nr_buffers); 1936 1937 return 0; 1938 } 1939 1940 static __rte_always_inline int16_t 1941 virtio_dev_rx_async_single_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, 1942 struct rte_mbuf *pkt, uint16_t *nr_descs, uint16_t *nr_buffers, 1943 struct vring_packed_desc *async_descs, 1944 struct iovec *src_iovec, struct iovec *dst_iovec, 1945 struct rte_vhost_iov_iter *src_it, struct rte_vhost_iov_iter *dst_it) 1946 { 1947 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1948 1949 if (unlikely(vhost_enqueue_async_single_packed(dev, vq, pkt, buf_vec, nr_descs, nr_buffers, 1950 async_descs, src_iovec, dst_iovec, 1951 src_it, dst_it) < 0)) { 1952 VHOST_LOG_DATA(DEBUG, "(%d) failed to get enough desc from vring\n", dev->vid); 1953 return -1; 1954 } 1955 1956 VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n", 1957 dev->vid, vq->last_avail_idx, vq->last_avail_idx + *nr_descs); 1958 1959 return 0; 1960 } 1961 1962 static __rte_always_inline void 1963 dma_error_handler_packed(struct vhost_virtqueue *vq, struct vring_packed_desc *async_descs, 1964 uint16_t async_descs_idx, uint16_t slot_idx, uint32_t nr_err, 1965 uint32_t *pkt_idx, uint32_t *num_async_pkts, uint32_t *num_done_pkts) 1966 { 1967 uint16_t descs_err = 0; 1968 uint16_t buffers_err = 0; 1969 struct async_inflight_info *pkts_info = vq->async_pkts_info; 1970 1971 *num_async_pkts -= nr_err; 1972 *pkt_idx -= nr_err; 1973 /* calculate the sum of buffers and descs of DMA-error packets. */ 1974 while (nr_err-- > 0) { 1975 descs_err += pkts_info[slot_idx % vq->size].descs; 1976 buffers_err += pkts_info[slot_idx % vq->size].nr_buffers; 1977 slot_idx--; 1978 } 1979 1980 vq->async_buffer_idx_packed -= buffers_err; 1981 1982 if (vq->last_avail_idx >= descs_err) { 1983 vq->last_avail_idx -= descs_err; 1984 1985 rte_memcpy(&vq->desc_packed[vq->last_avail_idx], 1986 &async_descs[async_descs_idx - descs_err], 1987 descs_err * sizeof(struct vring_packed_desc)); 1988 } else { 1989 uint16_t nr_copy; 1990 1991 vq->last_avail_idx = vq->last_avail_idx + vq->size - descs_err; 1992 nr_copy = vq->size - vq->last_avail_idx; 1993 rte_memcpy(&vq->desc_packed[vq->last_avail_idx], 1994 &async_descs[async_descs_idx - descs_err], 1995 nr_copy * sizeof(struct vring_packed_desc)); 1996 descs_err -= nr_copy; 1997 rte_memcpy(&vq->desc_packed[0], &async_descs[async_descs_idx - descs_err], 1998 descs_err * sizeof(struct vring_packed_desc)); 1999 vq->avail_wrap_counter ^= 1; 2000 } 2001 2002 *num_done_pkts = *pkt_idx - *num_async_pkts; 2003 } 2004 2005 static __rte_noinline uint32_t 2006 virtio_dev_rx_async_submit_packed(struct virtio_net *dev, 2007 struct vhost_virtqueue *vq, uint16_t queue_id, 2008 struct rte_mbuf **pkts, uint32_t count, 2009 struct rte_mbuf **comp_pkts, uint32_t *comp_count) 2010 { 2011 uint32_t pkt_idx = 0, pkt_burst_idx = 0; 2012 uint32_t remained = count; 2013 uint16_t async_descs_idx = 0; 2014 uint16_t num_buffers; 2015 uint16_t num_descs; 2016 int32_t n_xfer; 2017 2018 struct rte_vhost_iov_iter *it_pool = vq->it_pool; 2019 struct iovec *vec_pool = vq->vec_pool; 2020 struct rte_vhost_async_desc tdes[MAX_PKT_BURST]; 2021 struct iovec *src_iovec = vec_pool; 2022 struct iovec *dst_iovec = vec_pool + (VHOST_MAX_ASYNC_VEC >> 1); 2023 uint16_t slot_idx = 0; 2024 uint16_t segs_await = 0; 2025 uint16_t iovec_idx = 0, it_idx = 0; 2026 struct async_inflight_info *pkts_info = vq->async_pkts_info; 2027 uint32_t n_pkts = 0, pkt_err = 0; 2028 uint32_t num_async_pkts = 0, num_done_pkts = 0; 2029 struct vring_packed_desc async_descs[vq->size]; 2030 2031 do { 2032 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); 2033 if (remained >= PACKED_BATCH_SIZE) { 2034 if (!virtio_dev_rx_async_batch_packed(dev, vq, 2035 &pkts[pkt_idx], comp_pkts, &num_done_pkts)) { 2036 pkt_idx += PACKED_BATCH_SIZE; 2037 remained -= PACKED_BATCH_SIZE; 2038 continue; 2039 } 2040 } 2041 2042 num_buffers = 0; 2043 num_descs = 0; 2044 if (unlikely(virtio_dev_rx_async_single_packed(dev, vq, pkts[pkt_idx], 2045 &num_descs, &num_buffers, 2046 &async_descs[async_descs_idx], 2047 &src_iovec[iovec_idx], &dst_iovec[iovec_idx], 2048 &it_pool[it_idx], &it_pool[it_idx + 1]) < 0)) 2049 break; 2050 2051 VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n", 2052 dev->vid, vq->last_avail_idx, 2053 vq->last_avail_idx + num_descs); 2054 2055 slot_idx = (vq->async_pkts_idx + num_async_pkts) % vq->size; 2056 if (it_pool[it_idx].count) { 2057 uint16_t from; 2058 2059 async_descs_idx += num_descs; 2060 async_fill_desc(&tdes[pkt_burst_idx++], 2061 &it_pool[it_idx], &it_pool[it_idx + 1]); 2062 pkts_info[slot_idx].descs = num_descs; 2063 pkts_info[slot_idx].nr_buffers = num_buffers; 2064 pkts_info[slot_idx].mbuf = pkts[pkt_idx]; 2065 num_async_pkts++; 2066 iovec_idx += it_pool[it_idx].nr_segs; 2067 it_idx += 2; 2068 2069 segs_await += it_pool[it_idx].nr_segs; 2070 2071 /** 2072 * recover shadow used ring and keep DMA-occupied 2073 * descriptors. 2074 */ 2075 from = vq->shadow_used_idx - num_buffers; 2076 store_dma_desc_info_packed(vq->shadow_used_packed, 2077 vq->async_buffers_packed, vq->size, from, 2078 vq->async_buffer_idx_packed, num_buffers); 2079 2080 vq->async_buffer_idx_packed += num_buffers; 2081 if (vq->async_buffer_idx_packed >= vq->size) 2082 vq->async_buffer_idx_packed -= vq->size; 2083 vq->shadow_used_idx -= num_buffers; 2084 } else { 2085 comp_pkts[num_done_pkts++] = pkts[pkt_idx]; 2086 } 2087 2088 pkt_idx++; 2089 remained--; 2090 vq_inc_last_avail_packed(vq, num_descs); 2091 2092 /* 2093 * conditions to trigger async device transfer: 2094 * - buffered packet number reaches transfer threshold 2095 * - unused async iov number is less than max vhost vector 2096 */ 2097 if (unlikely(pkt_burst_idx >= VHOST_ASYNC_BATCH_THRESHOLD || 2098 ((VHOST_MAX_ASYNC_VEC >> 1) - segs_await < BUF_VECTOR_MAX))) { 2099 n_xfer = vq->async_ops.transfer_data(dev->vid, 2100 queue_id, tdes, 0, pkt_burst_idx); 2101 if (n_xfer >= 0) { 2102 n_pkts = n_xfer; 2103 } else { 2104 VHOST_LOG_DATA(ERR, 2105 "(%d) %s: failed to transfer data for queue id %d.\n", 2106 dev->vid, __func__, queue_id); 2107 n_pkts = 0; 2108 } 2109 2110 iovec_idx = 0; 2111 it_idx = 0; 2112 segs_await = 0; 2113 vq->async_pkts_inflight_n += n_pkts; 2114 2115 if (unlikely(n_pkts < pkt_burst_idx)) { 2116 /* 2117 * log error packets number here and do actual 2118 * error processing when applications poll 2119 * completion 2120 */ 2121 pkt_err = pkt_burst_idx - n_pkts; 2122 pkt_burst_idx = 0; 2123 break; 2124 } 2125 2126 pkt_burst_idx = 0; 2127 } 2128 } while (pkt_idx < count); 2129 2130 if (pkt_burst_idx) { 2131 n_xfer = vq->async_ops.transfer_data(dev->vid, queue_id, tdes, 0, pkt_burst_idx); 2132 if (n_xfer >= 0) { 2133 n_pkts = n_xfer; 2134 } else { 2135 VHOST_LOG_DATA(ERR, "(%d) %s: failed to transfer data for queue id %d.\n", 2136 dev->vid, __func__, queue_id); 2137 n_pkts = 0; 2138 } 2139 2140 vq->async_pkts_inflight_n += n_pkts; 2141 2142 if (unlikely(n_pkts < pkt_burst_idx)) 2143 pkt_err = pkt_burst_idx - n_pkts; 2144 } 2145 2146 do_data_copy_enqueue(dev, vq); 2147 2148 if (unlikely(pkt_err)) 2149 dma_error_handler_packed(vq, async_descs, async_descs_idx, slot_idx, pkt_err, 2150 &pkt_idx, &num_async_pkts, &num_done_pkts); 2151 vq->async_pkts_idx += num_async_pkts; 2152 if (vq->async_pkts_idx >= vq->size) 2153 vq->async_pkts_idx -= vq->size; 2154 *comp_count = num_done_pkts; 2155 2156 if (likely(vq->shadow_used_idx)) { 2157 vhost_flush_enqueue_shadow_packed(dev, vq); 2158 vhost_vring_call_packed(dev, vq); 2159 } 2160 2161 return pkt_idx; 2162 } 2163 2164 static __rte_always_inline void 2165 write_back_completed_descs_split(struct vhost_virtqueue *vq, uint16_t n_descs) 2166 { 2167 uint16_t nr_left = n_descs; 2168 uint16_t nr_copy; 2169 uint16_t to, from; 2170 2171 do { 2172 from = vq->last_async_desc_idx_split & (vq->size - 1); 2173 nr_copy = nr_left + from <= vq->size ? nr_left : vq->size - from; 2174 to = vq->last_used_idx & (vq->size - 1); 2175 2176 if (to + nr_copy <= vq->size) { 2177 rte_memcpy(&vq->used->ring[to], &vq->async_descs_split[from], 2178 nr_copy * sizeof(struct vring_used_elem)); 2179 } else { 2180 uint16_t size = vq->size - to; 2181 2182 rte_memcpy(&vq->used->ring[to], &vq->async_descs_split[from], 2183 size * sizeof(struct vring_used_elem)); 2184 rte_memcpy(&vq->used->ring[0], &vq->async_descs_split[from + size], 2185 (nr_copy - size) * sizeof(struct vring_used_elem)); 2186 } 2187 2188 vq->last_async_desc_idx_split += nr_copy; 2189 vq->last_used_idx += nr_copy; 2190 nr_left -= nr_copy; 2191 } while (nr_left > 0); 2192 } 2193 2194 static __rte_always_inline void 2195 write_back_completed_descs_packed(struct vhost_virtqueue *vq, 2196 uint16_t n_buffers) 2197 { 2198 uint16_t nr_left = n_buffers; 2199 uint16_t from, to; 2200 2201 do { 2202 from = vq->last_async_buffer_idx_packed; 2203 to = (from + nr_left) % vq->size; 2204 if (to > from) { 2205 vhost_update_used_packed(vq, vq->async_buffers_packed + from, to - from); 2206 vq->last_async_buffer_idx_packed += nr_left; 2207 nr_left = 0; 2208 } else { 2209 vhost_update_used_packed(vq, vq->async_buffers_packed + from, 2210 vq->size - from); 2211 vq->last_async_buffer_idx_packed = 0; 2212 nr_left -= vq->size - from; 2213 } 2214 } while (nr_left > 0); 2215 } 2216 2217 static __rte_always_inline uint16_t 2218 vhost_poll_enqueue_completed(struct virtio_net *dev, uint16_t queue_id, 2219 struct rte_mbuf **pkts, uint16_t count) 2220 { 2221 struct vhost_virtqueue *vq; 2222 uint16_t n_pkts_cpl = 0, n_pkts_put = 0, n_descs = 0, n_buffers = 0; 2223 uint16_t start_idx, pkts_idx, vq_size; 2224 struct async_inflight_info *pkts_info; 2225 uint16_t from, i; 2226 int32_t n_cpl; 2227 2228 vq = dev->virtqueue[queue_id]; 2229 2230 pkts_idx = vq->async_pkts_idx % vq->size; 2231 pkts_info = vq->async_pkts_info; 2232 vq_size = vq->size; 2233 start_idx = virtio_dev_rx_async_get_info_idx(pkts_idx, 2234 vq_size, vq->async_pkts_inflight_n); 2235 2236 if (count > vq->async_last_pkts_n) { 2237 n_cpl = vq->async_ops.check_completed_copies(dev->vid, 2238 queue_id, 0, count - vq->async_last_pkts_n); 2239 if (n_cpl >= 0) { 2240 n_pkts_cpl = n_cpl; 2241 } else { 2242 VHOST_LOG_DATA(ERR, 2243 "(%d) %s: failed to check completed copies for queue id %d.\n", 2244 dev->vid, __func__, queue_id); 2245 n_pkts_cpl = 0; 2246 } 2247 } 2248 n_pkts_cpl += vq->async_last_pkts_n; 2249 2250 n_pkts_put = RTE_MIN(count, n_pkts_cpl); 2251 if (unlikely(n_pkts_put == 0)) { 2252 vq->async_last_pkts_n = n_pkts_cpl; 2253 return 0; 2254 } 2255 2256 if (vq_is_packed(dev)) { 2257 for (i = 0; i < n_pkts_put; i++) { 2258 from = (start_idx + i) % vq_size; 2259 n_buffers += pkts_info[from].nr_buffers; 2260 pkts[i] = pkts_info[from].mbuf; 2261 } 2262 } else { 2263 for (i = 0; i < n_pkts_put; i++) { 2264 from = (start_idx + i) & (vq_size - 1); 2265 n_descs += pkts_info[from].descs; 2266 pkts[i] = pkts_info[from].mbuf; 2267 } 2268 } 2269 2270 vq->async_last_pkts_n = n_pkts_cpl - n_pkts_put; 2271 vq->async_pkts_inflight_n -= n_pkts_put; 2272 2273 if (likely(vq->enabled && vq->access_ok)) { 2274 if (vq_is_packed(dev)) { 2275 write_back_completed_descs_packed(vq, n_buffers); 2276 2277 vhost_vring_call_packed(dev, vq); 2278 } else { 2279 write_back_completed_descs_split(vq, n_descs); 2280 2281 __atomic_add_fetch(&vq->used->idx, n_descs, 2282 __ATOMIC_RELEASE); 2283 vhost_vring_call_split(dev, vq); 2284 } 2285 } else { 2286 if (vq_is_packed(dev)) { 2287 vq->last_async_buffer_idx_packed += n_buffers; 2288 if (vq->last_async_buffer_idx_packed >= vq->size) 2289 vq->last_async_buffer_idx_packed -= vq->size; 2290 } else { 2291 vq->last_async_desc_idx_split += n_descs; 2292 } 2293 } 2294 2295 return n_pkts_put; 2296 } 2297 2298 uint16_t 2299 rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id, 2300 struct rte_mbuf **pkts, uint16_t count) 2301 { 2302 struct virtio_net *dev = get_device(vid); 2303 struct vhost_virtqueue *vq; 2304 uint16_t n_pkts_cpl = 0; 2305 2306 if (!dev) 2307 return 0; 2308 2309 VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__); 2310 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { 2311 VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n", 2312 dev->vid, __func__, queue_id); 2313 return 0; 2314 } 2315 2316 vq = dev->virtqueue[queue_id]; 2317 2318 if (unlikely(!vq->async_registered)) { 2319 VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for queue id %d.\n", 2320 dev->vid, __func__, queue_id); 2321 return 0; 2322 } 2323 2324 rte_spinlock_lock(&vq->access_lock); 2325 2326 n_pkts_cpl = vhost_poll_enqueue_completed(dev, queue_id, pkts, count); 2327 2328 rte_spinlock_unlock(&vq->access_lock); 2329 2330 return n_pkts_cpl; 2331 } 2332 2333 uint16_t 2334 rte_vhost_clear_queue_thread_unsafe(int vid, uint16_t queue_id, 2335 struct rte_mbuf **pkts, uint16_t count) 2336 { 2337 struct virtio_net *dev = get_device(vid); 2338 struct vhost_virtqueue *vq; 2339 uint16_t n_pkts_cpl = 0; 2340 2341 if (!dev) 2342 return 0; 2343 2344 VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__); 2345 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { 2346 VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n", 2347 dev->vid, __func__, queue_id); 2348 return 0; 2349 } 2350 2351 vq = dev->virtqueue[queue_id]; 2352 2353 if (unlikely(!vq->async_registered)) { 2354 VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for queue id %d.\n", 2355 dev->vid, __func__, queue_id); 2356 return 0; 2357 } 2358 2359 n_pkts_cpl = vhost_poll_enqueue_completed(dev, queue_id, pkts, count); 2360 2361 return n_pkts_cpl; 2362 } 2363 2364 static __rte_always_inline uint32_t 2365 virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id, 2366 struct rte_mbuf **pkts, uint32_t count, 2367 struct rte_mbuf **comp_pkts, uint32_t *comp_count) 2368 { 2369 struct vhost_virtqueue *vq; 2370 uint32_t nb_tx = 0; 2371 2372 VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__); 2373 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { 2374 VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n", 2375 dev->vid, __func__, queue_id); 2376 return 0; 2377 } 2378 2379 vq = dev->virtqueue[queue_id]; 2380 2381 rte_spinlock_lock(&vq->access_lock); 2382 2383 if (unlikely(!vq->enabled || !vq->async_registered)) 2384 goto out_access_unlock; 2385 2386 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 2387 vhost_user_iotlb_rd_lock(vq); 2388 2389 if (unlikely(!vq->access_ok)) 2390 if (unlikely(vring_translate(dev, vq) < 0)) 2391 goto out; 2392 2393 count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); 2394 if (count == 0) 2395 goto out; 2396 2397 if (vq_is_packed(dev)) 2398 nb_tx = virtio_dev_rx_async_submit_packed(dev, 2399 vq, queue_id, pkts, count, comp_pkts, 2400 comp_count); 2401 else 2402 nb_tx = virtio_dev_rx_async_submit_split(dev, 2403 vq, queue_id, pkts, count, comp_pkts, 2404 comp_count); 2405 2406 out: 2407 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 2408 vhost_user_iotlb_rd_unlock(vq); 2409 2410 out_access_unlock: 2411 rte_spinlock_unlock(&vq->access_lock); 2412 2413 return nb_tx; 2414 } 2415 2416 uint16_t 2417 rte_vhost_submit_enqueue_burst(int vid, uint16_t queue_id, 2418 struct rte_mbuf **pkts, uint16_t count, 2419 struct rte_mbuf **comp_pkts, uint32_t *comp_count) 2420 { 2421 struct virtio_net *dev = get_device(vid); 2422 2423 *comp_count = 0; 2424 if (!dev) 2425 return 0; 2426 2427 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { 2428 VHOST_LOG_DATA(ERR, 2429 "(%d) %s: built-in vhost net backend is disabled.\n", 2430 dev->vid, __func__); 2431 return 0; 2432 } 2433 2434 return virtio_dev_rx_async_submit(dev, queue_id, pkts, count, comp_pkts, 2435 comp_count); 2436 } 2437 2438 static inline bool 2439 virtio_net_with_host_offload(struct virtio_net *dev) 2440 { 2441 if (dev->features & 2442 ((1ULL << VIRTIO_NET_F_CSUM) | 2443 (1ULL << VIRTIO_NET_F_HOST_ECN) | 2444 (1ULL << VIRTIO_NET_F_HOST_TSO4) | 2445 (1ULL << VIRTIO_NET_F_HOST_TSO6) | 2446 (1ULL << VIRTIO_NET_F_HOST_UFO))) 2447 return true; 2448 2449 return false; 2450 } 2451 2452 static int 2453 parse_headers(struct rte_mbuf *m, uint8_t *l4_proto) 2454 { 2455 struct rte_ipv4_hdr *ipv4_hdr; 2456 struct rte_ipv6_hdr *ipv6_hdr; 2457 struct rte_ether_hdr *eth_hdr; 2458 uint16_t ethertype; 2459 uint16_t data_len = rte_pktmbuf_data_len(m); 2460 2461 if (data_len < sizeof(struct rte_ether_hdr)) 2462 return -EINVAL; 2463 2464 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 2465 2466 m->l2_len = sizeof(struct rte_ether_hdr); 2467 ethertype = rte_be_to_cpu_16(eth_hdr->ether_type); 2468 2469 if (ethertype == RTE_ETHER_TYPE_VLAN) { 2470 if (data_len < sizeof(struct rte_ether_hdr) + 2471 sizeof(struct rte_vlan_hdr)) 2472 goto error; 2473 2474 struct rte_vlan_hdr *vlan_hdr = 2475 (struct rte_vlan_hdr *)(eth_hdr + 1); 2476 2477 m->l2_len += sizeof(struct rte_vlan_hdr); 2478 ethertype = rte_be_to_cpu_16(vlan_hdr->eth_proto); 2479 } 2480 2481 switch (ethertype) { 2482 case RTE_ETHER_TYPE_IPV4: 2483 if (data_len < m->l2_len + sizeof(struct rte_ipv4_hdr)) 2484 goto error; 2485 ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *, 2486 m->l2_len); 2487 m->l3_len = rte_ipv4_hdr_len(ipv4_hdr); 2488 if (data_len < m->l2_len + m->l3_len) 2489 goto error; 2490 m->ol_flags |= PKT_TX_IPV4; 2491 *l4_proto = ipv4_hdr->next_proto_id; 2492 break; 2493 case RTE_ETHER_TYPE_IPV6: 2494 if (data_len < m->l2_len + sizeof(struct rte_ipv6_hdr)) 2495 goto error; 2496 ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *, 2497 m->l2_len); 2498 m->l3_len = sizeof(struct rte_ipv6_hdr); 2499 m->ol_flags |= PKT_TX_IPV6; 2500 *l4_proto = ipv6_hdr->proto; 2501 break; 2502 default: 2503 /* a valid L3 header is needed for further L4 parsing */ 2504 goto error; 2505 } 2506 2507 /* both CSUM and GSO need a valid L4 header */ 2508 switch (*l4_proto) { 2509 case IPPROTO_TCP: 2510 if (data_len < m->l2_len + m->l3_len + 2511 sizeof(struct rte_tcp_hdr)) 2512 goto error; 2513 break; 2514 case IPPROTO_UDP: 2515 if (data_len < m->l2_len + m->l3_len + 2516 sizeof(struct rte_udp_hdr)) 2517 goto error; 2518 break; 2519 case IPPROTO_SCTP: 2520 if (data_len < m->l2_len + m->l3_len + 2521 sizeof(struct rte_sctp_hdr)) 2522 goto error; 2523 break; 2524 default: 2525 goto error; 2526 } 2527 2528 return 0; 2529 2530 error: 2531 m->l2_len = 0; 2532 m->l3_len = 0; 2533 m->ol_flags = 0; 2534 return -EINVAL; 2535 } 2536 2537 static __rte_always_inline void 2538 vhost_dequeue_offload_legacy(struct virtio_net_hdr *hdr, struct rte_mbuf *m) 2539 { 2540 uint8_t l4_proto = 0; 2541 struct rte_tcp_hdr *tcp_hdr = NULL; 2542 uint16_t tcp_len; 2543 uint16_t data_len = rte_pktmbuf_data_len(m); 2544 2545 if (parse_headers(m, &l4_proto) < 0) 2546 return; 2547 2548 if (hdr->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) { 2549 if (hdr->csum_start == (m->l2_len + m->l3_len)) { 2550 switch (hdr->csum_offset) { 2551 case (offsetof(struct rte_tcp_hdr, cksum)): 2552 if (l4_proto != IPPROTO_TCP) 2553 goto error; 2554 m->ol_flags |= PKT_TX_TCP_CKSUM; 2555 break; 2556 case (offsetof(struct rte_udp_hdr, dgram_cksum)): 2557 if (l4_proto != IPPROTO_UDP) 2558 goto error; 2559 m->ol_flags |= PKT_TX_UDP_CKSUM; 2560 break; 2561 case (offsetof(struct rte_sctp_hdr, cksum)): 2562 if (l4_proto != IPPROTO_SCTP) 2563 goto error; 2564 m->ol_flags |= PKT_TX_SCTP_CKSUM; 2565 break; 2566 default: 2567 goto error; 2568 } 2569 } else { 2570 goto error; 2571 } 2572 } 2573 2574 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 2575 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 2576 case VIRTIO_NET_HDR_GSO_TCPV4: 2577 case VIRTIO_NET_HDR_GSO_TCPV6: 2578 if (l4_proto != IPPROTO_TCP) 2579 goto error; 2580 tcp_hdr = rte_pktmbuf_mtod_offset(m, 2581 struct rte_tcp_hdr *, 2582 m->l2_len + m->l3_len); 2583 tcp_len = (tcp_hdr->data_off & 0xf0) >> 2; 2584 if (data_len < m->l2_len + m->l3_len + tcp_len) 2585 goto error; 2586 m->ol_flags |= PKT_TX_TCP_SEG; 2587 m->tso_segsz = hdr->gso_size; 2588 m->l4_len = tcp_len; 2589 break; 2590 case VIRTIO_NET_HDR_GSO_UDP: 2591 if (l4_proto != IPPROTO_UDP) 2592 goto error; 2593 m->ol_flags |= PKT_TX_UDP_SEG; 2594 m->tso_segsz = hdr->gso_size; 2595 m->l4_len = sizeof(struct rte_udp_hdr); 2596 break; 2597 default: 2598 VHOST_LOG_DATA(WARNING, 2599 "unsupported gso type %u.\n", hdr->gso_type); 2600 goto error; 2601 } 2602 } 2603 return; 2604 2605 error: 2606 m->l2_len = 0; 2607 m->l3_len = 0; 2608 m->ol_flags = 0; 2609 } 2610 2611 static __rte_always_inline void 2612 vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m, 2613 bool legacy_ol_flags) 2614 { 2615 struct rte_net_hdr_lens hdr_lens; 2616 int l4_supported = 0; 2617 uint32_t ptype; 2618 2619 if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE) 2620 return; 2621 2622 if (legacy_ol_flags) { 2623 vhost_dequeue_offload_legacy(hdr, m); 2624 return; 2625 } 2626 2627 m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN; 2628 2629 ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK); 2630 m->packet_type = ptype; 2631 if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP || 2632 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP || 2633 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP) 2634 l4_supported = 1; 2635 2636 /* According to Virtio 1.1 spec, the device only needs to look at 2637 * VIRTIO_NET_HDR_F_NEEDS_CSUM in the packet transmission path. 2638 * This differs from the processing incoming packets path where the 2639 * driver could rely on VIRTIO_NET_HDR_F_DATA_VALID flag set by the 2640 * device. 2641 * 2642 * 5.1.6.2.1 Driver Requirements: Packet Transmission 2643 * The driver MUST NOT set the VIRTIO_NET_HDR_F_DATA_VALID and 2644 * VIRTIO_NET_HDR_F_RSC_INFO bits in flags. 2645 * 2646 * 5.1.6.2.2 Device Requirements: Packet Transmission 2647 * The device MUST ignore flag bits that it does not recognize. 2648 */ 2649 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 2650 uint32_t hdrlen; 2651 2652 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len; 2653 if (hdr->csum_start <= hdrlen && l4_supported != 0) { 2654 m->ol_flags |= PKT_RX_L4_CKSUM_NONE; 2655 } else { 2656 /* Unknown proto or tunnel, do sw cksum. We can assume 2657 * the cksum field is in the first segment since the 2658 * buffers we provided to the host are large enough. 2659 * In case of SCTP, this will be wrong since it's a CRC 2660 * but there's nothing we can do. 2661 */ 2662 uint16_t csum = 0, off; 2663 2664 if (rte_raw_cksum_mbuf(m, hdr->csum_start, 2665 rte_pktmbuf_pkt_len(m) - hdr->csum_start, &csum) < 0) 2666 return; 2667 if (likely(csum != 0xffff)) 2668 csum = ~csum; 2669 off = hdr->csum_offset + hdr->csum_start; 2670 if (rte_pktmbuf_data_len(m) >= off + 1) 2671 *rte_pktmbuf_mtod_offset(m, uint16_t *, off) = csum; 2672 } 2673 } 2674 2675 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 2676 if (hdr->gso_size == 0) 2677 return; 2678 2679 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 2680 case VIRTIO_NET_HDR_GSO_TCPV4: 2681 case VIRTIO_NET_HDR_GSO_TCPV6: 2682 if ((ptype & RTE_PTYPE_L4_MASK) != RTE_PTYPE_L4_TCP) 2683 break; 2684 m->ol_flags |= PKT_RX_LRO | PKT_RX_L4_CKSUM_NONE; 2685 m->tso_segsz = hdr->gso_size; 2686 break; 2687 case VIRTIO_NET_HDR_GSO_UDP: 2688 if ((ptype & RTE_PTYPE_L4_MASK) != RTE_PTYPE_L4_UDP) 2689 break; 2690 m->ol_flags |= PKT_RX_LRO | PKT_RX_L4_CKSUM_NONE; 2691 m->tso_segsz = hdr->gso_size; 2692 break; 2693 default: 2694 break; 2695 } 2696 } 2697 } 2698 2699 static __rte_noinline void 2700 copy_vnet_hdr_from_desc(struct virtio_net_hdr *hdr, 2701 struct buf_vector *buf_vec) 2702 { 2703 uint64_t len; 2704 uint64_t remain = sizeof(struct virtio_net_hdr); 2705 uint64_t src; 2706 uint64_t dst = (uint64_t)(uintptr_t)hdr; 2707 2708 while (remain) { 2709 len = RTE_MIN(remain, buf_vec->buf_len); 2710 src = buf_vec->buf_addr; 2711 rte_memcpy((void *)(uintptr_t)dst, 2712 (void *)(uintptr_t)src, len); 2713 2714 remain -= len; 2715 dst += len; 2716 buf_vec++; 2717 } 2718 } 2719 2720 static __rte_always_inline int 2721 copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, 2722 struct buf_vector *buf_vec, uint16_t nr_vec, 2723 struct rte_mbuf *m, struct rte_mempool *mbuf_pool, 2724 bool legacy_ol_flags) 2725 { 2726 uint32_t buf_avail, buf_offset; 2727 uint64_t buf_addr, buf_len; 2728 uint32_t mbuf_avail, mbuf_offset; 2729 uint32_t cpy_len; 2730 struct rte_mbuf *cur = m, *prev = m; 2731 struct virtio_net_hdr tmp_hdr; 2732 struct virtio_net_hdr *hdr = NULL; 2733 /* A counter to avoid desc dead loop chain */ 2734 uint16_t vec_idx = 0; 2735 struct batch_copy_elem *batch_copy = vq->batch_copy_elems; 2736 int error = 0; 2737 2738 buf_addr = buf_vec[vec_idx].buf_addr; 2739 buf_len = buf_vec[vec_idx].buf_len; 2740 2741 if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) { 2742 error = -1; 2743 goto out; 2744 } 2745 2746 if (virtio_net_with_host_offload(dev)) { 2747 if (unlikely(buf_len < sizeof(struct virtio_net_hdr))) { 2748 /* 2749 * No luck, the virtio-net header doesn't fit 2750 * in a contiguous virtual area. 2751 */ 2752 copy_vnet_hdr_from_desc(&tmp_hdr, buf_vec); 2753 hdr = &tmp_hdr; 2754 } else { 2755 hdr = (struct virtio_net_hdr *)((uintptr_t)buf_addr); 2756 } 2757 } 2758 2759 /* 2760 * A virtio driver normally uses at least 2 desc buffers 2761 * for Tx: the first for storing the header, and others 2762 * for storing the data. 2763 */ 2764 if (unlikely(buf_len < dev->vhost_hlen)) { 2765 buf_offset = dev->vhost_hlen - buf_len; 2766 vec_idx++; 2767 buf_addr = buf_vec[vec_idx].buf_addr; 2768 buf_len = buf_vec[vec_idx].buf_len; 2769 buf_avail = buf_len - buf_offset; 2770 } else if (buf_len == dev->vhost_hlen) { 2771 if (unlikely(++vec_idx >= nr_vec)) 2772 goto out; 2773 buf_addr = buf_vec[vec_idx].buf_addr; 2774 buf_len = buf_vec[vec_idx].buf_len; 2775 2776 buf_offset = 0; 2777 buf_avail = buf_len; 2778 } else { 2779 buf_offset = dev->vhost_hlen; 2780 buf_avail = buf_vec[vec_idx].buf_len - dev->vhost_hlen; 2781 } 2782 2783 PRINT_PACKET(dev, 2784 (uintptr_t)(buf_addr + buf_offset), 2785 (uint32_t)buf_avail, 0); 2786 2787 mbuf_offset = 0; 2788 mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM; 2789 while (1) { 2790 cpy_len = RTE_MIN(buf_avail, mbuf_avail); 2791 2792 if (likely(cpy_len > MAX_BATCH_LEN || 2793 vq->batch_copy_nb_elems >= vq->size || 2794 (hdr && cur == m))) { 2795 rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, 2796 mbuf_offset), 2797 (void *)((uintptr_t)(buf_addr + 2798 buf_offset)), cpy_len); 2799 } else { 2800 batch_copy[vq->batch_copy_nb_elems].dst = 2801 rte_pktmbuf_mtod_offset(cur, void *, 2802 mbuf_offset); 2803 batch_copy[vq->batch_copy_nb_elems].src = 2804 (void *)((uintptr_t)(buf_addr + buf_offset)); 2805 batch_copy[vq->batch_copy_nb_elems].len = cpy_len; 2806 vq->batch_copy_nb_elems++; 2807 } 2808 2809 mbuf_avail -= cpy_len; 2810 mbuf_offset += cpy_len; 2811 buf_avail -= cpy_len; 2812 buf_offset += cpy_len; 2813 2814 /* This buf reaches to its end, get the next one */ 2815 if (buf_avail == 0) { 2816 if (++vec_idx >= nr_vec) 2817 break; 2818 2819 buf_addr = buf_vec[vec_idx].buf_addr; 2820 buf_len = buf_vec[vec_idx].buf_len; 2821 2822 buf_offset = 0; 2823 buf_avail = buf_len; 2824 2825 PRINT_PACKET(dev, (uintptr_t)buf_addr, 2826 (uint32_t)buf_avail, 0); 2827 } 2828 2829 /* 2830 * This mbuf reaches to its end, get a new one 2831 * to hold more data. 2832 */ 2833 if (mbuf_avail == 0) { 2834 cur = rte_pktmbuf_alloc(mbuf_pool); 2835 if (unlikely(cur == NULL)) { 2836 VHOST_LOG_DATA(ERR, "Failed to " 2837 "allocate memory for mbuf.\n"); 2838 error = -1; 2839 goto out; 2840 } 2841 2842 prev->next = cur; 2843 prev->data_len = mbuf_offset; 2844 m->nb_segs += 1; 2845 m->pkt_len += mbuf_offset; 2846 prev = cur; 2847 2848 mbuf_offset = 0; 2849 mbuf_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM; 2850 } 2851 } 2852 2853 prev->data_len = mbuf_offset; 2854 m->pkt_len += mbuf_offset; 2855 2856 if (hdr) 2857 vhost_dequeue_offload(hdr, m, legacy_ol_flags); 2858 2859 out: 2860 2861 return error; 2862 } 2863 2864 static void 2865 virtio_dev_extbuf_free(void *addr __rte_unused, void *opaque) 2866 { 2867 rte_free(opaque); 2868 } 2869 2870 static int 2871 virtio_dev_extbuf_alloc(struct rte_mbuf *pkt, uint32_t size) 2872 { 2873 struct rte_mbuf_ext_shared_info *shinfo = NULL; 2874 uint32_t total_len = RTE_PKTMBUF_HEADROOM + size; 2875 uint16_t buf_len; 2876 rte_iova_t iova; 2877 void *buf; 2878 2879 total_len += sizeof(*shinfo) + sizeof(uintptr_t); 2880 total_len = RTE_ALIGN_CEIL(total_len, sizeof(uintptr_t)); 2881 2882 if (unlikely(total_len > UINT16_MAX)) 2883 return -ENOSPC; 2884 2885 buf_len = total_len; 2886 buf = rte_malloc(NULL, buf_len, RTE_CACHE_LINE_SIZE); 2887 if (unlikely(buf == NULL)) 2888 return -ENOMEM; 2889 2890 /* Initialize shinfo */ 2891 shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len, 2892 virtio_dev_extbuf_free, buf); 2893 if (unlikely(shinfo == NULL)) { 2894 rte_free(buf); 2895 VHOST_LOG_DATA(ERR, "Failed to init shinfo\n"); 2896 return -1; 2897 } 2898 2899 iova = rte_malloc_virt2iova(buf); 2900 rte_pktmbuf_attach_extbuf(pkt, buf, iova, buf_len, shinfo); 2901 rte_pktmbuf_reset_headroom(pkt); 2902 2903 return 0; 2904 } 2905 2906 /* 2907 * Prepare a host supported pktmbuf. 2908 */ 2909 static __rte_always_inline int 2910 virtio_dev_pktmbuf_prep(struct virtio_net *dev, struct rte_mbuf *pkt, 2911 uint32_t data_len) 2912 { 2913 if (rte_pktmbuf_tailroom(pkt) >= data_len) 2914 return 0; 2915 2916 /* attach an external buffer if supported */ 2917 if (dev->extbuf && !virtio_dev_extbuf_alloc(pkt, data_len)) 2918 return 0; 2919 2920 /* check if chained buffers are allowed */ 2921 if (!dev->linearbuf) 2922 return 0; 2923 2924 return -1; 2925 } 2926 2927 __rte_always_inline 2928 static uint16_t 2929 virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 2930 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count, 2931 bool legacy_ol_flags) 2932 { 2933 uint16_t i; 2934 uint16_t free_entries; 2935 uint16_t dropped = 0; 2936 static bool allocerr_warned; 2937 2938 /* 2939 * The ordering between avail index and 2940 * desc reads needs to be enforced. 2941 */ 2942 free_entries = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE) - 2943 vq->last_avail_idx; 2944 if (free_entries == 0) 2945 return 0; 2946 2947 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); 2948 2949 VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__); 2950 2951 count = RTE_MIN(count, MAX_PKT_BURST); 2952 count = RTE_MIN(count, free_entries); 2953 VHOST_LOG_DATA(DEBUG, "(%d) about to dequeue %u buffers\n", 2954 dev->vid, count); 2955 2956 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, count)) 2957 return 0; 2958 2959 for (i = 0; i < count; i++) { 2960 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 2961 uint16_t head_idx; 2962 uint32_t buf_len; 2963 uint16_t nr_vec = 0; 2964 int err; 2965 2966 if (unlikely(fill_vec_buf_split(dev, vq, 2967 vq->last_avail_idx + i, 2968 &nr_vec, buf_vec, 2969 &head_idx, &buf_len, 2970 VHOST_ACCESS_RO) < 0)) 2971 break; 2972 2973 update_shadow_used_ring_split(vq, head_idx, 0); 2974 2975 err = virtio_dev_pktmbuf_prep(dev, pkts[i], buf_len); 2976 if (unlikely(err)) { 2977 /* 2978 * mbuf allocation fails for jumbo packets when external 2979 * buffer allocation is not allowed and linear buffer 2980 * is required. Drop this packet. 2981 */ 2982 if (!allocerr_warned) { 2983 VHOST_LOG_DATA(ERR, 2984 "Failed mbuf alloc of size %d from %s on %s.\n", 2985 buf_len, mbuf_pool->name, dev->ifname); 2986 allocerr_warned = true; 2987 } 2988 dropped += 1; 2989 i++; 2990 break; 2991 } 2992 2993 err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i], 2994 mbuf_pool, legacy_ol_flags); 2995 if (unlikely(err)) { 2996 if (!allocerr_warned) { 2997 VHOST_LOG_DATA(ERR, 2998 "Failed to copy desc to mbuf on %s.\n", 2999 dev->ifname); 3000 allocerr_warned = true; 3001 } 3002 dropped += 1; 3003 i++; 3004 break; 3005 } 3006 } 3007 3008 if (dropped) 3009 rte_pktmbuf_free_bulk(&pkts[i - 1], count - i + 1); 3010 3011 vq->last_avail_idx += i; 3012 3013 do_data_copy_dequeue(vq); 3014 if (unlikely(i < count)) 3015 vq->shadow_used_idx = i; 3016 if (likely(vq->shadow_used_idx)) { 3017 flush_shadow_used_ring_split(dev, vq); 3018 vhost_vring_call_split(dev, vq); 3019 } 3020 3021 return (i - dropped); 3022 } 3023 3024 __rte_noinline 3025 static uint16_t 3026 virtio_dev_tx_split_legacy(struct virtio_net *dev, 3027 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, 3028 struct rte_mbuf **pkts, uint16_t count) 3029 { 3030 return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, true); 3031 } 3032 3033 __rte_noinline 3034 static uint16_t 3035 virtio_dev_tx_split_compliant(struct virtio_net *dev, 3036 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, 3037 struct rte_mbuf **pkts, uint16_t count) 3038 { 3039 return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, false); 3040 } 3041 3042 static __rte_always_inline int 3043 vhost_reserve_avail_batch_packed(struct virtio_net *dev, 3044 struct vhost_virtqueue *vq, 3045 struct rte_mbuf **pkts, 3046 uint16_t avail_idx, 3047 uintptr_t *desc_addrs, 3048 uint16_t *ids) 3049 { 3050 bool wrap = vq->avail_wrap_counter; 3051 struct vring_packed_desc *descs = vq->desc_packed; 3052 uint64_t lens[PACKED_BATCH_SIZE]; 3053 uint64_t buf_lens[PACKED_BATCH_SIZE]; 3054 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 3055 uint16_t flags, i; 3056 3057 if (unlikely(avail_idx & PACKED_BATCH_MASK)) 3058 return -1; 3059 if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size)) 3060 return -1; 3061 3062 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3063 flags = descs[avail_idx + i].flags; 3064 if (unlikely((wrap != !!(flags & VRING_DESC_F_AVAIL)) || 3065 (wrap == !!(flags & VRING_DESC_F_USED)) || 3066 (flags & PACKED_DESC_SINGLE_DEQUEUE_FLAG))) 3067 return -1; 3068 } 3069 3070 rte_atomic_thread_fence(__ATOMIC_ACQUIRE); 3071 3072 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 3073 lens[i] = descs[avail_idx + i].len; 3074 3075 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3076 desc_addrs[i] = vhost_iova_to_vva(dev, vq, 3077 descs[avail_idx + i].addr, 3078 &lens[i], VHOST_ACCESS_RW); 3079 } 3080 3081 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3082 if (unlikely(!desc_addrs[i])) 3083 return -1; 3084 if (unlikely((lens[i] != descs[avail_idx + i].len))) 3085 return -1; 3086 } 3087 3088 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3089 if (virtio_dev_pktmbuf_prep(dev, pkts[i], lens[i])) 3090 goto err; 3091 } 3092 3093 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 3094 buf_lens[i] = pkts[i]->buf_len - pkts[i]->data_off; 3095 3096 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3097 if (unlikely(buf_lens[i] < (lens[i] - buf_offset))) 3098 goto err; 3099 } 3100 3101 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3102 pkts[i]->pkt_len = lens[i] - buf_offset; 3103 pkts[i]->data_len = pkts[i]->pkt_len; 3104 ids[i] = descs[avail_idx + i].id; 3105 } 3106 3107 return 0; 3108 3109 err: 3110 return -1; 3111 } 3112 3113 static __rte_always_inline int 3114 virtio_dev_tx_batch_packed(struct virtio_net *dev, 3115 struct vhost_virtqueue *vq, 3116 struct rte_mbuf **pkts, 3117 bool legacy_ol_flags) 3118 { 3119 uint16_t avail_idx = vq->last_avail_idx; 3120 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 3121 struct virtio_net_hdr *hdr; 3122 uintptr_t desc_addrs[PACKED_BATCH_SIZE]; 3123 uint16_t ids[PACKED_BATCH_SIZE]; 3124 uint16_t i; 3125 3126 if (vhost_reserve_avail_batch_packed(dev, vq, pkts, avail_idx, 3127 desc_addrs, ids)) 3128 return -1; 3129 3130 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 3131 rte_prefetch0((void *)(uintptr_t)desc_addrs[i]); 3132 3133 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 3134 rte_memcpy(rte_pktmbuf_mtod_offset(pkts[i], void *, 0), 3135 (void *)(uintptr_t)(desc_addrs[i] + buf_offset), 3136 pkts[i]->pkt_len); 3137 3138 if (virtio_net_with_host_offload(dev)) { 3139 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 3140 hdr = (struct virtio_net_hdr *)(desc_addrs[i]); 3141 vhost_dequeue_offload(hdr, pkts[i], legacy_ol_flags); 3142 } 3143 } 3144 3145 if (virtio_net_is_inorder(dev)) 3146 vhost_shadow_dequeue_batch_packed_inorder(vq, 3147 ids[PACKED_BATCH_SIZE - 1]); 3148 else 3149 vhost_shadow_dequeue_batch_packed(dev, vq, ids); 3150 3151 vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE); 3152 3153 return 0; 3154 } 3155 3156 static __rte_always_inline int 3157 vhost_dequeue_single_packed(struct virtio_net *dev, 3158 struct vhost_virtqueue *vq, 3159 struct rte_mempool *mbuf_pool, 3160 struct rte_mbuf *pkts, 3161 uint16_t *buf_id, 3162 uint16_t *desc_count, 3163 bool legacy_ol_flags) 3164 { 3165 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 3166 uint32_t buf_len; 3167 uint16_t nr_vec = 0; 3168 int err; 3169 static bool allocerr_warned; 3170 3171 if (unlikely(fill_vec_buf_packed(dev, vq, 3172 vq->last_avail_idx, desc_count, 3173 buf_vec, &nr_vec, 3174 buf_id, &buf_len, 3175 VHOST_ACCESS_RO) < 0)) 3176 return -1; 3177 3178 if (unlikely(virtio_dev_pktmbuf_prep(dev, pkts, buf_len))) { 3179 if (!allocerr_warned) { 3180 VHOST_LOG_DATA(ERR, 3181 "Failed mbuf alloc of size %d from %s on %s.\n", 3182 buf_len, mbuf_pool->name, dev->ifname); 3183 allocerr_warned = true; 3184 } 3185 return -1; 3186 } 3187 3188 err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts, 3189 mbuf_pool, legacy_ol_flags); 3190 if (unlikely(err)) { 3191 if (!allocerr_warned) { 3192 VHOST_LOG_DATA(ERR, 3193 "Failed to copy desc to mbuf on %s.\n", 3194 dev->ifname); 3195 allocerr_warned = true; 3196 } 3197 return -1; 3198 } 3199 3200 return 0; 3201 } 3202 3203 static __rte_always_inline int 3204 virtio_dev_tx_single_packed(struct virtio_net *dev, 3205 struct vhost_virtqueue *vq, 3206 struct rte_mempool *mbuf_pool, 3207 struct rte_mbuf *pkts, 3208 bool legacy_ol_flags) 3209 { 3210 3211 uint16_t buf_id, desc_count = 0; 3212 int ret; 3213 3214 ret = vhost_dequeue_single_packed(dev, vq, mbuf_pool, pkts, &buf_id, 3215 &desc_count, legacy_ol_flags); 3216 3217 if (likely(desc_count > 0)) { 3218 if (virtio_net_is_inorder(dev)) 3219 vhost_shadow_dequeue_single_packed_inorder(vq, buf_id, 3220 desc_count); 3221 else 3222 vhost_shadow_dequeue_single_packed(vq, buf_id, 3223 desc_count); 3224 3225 vq_inc_last_avail_packed(vq, desc_count); 3226 } 3227 3228 return ret; 3229 } 3230 3231 __rte_always_inline 3232 static uint16_t 3233 virtio_dev_tx_packed(struct virtio_net *dev, 3234 struct vhost_virtqueue *__rte_restrict vq, 3235 struct rte_mempool *mbuf_pool, 3236 struct rte_mbuf **__rte_restrict pkts, 3237 uint32_t count, 3238 bool legacy_ol_flags) 3239 { 3240 uint32_t pkt_idx = 0; 3241 3242 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, count)) 3243 return 0; 3244 3245 do { 3246 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); 3247 3248 if (count - pkt_idx >= PACKED_BATCH_SIZE) { 3249 if (!virtio_dev_tx_batch_packed(dev, vq, 3250 &pkts[pkt_idx], 3251 legacy_ol_flags)) { 3252 pkt_idx += PACKED_BATCH_SIZE; 3253 continue; 3254 } 3255 } 3256 3257 if (virtio_dev_tx_single_packed(dev, vq, mbuf_pool, 3258 pkts[pkt_idx], 3259 legacy_ol_flags)) 3260 break; 3261 pkt_idx++; 3262 } while (pkt_idx < count); 3263 3264 if (pkt_idx != count) 3265 rte_pktmbuf_free_bulk(&pkts[pkt_idx], count - pkt_idx); 3266 3267 if (vq->shadow_used_idx) { 3268 do_data_copy_dequeue(vq); 3269 3270 vhost_flush_dequeue_shadow_packed(dev, vq); 3271 vhost_vring_call_packed(dev, vq); 3272 } 3273 3274 return pkt_idx; 3275 } 3276 3277 __rte_noinline 3278 static uint16_t 3279 virtio_dev_tx_packed_legacy(struct virtio_net *dev, 3280 struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool, 3281 struct rte_mbuf **__rte_restrict pkts, uint32_t count) 3282 { 3283 return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, true); 3284 } 3285 3286 __rte_noinline 3287 static uint16_t 3288 virtio_dev_tx_packed_compliant(struct virtio_net *dev, 3289 struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool, 3290 struct rte_mbuf **__rte_restrict pkts, uint32_t count) 3291 { 3292 return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, false); 3293 } 3294 3295 uint16_t 3296 rte_vhost_dequeue_burst(int vid, uint16_t queue_id, 3297 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) 3298 { 3299 struct virtio_net *dev; 3300 struct rte_mbuf *rarp_mbuf = NULL; 3301 struct vhost_virtqueue *vq; 3302 int16_t success = 1; 3303 3304 dev = get_device(vid); 3305 if (!dev) 3306 return 0; 3307 3308 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { 3309 VHOST_LOG_DATA(ERR, 3310 "(%d) %s: built-in vhost net backend is disabled.\n", 3311 dev->vid, __func__); 3312 return 0; 3313 } 3314 3315 if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) { 3316 VHOST_LOG_DATA(ERR, 3317 "(%d) %s: invalid virtqueue idx %d.\n", 3318 dev->vid, __func__, queue_id); 3319 return 0; 3320 } 3321 3322 vq = dev->virtqueue[queue_id]; 3323 3324 if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0)) 3325 return 0; 3326 3327 if (unlikely(!vq->enabled)) { 3328 count = 0; 3329 goto out_access_unlock; 3330 } 3331 3332 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 3333 vhost_user_iotlb_rd_lock(vq); 3334 3335 if (unlikely(!vq->access_ok)) 3336 if (unlikely(vring_translate(dev, vq) < 0)) { 3337 count = 0; 3338 goto out; 3339 } 3340 3341 /* 3342 * Construct a RARP broadcast packet, and inject it to the "pkts" 3343 * array, to looks like that guest actually send such packet. 3344 * 3345 * Check user_send_rarp() for more information. 3346 * 3347 * broadcast_rarp shares a cacheline in the virtio_net structure 3348 * with some fields that are accessed during enqueue and 3349 * __atomic_compare_exchange_n causes a write if performed compare 3350 * and exchange. This could result in false sharing between enqueue 3351 * and dequeue. 3352 * 3353 * Prevent unnecessary false sharing by reading broadcast_rarp first 3354 * and only performing compare and exchange if the read indicates it 3355 * is likely to be set. 3356 */ 3357 if (unlikely(__atomic_load_n(&dev->broadcast_rarp, __ATOMIC_ACQUIRE) && 3358 __atomic_compare_exchange_n(&dev->broadcast_rarp, 3359 &success, 0, 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED))) { 3360 3361 rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac); 3362 if (rarp_mbuf == NULL) { 3363 VHOST_LOG_DATA(ERR, "Failed to make RARP packet.\n"); 3364 count = 0; 3365 goto out; 3366 } 3367 count -= 1; 3368 } 3369 3370 if (vq_is_packed(dev)) { 3371 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS) 3372 count = virtio_dev_tx_packed_legacy(dev, vq, mbuf_pool, pkts, count); 3373 else 3374 count = virtio_dev_tx_packed_compliant(dev, vq, mbuf_pool, pkts, count); 3375 } else { 3376 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS) 3377 count = virtio_dev_tx_split_legacy(dev, vq, mbuf_pool, pkts, count); 3378 else 3379 count = virtio_dev_tx_split_compliant(dev, vq, mbuf_pool, pkts, count); 3380 } 3381 3382 out: 3383 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 3384 vhost_user_iotlb_rd_unlock(vq); 3385 3386 out_access_unlock: 3387 rte_spinlock_unlock(&vq->access_lock); 3388 3389 if (unlikely(rarp_mbuf != NULL)) { 3390 /* 3391 * Inject it to the head of "pkts" array, so that switch's mac 3392 * learning table will get updated first. 3393 */ 3394 memmove(&pkts[1], pkts, count * sizeof(struct rte_mbuf *)); 3395 pkts[0] = rarp_mbuf; 3396 count += 1; 3397 } 3398 3399 return count; 3400 } 3401