1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2016 Intel Corporation 3 */ 4 5 #include <stdint.h> 6 #include <stdbool.h> 7 #include <linux/virtio_net.h> 8 9 #include <rte_mbuf.h> 10 #include <rte_memcpy.h> 11 #include <rte_net.h> 12 #include <rte_ether.h> 13 #include <rte_ip.h> 14 #include <rte_vhost.h> 15 #include <rte_tcp.h> 16 #include <rte_udp.h> 17 #include <rte_sctp.h> 18 #include <rte_arp.h> 19 #include <rte_spinlock.h> 20 #include <rte_malloc.h> 21 #include <rte_vhost_async.h> 22 23 #include "iotlb.h" 24 #include "vhost.h" 25 26 #define MAX_BATCH_LEN 256 27 28 #define VHOST_ASYNC_BATCH_THRESHOLD 32 29 30 static __rte_always_inline bool 31 rxvq_is_mergeable(struct virtio_net *dev) 32 { 33 return dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF); 34 } 35 36 static __rte_always_inline bool 37 virtio_net_is_inorder(struct virtio_net *dev) 38 { 39 return dev->features & (1ULL << VIRTIO_F_IN_ORDER); 40 } 41 42 static bool 43 is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t nr_vring) 44 { 45 return (is_tx ^ (idx & 1)) == 0 && idx < nr_vring; 46 } 47 48 static inline void 49 do_data_copy_enqueue(struct virtio_net *dev, struct vhost_virtqueue *vq) 50 { 51 struct batch_copy_elem *elem = vq->batch_copy_elems; 52 uint16_t count = vq->batch_copy_nb_elems; 53 int i; 54 55 for (i = 0; i < count; i++) { 56 rte_memcpy(elem[i].dst, elem[i].src, elem[i].len); 57 vhost_log_cache_write_iova(dev, vq, elem[i].log_addr, 58 elem[i].len); 59 PRINT_PACKET(dev, (uintptr_t)elem[i].dst, elem[i].len, 0); 60 } 61 62 vq->batch_copy_nb_elems = 0; 63 } 64 65 static inline void 66 do_data_copy_dequeue(struct vhost_virtqueue *vq) 67 { 68 struct batch_copy_elem *elem = vq->batch_copy_elems; 69 uint16_t count = vq->batch_copy_nb_elems; 70 int i; 71 72 for (i = 0; i < count; i++) 73 rte_memcpy(elem[i].dst, elem[i].src, elem[i].len); 74 75 vq->batch_copy_nb_elems = 0; 76 } 77 78 static __rte_always_inline void 79 do_flush_shadow_used_ring_split(struct virtio_net *dev, 80 struct vhost_virtqueue *vq, 81 uint16_t to, uint16_t from, uint16_t size) 82 { 83 rte_memcpy(&vq->used->ring[to], 84 &vq->shadow_used_split[from], 85 size * sizeof(struct vring_used_elem)); 86 vhost_log_cache_used_vring(dev, vq, 87 offsetof(struct vring_used, ring[to]), 88 size * sizeof(struct vring_used_elem)); 89 } 90 91 static __rte_always_inline void 92 flush_shadow_used_ring_split(struct virtio_net *dev, struct vhost_virtqueue *vq) 93 { 94 uint16_t used_idx = vq->last_used_idx & (vq->size - 1); 95 96 if (used_idx + vq->shadow_used_idx <= vq->size) { 97 do_flush_shadow_used_ring_split(dev, vq, used_idx, 0, 98 vq->shadow_used_idx); 99 } else { 100 uint16_t size; 101 102 /* update used ring interval [used_idx, vq->size] */ 103 size = vq->size - used_idx; 104 do_flush_shadow_used_ring_split(dev, vq, used_idx, 0, size); 105 106 /* update the left half used ring interval [0, left_size] */ 107 do_flush_shadow_used_ring_split(dev, vq, 0, size, 108 vq->shadow_used_idx - size); 109 } 110 vq->last_used_idx += vq->shadow_used_idx; 111 112 vhost_log_cache_sync(dev, vq); 113 114 __atomic_add_fetch(&vq->used->idx, vq->shadow_used_idx, 115 __ATOMIC_RELEASE); 116 vq->shadow_used_idx = 0; 117 vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx), 118 sizeof(vq->used->idx)); 119 } 120 121 static __rte_always_inline void 122 update_shadow_used_ring_split(struct vhost_virtqueue *vq, 123 uint16_t desc_idx, uint32_t len) 124 { 125 uint16_t i = vq->shadow_used_idx++; 126 127 vq->shadow_used_split[i].id = desc_idx; 128 vq->shadow_used_split[i].len = len; 129 } 130 131 static __rte_always_inline void 132 vhost_flush_enqueue_shadow_packed(struct virtio_net *dev, 133 struct vhost_virtqueue *vq) 134 { 135 int i; 136 uint16_t used_idx = vq->last_used_idx; 137 uint16_t head_idx = vq->last_used_idx; 138 uint16_t head_flags = 0; 139 140 /* Split loop in two to save memory barriers */ 141 for (i = 0; i < vq->shadow_used_idx; i++) { 142 vq->desc_packed[used_idx].id = vq->shadow_used_packed[i].id; 143 vq->desc_packed[used_idx].len = vq->shadow_used_packed[i].len; 144 145 used_idx += vq->shadow_used_packed[i].count; 146 if (used_idx >= vq->size) 147 used_idx -= vq->size; 148 } 149 150 /* The ordering for storing desc flags needs to be enforced. */ 151 rte_atomic_thread_fence(__ATOMIC_RELEASE); 152 153 for (i = 0; i < vq->shadow_used_idx; i++) { 154 uint16_t flags; 155 156 if (vq->shadow_used_packed[i].len) 157 flags = VRING_DESC_F_WRITE; 158 else 159 flags = 0; 160 161 if (vq->used_wrap_counter) { 162 flags |= VRING_DESC_F_USED; 163 flags |= VRING_DESC_F_AVAIL; 164 } else { 165 flags &= ~VRING_DESC_F_USED; 166 flags &= ~VRING_DESC_F_AVAIL; 167 } 168 169 if (i > 0) { 170 vq->desc_packed[vq->last_used_idx].flags = flags; 171 172 vhost_log_cache_used_vring(dev, vq, 173 vq->last_used_idx * 174 sizeof(struct vring_packed_desc), 175 sizeof(struct vring_packed_desc)); 176 } else { 177 head_idx = vq->last_used_idx; 178 head_flags = flags; 179 } 180 181 vq_inc_last_used_packed(vq, vq->shadow_used_packed[i].count); 182 } 183 184 vq->desc_packed[head_idx].flags = head_flags; 185 186 vhost_log_cache_used_vring(dev, vq, 187 head_idx * 188 sizeof(struct vring_packed_desc), 189 sizeof(struct vring_packed_desc)); 190 191 vq->shadow_used_idx = 0; 192 vhost_log_cache_sync(dev, vq); 193 } 194 195 static __rte_always_inline void 196 vhost_flush_dequeue_shadow_packed(struct virtio_net *dev, 197 struct vhost_virtqueue *vq) 198 { 199 struct vring_used_elem_packed *used_elem = &vq->shadow_used_packed[0]; 200 201 vq->desc_packed[vq->shadow_last_used_idx].id = used_elem->id; 202 /* desc flags is the synchronization point for virtio packed vring */ 203 __atomic_store_n(&vq->desc_packed[vq->shadow_last_used_idx].flags, 204 used_elem->flags, __ATOMIC_RELEASE); 205 206 vhost_log_cache_used_vring(dev, vq, vq->shadow_last_used_idx * 207 sizeof(struct vring_packed_desc), 208 sizeof(struct vring_packed_desc)); 209 vq->shadow_used_idx = 0; 210 vhost_log_cache_sync(dev, vq); 211 } 212 213 static __rte_always_inline void 214 vhost_flush_enqueue_batch_packed(struct virtio_net *dev, 215 struct vhost_virtqueue *vq, 216 uint64_t *lens, 217 uint16_t *ids) 218 { 219 uint16_t i; 220 uint16_t flags; 221 uint16_t last_used_idx = vq->last_used_idx; 222 struct vring_packed_desc *desc_base = &vq->desc_packed[last_used_idx]; 223 224 if (vq->shadow_used_idx) { 225 do_data_copy_enqueue(dev, vq); 226 vhost_flush_enqueue_shadow_packed(dev, vq); 227 } 228 229 flags = PACKED_DESC_ENQUEUE_USED_FLAG(vq->used_wrap_counter); 230 231 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 232 desc_base[i].id = ids[i]; 233 desc_base[i].len = lens[i]; 234 } 235 236 rte_atomic_thread_fence(__ATOMIC_RELEASE); 237 238 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 239 desc_base[i].flags = flags; 240 } 241 242 vhost_log_cache_used_vring(dev, vq, last_used_idx * 243 sizeof(struct vring_packed_desc), 244 sizeof(struct vring_packed_desc) * 245 PACKED_BATCH_SIZE); 246 vhost_log_cache_sync(dev, vq); 247 248 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE); 249 } 250 251 static __rte_always_inline void 252 vhost_shadow_dequeue_batch_packed_inorder(struct vhost_virtqueue *vq, 253 uint16_t id) 254 { 255 vq->shadow_used_packed[0].id = id; 256 257 if (!vq->shadow_used_idx) { 258 vq->shadow_last_used_idx = vq->last_used_idx; 259 vq->shadow_used_packed[0].flags = 260 PACKED_DESC_DEQUEUE_USED_FLAG(vq->used_wrap_counter); 261 vq->shadow_used_packed[0].len = 0; 262 vq->shadow_used_packed[0].count = 1; 263 vq->shadow_used_idx++; 264 } 265 266 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE); 267 } 268 269 static __rte_always_inline void 270 vhost_shadow_dequeue_batch_packed(struct virtio_net *dev, 271 struct vhost_virtqueue *vq, 272 uint16_t *ids) 273 { 274 uint16_t flags; 275 uint16_t i; 276 uint16_t begin; 277 278 flags = PACKED_DESC_DEQUEUE_USED_FLAG(vq->used_wrap_counter); 279 280 if (!vq->shadow_used_idx) { 281 vq->shadow_last_used_idx = vq->last_used_idx; 282 vq->shadow_used_packed[0].id = ids[0]; 283 vq->shadow_used_packed[0].len = 0; 284 vq->shadow_used_packed[0].count = 1; 285 vq->shadow_used_packed[0].flags = flags; 286 vq->shadow_used_idx++; 287 begin = 1; 288 } else 289 begin = 0; 290 291 vhost_for_each_try_unroll(i, begin, PACKED_BATCH_SIZE) { 292 vq->desc_packed[vq->last_used_idx + i].id = ids[i]; 293 vq->desc_packed[vq->last_used_idx + i].len = 0; 294 } 295 296 rte_atomic_thread_fence(__ATOMIC_RELEASE); 297 vhost_for_each_try_unroll(i, begin, PACKED_BATCH_SIZE) 298 vq->desc_packed[vq->last_used_idx + i].flags = flags; 299 300 vhost_log_cache_used_vring(dev, vq, vq->last_used_idx * 301 sizeof(struct vring_packed_desc), 302 sizeof(struct vring_packed_desc) * 303 PACKED_BATCH_SIZE); 304 vhost_log_cache_sync(dev, vq); 305 306 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE); 307 } 308 309 static __rte_always_inline void 310 vhost_shadow_dequeue_single_packed(struct vhost_virtqueue *vq, 311 uint16_t buf_id, 312 uint16_t count) 313 { 314 uint16_t flags; 315 316 flags = vq->desc_packed[vq->last_used_idx].flags; 317 if (vq->used_wrap_counter) { 318 flags |= VRING_DESC_F_USED; 319 flags |= VRING_DESC_F_AVAIL; 320 } else { 321 flags &= ~VRING_DESC_F_USED; 322 flags &= ~VRING_DESC_F_AVAIL; 323 } 324 325 if (!vq->shadow_used_idx) { 326 vq->shadow_last_used_idx = vq->last_used_idx; 327 328 vq->shadow_used_packed[0].id = buf_id; 329 vq->shadow_used_packed[0].len = 0; 330 vq->shadow_used_packed[0].flags = flags; 331 vq->shadow_used_idx++; 332 } else { 333 vq->desc_packed[vq->last_used_idx].id = buf_id; 334 vq->desc_packed[vq->last_used_idx].len = 0; 335 vq->desc_packed[vq->last_used_idx].flags = flags; 336 } 337 338 vq_inc_last_used_packed(vq, count); 339 } 340 341 static __rte_always_inline void 342 vhost_shadow_dequeue_single_packed_inorder(struct vhost_virtqueue *vq, 343 uint16_t buf_id, 344 uint16_t count) 345 { 346 uint16_t flags; 347 348 vq->shadow_used_packed[0].id = buf_id; 349 350 flags = vq->desc_packed[vq->last_used_idx].flags; 351 if (vq->used_wrap_counter) { 352 flags |= VRING_DESC_F_USED; 353 flags |= VRING_DESC_F_AVAIL; 354 } else { 355 flags &= ~VRING_DESC_F_USED; 356 flags &= ~VRING_DESC_F_AVAIL; 357 } 358 359 if (!vq->shadow_used_idx) { 360 vq->shadow_last_used_idx = vq->last_used_idx; 361 vq->shadow_used_packed[0].len = 0; 362 vq->shadow_used_packed[0].flags = flags; 363 vq->shadow_used_idx++; 364 } 365 366 vq_inc_last_used_packed(vq, count); 367 } 368 369 static __rte_always_inline void 370 vhost_shadow_enqueue_packed(struct vhost_virtqueue *vq, 371 uint32_t *len, 372 uint16_t *id, 373 uint16_t *count, 374 uint16_t num_buffers) 375 { 376 uint16_t i; 377 378 for (i = 0; i < num_buffers; i++) { 379 /* enqueue shadow flush action aligned with batch num */ 380 if (!vq->shadow_used_idx) 381 vq->shadow_aligned_idx = vq->last_used_idx & 382 PACKED_BATCH_MASK; 383 vq->shadow_used_packed[vq->shadow_used_idx].id = id[i]; 384 vq->shadow_used_packed[vq->shadow_used_idx].len = len[i]; 385 vq->shadow_used_packed[vq->shadow_used_idx].count = count[i]; 386 vq->shadow_aligned_idx += count[i]; 387 vq->shadow_used_idx++; 388 } 389 } 390 391 static __rte_always_inline void 392 vhost_shadow_enqueue_single_packed(struct virtio_net *dev, 393 struct vhost_virtqueue *vq, 394 uint32_t *len, 395 uint16_t *id, 396 uint16_t *count, 397 uint16_t num_buffers) 398 { 399 vhost_shadow_enqueue_packed(vq, len, id, count, num_buffers); 400 401 if (vq->shadow_aligned_idx >= PACKED_BATCH_SIZE) { 402 do_data_copy_enqueue(dev, vq); 403 vhost_flush_enqueue_shadow_packed(dev, vq); 404 } 405 } 406 407 /* avoid write operation when necessary, to lessen cache issues */ 408 #define ASSIGN_UNLESS_EQUAL(var, val) do { \ 409 if ((var) != (val)) \ 410 (var) = (val); \ 411 } while (0) 412 413 static __rte_always_inline void 414 virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr) 415 { 416 uint64_t csum_l4 = m_buf->ol_flags & PKT_TX_L4_MASK; 417 418 if (m_buf->ol_flags & PKT_TX_TCP_SEG) 419 csum_l4 |= PKT_TX_TCP_CKSUM; 420 421 if (csum_l4) { 422 net_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 423 net_hdr->csum_start = m_buf->l2_len + m_buf->l3_len; 424 425 switch (csum_l4) { 426 case PKT_TX_TCP_CKSUM: 427 net_hdr->csum_offset = (offsetof(struct rte_tcp_hdr, 428 cksum)); 429 break; 430 case PKT_TX_UDP_CKSUM: 431 net_hdr->csum_offset = (offsetof(struct rte_udp_hdr, 432 dgram_cksum)); 433 break; 434 case PKT_TX_SCTP_CKSUM: 435 net_hdr->csum_offset = (offsetof(struct rte_sctp_hdr, 436 cksum)); 437 break; 438 } 439 } else { 440 ASSIGN_UNLESS_EQUAL(net_hdr->csum_start, 0); 441 ASSIGN_UNLESS_EQUAL(net_hdr->csum_offset, 0); 442 ASSIGN_UNLESS_EQUAL(net_hdr->flags, 0); 443 } 444 445 /* IP cksum verification cannot be bypassed, then calculate here */ 446 if (m_buf->ol_flags & PKT_TX_IP_CKSUM) { 447 struct rte_ipv4_hdr *ipv4_hdr; 448 449 ipv4_hdr = rte_pktmbuf_mtod_offset(m_buf, struct rte_ipv4_hdr *, 450 m_buf->l2_len); 451 ipv4_hdr->hdr_checksum = 0; 452 ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr); 453 } 454 455 if (m_buf->ol_flags & PKT_TX_TCP_SEG) { 456 if (m_buf->ol_flags & PKT_TX_IPV4) 457 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 458 else 459 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 460 net_hdr->gso_size = m_buf->tso_segsz; 461 net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len 462 + m_buf->l4_len; 463 } else if (m_buf->ol_flags & PKT_TX_UDP_SEG) { 464 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP; 465 net_hdr->gso_size = m_buf->tso_segsz; 466 net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len + 467 m_buf->l4_len; 468 } else { 469 ASSIGN_UNLESS_EQUAL(net_hdr->gso_type, 0); 470 ASSIGN_UNLESS_EQUAL(net_hdr->gso_size, 0); 471 ASSIGN_UNLESS_EQUAL(net_hdr->hdr_len, 0); 472 } 473 } 474 475 static __rte_always_inline int 476 map_one_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, 477 struct buf_vector *buf_vec, uint16_t *vec_idx, 478 uint64_t desc_iova, uint64_t desc_len, uint8_t perm) 479 { 480 uint16_t vec_id = *vec_idx; 481 482 while (desc_len) { 483 uint64_t desc_addr; 484 uint64_t desc_chunck_len = desc_len; 485 486 if (unlikely(vec_id >= BUF_VECTOR_MAX)) 487 return -1; 488 489 desc_addr = vhost_iova_to_vva(dev, vq, 490 desc_iova, 491 &desc_chunck_len, 492 perm); 493 if (unlikely(!desc_addr)) 494 return -1; 495 496 rte_prefetch0((void *)(uintptr_t)desc_addr); 497 498 buf_vec[vec_id].buf_iova = desc_iova; 499 buf_vec[vec_id].buf_addr = desc_addr; 500 buf_vec[vec_id].buf_len = desc_chunck_len; 501 502 desc_len -= desc_chunck_len; 503 desc_iova += desc_chunck_len; 504 vec_id++; 505 } 506 *vec_idx = vec_id; 507 508 return 0; 509 } 510 511 static __rte_always_inline int 512 fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 513 uint32_t avail_idx, uint16_t *vec_idx, 514 struct buf_vector *buf_vec, uint16_t *desc_chain_head, 515 uint32_t *desc_chain_len, uint8_t perm) 516 { 517 uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)]; 518 uint16_t vec_id = *vec_idx; 519 uint32_t len = 0; 520 uint64_t dlen; 521 uint32_t nr_descs = vq->size; 522 uint32_t cnt = 0; 523 struct vring_desc *descs = vq->desc; 524 struct vring_desc *idesc = NULL; 525 526 if (unlikely(idx >= vq->size)) 527 return -1; 528 529 *desc_chain_head = idx; 530 531 if (vq->desc[idx].flags & VRING_DESC_F_INDIRECT) { 532 dlen = vq->desc[idx].len; 533 nr_descs = dlen / sizeof(struct vring_desc); 534 if (unlikely(nr_descs > vq->size)) 535 return -1; 536 537 descs = (struct vring_desc *)(uintptr_t) 538 vhost_iova_to_vva(dev, vq, vq->desc[idx].addr, 539 &dlen, 540 VHOST_ACCESS_RO); 541 if (unlikely(!descs)) 542 return -1; 543 544 if (unlikely(dlen < vq->desc[idx].len)) { 545 /* 546 * The indirect desc table is not contiguous 547 * in process VA space, we have to copy it. 548 */ 549 idesc = vhost_alloc_copy_ind_table(dev, vq, 550 vq->desc[idx].addr, vq->desc[idx].len); 551 if (unlikely(!idesc)) 552 return -1; 553 554 descs = idesc; 555 } 556 557 idx = 0; 558 } 559 560 while (1) { 561 if (unlikely(idx >= nr_descs || cnt++ >= nr_descs)) { 562 free_ind_table(idesc); 563 return -1; 564 } 565 566 dlen = descs[idx].len; 567 len += dlen; 568 569 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id, 570 descs[idx].addr, dlen, 571 perm))) { 572 free_ind_table(idesc); 573 return -1; 574 } 575 576 if ((descs[idx].flags & VRING_DESC_F_NEXT) == 0) 577 break; 578 579 idx = descs[idx].next; 580 } 581 582 *desc_chain_len = len; 583 *vec_idx = vec_id; 584 585 if (unlikely(!!idesc)) 586 free_ind_table(idesc); 587 588 return 0; 589 } 590 591 /* 592 * Returns -1 on fail, 0 on success 593 */ 594 static inline int 595 reserve_avail_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 596 uint32_t size, struct buf_vector *buf_vec, 597 uint16_t *num_buffers, uint16_t avail_head, 598 uint16_t *nr_vec) 599 { 600 uint16_t cur_idx; 601 uint16_t vec_idx = 0; 602 uint16_t max_tries, tries = 0; 603 604 uint16_t head_idx = 0; 605 uint32_t len = 0; 606 607 *num_buffers = 0; 608 cur_idx = vq->last_avail_idx; 609 610 if (rxvq_is_mergeable(dev)) 611 max_tries = vq->size - 1; 612 else 613 max_tries = 1; 614 615 while (size > 0) { 616 if (unlikely(cur_idx == avail_head)) 617 return -1; 618 /* 619 * if we tried all available ring items, and still 620 * can't get enough buf, it means something abnormal 621 * happened. 622 */ 623 if (unlikely(++tries > max_tries)) 624 return -1; 625 626 if (unlikely(fill_vec_buf_split(dev, vq, cur_idx, 627 &vec_idx, buf_vec, 628 &head_idx, &len, 629 VHOST_ACCESS_RW) < 0)) 630 return -1; 631 len = RTE_MIN(len, size); 632 update_shadow_used_ring_split(vq, head_idx, len); 633 size -= len; 634 635 cur_idx++; 636 *num_buffers += 1; 637 } 638 639 *nr_vec = vec_idx; 640 641 return 0; 642 } 643 644 static __rte_always_inline int 645 fill_vec_buf_packed_indirect(struct virtio_net *dev, 646 struct vhost_virtqueue *vq, 647 struct vring_packed_desc *desc, uint16_t *vec_idx, 648 struct buf_vector *buf_vec, uint32_t *len, uint8_t perm) 649 { 650 uint16_t i; 651 uint32_t nr_descs; 652 uint16_t vec_id = *vec_idx; 653 uint64_t dlen; 654 struct vring_packed_desc *descs, *idescs = NULL; 655 656 dlen = desc->len; 657 descs = (struct vring_packed_desc *)(uintptr_t) 658 vhost_iova_to_vva(dev, vq, desc->addr, &dlen, VHOST_ACCESS_RO); 659 if (unlikely(!descs)) 660 return -1; 661 662 if (unlikely(dlen < desc->len)) { 663 /* 664 * The indirect desc table is not contiguous 665 * in process VA space, we have to copy it. 666 */ 667 idescs = vhost_alloc_copy_ind_table(dev, 668 vq, desc->addr, desc->len); 669 if (unlikely(!idescs)) 670 return -1; 671 672 descs = idescs; 673 } 674 675 nr_descs = desc->len / sizeof(struct vring_packed_desc); 676 if (unlikely(nr_descs >= vq->size)) { 677 free_ind_table(idescs); 678 return -1; 679 } 680 681 for (i = 0; i < nr_descs; i++) { 682 if (unlikely(vec_id >= BUF_VECTOR_MAX)) { 683 free_ind_table(idescs); 684 return -1; 685 } 686 687 dlen = descs[i].len; 688 *len += dlen; 689 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id, 690 descs[i].addr, dlen, 691 perm))) 692 return -1; 693 } 694 *vec_idx = vec_id; 695 696 if (unlikely(!!idescs)) 697 free_ind_table(idescs); 698 699 return 0; 700 } 701 702 static __rte_always_inline int 703 fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, 704 uint16_t avail_idx, uint16_t *desc_count, 705 struct buf_vector *buf_vec, uint16_t *vec_idx, 706 uint16_t *buf_id, uint32_t *len, uint8_t perm) 707 { 708 bool wrap_counter = vq->avail_wrap_counter; 709 struct vring_packed_desc *descs = vq->desc_packed; 710 uint16_t vec_id = *vec_idx; 711 uint64_t dlen; 712 713 if (avail_idx < vq->last_avail_idx) 714 wrap_counter ^= 1; 715 716 /* 717 * Perform a load-acquire barrier in desc_is_avail to 718 * enforce the ordering between desc flags and desc 719 * content. 720 */ 721 if (unlikely(!desc_is_avail(&descs[avail_idx], wrap_counter))) 722 return -1; 723 724 *desc_count = 0; 725 *len = 0; 726 727 while (1) { 728 if (unlikely(vec_id >= BUF_VECTOR_MAX)) 729 return -1; 730 731 if (unlikely(*desc_count >= vq->size)) 732 return -1; 733 734 *desc_count += 1; 735 *buf_id = descs[avail_idx].id; 736 737 if (descs[avail_idx].flags & VRING_DESC_F_INDIRECT) { 738 if (unlikely(fill_vec_buf_packed_indirect(dev, vq, 739 &descs[avail_idx], 740 &vec_id, buf_vec, 741 len, perm) < 0)) 742 return -1; 743 } else { 744 dlen = descs[avail_idx].len; 745 *len += dlen; 746 747 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id, 748 descs[avail_idx].addr, 749 dlen, 750 perm))) 751 return -1; 752 } 753 754 if ((descs[avail_idx].flags & VRING_DESC_F_NEXT) == 0) 755 break; 756 757 if (++avail_idx >= vq->size) { 758 avail_idx -= vq->size; 759 wrap_counter ^= 1; 760 } 761 } 762 763 *vec_idx = vec_id; 764 765 return 0; 766 } 767 768 static __rte_noinline void 769 copy_vnet_hdr_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, 770 struct buf_vector *buf_vec, 771 struct virtio_net_hdr_mrg_rxbuf *hdr) 772 { 773 uint64_t len; 774 uint64_t remain = dev->vhost_hlen; 775 uint64_t src = (uint64_t)(uintptr_t)hdr, dst; 776 uint64_t iova = buf_vec->buf_iova; 777 778 while (remain) { 779 len = RTE_MIN(remain, 780 buf_vec->buf_len); 781 dst = buf_vec->buf_addr; 782 rte_memcpy((void *)(uintptr_t)dst, 783 (void *)(uintptr_t)src, 784 len); 785 786 PRINT_PACKET(dev, (uintptr_t)dst, 787 (uint32_t)len, 0); 788 vhost_log_cache_write_iova(dev, vq, 789 iova, len); 790 791 remain -= len; 792 iova += len; 793 src += len; 794 buf_vec++; 795 } 796 } 797 798 static __rte_always_inline int 799 copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, 800 struct rte_mbuf *m, struct buf_vector *buf_vec, 801 uint16_t nr_vec, uint16_t num_buffers) 802 { 803 uint32_t vec_idx = 0; 804 uint32_t mbuf_offset, mbuf_avail; 805 uint32_t buf_offset, buf_avail; 806 uint64_t buf_addr, buf_iova, buf_len; 807 uint32_t cpy_len; 808 uint64_t hdr_addr; 809 struct rte_mbuf *hdr_mbuf; 810 struct batch_copy_elem *batch_copy = vq->batch_copy_elems; 811 struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL; 812 int error = 0; 813 814 if (unlikely(m == NULL)) { 815 error = -1; 816 goto out; 817 } 818 819 buf_addr = buf_vec[vec_idx].buf_addr; 820 buf_iova = buf_vec[vec_idx].buf_iova; 821 buf_len = buf_vec[vec_idx].buf_len; 822 823 if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) { 824 error = -1; 825 goto out; 826 } 827 828 hdr_mbuf = m; 829 hdr_addr = buf_addr; 830 if (unlikely(buf_len < dev->vhost_hlen)) { 831 memset(&tmp_hdr, 0, sizeof(struct virtio_net_hdr_mrg_rxbuf)); 832 hdr = &tmp_hdr; 833 } else 834 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr; 835 836 VHOST_LOG_DATA(DEBUG, "(%d) RX: num merge buffers %d\n", 837 dev->vid, num_buffers); 838 839 if (unlikely(buf_len < dev->vhost_hlen)) { 840 buf_offset = dev->vhost_hlen - buf_len; 841 vec_idx++; 842 buf_addr = buf_vec[vec_idx].buf_addr; 843 buf_iova = buf_vec[vec_idx].buf_iova; 844 buf_len = buf_vec[vec_idx].buf_len; 845 buf_avail = buf_len - buf_offset; 846 } else { 847 buf_offset = dev->vhost_hlen; 848 buf_avail = buf_len - dev->vhost_hlen; 849 } 850 851 mbuf_avail = rte_pktmbuf_data_len(m); 852 mbuf_offset = 0; 853 while (mbuf_avail != 0 || m->next != NULL) { 854 /* done with current buf, get the next one */ 855 if (buf_avail == 0) { 856 vec_idx++; 857 if (unlikely(vec_idx >= nr_vec)) { 858 error = -1; 859 goto out; 860 } 861 862 buf_addr = buf_vec[vec_idx].buf_addr; 863 buf_iova = buf_vec[vec_idx].buf_iova; 864 buf_len = buf_vec[vec_idx].buf_len; 865 866 buf_offset = 0; 867 buf_avail = buf_len; 868 } 869 870 /* done with current mbuf, get the next one */ 871 if (mbuf_avail == 0) { 872 m = m->next; 873 874 mbuf_offset = 0; 875 mbuf_avail = rte_pktmbuf_data_len(m); 876 } 877 878 if (hdr_addr) { 879 virtio_enqueue_offload(hdr_mbuf, &hdr->hdr); 880 if (rxvq_is_mergeable(dev)) 881 ASSIGN_UNLESS_EQUAL(hdr->num_buffers, 882 num_buffers); 883 884 if (unlikely(hdr == &tmp_hdr)) { 885 copy_vnet_hdr_to_desc(dev, vq, buf_vec, hdr); 886 } else { 887 PRINT_PACKET(dev, (uintptr_t)hdr_addr, 888 dev->vhost_hlen, 0); 889 vhost_log_cache_write_iova(dev, vq, 890 buf_vec[0].buf_iova, 891 dev->vhost_hlen); 892 } 893 894 hdr_addr = 0; 895 } 896 897 cpy_len = RTE_MIN(buf_avail, mbuf_avail); 898 899 if (likely(cpy_len > MAX_BATCH_LEN || 900 vq->batch_copy_nb_elems >= vq->size)) { 901 rte_memcpy((void *)((uintptr_t)(buf_addr + buf_offset)), 902 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), 903 cpy_len); 904 vhost_log_cache_write_iova(dev, vq, 905 buf_iova + buf_offset, 906 cpy_len); 907 PRINT_PACKET(dev, (uintptr_t)(buf_addr + buf_offset), 908 cpy_len, 0); 909 } else { 910 batch_copy[vq->batch_copy_nb_elems].dst = 911 (void *)((uintptr_t)(buf_addr + buf_offset)); 912 batch_copy[vq->batch_copy_nb_elems].src = 913 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset); 914 batch_copy[vq->batch_copy_nb_elems].log_addr = 915 buf_iova + buf_offset; 916 batch_copy[vq->batch_copy_nb_elems].len = cpy_len; 917 vq->batch_copy_nb_elems++; 918 } 919 920 mbuf_avail -= cpy_len; 921 mbuf_offset += cpy_len; 922 buf_avail -= cpy_len; 923 buf_offset += cpy_len; 924 } 925 926 out: 927 928 return error; 929 } 930 931 static __rte_always_inline void 932 async_fill_vec(struct iovec *v, void *base, size_t len) 933 { 934 v->iov_base = base; 935 v->iov_len = len; 936 } 937 938 static __rte_always_inline void 939 async_fill_iter(struct rte_vhost_iov_iter *it, size_t count, 940 struct iovec *vec, unsigned long nr_seg) 941 { 942 it->offset = 0; 943 it->count = count; 944 945 if (count) { 946 it->iov = vec; 947 it->nr_segs = nr_seg; 948 } else { 949 it->iov = 0; 950 it->nr_segs = 0; 951 } 952 } 953 954 static __rte_always_inline void 955 async_fill_desc(struct rte_vhost_async_desc *desc, 956 struct rte_vhost_iov_iter *src, struct rte_vhost_iov_iter *dst) 957 { 958 desc->src = src; 959 desc->dst = dst; 960 } 961 962 static __rte_always_inline int 963 async_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, 964 struct rte_mbuf *m, struct buf_vector *buf_vec, 965 uint16_t nr_vec, uint16_t num_buffers, 966 struct iovec *src_iovec, struct iovec *dst_iovec, 967 struct rte_vhost_iov_iter *src_it, 968 struct rte_vhost_iov_iter *dst_it) 969 { 970 uint32_t vec_idx = 0; 971 uint32_t mbuf_offset, mbuf_avail; 972 uint32_t buf_offset, buf_avail; 973 uint64_t buf_addr, buf_iova, buf_len; 974 uint32_t cpy_len, cpy_threshold; 975 uint64_t hdr_addr; 976 struct rte_mbuf *hdr_mbuf; 977 struct batch_copy_elem *batch_copy = vq->batch_copy_elems; 978 struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL; 979 int error = 0; 980 uint64_t mapped_len; 981 982 uint32_t tlen = 0; 983 int tvec_idx = 0; 984 void *hpa; 985 986 if (unlikely(m == NULL)) { 987 error = -1; 988 goto out; 989 } 990 991 cpy_threshold = vq->async_threshold; 992 993 buf_addr = buf_vec[vec_idx].buf_addr; 994 buf_iova = buf_vec[vec_idx].buf_iova; 995 buf_len = buf_vec[vec_idx].buf_len; 996 997 if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) { 998 error = -1; 999 goto out; 1000 } 1001 1002 hdr_mbuf = m; 1003 hdr_addr = buf_addr; 1004 if (unlikely(buf_len < dev->vhost_hlen)) { 1005 memset(&tmp_hdr, 0, sizeof(struct virtio_net_hdr_mrg_rxbuf)); 1006 hdr = &tmp_hdr; 1007 } else 1008 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr; 1009 1010 VHOST_LOG_DATA(DEBUG, "(%d) RX: num merge buffers %d\n", 1011 dev->vid, num_buffers); 1012 1013 if (unlikely(buf_len < dev->vhost_hlen)) { 1014 buf_offset = dev->vhost_hlen - buf_len; 1015 vec_idx++; 1016 buf_addr = buf_vec[vec_idx].buf_addr; 1017 buf_iova = buf_vec[vec_idx].buf_iova; 1018 buf_len = buf_vec[vec_idx].buf_len; 1019 buf_avail = buf_len - buf_offset; 1020 } else { 1021 buf_offset = dev->vhost_hlen; 1022 buf_avail = buf_len - dev->vhost_hlen; 1023 } 1024 1025 mbuf_avail = rte_pktmbuf_data_len(m); 1026 mbuf_offset = 0; 1027 1028 while (mbuf_avail != 0 || m->next != NULL) { 1029 /* done with current buf, get the next one */ 1030 if (buf_avail == 0) { 1031 vec_idx++; 1032 if (unlikely(vec_idx >= nr_vec)) { 1033 error = -1; 1034 goto out; 1035 } 1036 1037 buf_addr = buf_vec[vec_idx].buf_addr; 1038 buf_iova = buf_vec[vec_idx].buf_iova; 1039 buf_len = buf_vec[vec_idx].buf_len; 1040 1041 buf_offset = 0; 1042 buf_avail = buf_len; 1043 } 1044 1045 /* done with current mbuf, get the next one */ 1046 if (mbuf_avail == 0) { 1047 m = m->next; 1048 1049 mbuf_offset = 0; 1050 mbuf_avail = rte_pktmbuf_data_len(m); 1051 } 1052 1053 if (hdr_addr) { 1054 virtio_enqueue_offload(hdr_mbuf, &hdr->hdr); 1055 if (rxvq_is_mergeable(dev)) 1056 ASSIGN_UNLESS_EQUAL(hdr->num_buffers, 1057 num_buffers); 1058 1059 if (unlikely(hdr == &tmp_hdr)) { 1060 copy_vnet_hdr_to_desc(dev, vq, buf_vec, hdr); 1061 } else { 1062 PRINT_PACKET(dev, (uintptr_t)hdr_addr, 1063 dev->vhost_hlen, 0); 1064 vhost_log_cache_write_iova(dev, vq, 1065 buf_vec[0].buf_iova, 1066 dev->vhost_hlen); 1067 } 1068 1069 hdr_addr = 0; 1070 } 1071 1072 cpy_len = RTE_MIN(buf_avail, mbuf_avail); 1073 1074 while (unlikely(cpy_len && cpy_len >= cpy_threshold)) { 1075 hpa = (void *)(uintptr_t)gpa_to_first_hpa(dev, 1076 buf_iova + buf_offset, 1077 cpy_len, &mapped_len); 1078 1079 if (unlikely(!hpa || mapped_len < cpy_threshold)) 1080 break; 1081 1082 async_fill_vec(src_iovec + tvec_idx, 1083 (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, 1084 mbuf_offset), (size_t)mapped_len); 1085 1086 async_fill_vec(dst_iovec + tvec_idx, 1087 hpa, (size_t)mapped_len); 1088 1089 tlen += (uint32_t)mapped_len; 1090 cpy_len -= (uint32_t)mapped_len; 1091 mbuf_avail -= (uint32_t)mapped_len; 1092 mbuf_offset += (uint32_t)mapped_len; 1093 buf_avail -= (uint32_t)mapped_len; 1094 buf_offset += (uint32_t)mapped_len; 1095 tvec_idx++; 1096 } 1097 1098 if (likely(cpy_len)) { 1099 if (unlikely(vq->batch_copy_nb_elems >= vq->size)) { 1100 rte_memcpy( 1101 (void *)((uintptr_t)(buf_addr + buf_offset)), 1102 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), 1103 cpy_len); 1104 1105 PRINT_PACKET(dev, 1106 (uintptr_t)(buf_addr + buf_offset), 1107 cpy_len, 0); 1108 } else { 1109 batch_copy[vq->batch_copy_nb_elems].dst = 1110 (void *)((uintptr_t)(buf_addr + buf_offset)); 1111 batch_copy[vq->batch_copy_nb_elems].src = 1112 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset); 1113 batch_copy[vq->batch_copy_nb_elems].log_addr = 1114 buf_iova + buf_offset; 1115 batch_copy[vq->batch_copy_nb_elems].len = 1116 cpy_len; 1117 vq->batch_copy_nb_elems++; 1118 } 1119 1120 mbuf_avail -= cpy_len; 1121 mbuf_offset += cpy_len; 1122 buf_avail -= cpy_len; 1123 buf_offset += cpy_len; 1124 } 1125 1126 } 1127 1128 out: 1129 if (tlen) { 1130 async_fill_iter(src_it, tlen, src_iovec, tvec_idx); 1131 async_fill_iter(dst_it, tlen, dst_iovec, tvec_idx); 1132 } else { 1133 src_it->count = 0; 1134 } 1135 1136 return error; 1137 } 1138 1139 static __rte_always_inline int 1140 vhost_enqueue_single_packed(struct virtio_net *dev, 1141 struct vhost_virtqueue *vq, 1142 struct rte_mbuf *pkt, 1143 struct buf_vector *buf_vec, 1144 uint16_t *nr_descs) 1145 { 1146 uint16_t nr_vec = 0; 1147 uint16_t avail_idx = vq->last_avail_idx; 1148 uint16_t max_tries, tries = 0; 1149 uint16_t buf_id = 0; 1150 uint32_t len = 0; 1151 uint16_t desc_count; 1152 uint32_t size = pkt->pkt_len + sizeof(struct virtio_net_hdr_mrg_rxbuf); 1153 uint16_t num_buffers = 0; 1154 uint32_t buffer_len[vq->size]; 1155 uint16_t buffer_buf_id[vq->size]; 1156 uint16_t buffer_desc_count[vq->size]; 1157 1158 if (rxvq_is_mergeable(dev)) 1159 max_tries = vq->size - 1; 1160 else 1161 max_tries = 1; 1162 1163 while (size > 0) { 1164 /* 1165 * if we tried all available ring items, and still 1166 * can't get enough buf, it means something abnormal 1167 * happened. 1168 */ 1169 if (unlikely(++tries > max_tries)) 1170 return -1; 1171 1172 if (unlikely(fill_vec_buf_packed(dev, vq, 1173 avail_idx, &desc_count, 1174 buf_vec, &nr_vec, 1175 &buf_id, &len, 1176 VHOST_ACCESS_RW) < 0)) 1177 return -1; 1178 1179 len = RTE_MIN(len, size); 1180 size -= len; 1181 1182 buffer_len[num_buffers] = len; 1183 buffer_buf_id[num_buffers] = buf_id; 1184 buffer_desc_count[num_buffers] = desc_count; 1185 num_buffers += 1; 1186 1187 *nr_descs += desc_count; 1188 avail_idx += desc_count; 1189 if (avail_idx >= vq->size) 1190 avail_idx -= vq->size; 1191 } 1192 1193 if (copy_mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, num_buffers) < 0) 1194 return -1; 1195 1196 vhost_shadow_enqueue_single_packed(dev, vq, buffer_len, buffer_buf_id, 1197 buffer_desc_count, num_buffers); 1198 1199 return 0; 1200 } 1201 1202 static __rte_noinline uint32_t 1203 virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 1204 struct rte_mbuf **pkts, uint32_t count) 1205 { 1206 uint32_t pkt_idx = 0; 1207 uint16_t num_buffers; 1208 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1209 uint16_t avail_head; 1210 1211 /* 1212 * The ordering between avail index and 1213 * desc reads needs to be enforced. 1214 */ 1215 avail_head = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE); 1216 1217 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); 1218 1219 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { 1220 uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen; 1221 uint16_t nr_vec = 0; 1222 1223 if (unlikely(reserve_avail_buf_split(dev, vq, 1224 pkt_len, buf_vec, &num_buffers, 1225 avail_head, &nr_vec) < 0)) { 1226 VHOST_LOG_DATA(DEBUG, 1227 "(%d) failed to get enough desc from vring\n", 1228 dev->vid); 1229 vq->shadow_used_idx -= num_buffers; 1230 break; 1231 } 1232 1233 VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n", 1234 dev->vid, vq->last_avail_idx, 1235 vq->last_avail_idx + num_buffers); 1236 1237 if (copy_mbuf_to_desc(dev, vq, pkts[pkt_idx], 1238 buf_vec, nr_vec, 1239 num_buffers) < 0) { 1240 vq->shadow_used_idx -= num_buffers; 1241 break; 1242 } 1243 1244 vq->last_avail_idx += num_buffers; 1245 } 1246 1247 do_data_copy_enqueue(dev, vq); 1248 1249 if (likely(vq->shadow_used_idx)) { 1250 flush_shadow_used_ring_split(dev, vq); 1251 vhost_vring_call_split(dev, vq); 1252 } 1253 1254 return pkt_idx; 1255 } 1256 1257 static __rte_always_inline int 1258 virtio_dev_rx_batch_packed(struct virtio_net *dev, 1259 struct vhost_virtqueue *vq, 1260 struct rte_mbuf **pkts) 1261 { 1262 bool wrap_counter = vq->avail_wrap_counter; 1263 struct vring_packed_desc *descs = vq->desc_packed; 1264 uint16_t avail_idx = vq->last_avail_idx; 1265 uint64_t desc_addrs[PACKED_BATCH_SIZE]; 1266 struct virtio_net_hdr_mrg_rxbuf *hdrs[PACKED_BATCH_SIZE]; 1267 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 1268 uint64_t lens[PACKED_BATCH_SIZE]; 1269 uint16_t ids[PACKED_BATCH_SIZE]; 1270 uint16_t i; 1271 1272 if (unlikely(avail_idx & PACKED_BATCH_MASK)) 1273 return -1; 1274 1275 if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size)) 1276 return -1; 1277 1278 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1279 if (unlikely(pkts[i]->next != NULL)) 1280 return -1; 1281 if (unlikely(!desc_is_avail(&descs[avail_idx + i], 1282 wrap_counter))) 1283 return -1; 1284 } 1285 1286 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1287 lens[i] = descs[avail_idx + i].len; 1288 1289 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1290 if (unlikely(pkts[i]->pkt_len > (lens[i] - buf_offset))) 1291 return -1; 1292 } 1293 1294 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1295 desc_addrs[i] = vhost_iova_to_vva(dev, vq, 1296 descs[avail_idx + i].addr, 1297 &lens[i], 1298 VHOST_ACCESS_RW); 1299 1300 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1301 if (unlikely(!desc_addrs[i])) 1302 return -1; 1303 if (unlikely(lens[i] != descs[avail_idx + i].len)) 1304 return -1; 1305 } 1306 1307 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1308 rte_prefetch0((void *)(uintptr_t)desc_addrs[i]); 1309 hdrs[i] = (struct virtio_net_hdr_mrg_rxbuf *) 1310 (uintptr_t)desc_addrs[i]; 1311 lens[i] = pkts[i]->pkt_len + 1312 sizeof(struct virtio_net_hdr_mrg_rxbuf); 1313 } 1314 1315 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1316 virtio_enqueue_offload(pkts[i], &hdrs[i]->hdr); 1317 1318 vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE); 1319 1320 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1321 rte_memcpy((void *)(uintptr_t)(desc_addrs[i] + buf_offset), 1322 rte_pktmbuf_mtod_offset(pkts[i], void *, 0), 1323 pkts[i]->pkt_len); 1324 } 1325 1326 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1327 vhost_log_cache_write_iova(dev, vq, descs[avail_idx + i].addr, 1328 lens[i]); 1329 1330 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1331 ids[i] = descs[avail_idx + i].id; 1332 1333 vhost_flush_enqueue_batch_packed(dev, vq, lens, ids); 1334 1335 return 0; 1336 } 1337 1338 static __rte_always_inline int16_t 1339 virtio_dev_rx_single_packed(struct virtio_net *dev, 1340 struct vhost_virtqueue *vq, 1341 struct rte_mbuf *pkt) 1342 { 1343 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1344 uint16_t nr_descs = 0; 1345 1346 if (unlikely(vhost_enqueue_single_packed(dev, vq, pkt, buf_vec, 1347 &nr_descs) < 0)) { 1348 VHOST_LOG_DATA(DEBUG, 1349 "(%d) failed to get enough desc from vring\n", 1350 dev->vid); 1351 return -1; 1352 } 1353 1354 VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n", 1355 dev->vid, vq->last_avail_idx, 1356 vq->last_avail_idx + nr_descs); 1357 1358 vq_inc_last_avail_packed(vq, nr_descs); 1359 1360 return 0; 1361 } 1362 1363 static __rte_noinline uint32_t 1364 virtio_dev_rx_packed(struct virtio_net *dev, 1365 struct vhost_virtqueue *__rte_restrict vq, 1366 struct rte_mbuf **__rte_restrict pkts, 1367 uint32_t count) 1368 { 1369 uint32_t pkt_idx = 0; 1370 1371 do { 1372 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); 1373 1374 if (count - pkt_idx >= PACKED_BATCH_SIZE) { 1375 if (!virtio_dev_rx_batch_packed(dev, vq, 1376 &pkts[pkt_idx])) { 1377 pkt_idx += PACKED_BATCH_SIZE; 1378 continue; 1379 } 1380 } 1381 1382 if (virtio_dev_rx_single_packed(dev, vq, pkts[pkt_idx])) 1383 break; 1384 pkt_idx++; 1385 1386 } while (pkt_idx < count); 1387 1388 if (vq->shadow_used_idx) { 1389 do_data_copy_enqueue(dev, vq); 1390 vhost_flush_enqueue_shadow_packed(dev, vq); 1391 } 1392 1393 if (pkt_idx) 1394 vhost_vring_call_packed(dev, vq); 1395 1396 return pkt_idx; 1397 } 1398 1399 static __rte_always_inline uint32_t 1400 virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, 1401 struct rte_mbuf **pkts, uint32_t count) 1402 { 1403 struct vhost_virtqueue *vq; 1404 uint32_t nb_tx = 0; 1405 1406 VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__); 1407 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { 1408 VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n", 1409 dev->vid, __func__, queue_id); 1410 return 0; 1411 } 1412 1413 vq = dev->virtqueue[queue_id]; 1414 1415 rte_spinlock_lock(&vq->access_lock); 1416 1417 if (unlikely(!vq->enabled)) 1418 goto out_access_unlock; 1419 1420 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 1421 vhost_user_iotlb_rd_lock(vq); 1422 1423 if (unlikely(!vq->access_ok)) 1424 if (unlikely(vring_translate(dev, vq) < 0)) 1425 goto out; 1426 1427 count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); 1428 if (count == 0) 1429 goto out; 1430 1431 if (vq_is_packed(dev)) 1432 nb_tx = virtio_dev_rx_packed(dev, vq, pkts, count); 1433 else 1434 nb_tx = virtio_dev_rx_split(dev, vq, pkts, count); 1435 1436 out: 1437 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 1438 vhost_user_iotlb_rd_unlock(vq); 1439 1440 out_access_unlock: 1441 rte_spinlock_unlock(&vq->access_lock); 1442 1443 return nb_tx; 1444 } 1445 1446 uint16_t 1447 rte_vhost_enqueue_burst(int vid, uint16_t queue_id, 1448 struct rte_mbuf **__rte_restrict pkts, uint16_t count) 1449 { 1450 struct virtio_net *dev = get_device(vid); 1451 1452 if (!dev) 1453 return 0; 1454 1455 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { 1456 VHOST_LOG_DATA(ERR, 1457 "(%d) %s: built-in vhost net backend is disabled.\n", 1458 dev->vid, __func__); 1459 return 0; 1460 } 1461 1462 return virtio_dev_rx(dev, queue_id, pkts, count); 1463 } 1464 1465 static __rte_always_inline uint16_t 1466 virtio_dev_rx_async_get_info_idx(uint16_t pkts_idx, 1467 uint16_t vq_size, uint16_t n_inflight) 1468 { 1469 return pkts_idx > n_inflight ? (pkts_idx - n_inflight) : 1470 (vq_size - n_inflight + pkts_idx) & (vq_size - 1); 1471 } 1472 1473 static __rte_always_inline void 1474 store_dma_desc_info_split(struct vring_used_elem *s_ring, struct vring_used_elem *d_ring, 1475 uint16_t ring_size, uint16_t s_idx, uint16_t d_idx, uint16_t count) 1476 { 1477 size_t elem_size = sizeof(struct vring_used_elem); 1478 1479 if (d_idx + count <= ring_size) { 1480 rte_memcpy(d_ring + d_idx, s_ring + s_idx, count * elem_size); 1481 } else { 1482 uint16_t size = ring_size - d_idx; 1483 1484 rte_memcpy(d_ring + d_idx, s_ring + s_idx, size * elem_size); 1485 rte_memcpy(d_ring, s_ring + s_idx + size, (count - size) * elem_size); 1486 } 1487 } 1488 1489 static __rte_always_inline void 1490 store_dma_desc_info_packed(struct vring_used_elem_packed *s_ring, 1491 struct vring_used_elem_packed *d_ring, 1492 uint16_t ring_size, uint16_t s_idx, uint16_t d_idx, uint16_t count) 1493 { 1494 size_t elem_size = sizeof(struct vring_used_elem_packed); 1495 1496 if (d_idx + count <= ring_size) { 1497 rte_memcpy(d_ring + d_idx, s_ring + s_idx, count * elem_size); 1498 } else { 1499 uint16_t size = ring_size - d_idx; 1500 1501 rte_memcpy(d_ring + d_idx, s_ring + s_idx, size * elem_size); 1502 rte_memcpy(d_ring, s_ring + s_idx + size, (count - size) * elem_size); 1503 } 1504 } 1505 1506 static __rte_noinline uint32_t 1507 virtio_dev_rx_async_submit_split(struct virtio_net *dev, 1508 struct vhost_virtqueue *vq, uint16_t queue_id, 1509 struct rte_mbuf **pkts, uint32_t count, 1510 struct rte_mbuf **comp_pkts, uint32_t *comp_count) 1511 { 1512 uint32_t pkt_idx = 0, pkt_burst_idx = 0; 1513 uint16_t num_buffers; 1514 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1515 uint16_t avail_head; 1516 1517 struct rte_vhost_iov_iter *it_pool = vq->it_pool; 1518 struct iovec *vec_pool = vq->vec_pool; 1519 struct rte_vhost_async_desc tdes[MAX_PKT_BURST]; 1520 struct iovec *src_iovec = vec_pool; 1521 struct iovec *dst_iovec = vec_pool + (VHOST_MAX_ASYNC_VEC >> 1); 1522 uint16_t slot_idx = 0; 1523 uint16_t segs_await = 0; 1524 uint16_t iovec_idx = 0, it_idx = 0; 1525 struct async_inflight_info *pkts_info = vq->async_pkts_info; 1526 uint32_t n_pkts = 0, pkt_err = 0; 1527 uint32_t num_async_pkts = 0, num_done_pkts = 0; 1528 struct { 1529 uint16_t pkt_idx; 1530 uint16_t last_avail_idx; 1531 } async_pkts_log[MAX_PKT_BURST]; 1532 1533 /* 1534 * The ordering between avail index and desc reads need to be enforced. 1535 */ 1536 avail_head = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE); 1537 1538 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); 1539 1540 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { 1541 uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen; 1542 uint16_t nr_vec = 0; 1543 1544 if (unlikely(reserve_avail_buf_split(dev, vq, 1545 pkt_len, buf_vec, &num_buffers, 1546 avail_head, &nr_vec) < 0)) { 1547 VHOST_LOG_DATA(DEBUG, 1548 "(%d) failed to get enough desc from vring\n", 1549 dev->vid); 1550 vq->shadow_used_idx -= num_buffers; 1551 break; 1552 } 1553 1554 VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n", 1555 dev->vid, vq->last_avail_idx, 1556 vq->last_avail_idx + num_buffers); 1557 1558 if (async_mbuf_to_desc(dev, vq, pkts[pkt_idx], buf_vec, nr_vec, num_buffers, 1559 &src_iovec[iovec_idx], &dst_iovec[iovec_idx], 1560 &it_pool[it_idx], &it_pool[it_idx + 1]) < 0) { 1561 vq->shadow_used_idx -= num_buffers; 1562 break; 1563 } 1564 1565 slot_idx = (vq->async_pkts_idx + num_async_pkts) & 1566 (vq->size - 1); 1567 if (it_pool[it_idx].count) { 1568 uint16_t from, to; 1569 1570 async_fill_desc(&tdes[pkt_burst_idx++], 1571 &it_pool[it_idx], &it_pool[it_idx + 1]); 1572 pkts_info[slot_idx].descs = num_buffers; 1573 pkts_info[slot_idx].mbuf = pkts[pkt_idx]; 1574 async_pkts_log[num_async_pkts].pkt_idx = pkt_idx; 1575 async_pkts_log[num_async_pkts++].last_avail_idx = 1576 vq->last_avail_idx; 1577 1578 iovec_idx += it_pool[it_idx].nr_segs; 1579 it_idx += 2; 1580 1581 segs_await += it_pool[it_idx].nr_segs; 1582 1583 /** 1584 * recover shadow used ring and keep DMA-occupied 1585 * descriptors. 1586 */ 1587 from = vq->shadow_used_idx - num_buffers; 1588 to = vq->async_desc_idx_split & (vq->size - 1); 1589 1590 store_dma_desc_info_split(vq->shadow_used_split, 1591 vq->async_descs_split, vq->size, from, to, num_buffers); 1592 1593 vq->async_desc_idx_split += num_buffers; 1594 vq->shadow_used_idx -= num_buffers; 1595 } else 1596 comp_pkts[num_done_pkts++] = pkts[pkt_idx]; 1597 1598 vq->last_avail_idx += num_buffers; 1599 1600 /* 1601 * conditions to trigger async device transfer: 1602 * - buffered packet number reaches transfer threshold 1603 * - unused async iov number is less than max vhost vector 1604 */ 1605 if (unlikely(pkt_burst_idx >= VHOST_ASYNC_BATCH_THRESHOLD || 1606 ((VHOST_MAX_ASYNC_VEC >> 1) - segs_await < 1607 BUF_VECTOR_MAX))) { 1608 n_pkts = vq->async_ops.transfer_data(dev->vid, 1609 queue_id, tdes, 0, pkt_burst_idx); 1610 iovec_idx = 0; 1611 it_idx = 0; 1612 1613 segs_await = 0; 1614 vq->async_pkts_inflight_n += n_pkts; 1615 1616 if (unlikely(n_pkts < pkt_burst_idx)) { 1617 /* 1618 * log error packets number here and do actual 1619 * error processing when applications poll 1620 * completion 1621 */ 1622 pkt_err = pkt_burst_idx - n_pkts; 1623 pkt_burst_idx = 0; 1624 break; 1625 } 1626 1627 pkt_burst_idx = 0; 1628 } 1629 } 1630 1631 if (pkt_burst_idx) { 1632 n_pkts = vq->async_ops.transfer_data(dev->vid, 1633 queue_id, tdes, 0, pkt_burst_idx); 1634 vq->async_pkts_inflight_n += n_pkts; 1635 1636 if (unlikely(n_pkts < pkt_burst_idx)) 1637 pkt_err = pkt_burst_idx - n_pkts; 1638 } 1639 1640 do_data_copy_enqueue(dev, vq); 1641 1642 if (unlikely(pkt_err)) { 1643 uint16_t num_descs = 0; 1644 1645 num_async_pkts -= pkt_err; 1646 /* calculate the sum of descriptors of DMA-error packets. */ 1647 while (pkt_err-- > 0) { 1648 num_descs += pkts_info[slot_idx & (vq->size - 1)].descs; 1649 slot_idx--; 1650 } 1651 vq->async_desc_idx_split -= num_descs; 1652 /* recover shadow used ring and available ring */ 1653 vq->shadow_used_idx -= (vq->last_avail_idx - 1654 async_pkts_log[num_async_pkts].last_avail_idx - 1655 num_descs); 1656 vq->last_avail_idx = 1657 async_pkts_log[num_async_pkts].last_avail_idx; 1658 pkt_idx = async_pkts_log[num_async_pkts].pkt_idx; 1659 num_done_pkts = pkt_idx - num_async_pkts; 1660 } 1661 1662 vq->async_pkts_idx += num_async_pkts; 1663 *comp_count = num_done_pkts; 1664 1665 if (likely(vq->shadow_used_idx)) { 1666 flush_shadow_used_ring_split(dev, vq); 1667 vhost_vring_call_split(dev, vq); 1668 } 1669 1670 return pkt_idx; 1671 } 1672 1673 static __rte_always_inline void 1674 vhost_update_used_packed(struct vhost_virtqueue *vq, 1675 struct vring_used_elem_packed *shadow_ring, 1676 uint16_t count) 1677 { 1678 int i; 1679 uint16_t used_idx = vq->last_used_idx; 1680 uint16_t head_idx = vq->last_used_idx; 1681 uint16_t head_flags = 0; 1682 1683 if (count == 0) 1684 return; 1685 1686 /* Split loop in two to save memory barriers */ 1687 for (i = 0; i < count; i++) { 1688 vq->desc_packed[used_idx].id = shadow_ring[i].id; 1689 vq->desc_packed[used_idx].len = shadow_ring[i].len; 1690 1691 used_idx += shadow_ring[i].count; 1692 if (used_idx >= vq->size) 1693 used_idx -= vq->size; 1694 } 1695 1696 /* The ordering for storing desc flags needs to be enforced. */ 1697 rte_atomic_thread_fence(__ATOMIC_RELEASE); 1698 1699 for (i = 0; i < count; i++) { 1700 uint16_t flags; 1701 1702 if (vq->shadow_used_packed[i].len) 1703 flags = VRING_DESC_F_WRITE; 1704 else 1705 flags = 0; 1706 1707 if (vq->used_wrap_counter) { 1708 flags |= VRING_DESC_F_USED; 1709 flags |= VRING_DESC_F_AVAIL; 1710 } else { 1711 flags &= ~VRING_DESC_F_USED; 1712 flags &= ~VRING_DESC_F_AVAIL; 1713 } 1714 1715 if (i > 0) { 1716 vq->desc_packed[vq->last_used_idx].flags = flags; 1717 } else { 1718 head_idx = vq->last_used_idx; 1719 head_flags = flags; 1720 } 1721 1722 vq_inc_last_used_packed(vq, shadow_ring[i].count); 1723 } 1724 1725 vq->desc_packed[head_idx].flags = head_flags; 1726 } 1727 1728 static __rte_always_inline int 1729 virtio_dev_rx_async_batch_packed(struct virtio_net *dev, 1730 struct vhost_virtqueue *vq, 1731 struct rte_mbuf **pkts, 1732 struct rte_mbuf **comp_pkts, uint32_t *pkt_done) 1733 { 1734 uint16_t i; 1735 uint32_t cpy_threshold = vq->async_threshold; 1736 1737 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1738 if (unlikely(pkts[i]->pkt_len >= cpy_threshold)) 1739 return -1; 1740 } 1741 if (!virtio_dev_rx_batch_packed(dev, vq, pkts)) { 1742 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1743 comp_pkts[(*pkt_done)++] = pkts[i]; 1744 1745 return 0; 1746 } 1747 1748 return -1; 1749 } 1750 1751 static __rte_always_inline int 1752 vhost_enqueue_async_single_packed(struct virtio_net *dev, 1753 struct vhost_virtqueue *vq, 1754 struct rte_mbuf *pkt, 1755 struct buf_vector *buf_vec, 1756 uint16_t *nr_descs, 1757 uint16_t *nr_buffers, 1758 struct vring_packed_desc *async_descs, 1759 struct iovec *src_iovec, struct iovec *dst_iovec, 1760 struct rte_vhost_iov_iter *src_it, 1761 struct rte_vhost_iov_iter *dst_it) 1762 { 1763 uint16_t nr_vec = 0; 1764 uint16_t avail_idx = vq->last_avail_idx; 1765 uint16_t max_tries, tries = 0; 1766 uint16_t buf_id = 0; 1767 uint32_t len = 0; 1768 uint16_t desc_count = 0; 1769 uint32_t size = pkt->pkt_len + sizeof(struct virtio_net_hdr_mrg_rxbuf); 1770 uint32_t buffer_len[vq->size]; 1771 uint16_t buffer_buf_id[vq->size]; 1772 uint16_t buffer_desc_count[vq->size]; 1773 1774 if (rxvq_is_mergeable(dev)) 1775 max_tries = vq->size - 1; 1776 else 1777 max_tries = 1; 1778 1779 while (size > 0) { 1780 /* 1781 * if we tried all available ring items, and still 1782 * can't get enough buf, it means something abnormal 1783 * happened. 1784 */ 1785 if (unlikely(++tries > max_tries)) 1786 return -1; 1787 1788 if (unlikely(fill_vec_buf_packed(dev, vq, avail_idx, &desc_count, buf_vec, &nr_vec, 1789 &buf_id, &len, VHOST_ACCESS_RW) < 0)) 1790 return -1; 1791 1792 len = RTE_MIN(len, size); 1793 size -= len; 1794 1795 buffer_len[*nr_buffers] = len; 1796 buffer_buf_id[*nr_buffers] = buf_id; 1797 buffer_desc_count[*nr_buffers] = desc_count; 1798 *nr_buffers += 1; 1799 1800 *nr_descs += desc_count; 1801 avail_idx += desc_count; 1802 if (avail_idx >= vq->size) 1803 avail_idx -= vq->size; 1804 } 1805 1806 if (async_mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, *nr_buffers, src_iovec, dst_iovec, 1807 src_it, dst_it) < 0) 1808 return -1; 1809 /* store descriptors for DMA */ 1810 if (avail_idx >= *nr_descs) { 1811 rte_memcpy(async_descs, &vq->desc_packed[vq->last_avail_idx], 1812 *nr_descs * sizeof(struct vring_packed_desc)); 1813 } else { 1814 uint16_t nr_copy = vq->size - vq->last_avail_idx; 1815 1816 rte_memcpy(async_descs, &vq->desc_packed[vq->last_avail_idx], 1817 nr_copy * sizeof(struct vring_packed_desc)); 1818 rte_memcpy(async_descs + nr_copy, vq->desc_packed, 1819 (*nr_descs - nr_copy) * sizeof(struct vring_packed_desc)); 1820 } 1821 1822 vhost_shadow_enqueue_packed(vq, buffer_len, buffer_buf_id, buffer_desc_count, *nr_buffers); 1823 1824 return 0; 1825 } 1826 1827 static __rte_always_inline int16_t 1828 virtio_dev_rx_async_single_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, 1829 struct rte_mbuf *pkt, uint16_t *nr_descs, uint16_t *nr_buffers, 1830 struct vring_packed_desc *async_descs, 1831 struct iovec *src_iovec, struct iovec *dst_iovec, 1832 struct rte_vhost_iov_iter *src_it, struct rte_vhost_iov_iter *dst_it) 1833 { 1834 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1835 1836 if (unlikely(vhost_enqueue_async_single_packed(dev, vq, pkt, buf_vec, nr_descs, nr_buffers, 1837 async_descs, src_iovec, dst_iovec, 1838 src_it, dst_it) < 0)) { 1839 VHOST_LOG_DATA(DEBUG, "(%d) failed to get enough desc from vring\n", dev->vid); 1840 return -1; 1841 } 1842 1843 VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n", 1844 dev->vid, vq->last_avail_idx, vq->last_avail_idx + *nr_descs); 1845 1846 return 0; 1847 } 1848 1849 static __rte_always_inline void 1850 dma_error_handler_packed(struct vhost_virtqueue *vq, struct vring_packed_desc *async_descs, 1851 uint16_t async_descs_idx, uint16_t slot_idx, uint32_t nr_err, 1852 uint32_t *pkt_idx, uint32_t *num_async_pkts, uint32_t *num_done_pkts) 1853 { 1854 uint16_t descs_err = 0; 1855 uint16_t buffers_err = 0; 1856 struct async_inflight_info *pkts_info = vq->async_pkts_info; 1857 1858 *num_async_pkts -= nr_err; 1859 *pkt_idx -= nr_err; 1860 /* calculate the sum of buffers and descs of DMA-error packets. */ 1861 while (nr_err-- > 0) { 1862 descs_err += pkts_info[slot_idx % vq->size].descs; 1863 buffers_err += pkts_info[slot_idx % vq->size].nr_buffers; 1864 slot_idx--; 1865 } 1866 1867 vq->async_buffer_idx_packed -= buffers_err; 1868 1869 if (vq->last_avail_idx >= descs_err) { 1870 vq->last_avail_idx -= descs_err; 1871 1872 rte_memcpy(&vq->desc_packed[vq->last_avail_idx], 1873 &async_descs[async_descs_idx - descs_err], 1874 descs_err * sizeof(struct vring_packed_desc)); 1875 } else { 1876 uint16_t nr_copy; 1877 1878 vq->last_avail_idx = vq->last_avail_idx + vq->size - descs_err; 1879 nr_copy = vq->size - vq->last_avail_idx; 1880 rte_memcpy(&vq->desc_packed[vq->last_avail_idx], 1881 &async_descs[async_descs_idx - descs_err], 1882 nr_copy * sizeof(struct vring_packed_desc)); 1883 descs_err -= nr_copy; 1884 rte_memcpy(&vq->desc_packed[0], &async_descs[async_descs_idx - descs_err], 1885 descs_err * sizeof(struct vring_packed_desc)); 1886 vq->avail_wrap_counter ^= 1; 1887 } 1888 1889 *num_done_pkts = *pkt_idx - *num_async_pkts; 1890 } 1891 1892 static __rte_noinline uint32_t 1893 virtio_dev_rx_async_submit_packed(struct virtio_net *dev, 1894 struct vhost_virtqueue *vq, uint16_t queue_id, 1895 struct rte_mbuf **pkts, uint32_t count, 1896 struct rte_mbuf **comp_pkts, uint32_t *comp_count) 1897 { 1898 uint32_t pkt_idx = 0, pkt_burst_idx = 0; 1899 uint32_t remained = count; 1900 uint16_t async_descs_idx = 0; 1901 uint16_t num_buffers; 1902 uint16_t num_descs; 1903 1904 struct rte_vhost_iov_iter *it_pool = vq->it_pool; 1905 struct iovec *vec_pool = vq->vec_pool; 1906 struct rte_vhost_async_desc tdes[MAX_PKT_BURST]; 1907 struct iovec *src_iovec = vec_pool; 1908 struct iovec *dst_iovec = vec_pool + (VHOST_MAX_ASYNC_VEC >> 1); 1909 uint16_t slot_idx = 0; 1910 uint16_t segs_await = 0; 1911 uint16_t iovec_idx = 0, it_idx = 0; 1912 struct async_inflight_info *pkts_info = vq->async_pkts_info; 1913 uint32_t n_pkts = 0, pkt_err = 0; 1914 uint32_t num_async_pkts = 0, num_done_pkts = 0; 1915 struct vring_packed_desc async_descs[vq->size]; 1916 1917 do { 1918 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); 1919 if (remained >= PACKED_BATCH_SIZE) { 1920 if (!virtio_dev_rx_async_batch_packed(dev, vq, 1921 &pkts[pkt_idx], comp_pkts, &num_done_pkts)) { 1922 pkt_idx += PACKED_BATCH_SIZE; 1923 remained -= PACKED_BATCH_SIZE; 1924 continue; 1925 } 1926 } 1927 1928 num_buffers = 0; 1929 num_descs = 0; 1930 if (unlikely(virtio_dev_rx_async_single_packed(dev, vq, pkts[pkt_idx], 1931 &num_descs, &num_buffers, 1932 &async_descs[async_descs_idx], 1933 &src_iovec[iovec_idx], &dst_iovec[iovec_idx], 1934 &it_pool[it_idx], &it_pool[it_idx + 1]) < 0)) 1935 break; 1936 1937 VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n", 1938 dev->vid, vq->last_avail_idx, 1939 vq->last_avail_idx + num_descs); 1940 1941 slot_idx = (vq->async_pkts_idx + num_async_pkts) % vq->size; 1942 if (it_pool[it_idx].count) { 1943 uint16_t from, to; 1944 1945 async_descs_idx += num_descs; 1946 async_fill_desc(&tdes[pkt_burst_idx++], 1947 &it_pool[it_idx], &it_pool[it_idx + 1]); 1948 pkts_info[slot_idx].descs = num_descs; 1949 pkts_info[slot_idx].nr_buffers = num_buffers; 1950 pkts_info[slot_idx].mbuf = pkts[pkt_idx]; 1951 num_async_pkts++; 1952 iovec_idx += it_pool[it_idx].nr_segs; 1953 it_idx += 2; 1954 1955 segs_await += it_pool[it_idx].nr_segs; 1956 1957 /** 1958 * recover shadow used ring and keep DMA-occupied 1959 * descriptors. 1960 */ 1961 from = vq->shadow_used_idx - num_buffers; 1962 to = vq->async_buffer_idx_packed % vq->size; 1963 store_dma_desc_info_packed(vq->shadow_used_packed, 1964 vq->async_buffers_packed, vq->size, from, to, num_buffers); 1965 1966 vq->async_buffer_idx_packed += num_buffers; 1967 vq->shadow_used_idx -= num_buffers; 1968 } else { 1969 comp_pkts[num_done_pkts++] = pkts[pkt_idx]; 1970 } 1971 1972 pkt_idx++; 1973 remained--; 1974 vq_inc_last_avail_packed(vq, num_descs); 1975 1976 /* 1977 * conditions to trigger async device transfer: 1978 * - buffered packet number reaches transfer threshold 1979 * - unused async iov number is less than max vhost vector 1980 */ 1981 if (unlikely(pkt_burst_idx >= VHOST_ASYNC_BATCH_THRESHOLD || 1982 ((VHOST_MAX_ASYNC_VEC >> 1) - segs_await < BUF_VECTOR_MAX))) { 1983 n_pkts = vq->async_ops.transfer_data(dev->vid, queue_id, 1984 tdes, 0, pkt_burst_idx); 1985 iovec_idx = 0; 1986 it_idx = 0; 1987 segs_await = 0; 1988 vq->async_pkts_inflight_n += n_pkts; 1989 1990 if (unlikely(n_pkts < pkt_burst_idx)) { 1991 /* 1992 * log error packets number here and do actual 1993 * error processing when applications poll 1994 * completion 1995 */ 1996 pkt_err = pkt_burst_idx - n_pkts; 1997 pkt_burst_idx = 0; 1998 break; 1999 } 2000 2001 pkt_burst_idx = 0; 2002 } 2003 } while (pkt_idx < count); 2004 2005 if (pkt_burst_idx) { 2006 n_pkts = vq->async_ops.transfer_data(dev->vid, queue_id, tdes, 0, pkt_burst_idx); 2007 vq->async_pkts_inflight_n += n_pkts; 2008 2009 if (unlikely(n_pkts < pkt_burst_idx)) 2010 pkt_err = pkt_burst_idx - n_pkts; 2011 } 2012 2013 do_data_copy_enqueue(dev, vq); 2014 2015 if (unlikely(pkt_err)) 2016 dma_error_handler_packed(vq, async_descs, async_descs_idx, slot_idx, pkt_err, 2017 &pkt_idx, &num_async_pkts, &num_done_pkts); 2018 vq->async_pkts_idx += num_async_pkts; 2019 *comp_count = num_done_pkts; 2020 2021 if (likely(vq->shadow_used_idx)) { 2022 vhost_flush_enqueue_shadow_packed(dev, vq); 2023 vhost_vring_call_packed(dev, vq); 2024 } 2025 2026 return pkt_idx; 2027 } 2028 2029 static __rte_always_inline void 2030 write_back_completed_descs_split(struct vhost_virtqueue *vq, uint16_t n_descs) 2031 { 2032 uint16_t nr_left = n_descs; 2033 uint16_t nr_copy; 2034 uint16_t to, from; 2035 2036 do { 2037 from = vq->last_async_desc_idx_split & (vq->size - 1); 2038 nr_copy = nr_left + from <= vq->size ? nr_left : vq->size - from; 2039 to = vq->last_used_idx & (vq->size - 1); 2040 2041 if (to + nr_copy <= vq->size) { 2042 rte_memcpy(&vq->used->ring[to], &vq->async_descs_split[from], 2043 nr_copy * sizeof(struct vring_used_elem)); 2044 } else { 2045 uint16_t size = vq->size - to; 2046 2047 rte_memcpy(&vq->used->ring[to], &vq->async_descs_split[from], 2048 size * sizeof(struct vring_used_elem)); 2049 rte_memcpy(&vq->used->ring[0], &vq->async_descs_split[from + size], 2050 (nr_copy - size) * sizeof(struct vring_used_elem)); 2051 } 2052 2053 vq->last_async_desc_idx_split += nr_copy; 2054 vq->last_used_idx += nr_copy; 2055 nr_left -= nr_copy; 2056 } while (nr_left > 0); 2057 } 2058 2059 static __rte_always_inline void 2060 write_back_completed_descs_packed(struct vhost_virtqueue *vq, 2061 uint16_t n_buffers) 2062 { 2063 uint16_t nr_left = n_buffers; 2064 uint16_t from, to; 2065 2066 do { 2067 from = vq->last_async_buffer_idx_packed % vq->size; 2068 to = (from + nr_left) % vq->size; 2069 if (to > from) { 2070 vhost_update_used_packed(vq, vq->async_buffers_packed + from, to - from); 2071 vq->last_async_buffer_idx_packed += nr_left; 2072 nr_left = 0; 2073 } else { 2074 vhost_update_used_packed(vq, vq->async_buffers_packed + from, 2075 vq->size - from); 2076 vq->last_async_buffer_idx_packed += vq->size - from; 2077 nr_left -= vq->size - from; 2078 } 2079 } while (nr_left > 0); 2080 } 2081 2082 uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id, 2083 struct rte_mbuf **pkts, uint16_t count) 2084 { 2085 struct virtio_net *dev = get_device(vid); 2086 struct vhost_virtqueue *vq; 2087 uint16_t n_pkts_cpl = 0, n_pkts_put = 0, n_descs = 0, n_buffers = 0; 2088 uint16_t start_idx, pkts_idx, vq_size; 2089 struct async_inflight_info *pkts_info; 2090 uint16_t from, i; 2091 2092 if (!dev) 2093 return 0; 2094 2095 VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__); 2096 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { 2097 VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n", 2098 dev->vid, __func__, queue_id); 2099 return 0; 2100 } 2101 2102 vq = dev->virtqueue[queue_id]; 2103 2104 if (unlikely(!vq->async_registered)) { 2105 VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for queue id %d.\n", 2106 dev->vid, __func__, queue_id); 2107 return 0; 2108 } 2109 2110 rte_spinlock_lock(&vq->access_lock); 2111 2112 pkts_idx = vq->async_pkts_idx % vq->size; 2113 pkts_info = vq->async_pkts_info; 2114 vq_size = vq->size; 2115 start_idx = virtio_dev_rx_async_get_info_idx(pkts_idx, 2116 vq_size, vq->async_pkts_inflight_n); 2117 2118 if (count > vq->async_last_pkts_n) 2119 n_pkts_cpl = vq->async_ops.check_completed_copies(vid, 2120 queue_id, 0, count - vq->async_last_pkts_n); 2121 n_pkts_cpl += vq->async_last_pkts_n; 2122 2123 n_pkts_put = RTE_MIN(count, n_pkts_cpl); 2124 if (unlikely(n_pkts_put == 0)) { 2125 vq->async_last_pkts_n = n_pkts_cpl; 2126 goto done; 2127 } 2128 2129 if (vq_is_packed(dev)) { 2130 for (i = 0; i < n_pkts_put; i++) { 2131 from = (start_idx + i) & (vq_size - 1); 2132 n_buffers += pkts_info[from].nr_buffers; 2133 pkts[i] = pkts_info[from].mbuf; 2134 } 2135 } else { 2136 for (i = 0; i < n_pkts_put; i++) { 2137 from = (start_idx + i) & (vq_size - 1); 2138 n_descs += pkts_info[from].descs; 2139 pkts[i] = pkts_info[from].mbuf; 2140 } 2141 } 2142 2143 vq->async_last_pkts_n = n_pkts_cpl - n_pkts_put; 2144 vq->async_pkts_inflight_n -= n_pkts_put; 2145 2146 if (likely(vq->enabled && vq->access_ok)) { 2147 if (vq_is_packed(dev)) { 2148 write_back_completed_descs_packed(vq, n_buffers); 2149 2150 vhost_vring_call_packed(dev, vq); 2151 } else { 2152 write_back_completed_descs_split(vq, n_descs); 2153 2154 __atomic_add_fetch(&vq->used->idx, n_descs, 2155 __ATOMIC_RELEASE); 2156 vhost_vring_call_split(dev, vq); 2157 } 2158 } else { 2159 if (vq_is_packed(dev)) 2160 vq->last_async_buffer_idx_packed += n_buffers; 2161 else 2162 vq->last_async_desc_idx_split += n_descs; 2163 } 2164 2165 done: 2166 rte_spinlock_unlock(&vq->access_lock); 2167 2168 return n_pkts_put; 2169 } 2170 2171 static __rte_always_inline uint32_t 2172 virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id, 2173 struct rte_mbuf **pkts, uint32_t count, 2174 struct rte_mbuf **comp_pkts, uint32_t *comp_count) 2175 { 2176 struct vhost_virtqueue *vq; 2177 uint32_t nb_tx = 0; 2178 2179 VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__); 2180 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { 2181 VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n", 2182 dev->vid, __func__, queue_id); 2183 return 0; 2184 } 2185 2186 vq = dev->virtqueue[queue_id]; 2187 2188 rte_spinlock_lock(&vq->access_lock); 2189 2190 if (unlikely(!vq->enabled || !vq->async_registered)) 2191 goto out_access_unlock; 2192 2193 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 2194 vhost_user_iotlb_rd_lock(vq); 2195 2196 if (unlikely(!vq->access_ok)) 2197 if (unlikely(vring_translate(dev, vq) < 0)) 2198 goto out; 2199 2200 count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); 2201 if (count == 0) 2202 goto out; 2203 2204 if (vq_is_packed(dev)) 2205 nb_tx = virtio_dev_rx_async_submit_packed(dev, 2206 vq, queue_id, pkts, count, comp_pkts, 2207 comp_count); 2208 else 2209 nb_tx = virtio_dev_rx_async_submit_split(dev, 2210 vq, queue_id, pkts, count, comp_pkts, 2211 comp_count); 2212 2213 out: 2214 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 2215 vhost_user_iotlb_rd_unlock(vq); 2216 2217 out_access_unlock: 2218 rte_spinlock_unlock(&vq->access_lock); 2219 2220 return nb_tx; 2221 } 2222 2223 uint16_t 2224 rte_vhost_submit_enqueue_burst(int vid, uint16_t queue_id, 2225 struct rte_mbuf **pkts, uint16_t count, 2226 struct rte_mbuf **comp_pkts, uint32_t *comp_count) 2227 { 2228 struct virtio_net *dev = get_device(vid); 2229 2230 *comp_count = 0; 2231 if (!dev) 2232 return 0; 2233 2234 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { 2235 VHOST_LOG_DATA(ERR, 2236 "(%d) %s: built-in vhost net backend is disabled.\n", 2237 dev->vid, __func__); 2238 return 0; 2239 } 2240 2241 return virtio_dev_rx_async_submit(dev, queue_id, pkts, count, comp_pkts, 2242 comp_count); 2243 } 2244 2245 static inline bool 2246 virtio_net_with_host_offload(struct virtio_net *dev) 2247 { 2248 if (dev->features & 2249 ((1ULL << VIRTIO_NET_F_CSUM) | 2250 (1ULL << VIRTIO_NET_F_HOST_ECN) | 2251 (1ULL << VIRTIO_NET_F_HOST_TSO4) | 2252 (1ULL << VIRTIO_NET_F_HOST_TSO6) | 2253 (1ULL << VIRTIO_NET_F_HOST_UFO))) 2254 return true; 2255 2256 return false; 2257 } 2258 2259 static void 2260 parse_ethernet(struct rte_mbuf *m, uint16_t *l4_proto, void **l4_hdr) 2261 { 2262 struct rte_ipv4_hdr *ipv4_hdr; 2263 struct rte_ipv6_hdr *ipv6_hdr; 2264 void *l3_hdr = NULL; 2265 struct rte_ether_hdr *eth_hdr; 2266 uint16_t ethertype; 2267 2268 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 2269 2270 m->l2_len = sizeof(struct rte_ether_hdr); 2271 ethertype = rte_be_to_cpu_16(eth_hdr->ether_type); 2272 2273 if (ethertype == RTE_ETHER_TYPE_VLAN) { 2274 struct rte_vlan_hdr *vlan_hdr = 2275 (struct rte_vlan_hdr *)(eth_hdr + 1); 2276 2277 m->l2_len += sizeof(struct rte_vlan_hdr); 2278 ethertype = rte_be_to_cpu_16(vlan_hdr->eth_proto); 2279 } 2280 2281 l3_hdr = (char *)eth_hdr + m->l2_len; 2282 2283 switch (ethertype) { 2284 case RTE_ETHER_TYPE_IPV4: 2285 ipv4_hdr = l3_hdr; 2286 *l4_proto = ipv4_hdr->next_proto_id; 2287 m->l3_len = rte_ipv4_hdr_len(ipv4_hdr); 2288 *l4_hdr = (char *)l3_hdr + m->l3_len; 2289 m->ol_flags |= PKT_TX_IPV4; 2290 break; 2291 case RTE_ETHER_TYPE_IPV6: 2292 ipv6_hdr = l3_hdr; 2293 *l4_proto = ipv6_hdr->proto; 2294 m->l3_len = sizeof(struct rte_ipv6_hdr); 2295 *l4_hdr = (char *)l3_hdr + m->l3_len; 2296 m->ol_flags |= PKT_TX_IPV6; 2297 break; 2298 default: 2299 m->l3_len = 0; 2300 *l4_proto = 0; 2301 *l4_hdr = NULL; 2302 break; 2303 } 2304 } 2305 2306 static __rte_always_inline void 2307 vhost_dequeue_offload_legacy(struct virtio_net_hdr *hdr, struct rte_mbuf *m) 2308 { 2309 uint16_t l4_proto = 0; 2310 void *l4_hdr = NULL; 2311 struct rte_tcp_hdr *tcp_hdr = NULL; 2312 2313 parse_ethernet(m, &l4_proto, &l4_hdr); 2314 if (hdr->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) { 2315 if (hdr->csum_start == (m->l2_len + m->l3_len)) { 2316 switch (hdr->csum_offset) { 2317 case (offsetof(struct rte_tcp_hdr, cksum)): 2318 if (l4_proto == IPPROTO_TCP) 2319 m->ol_flags |= PKT_TX_TCP_CKSUM; 2320 break; 2321 case (offsetof(struct rte_udp_hdr, dgram_cksum)): 2322 if (l4_proto == IPPROTO_UDP) 2323 m->ol_flags |= PKT_TX_UDP_CKSUM; 2324 break; 2325 case (offsetof(struct rte_sctp_hdr, cksum)): 2326 if (l4_proto == IPPROTO_SCTP) 2327 m->ol_flags |= PKT_TX_SCTP_CKSUM; 2328 break; 2329 default: 2330 break; 2331 } 2332 } 2333 } 2334 2335 if (l4_hdr && hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 2336 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 2337 case VIRTIO_NET_HDR_GSO_TCPV4: 2338 case VIRTIO_NET_HDR_GSO_TCPV6: 2339 tcp_hdr = l4_hdr; 2340 m->ol_flags |= PKT_TX_TCP_SEG; 2341 m->tso_segsz = hdr->gso_size; 2342 m->l4_len = (tcp_hdr->data_off & 0xf0) >> 2; 2343 break; 2344 case VIRTIO_NET_HDR_GSO_UDP: 2345 m->ol_flags |= PKT_TX_UDP_SEG; 2346 m->tso_segsz = hdr->gso_size; 2347 m->l4_len = sizeof(struct rte_udp_hdr); 2348 break; 2349 default: 2350 VHOST_LOG_DATA(WARNING, 2351 "unsupported gso type %u.\n", hdr->gso_type); 2352 break; 2353 } 2354 } 2355 } 2356 2357 static __rte_always_inline void 2358 vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m, 2359 bool legacy_ol_flags) 2360 { 2361 struct rte_net_hdr_lens hdr_lens; 2362 int l4_supported = 0; 2363 uint32_t ptype; 2364 2365 if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE) 2366 return; 2367 2368 if (legacy_ol_flags) { 2369 vhost_dequeue_offload_legacy(hdr, m); 2370 return; 2371 } 2372 2373 m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN; 2374 2375 ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK); 2376 m->packet_type = ptype; 2377 if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP || 2378 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP || 2379 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP) 2380 l4_supported = 1; 2381 2382 /* According to Virtio 1.1 spec, the device only needs to look at 2383 * VIRTIO_NET_HDR_F_NEEDS_CSUM in the packet transmission path. 2384 * This differs from the processing incoming packets path where the 2385 * driver could rely on VIRTIO_NET_HDR_F_DATA_VALID flag set by the 2386 * device. 2387 * 2388 * 5.1.6.2.1 Driver Requirements: Packet Transmission 2389 * The driver MUST NOT set the VIRTIO_NET_HDR_F_DATA_VALID and 2390 * VIRTIO_NET_HDR_F_RSC_INFO bits in flags. 2391 * 2392 * 5.1.6.2.2 Device Requirements: Packet Transmission 2393 * The device MUST ignore flag bits that it does not recognize. 2394 */ 2395 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 2396 uint32_t hdrlen; 2397 2398 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len; 2399 if (hdr->csum_start <= hdrlen && l4_supported != 0) { 2400 m->ol_flags |= PKT_RX_L4_CKSUM_NONE; 2401 } else { 2402 /* Unknown proto or tunnel, do sw cksum. We can assume 2403 * the cksum field is in the first segment since the 2404 * buffers we provided to the host are large enough. 2405 * In case of SCTP, this will be wrong since it's a CRC 2406 * but there's nothing we can do. 2407 */ 2408 uint16_t csum = 0, off; 2409 2410 if (rte_raw_cksum_mbuf(m, hdr->csum_start, 2411 rte_pktmbuf_pkt_len(m) - hdr->csum_start, &csum) < 0) 2412 return; 2413 if (likely(csum != 0xffff)) 2414 csum = ~csum; 2415 off = hdr->csum_offset + hdr->csum_start; 2416 if (rte_pktmbuf_data_len(m) >= off + 1) 2417 *rte_pktmbuf_mtod_offset(m, uint16_t *, off) = csum; 2418 } 2419 } 2420 2421 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 2422 if (hdr->gso_size == 0) 2423 return; 2424 2425 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 2426 case VIRTIO_NET_HDR_GSO_TCPV4: 2427 case VIRTIO_NET_HDR_GSO_TCPV6: 2428 if ((ptype & RTE_PTYPE_L4_MASK) != RTE_PTYPE_L4_TCP) 2429 break; 2430 m->ol_flags |= PKT_RX_LRO | PKT_RX_L4_CKSUM_NONE; 2431 m->tso_segsz = hdr->gso_size; 2432 break; 2433 case VIRTIO_NET_HDR_GSO_UDP: 2434 if ((ptype & RTE_PTYPE_L4_MASK) != RTE_PTYPE_L4_UDP) 2435 break; 2436 m->ol_flags |= PKT_RX_LRO | PKT_RX_L4_CKSUM_NONE; 2437 m->tso_segsz = hdr->gso_size; 2438 break; 2439 default: 2440 break; 2441 } 2442 } 2443 } 2444 2445 static __rte_noinline void 2446 copy_vnet_hdr_from_desc(struct virtio_net_hdr *hdr, 2447 struct buf_vector *buf_vec) 2448 { 2449 uint64_t len; 2450 uint64_t remain = sizeof(struct virtio_net_hdr); 2451 uint64_t src; 2452 uint64_t dst = (uint64_t)(uintptr_t)hdr; 2453 2454 while (remain) { 2455 len = RTE_MIN(remain, buf_vec->buf_len); 2456 src = buf_vec->buf_addr; 2457 rte_memcpy((void *)(uintptr_t)dst, 2458 (void *)(uintptr_t)src, len); 2459 2460 remain -= len; 2461 dst += len; 2462 buf_vec++; 2463 } 2464 } 2465 2466 static __rte_always_inline int 2467 copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, 2468 struct buf_vector *buf_vec, uint16_t nr_vec, 2469 struct rte_mbuf *m, struct rte_mempool *mbuf_pool, 2470 bool legacy_ol_flags) 2471 { 2472 uint32_t buf_avail, buf_offset; 2473 uint64_t buf_addr, buf_len; 2474 uint32_t mbuf_avail, mbuf_offset; 2475 uint32_t cpy_len; 2476 struct rte_mbuf *cur = m, *prev = m; 2477 struct virtio_net_hdr tmp_hdr; 2478 struct virtio_net_hdr *hdr = NULL; 2479 /* A counter to avoid desc dead loop chain */ 2480 uint16_t vec_idx = 0; 2481 struct batch_copy_elem *batch_copy = vq->batch_copy_elems; 2482 int error = 0; 2483 2484 buf_addr = buf_vec[vec_idx].buf_addr; 2485 buf_len = buf_vec[vec_idx].buf_len; 2486 2487 if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) { 2488 error = -1; 2489 goto out; 2490 } 2491 2492 if (virtio_net_with_host_offload(dev)) { 2493 if (unlikely(buf_len < sizeof(struct virtio_net_hdr))) { 2494 /* 2495 * No luck, the virtio-net header doesn't fit 2496 * in a contiguous virtual area. 2497 */ 2498 copy_vnet_hdr_from_desc(&tmp_hdr, buf_vec); 2499 hdr = &tmp_hdr; 2500 } else { 2501 hdr = (struct virtio_net_hdr *)((uintptr_t)buf_addr); 2502 } 2503 } 2504 2505 /* 2506 * A virtio driver normally uses at least 2 desc buffers 2507 * for Tx: the first for storing the header, and others 2508 * for storing the data. 2509 */ 2510 if (unlikely(buf_len < dev->vhost_hlen)) { 2511 buf_offset = dev->vhost_hlen - buf_len; 2512 vec_idx++; 2513 buf_addr = buf_vec[vec_idx].buf_addr; 2514 buf_len = buf_vec[vec_idx].buf_len; 2515 buf_avail = buf_len - buf_offset; 2516 } else if (buf_len == dev->vhost_hlen) { 2517 if (unlikely(++vec_idx >= nr_vec)) 2518 goto out; 2519 buf_addr = buf_vec[vec_idx].buf_addr; 2520 buf_len = buf_vec[vec_idx].buf_len; 2521 2522 buf_offset = 0; 2523 buf_avail = buf_len; 2524 } else { 2525 buf_offset = dev->vhost_hlen; 2526 buf_avail = buf_vec[vec_idx].buf_len - dev->vhost_hlen; 2527 } 2528 2529 PRINT_PACKET(dev, 2530 (uintptr_t)(buf_addr + buf_offset), 2531 (uint32_t)buf_avail, 0); 2532 2533 mbuf_offset = 0; 2534 mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM; 2535 while (1) { 2536 cpy_len = RTE_MIN(buf_avail, mbuf_avail); 2537 2538 if (likely(cpy_len > MAX_BATCH_LEN || 2539 vq->batch_copy_nb_elems >= vq->size || 2540 (hdr && cur == m))) { 2541 rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, 2542 mbuf_offset), 2543 (void *)((uintptr_t)(buf_addr + 2544 buf_offset)), cpy_len); 2545 } else { 2546 batch_copy[vq->batch_copy_nb_elems].dst = 2547 rte_pktmbuf_mtod_offset(cur, void *, 2548 mbuf_offset); 2549 batch_copy[vq->batch_copy_nb_elems].src = 2550 (void *)((uintptr_t)(buf_addr + buf_offset)); 2551 batch_copy[vq->batch_copy_nb_elems].len = cpy_len; 2552 vq->batch_copy_nb_elems++; 2553 } 2554 2555 mbuf_avail -= cpy_len; 2556 mbuf_offset += cpy_len; 2557 buf_avail -= cpy_len; 2558 buf_offset += cpy_len; 2559 2560 /* This buf reaches to its end, get the next one */ 2561 if (buf_avail == 0) { 2562 if (++vec_idx >= nr_vec) 2563 break; 2564 2565 buf_addr = buf_vec[vec_idx].buf_addr; 2566 buf_len = buf_vec[vec_idx].buf_len; 2567 2568 buf_offset = 0; 2569 buf_avail = buf_len; 2570 2571 PRINT_PACKET(dev, (uintptr_t)buf_addr, 2572 (uint32_t)buf_avail, 0); 2573 } 2574 2575 /* 2576 * This mbuf reaches to its end, get a new one 2577 * to hold more data. 2578 */ 2579 if (mbuf_avail == 0) { 2580 cur = rte_pktmbuf_alloc(mbuf_pool); 2581 if (unlikely(cur == NULL)) { 2582 VHOST_LOG_DATA(ERR, "Failed to " 2583 "allocate memory for mbuf.\n"); 2584 error = -1; 2585 goto out; 2586 } 2587 2588 prev->next = cur; 2589 prev->data_len = mbuf_offset; 2590 m->nb_segs += 1; 2591 m->pkt_len += mbuf_offset; 2592 prev = cur; 2593 2594 mbuf_offset = 0; 2595 mbuf_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM; 2596 } 2597 } 2598 2599 prev->data_len = mbuf_offset; 2600 m->pkt_len += mbuf_offset; 2601 2602 if (hdr) 2603 vhost_dequeue_offload(hdr, m, legacy_ol_flags); 2604 2605 out: 2606 2607 return error; 2608 } 2609 2610 static void 2611 virtio_dev_extbuf_free(void *addr __rte_unused, void *opaque) 2612 { 2613 rte_free(opaque); 2614 } 2615 2616 static int 2617 virtio_dev_extbuf_alloc(struct rte_mbuf *pkt, uint32_t size) 2618 { 2619 struct rte_mbuf_ext_shared_info *shinfo = NULL; 2620 uint32_t total_len = RTE_PKTMBUF_HEADROOM + size; 2621 uint16_t buf_len; 2622 rte_iova_t iova; 2623 void *buf; 2624 2625 total_len += sizeof(*shinfo) + sizeof(uintptr_t); 2626 total_len = RTE_ALIGN_CEIL(total_len, sizeof(uintptr_t)); 2627 2628 if (unlikely(total_len > UINT16_MAX)) 2629 return -ENOSPC; 2630 2631 buf_len = total_len; 2632 buf = rte_malloc(NULL, buf_len, RTE_CACHE_LINE_SIZE); 2633 if (unlikely(buf == NULL)) 2634 return -ENOMEM; 2635 2636 /* Initialize shinfo */ 2637 shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len, 2638 virtio_dev_extbuf_free, buf); 2639 if (unlikely(shinfo == NULL)) { 2640 rte_free(buf); 2641 VHOST_LOG_DATA(ERR, "Failed to init shinfo\n"); 2642 return -1; 2643 } 2644 2645 iova = rte_malloc_virt2iova(buf); 2646 rte_pktmbuf_attach_extbuf(pkt, buf, iova, buf_len, shinfo); 2647 rte_pktmbuf_reset_headroom(pkt); 2648 2649 return 0; 2650 } 2651 2652 static __rte_always_inline int 2653 virtio_dev_pktmbuf_prep(struct virtio_net *dev, struct rte_mbuf *pkt, 2654 uint32_t data_len) 2655 { 2656 if (rte_pktmbuf_tailroom(pkt) >= data_len) 2657 return 0; 2658 2659 /* attach an external buffer if supported */ 2660 if (dev->extbuf && !virtio_dev_extbuf_alloc(pkt, data_len)) 2661 return 0; 2662 2663 /* check if chained buffers are allowed */ 2664 if (!dev->linearbuf) 2665 return 0; 2666 2667 return -1; 2668 } 2669 2670 /* 2671 * Allocate a host supported pktmbuf. 2672 */ 2673 static __rte_always_inline struct rte_mbuf * 2674 virtio_dev_pktmbuf_alloc(struct virtio_net *dev, struct rte_mempool *mp, 2675 uint32_t data_len) 2676 { 2677 struct rte_mbuf *pkt = rte_pktmbuf_alloc(mp); 2678 2679 if (unlikely(pkt == NULL)) { 2680 VHOST_LOG_DATA(ERR, 2681 "Failed to allocate memory for mbuf.\n"); 2682 return NULL; 2683 } 2684 2685 if (virtio_dev_pktmbuf_prep(dev, pkt, data_len)) { 2686 /* Data doesn't fit into the buffer and the host supports 2687 * only linear buffers 2688 */ 2689 rte_pktmbuf_free(pkt); 2690 return NULL; 2691 } 2692 2693 return pkt; 2694 } 2695 2696 __rte_always_inline 2697 static uint16_t 2698 virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 2699 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count, 2700 bool legacy_ol_flags) 2701 { 2702 uint16_t i; 2703 uint16_t free_entries; 2704 uint16_t dropped = 0; 2705 static bool allocerr_warned; 2706 2707 /* 2708 * The ordering between avail index and 2709 * desc reads needs to be enforced. 2710 */ 2711 free_entries = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE) - 2712 vq->last_avail_idx; 2713 if (free_entries == 0) 2714 return 0; 2715 2716 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); 2717 2718 VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__); 2719 2720 count = RTE_MIN(count, MAX_PKT_BURST); 2721 count = RTE_MIN(count, free_entries); 2722 VHOST_LOG_DATA(DEBUG, "(%d) about to dequeue %u buffers\n", 2723 dev->vid, count); 2724 2725 for (i = 0; i < count; i++) { 2726 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 2727 uint16_t head_idx; 2728 uint32_t buf_len; 2729 uint16_t nr_vec = 0; 2730 int err; 2731 2732 if (unlikely(fill_vec_buf_split(dev, vq, 2733 vq->last_avail_idx + i, 2734 &nr_vec, buf_vec, 2735 &head_idx, &buf_len, 2736 VHOST_ACCESS_RO) < 0)) 2737 break; 2738 2739 update_shadow_used_ring_split(vq, head_idx, 0); 2740 2741 pkts[i] = virtio_dev_pktmbuf_alloc(dev, mbuf_pool, buf_len); 2742 if (unlikely(pkts[i] == NULL)) { 2743 /* 2744 * mbuf allocation fails for jumbo packets when external 2745 * buffer allocation is not allowed and linear buffer 2746 * is required. Drop this packet. 2747 */ 2748 if (!allocerr_warned) { 2749 VHOST_LOG_DATA(ERR, 2750 "Failed mbuf alloc of size %d from %s on %s.\n", 2751 buf_len, mbuf_pool->name, dev->ifname); 2752 allocerr_warned = true; 2753 } 2754 dropped += 1; 2755 i++; 2756 break; 2757 } 2758 2759 err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i], 2760 mbuf_pool, legacy_ol_flags); 2761 if (unlikely(err)) { 2762 rte_pktmbuf_free(pkts[i]); 2763 if (!allocerr_warned) { 2764 VHOST_LOG_DATA(ERR, 2765 "Failed to copy desc to mbuf on %s.\n", 2766 dev->ifname); 2767 allocerr_warned = true; 2768 } 2769 dropped += 1; 2770 i++; 2771 break; 2772 } 2773 } 2774 2775 vq->last_avail_idx += i; 2776 2777 do_data_copy_dequeue(vq); 2778 if (unlikely(i < count)) 2779 vq->shadow_used_idx = i; 2780 if (likely(vq->shadow_used_idx)) { 2781 flush_shadow_used_ring_split(dev, vq); 2782 vhost_vring_call_split(dev, vq); 2783 } 2784 2785 return (i - dropped); 2786 } 2787 2788 __rte_noinline 2789 static uint16_t 2790 virtio_dev_tx_split_legacy(struct virtio_net *dev, 2791 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, 2792 struct rte_mbuf **pkts, uint16_t count) 2793 { 2794 return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, true); 2795 } 2796 2797 __rte_noinline 2798 static uint16_t 2799 virtio_dev_tx_split_compliant(struct virtio_net *dev, 2800 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, 2801 struct rte_mbuf **pkts, uint16_t count) 2802 { 2803 return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, false); 2804 } 2805 2806 static __rte_always_inline int 2807 vhost_reserve_avail_batch_packed(struct virtio_net *dev, 2808 struct vhost_virtqueue *vq, 2809 struct rte_mbuf **pkts, 2810 uint16_t avail_idx, 2811 uintptr_t *desc_addrs, 2812 uint16_t *ids) 2813 { 2814 bool wrap = vq->avail_wrap_counter; 2815 struct vring_packed_desc *descs = vq->desc_packed; 2816 uint64_t lens[PACKED_BATCH_SIZE]; 2817 uint64_t buf_lens[PACKED_BATCH_SIZE]; 2818 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 2819 uint16_t flags, i; 2820 2821 if (unlikely(avail_idx & PACKED_BATCH_MASK)) 2822 return -1; 2823 if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size)) 2824 return -1; 2825 2826 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 2827 flags = descs[avail_idx + i].flags; 2828 if (unlikely((wrap != !!(flags & VRING_DESC_F_AVAIL)) || 2829 (wrap == !!(flags & VRING_DESC_F_USED)) || 2830 (flags & PACKED_DESC_SINGLE_DEQUEUE_FLAG))) 2831 return -1; 2832 } 2833 2834 rte_atomic_thread_fence(__ATOMIC_ACQUIRE); 2835 2836 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 2837 lens[i] = descs[avail_idx + i].len; 2838 2839 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 2840 desc_addrs[i] = vhost_iova_to_vva(dev, vq, 2841 descs[avail_idx + i].addr, 2842 &lens[i], VHOST_ACCESS_RW); 2843 } 2844 2845 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 2846 if (unlikely(!desc_addrs[i])) 2847 return -1; 2848 if (unlikely((lens[i] != descs[avail_idx + i].len))) 2849 return -1; 2850 } 2851 2852 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 2853 if (virtio_dev_pktmbuf_prep(dev, pkts[i], lens[i])) 2854 goto err; 2855 } 2856 2857 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 2858 buf_lens[i] = pkts[i]->buf_len - pkts[i]->data_off; 2859 2860 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 2861 if (unlikely(buf_lens[i] < (lens[i] - buf_offset))) 2862 goto err; 2863 } 2864 2865 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 2866 pkts[i]->pkt_len = lens[i] - buf_offset; 2867 pkts[i]->data_len = pkts[i]->pkt_len; 2868 ids[i] = descs[avail_idx + i].id; 2869 } 2870 2871 return 0; 2872 2873 err: 2874 return -1; 2875 } 2876 2877 static __rte_always_inline int 2878 virtio_dev_tx_batch_packed(struct virtio_net *dev, 2879 struct vhost_virtqueue *vq, 2880 struct rte_mbuf **pkts, 2881 bool legacy_ol_flags) 2882 { 2883 uint16_t avail_idx = vq->last_avail_idx; 2884 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 2885 struct virtio_net_hdr *hdr; 2886 uintptr_t desc_addrs[PACKED_BATCH_SIZE]; 2887 uint16_t ids[PACKED_BATCH_SIZE]; 2888 uint16_t i; 2889 2890 if (vhost_reserve_avail_batch_packed(dev, vq, pkts, avail_idx, 2891 desc_addrs, ids)) 2892 return -1; 2893 2894 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 2895 rte_prefetch0((void *)(uintptr_t)desc_addrs[i]); 2896 2897 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 2898 rte_memcpy(rte_pktmbuf_mtod_offset(pkts[i], void *, 0), 2899 (void *)(uintptr_t)(desc_addrs[i] + buf_offset), 2900 pkts[i]->pkt_len); 2901 2902 if (virtio_net_with_host_offload(dev)) { 2903 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 2904 hdr = (struct virtio_net_hdr *)(desc_addrs[i]); 2905 vhost_dequeue_offload(hdr, pkts[i], legacy_ol_flags); 2906 } 2907 } 2908 2909 if (virtio_net_is_inorder(dev)) 2910 vhost_shadow_dequeue_batch_packed_inorder(vq, 2911 ids[PACKED_BATCH_SIZE - 1]); 2912 else 2913 vhost_shadow_dequeue_batch_packed(dev, vq, ids); 2914 2915 vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE); 2916 2917 return 0; 2918 } 2919 2920 static __rte_always_inline int 2921 vhost_dequeue_single_packed(struct virtio_net *dev, 2922 struct vhost_virtqueue *vq, 2923 struct rte_mempool *mbuf_pool, 2924 struct rte_mbuf *pkts, 2925 uint16_t *buf_id, 2926 uint16_t *desc_count, 2927 bool legacy_ol_flags) 2928 { 2929 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 2930 uint32_t buf_len; 2931 uint16_t nr_vec = 0; 2932 int err; 2933 static bool allocerr_warned; 2934 2935 if (unlikely(fill_vec_buf_packed(dev, vq, 2936 vq->last_avail_idx, desc_count, 2937 buf_vec, &nr_vec, 2938 buf_id, &buf_len, 2939 VHOST_ACCESS_RO) < 0)) 2940 return -1; 2941 2942 if (unlikely(virtio_dev_pktmbuf_prep(dev, pkts, buf_len))) { 2943 if (!allocerr_warned) { 2944 VHOST_LOG_DATA(ERR, 2945 "Failed mbuf alloc of size %d from %s on %s.\n", 2946 buf_len, mbuf_pool->name, dev->ifname); 2947 allocerr_warned = true; 2948 } 2949 return -1; 2950 } 2951 2952 err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts, 2953 mbuf_pool, legacy_ol_flags); 2954 if (unlikely(err)) { 2955 if (!allocerr_warned) { 2956 VHOST_LOG_DATA(ERR, 2957 "Failed to copy desc to mbuf on %s.\n", 2958 dev->ifname); 2959 allocerr_warned = true; 2960 } 2961 return -1; 2962 } 2963 2964 return 0; 2965 } 2966 2967 static __rte_always_inline int 2968 virtio_dev_tx_single_packed(struct virtio_net *dev, 2969 struct vhost_virtqueue *vq, 2970 struct rte_mempool *mbuf_pool, 2971 struct rte_mbuf *pkts, 2972 bool legacy_ol_flags) 2973 { 2974 2975 uint16_t buf_id, desc_count = 0; 2976 int ret; 2977 2978 ret = vhost_dequeue_single_packed(dev, vq, mbuf_pool, pkts, &buf_id, 2979 &desc_count, legacy_ol_flags); 2980 2981 if (likely(desc_count > 0)) { 2982 if (virtio_net_is_inorder(dev)) 2983 vhost_shadow_dequeue_single_packed_inorder(vq, buf_id, 2984 desc_count); 2985 else 2986 vhost_shadow_dequeue_single_packed(vq, buf_id, 2987 desc_count); 2988 2989 vq_inc_last_avail_packed(vq, desc_count); 2990 } 2991 2992 return ret; 2993 } 2994 2995 __rte_always_inline 2996 static uint16_t 2997 virtio_dev_tx_packed(struct virtio_net *dev, 2998 struct vhost_virtqueue *__rte_restrict vq, 2999 struct rte_mempool *mbuf_pool, 3000 struct rte_mbuf **__rte_restrict pkts, 3001 uint32_t count, 3002 bool legacy_ol_flags) 3003 { 3004 uint32_t pkt_idx = 0; 3005 3006 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, count)) 3007 return 0; 3008 3009 do { 3010 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); 3011 3012 if (count - pkt_idx >= PACKED_BATCH_SIZE) { 3013 if (!virtio_dev_tx_batch_packed(dev, vq, 3014 &pkts[pkt_idx], 3015 legacy_ol_flags)) { 3016 pkt_idx += PACKED_BATCH_SIZE; 3017 continue; 3018 } 3019 } 3020 3021 if (virtio_dev_tx_single_packed(dev, vq, mbuf_pool, 3022 pkts[pkt_idx], 3023 legacy_ol_flags)) 3024 break; 3025 pkt_idx++; 3026 } while (pkt_idx < count); 3027 3028 if (pkt_idx != count) 3029 rte_pktmbuf_free_bulk(&pkts[pkt_idx], count - pkt_idx); 3030 3031 if (vq->shadow_used_idx) { 3032 do_data_copy_dequeue(vq); 3033 3034 vhost_flush_dequeue_shadow_packed(dev, vq); 3035 vhost_vring_call_packed(dev, vq); 3036 } 3037 3038 return pkt_idx; 3039 } 3040 3041 __rte_noinline 3042 static uint16_t 3043 virtio_dev_tx_packed_legacy(struct virtio_net *dev, 3044 struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool, 3045 struct rte_mbuf **__rte_restrict pkts, uint32_t count) 3046 { 3047 return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, true); 3048 } 3049 3050 __rte_noinline 3051 static uint16_t 3052 virtio_dev_tx_packed_compliant(struct virtio_net *dev, 3053 struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool, 3054 struct rte_mbuf **__rte_restrict pkts, uint32_t count) 3055 { 3056 return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, false); 3057 } 3058 3059 uint16_t 3060 rte_vhost_dequeue_burst(int vid, uint16_t queue_id, 3061 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) 3062 { 3063 struct virtio_net *dev; 3064 struct rte_mbuf *rarp_mbuf = NULL; 3065 struct vhost_virtqueue *vq; 3066 int16_t success = 1; 3067 3068 dev = get_device(vid); 3069 if (!dev) 3070 return 0; 3071 3072 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { 3073 VHOST_LOG_DATA(ERR, 3074 "(%d) %s: built-in vhost net backend is disabled.\n", 3075 dev->vid, __func__); 3076 return 0; 3077 } 3078 3079 if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) { 3080 VHOST_LOG_DATA(ERR, 3081 "(%d) %s: invalid virtqueue idx %d.\n", 3082 dev->vid, __func__, queue_id); 3083 return 0; 3084 } 3085 3086 vq = dev->virtqueue[queue_id]; 3087 3088 if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0)) 3089 return 0; 3090 3091 if (unlikely(!vq->enabled)) { 3092 count = 0; 3093 goto out_access_unlock; 3094 } 3095 3096 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 3097 vhost_user_iotlb_rd_lock(vq); 3098 3099 if (unlikely(!vq->access_ok)) 3100 if (unlikely(vring_translate(dev, vq) < 0)) { 3101 count = 0; 3102 goto out; 3103 } 3104 3105 /* 3106 * Construct a RARP broadcast packet, and inject it to the "pkts" 3107 * array, to looks like that guest actually send such packet. 3108 * 3109 * Check user_send_rarp() for more information. 3110 * 3111 * broadcast_rarp shares a cacheline in the virtio_net structure 3112 * with some fields that are accessed during enqueue and 3113 * __atomic_compare_exchange_n causes a write if performed compare 3114 * and exchange. This could result in false sharing between enqueue 3115 * and dequeue. 3116 * 3117 * Prevent unnecessary false sharing by reading broadcast_rarp first 3118 * and only performing compare and exchange if the read indicates it 3119 * is likely to be set. 3120 */ 3121 if (unlikely(__atomic_load_n(&dev->broadcast_rarp, __ATOMIC_ACQUIRE) && 3122 __atomic_compare_exchange_n(&dev->broadcast_rarp, 3123 &success, 0, 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED))) { 3124 3125 rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac); 3126 if (rarp_mbuf == NULL) { 3127 VHOST_LOG_DATA(ERR, "Failed to make RARP packet.\n"); 3128 count = 0; 3129 goto out; 3130 } 3131 count -= 1; 3132 } 3133 3134 if (vq_is_packed(dev)) { 3135 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS) 3136 count = virtio_dev_tx_packed_legacy(dev, vq, mbuf_pool, pkts, count); 3137 else 3138 count = virtio_dev_tx_packed_compliant(dev, vq, mbuf_pool, pkts, count); 3139 } else { 3140 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS) 3141 count = virtio_dev_tx_split_legacy(dev, vq, mbuf_pool, pkts, count); 3142 else 3143 count = virtio_dev_tx_split_compliant(dev, vq, mbuf_pool, pkts, count); 3144 } 3145 3146 out: 3147 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 3148 vhost_user_iotlb_rd_unlock(vq); 3149 3150 out_access_unlock: 3151 rte_spinlock_unlock(&vq->access_lock); 3152 3153 if (unlikely(rarp_mbuf != NULL)) { 3154 /* 3155 * Inject it to the head of "pkts" array, so that switch's mac 3156 * learning table will get updated first. 3157 */ 3158 memmove(&pkts[1], pkts, count * sizeof(struct rte_mbuf *)); 3159 pkts[0] = rarp_mbuf; 3160 count += 1; 3161 } 3162 3163 return count; 3164 } 3165