1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2016 Intel Corporation 3 */ 4 5 #include <stdint.h> 6 #include <stdbool.h> 7 #include <linux/virtio_net.h> 8 9 #include <rte_mbuf.h> 10 #include <rte_memcpy.h> 11 #include <rte_net.h> 12 #include <rte_ether.h> 13 #include <rte_ip.h> 14 #include <rte_vhost.h> 15 #include <rte_tcp.h> 16 #include <rte_udp.h> 17 #include <rte_sctp.h> 18 #include <rte_arp.h> 19 #include <rte_spinlock.h> 20 #include <rte_malloc.h> 21 #include <rte_vhost_async.h> 22 23 #include "iotlb.h" 24 #include "vhost.h" 25 26 #define MAX_BATCH_LEN 256 27 28 #define VHOST_ASYNC_BATCH_THRESHOLD 32 29 30 static __rte_always_inline bool 31 rxvq_is_mergeable(struct virtio_net *dev) 32 { 33 return dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF); 34 } 35 36 static __rte_always_inline bool 37 virtio_net_is_inorder(struct virtio_net *dev) 38 { 39 return dev->features & (1ULL << VIRTIO_F_IN_ORDER); 40 } 41 42 static bool 43 is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t nr_vring) 44 { 45 return (is_tx ^ (idx & 1)) == 0 && idx < nr_vring; 46 } 47 48 static inline void 49 do_data_copy_enqueue(struct virtio_net *dev, struct vhost_virtqueue *vq) 50 { 51 struct batch_copy_elem *elem = vq->batch_copy_elems; 52 uint16_t count = vq->batch_copy_nb_elems; 53 int i; 54 55 for (i = 0; i < count; i++) { 56 rte_memcpy(elem[i].dst, elem[i].src, elem[i].len); 57 vhost_log_cache_write_iova(dev, vq, elem[i].log_addr, 58 elem[i].len); 59 PRINT_PACKET(dev, (uintptr_t)elem[i].dst, elem[i].len, 0); 60 } 61 62 vq->batch_copy_nb_elems = 0; 63 } 64 65 static inline void 66 do_data_copy_dequeue(struct vhost_virtqueue *vq) 67 { 68 struct batch_copy_elem *elem = vq->batch_copy_elems; 69 uint16_t count = vq->batch_copy_nb_elems; 70 int i; 71 72 for (i = 0; i < count; i++) 73 rte_memcpy(elem[i].dst, elem[i].src, elem[i].len); 74 75 vq->batch_copy_nb_elems = 0; 76 } 77 78 static __rte_always_inline void 79 do_flush_shadow_used_ring_split(struct virtio_net *dev, 80 struct vhost_virtqueue *vq, 81 uint16_t to, uint16_t from, uint16_t size) 82 { 83 rte_memcpy(&vq->used->ring[to], 84 &vq->shadow_used_split[from], 85 size * sizeof(struct vring_used_elem)); 86 vhost_log_cache_used_vring(dev, vq, 87 offsetof(struct vring_used, ring[to]), 88 size * sizeof(struct vring_used_elem)); 89 } 90 91 static __rte_always_inline void 92 flush_shadow_used_ring_split(struct virtio_net *dev, struct vhost_virtqueue *vq) 93 { 94 uint16_t used_idx = vq->last_used_idx & (vq->size - 1); 95 96 if (used_idx + vq->shadow_used_idx <= vq->size) { 97 do_flush_shadow_used_ring_split(dev, vq, used_idx, 0, 98 vq->shadow_used_idx); 99 } else { 100 uint16_t size; 101 102 /* update used ring interval [used_idx, vq->size] */ 103 size = vq->size - used_idx; 104 do_flush_shadow_used_ring_split(dev, vq, used_idx, 0, size); 105 106 /* update the left half used ring interval [0, left_size] */ 107 do_flush_shadow_used_ring_split(dev, vq, 0, size, 108 vq->shadow_used_idx - size); 109 } 110 vq->last_used_idx += vq->shadow_used_idx; 111 112 vhost_log_cache_sync(dev, vq); 113 114 __atomic_add_fetch(&vq->used->idx, vq->shadow_used_idx, 115 __ATOMIC_RELEASE); 116 vq->shadow_used_idx = 0; 117 vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx), 118 sizeof(vq->used->idx)); 119 } 120 121 static __rte_always_inline void 122 update_shadow_used_ring_split(struct vhost_virtqueue *vq, 123 uint16_t desc_idx, uint32_t len) 124 { 125 uint16_t i = vq->shadow_used_idx++; 126 127 vq->shadow_used_split[i].id = desc_idx; 128 vq->shadow_used_split[i].len = len; 129 } 130 131 static __rte_always_inline void 132 vhost_flush_enqueue_shadow_packed(struct virtio_net *dev, 133 struct vhost_virtqueue *vq) 134 { 135 int i; 136 uint16_t used_idx = vq->last_used_idx; 137 uint16_t head_idx = vq->last_used_idx; 138 uint16_t head_flags = 0; 139 140 /* Split loop in two to save memory barriers */ 141 for (i = 0; i < vq->shadow_used_idx; i++) { 142 vq->desc_packed[used_idx].id = vq->shadow_used_packed[i].id; 143 vq->desc_packed[used_idx].len = vq->shadow_used_packed[i].len; 144 145 used_idx += vq->shadow_used_packed[i].count; 146 if (used_idx >= vq->size) 147 used_idx -= vq->size; 148 } 149 150 /* The ordering for storing desc flags needs to be enforced. */ 151 rte_atomic_thread_fence(__ATOMIC_RELEASE); 152 153 for (i = 0; i < vq->shadow_used_idx; i++) { 154 uint16_t flags; 155 156 if (vq->shadow_used_packed[i].len) 157 flags = VRING_DESC_F_WRITE; 158 else 159 flags = 0; 160 161 if (vq->used_wrap_counter) { 162 flags |= VRING_DESC_F_USED; 163 flags |= VRING_DESC_F_AVAIL; 164 } else { 165 flags &= ~VRING_DESC_F_USED; 166 flags &= ~VRING_DESC_F_AVAIL; 167 } 168 169 if (i > 0) { 170 vq->desc_packed[vq->last_used_idx].flags = flags; 171 172 vhost_log_cache_used_vring(dev, vq, 173 vq->last_used_idx * 174 sizeof(struct vring_packed_desc), 175 sizeof(struct vring_packed_desc)); 176 } else { 177 head_idx = vq->last_used_idx; 178 head_flags = flags; 179 } 180 181 vq_inc_last_used_packed(vq, vq->shadow_used_packed[i].count); 182 } 183 184 vq->desc_packed[head_idx].flags = head_flags; 185 186 vhost_log_cache_used_vring(dev, vq, 187 head_idx * 188 sizeof(struct vring_packed_desc), 189 sizeof(struct vring_packed_desc)); 190 191 vq->shadow_used_idx = 0; 192 vhost_log_cache_sync(dev, vq); 193 } 194 195 static __rte_always_inline void 196 vhost_flush_dequeue_shadow_packed(struct virtio_net *dev, 197 struct vhost_virtqueue *vq) 198 { 199 struct vring_used_elem_packed *used_elem = &vq->shadow_used_packed[0]; 200 201 vq->desc_packed[vq->shadow_last_used_idx].id = used_elem->id; 202 /* desc flags is the synchronization point for virtio packed vring */ 203 __atomic_store_n(&vq->desc_packed[vq->shadow_last_used_idx].flags, 204 used_elem->flags, __ATOMIC_RELEASE); 205 206 vhost_log_cache_used_vring(dev, vq, vq->shadow_last_used_idx * 207 sizeof(struct vring_packed_desc), 208 sizeof(struct vring_packed_desc)); 209 vq->shadow_used_idx = 0; 210 vhost_log_cache_sync(dev, vq); 211 } 212 213 static __rte_always_inline void 214 vhost_flush_enqueue_batch_packed(struct virtio_net *dev, 215 struct vhost_virtqueue *vq, 216 uint64_t *lens, 217 uint16_t *ids) 218 { 219 uint16_t i; 220 uint16_t flags; 221 uint16_t last_used_idx; 222 struct vring_packed_desc *desc_base; 223 224 last_used_idx = vq->last_used_idx; 225 desc_base = &vq->desc_packed[last_used_idx]; 226 227 flags = PACKED_DESC_ENQUEUE_USED_FLAG(vq->used_wrap_counter); 228 229 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 230 desc_base[i].id = ids[i]; 231 desc_base[i].len = lens[i]; 232 } 233 234 rte_atomic_thread_fence(__ATOMIC_RELEASE); 235 236 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 237 desc_base[i].flags = flags; 238 } 239 240 vhost_log_cache_used_vring(dev, vq, last_used_idx * 241 sizeof(struct vring_packed_desc), 242 sizeof(struct vring_packed_desc) * 243 PACKED_BATCH_SIZE); 244 vhost_log_cache_sync(dev, vq); 245 246 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE); 247 } 248 249 static __rte_always_inline void 250 vhost_shadow_dequeue_batch_packed_inorder(struct vhost_virtqueue *vq, 251 uint16_t id) 252 { 253 vq->shadow_used_packed[0].id = id; 254 255 if (!vq->shadow_used_idx) { 256 vq->shadow_last_used_idx = vq->last_used_idx; 257 vq->shadow_used_packed[0].flags = 258 PACKED_DESC_DEQUEUE_USED_FLAG(vq->used_wrap_counter); 259 vq->shadow_used_packed[0].len = 0; 260 vq->shadow_used_packed[0].count = 1; 261 vq->shadow_used_idx++; 262 } 263 264 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE); 265 } 266 267 static __rte_always_inline void 268 vhost_shadow_dequeue_batch_packed(struct virtio_net *dev, 269 struct vhost_virtqueue *vq, 270 uint16_t *ids) 271 { 272 uint16_t flags; 273 uint16_t i; 274 uint16_t begin; 275 276 flags = PACKED_DESC_DEQUEUE_USED_FLAG(vq->used_wrap_counter); 277 278 if (!vq->shadow_used_idx) { 279 vq->shadow_last_used_idx = vq->last_used_idx; 280 vq->shadow_used_packed[0].id = ids[0]; 281 vq->shadow_used_packed[0].len = 0; 282 vq->shadow_used_packed[0].count = 1; 283 vq->shadow_used_packed[0].flags = flags; 284 vq->shadow_used_idx++; 285 begin = 1; 286 } else 287 begin = 0; 288 289 vhost_for_each_try_unroll(i, begin, PACKED_BATCH_SIZE) { 290 vq->desc_packed[vq->last_used_idx + i].id = ids[i]; 291 vq->desc_packed[vq->last_used_idx + i].len = 0; 292 } 293 294 rte_atomic_thread_fence(__ATOMIC_RELEASE); 295 vhost_for_each_try_unroll(i, begin, PACKED_BATCH_SIZE) 296 vq->desc_packed[vq->last_used_idx + i].flags = flags; 297 298 vhost_log_cache_used_vring(dev, vq, vq->last_used_idx * 299 sizeof(struct vring_packed_desc), 300 sizeof(struct vring_packed_desc) * 301 PACKED_BATCH_SIZE); 302 vhost_log_cache_sync(dev, vq); 303 304 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE); 305 } 306 307 static __rte_always_inline void 308 vhost_shadow_dequeue_single_packed(struct vhost_virtqueue *vq, 309 uint16_t buf_id, 310 uint16_t count) 311 { 312 uint16_t flags; 313 314 flags = vq->desc_packed[vq->last_used_idx].flags; 315 if (vq->used_wrap_counter) { 316 flags |= VRING_DESC_F_USED; 317 flags |= VRING_DESC_F_AVAIL; 318 } else { 319 flags &= ~VRING_DESC_F_USED; 320 flags &= ~VRING_DESC_F_AVAIL; 321 } 322 323 if (!vq->shadow_used_idx) { 324 vq->shadow_last_used_idx = vq->last_used_idx; 325 326 vq->shadow_used_packed[0].id = buf_id; 327 vq->shadow_used_packed[0].len = 0; 328 vq->shadow_used_packed[0].flags = flags; 329 vq->shadow_used_idx++; 330 } else { 331 vq->desc_packed[vq->last_used_idx].id = buf_id; 332 vq->desc_packed[vq->last_used_idx].len = 0; 333 vq->desc_packed[vq->last_used_idx].flags = flags; 334 } 335 336 vq_inc_last_used_packed(vq, count); 337 } 338 339 static __rte_always_inline void 340 vhost_shadow_dequeue_single_packed_inorder(struct vhost_virtqueue *vq, 341 uint16_t buf_id, 342 uint16_t count) 343 { 344 uint16_t flags; 345 346 vq->shadow_used_packed[0].id = buf_id; 347 348 flags = vq->desc_packed[vq->last_used_idx].flags; 349 if (vq->used_wrap_counter) { 350 flags |= VRING_DESC_F_USED; 351 flags |= VRING_DESC_F_AVAIL; 352 } else { 353 flags &= ~VRING_DESC_F_USED; 354 flags &= ~VRING_DESC_F_AVAIL; 355 } 356 357 if (!vq->shadow_used_idx) { 358 vq->shadow_last_used_idx = vq->last_used_idx; 359 vq->shadow_used_packed[0].len = 0; 360 vq->shadow_used_packed[0].flags = flags; 361 vq->shadow_used_idx++; 362 } 363 364 vq_inc_last_used_packed(vq, count); 365 } 366 367 static __rte_always_inline void 368 vhost_shadow_enqueue_packed(struct vhost_virtqueue *vq, 369 uint32_t *len, 370 uint16_t *id, 371 uint16_t *count, 372 uint16_t num_buffers) 373 { 374 uint16_t i; 375 376 for (i = 0; i < num_buffers; i++) { 377 /* enqueue shadow flush action aligned with batch num */ 378 if (!vq->shadow_used_idx) 379 vq->shadow_aligned_idx = vq->last_used_idx & 380 PACKED_BATCH_MASK; 381 vq->shadow_used_packed[vq->shadow_used_idx].id = id[i]; 382 vq->shadow_used_packed[vq->shadow_used_idx].len = len[i]; 383 vq->shadow_used_packed[vq->shadow_used_idx].count = count[i]; 384 vq->shadow_aligned_idx += count[i]; 385 vq->shadow_used_idx++; 386 } 387 } 388 389 static __rte_always_inline void 390 vhost_shadow_enqueue_single_packed(struct virtio_net *dev, 391 struct vhost_virtqueue *vq, 392 uint32_t *len, 393 uint16_t *id, 394 uint16_t *count, 395 uint16_t num_buffers) 396 { 397 vhost_shadow_enqueue_packed(vq, len, id, count, num_buffers); 398 399 if (vq->shadow_aligned_idx >= PACKED_BATCH_SIZE) { 400 do_data_copy_enqueue(dev, vq); 401 vhost_flush_enqueue_shadow_packed(dev, vq); 402 } 403 } 404 405 /* avoid write operation when necessary, to lessen cache issues */ 406 #define ASSIGN_UNLESS_EQUAL(var, val) do { \ 407 if ((var) != (val)) \ 408 (var) = (val); \ 409 } while (0) 410 411 static __rte_always_inline void 412 virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr) 413 { 414 uint64_t csum_l4 = m_buf->ol_flags & RTE_MBUF_F_TX_L4_MASK; 415 416 if (m_buf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) 417 csum_l4 |= RTE_MBUF_F_TX_TCP_CKSUM; 418 419 if (csum_l4) { 420 net_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 421 net_hdr->csum_start = m_buf->l2_len + m_buf->l3_len; 422 423 switch (csum_l4) { 424 case RTE_MBUF_F_TX_TCP_CKSUM: 425 net_hdr->csum_offset = (offsetof(struct rte_tcp_hdr, 426 cksum)); 427 break; 428 case RTE_MBUF_F_TX_UDP_CKSUM: 429 net_hdr->csum_offset = (offsetof(struct rte_udp_hdr, 430 dgram_cksum)); 431 break; 432 case RTE_MBUF_F_TX_SCTP_CKSUM: 433 net_hdr->csum_offset = (offsetof(struct rte_sctp_hdr, 434 cksum)); 435 break; 436 } 437 } else { 438 ASSIGN_UNLESS_EQUAL(net_hdr->csum_start, 0); 439 ASSIGN_UNLESS_EQUAL(net_hdr->csum_offset, 0); 440 ASSIGN_UNLESS_EQUAL(net_hdr->flags, 0); 441 } 442 443 /* IP cksum verification cannot be bypassed, then calculate here */ 444 if (m_buf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) { 445 struct rte_ipv4_hdr *ipv4_hdr; 446 447 ipv4_hdr = rte_pktmbuf_mtod_offset(m_buf, struct rte_ipv4_hdr *, 448 m_buf->l2_len); 449 ipv4_hdr->hdr_checksum = 0; 450 ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr); 451 } 452 453 if (m_buf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) { 454 if (m_buf->ol_flags & RTE_MBUF_F_TX_IPV4) 455 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 456 else 457 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 458 net_hdr->gso_size = m_buf->tso_segsz; 459 net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len 460 + m_buf->l4_len; 461 } else if (m_buf->ol_flags & RTE_MBUF_F_TX_UDP_SEG) { 462 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP; 463 net_hdr->gso_size = m_buf->tso_segsz; 464 net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len + 465 m_buf->l4_len; 466 } else { 467 ASSIGN_UNLESS_EQUAL(net_hdr->gso_type, 0); 468 ASSIGN_UNLESS_EQUAL(net_hdr->gso_size, 0); 469 ASSIGN_UNLESS_EQUAL(net_hdr->hdr_len, 0); 470 } 471 } 472 473 static __rte_always_inline int 474 map_one_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, 475 struct buf_vector *buf_vec, uint16_t *vec_idx, 476 uint64_t desc_iova, uint64_t desc_len, uint8_t perm) 477 { 478 uint16_t vec_id = *vec_idx; 479 480 while (desc_len) { 481 uint64_t desc_addr; 482 uint64_t desc_chunck_len = desc_len; 483 484 if (unlikely(vec_id >= BUF_VECTOR_MAX)) 485 return -1; 486 487 desc_addr = vhost_iova_to_vva(dev, vq, 488 desc_iova, 489 &desc_chunck_len, 490 perm); 491 if (unlikely(!desc_addr)) 492 return -1; 493 494 rte_prefetch0((void *)(uintptr_t)desc_addr); 495 496 buf_vec[vec_id].buf_iova = desc_iova; 497 buf_vec[vec_id].buf_addr = desc_addr; 498 buf_vec[vec_id].buf_len = desc_chunck_len; 499 500 desc_len -= desc_chunck_len; 501 desc_iova += desc_chunck_len; 502 vec_id++; 503 } 504 *vec_idx = vec_id; 505 506 return 0; 507 } 508 509 static __rte_always_inline int 510 fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 511 uint32_t avail_idx, uint16_t *vec_idx, 512 struct buf_vector *buf_vec, uint16_t *desc_chain_head, 513 uint32_t *desc_chain_len, uint8_t perm) 514 { 515 uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)]; 516 uint16_t vec_id = *vec_idx; 517 uint32_t len = 0; 518 uint64_t dlen; 519 uint32_t nr_descs = vq->size; 520 uint32_t cnt = 0; 521 struct vring_desc *descs = vq->desc; 522 struct vring_desc *idesc = NULL; 523 524 if (unlikely(idx >= vq->size)) 525 return -1; 526 527 *desc_chain_head = idx; 528 529 if (vq->desc[idx].flags & VRING_DESC_F_INDIRECT) { 530 dlen = vq->desc[idx].len; 531 nr_descs = dlen / sizeof(struct vring_desc); 532 if (unlikely(nr_descs > vq->size)) 533 return -1; 534 535 descs = (struct vring_desc *)(uintptr_t) 536 vhost_iova_to_vva(dev, vq, vq->desc[idx].addr, 537 &dlen, 538 VHOST_ACCESS_RO); 539 if (unlikely(!descs)) 540 return -1; 541 542 if (unlikely(dlen < vq->desc[idx].len)) { 543 /* 544 * The indirect desc table is not contiguous 545 * in process VA space, we have to copy it. 546 */ 547 idesc = vhost_alloc_copy_ind_table(dev, vq, 548 vq->desc[idx].addr, vq->desc[idx].len); 549 if (unlikely(!idesc)) 550 return -1; 551 552 descs = idesc; 553 } 554 555 idx = 0; 556 } 557 558 while (1) { 559 if (unlikely(idx >= nr_descs || cnt++ >= nr_descs)) { 560 free_ind_table(idesc); 561 return -1; 562 } 563 564 dlen = descs[idx].len; 565 len += dlen; 566 567 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id, 568 descs[idx].addr, dlen, 569 perm))) { 570 free_ind_table(idesc); 571 return -1; 572 } 573 574 if ((descs[idx].flags & VRING_DESC_F_NEXT) == 0) 575 break; 576 577 idx = descs[idx].next; 578 } 579 580 *desc_chain_len = len; 581 *vec_idx = vec_id; 582 583 if (unlikely(!!idesc)) 584 free_ind_table(idesc); 585 586 return 0; 587 } 588 589 /* 590 * Returns -1 on fail, 0 on success 591 */ 592 static inline int 593 reserve_avail_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 594 uint32_t size, struct buf_vector *buf_vec, 595 uint16_t *num_buffers, uint16_t avail_head, 596 uint16_t *nr_vec) 597 { 598 uint16_t cur_idx; 599 uint16_t vec_idx = 0; 600 uint16_t max_tries, tries = 0; 601 602 uint16_t head_idx = 0; 603 uint32_t len = 0; 604 605 *num_buffers = 0; 606 cur_idx = vq->last_avail_idx; 607 608 if (rxvq_is_mergeable(dev)) 609 max_tries = vq->size - 1; 610 else 611 max_tries = 1; 612 613 while (size > 0) { 614 if (unlikely(cur_idx == avail_head)) 615 return -1; 616 /* 617 * if we tried all available ring items, and still 618 * can't get enough buf, it means something abnormal 619 * happened. 620 */ 621 if (unlikely(++tries > max_tries)) 622 return -1; 623 624 if (unlikely(fill_vec_buf_split(dev, vq, cur_idx, 625 &vec_idx, buf_vec, 626 &head_idx, &len, 627 VHOST_ACCESS_RW) < 0)) 628 return -1; 629 len = RTE_MIN(len, size); 630 update_shadow_used_ring_split(vq, head_idx, len); 631 size -= len; 632 633 cur_idx++; 634 *num_buffers += 1; 635 } 636 637 *nr_vec = vec_idx; 638 639 return 0; 640 } 641 642 static __rte_always_inline int 643 fill_vec_buf_packed_indirect(struct virtio_net *dev, 644 struct vhost_virtqueue *vq, 645 struct vring_packed_desc *desc, uint16_t *vec_idx, 646 struct buf_vector *buf_vec, uint32_t *len, uint8_t perm) 647 { 648 uint16_t i; 649 uint32_t nr_descs; 650 uint16_t vec_id = *vec_idx; 651 uint64_t dlen; 652 struct vring_packed_desc *descs, *idescs = NULL; 653 654 dlen = desc->len; 655 descs = (struct vring_packed_desc *)(uintptr_t) 656 vhost_iova_to_vva(dev, vq, desc->addr, &dlen, VHOST_ACCESS_RO); 657 if (unlikely(!descs)) 658 return -1; 659 660 if (unlikely(dlen < desc->len)) { 661 /* 662 * The indirect desc table is not contiguous 663 * in process VA space, we have to copy it. 664 */ 665 idescs = vhost_alloc_copy_ind_table(dev, 666 vq, desc->addr, desc->len); 667 if (unlikely(!idescs)) 668 return -1; 669 670 descs = idescs; 671 } 672 673 nr_descs = desc->len / sizeof(struct vring_packed_desc); 674 if (unlikely(nr_descs >= vq->size)) { 675 free_ind_table(idescs); 676 return -1; 677 } 678 679 for (i = 0; i < nr_descs; i++) { 680 if (unlikely(vec_id >= BUF_VECTOR_MAX)) { 681 free_ind_table(idescs); 682 return -1; 683 } 684 685 dlen = descs[i].len; 686 *len += dlen; 687 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id, 688 descs[i].addr, dlen, 689 perm))) 690 return -1; 691 } 692 *vec_idx = vec_id; 693 694 if (unlikely(!!idescs)) 695 free_ind_table(idescs); 696 697 return 0; 698 } 699 700 static __rte_always_inline int 701 fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, 702 uint16_t avail_idx, uint16_t *desc_count, 703 struct buf_vector *buf_vec, uint16_t *vec_idx, 704 uint16_t *buf_id, uint32_t *len, uint8_t perm) 705 { 706 bool wrap_counter = vq->avail_wrap_counter; 707 struct vring_packed_desc *descs = vq->desc_packed; 708 uint16_t vec_id = *vec_idx; 709 uint64_t dlen; 710 711 if (avail_idx < vq->last_avail_idx) 712 wrap_counter ^= 1; 713 714 /* 715 * Perform a load-acquire barrier in desc_is_avail to 716 * enforce the ordering between desc flags and desc 717 * content. 718 */ 719 if (unlikely(!desc_is_avail(&descs[avail_idx], wrap_counter))) 720 return -1; 721 722 *desc_count = 0; 723 *len = 0; 724 725 while (1) { 726 if (unlikely(vec_id >= BUF_VECTOR_MAX)) 727 return -1; 728 729 if (unlikely(*desc_count >= vq->size)) 730 return -1; 731 732 *desc_count += 1; 733 *buf_id = descs[avail_idx].id; 734 735 if (descs[avail_idx].flags & VRING_DESC_F_INDIRECT) { 736 if (unlikely(fill_vec_buf_packed_indirect(dev, vq, 737 &descs[avail_idx], 738 &vec_id, buf_vec, 739 len, perm) < 0)) 740 return -1; 741 } else { 742 dlen = descs[avail_idx].len; 743 *len += dlen; 744 745 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id, 746 descs[avail_idx].addr, 747 dlen, 748 perm))) 749 return -1; 750 } 751 752 if ((descs[avail_idx].flags & VRING_DESC_F_NEXT) == 0) 753 break; 754 755 if (++avail_idx >= vq->size) { 756 avail_idx -= vq->size; 757 wrap_counter ^= 1; 758 } 759 } 760 761 *vec_idx = vec_id; 762 763 return 0; 764 } 765 766 static __rte_noinline void 767 copy_vnet_hdr_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, 768 struct buf_vector *buf_vec, 769 struct virtio_net_hdr_mrg_rxbuf *hdr) 770 { 771 uint64_t len; 772 uint64_t remain = dev->vhost_hlen; 773 uint64_t src = (uint64_t)(uintptr_t)hdr, dst; 774 uint64_t iova = buf_vec->buf_iova; 775 776 while (remain) { 777 len = RTE_MIN(remain, 778 buf_vec->buf_len); 779 dst = buf_vec->buf_addr; 780 rte_memcpy((void *)(uintptr_t)dst, 781 (void *)(uintptr_t)src, 782 len); 783 784 PRINT_PACKET(dev, (uintptr_t)dst, 785 (uint32_t)len, 0); 786 vhost_log_cache_write_iova(dev, vq, 787 iova, len); 788 789 remain -= len; 790 iova += len; 791 src += len; 792 buf_vec++; 793 } 794 } 795 796 static __rte_always_inline int 797 copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, 798 struct rte_mbuf *m, struct buf_vector *buf_vec, 799 uint16_t nr_vec, uint16_t num_buffers) 800 { 801 uint32_t vec_idx = 0; 802 uint32_t mbuf_offset, mbuf_avail; 803 uint32_t buf_offset, buf_avail; 804 uint64_t buf_addr, buf_iova, buf_len; 805 uint32_t cpy_len; 806 uint64_t hdr_addr; 807 struct rte_mbuf *hdr_mbuf; 808 struct batch_copy_elem *batch_copy = vq->batch_copy_elems; 809 struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL; 810 int error = 0; 811 812 if (unlikely(m == NULL)) { 813 error = -1; 814 goto out; 815 } 816 817 buf_addr = buf_vec[vec_idx].buf_addr; 818 buf_iova = buf_vec[vec_idx].buf_iova; 819 buf_len = buf_vec[vec_idx].buf_len; 820 821 if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) { 822 error = -1; 823 goto out; 824 } 825 826 hdr_mbuf = m; 827 hdr_addr = buf_addr; 828 if (unlikely(buf_len < dev->vhost_hlen)) { 829 memset(&tmp_hdr, 0, sizeof(struct virtio_net_hdr_mrg_rxbuf)); 830 hdr = &tmp_hdr; 831 } else 832 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr; 833 834 VHOST_LOG_DATA(DEBUG, "(%d) RX: num merge buffers %d\n", 835 dev->vid, num_buffers); 836 837 if (unlikely(buf_len < dev->vhost_hlen)) { 838 buf_offset = dev->vhost_hlen - buf_len; 839 vec_idx++; 840 buf_addr = buf_vec[vec_idx].buf_addr; 841 buf_iova = buf_vec[vec_idx].buf_iova; 842 buf_len = buf_vec[vec_idx].buf_len; 843 buf_avail = buf_len - buf_offset; 844 } else { 845 buf_offset = dev->vhost_hlen; 846 buf_avail = buf_len - dev->vhost_hlen; 847 } 848 849 mbuf_avail = rte_pktmbuf_data_len(m); 850 mbuf_offset = 0; 851 while (mbuf_avail != 0 || m->next != NULL) { 852 /* done with current buf, get the next one */ 853 if (buf_avail == 0) { 854 vec_idx++; 855 if (unlikely(vec_idx >= nr_vec)) { 856 error = -1; 857 goto out; 858 } 859 860 buf_addr = buf_vec[vec_idx].buf_addr; 861 buf_iova = buf_vec[vec_idx].buf_iova; 862 buf_len = buf_vec[vec_idx].buf_len; 863 864 buf_offset = 0; 865 buf_avail = buf_len; 866 } 867 868 /* done with current mbuf, get the next one */ 869 if (mbuf_avail == 0) { 870 m = m->next; 871 872 mbuf_offset = 0; 873 mbuf_avail = rte_pktmbuf_data_len(m); 874 } 875 876 if (hdr_addr) { 877 virtio_enqueue_offload(hdr_mbuf, &hdr->hdr); 878 if (rxvq_is_mergeable(dev)) 879 ASSIGN_UNLESS_EQUAL(hdr->num_buffers, 880 num_buffers); 881 882 if (unlikely(hdr == &tmp_hdr)) { 883 copy_vnet_hdr_to_desc(dev, vq, buf_vec, hdr); 884 } else { 885 PRINT_PACKET(dev, (uintptr_t)hdr_addr, 886 dev->vhost_hlen, 0); 887 vhost_log_cache_write_iova(dev, vq, 888 buf_vec[0].buf_iova, 889 dev->vhost_hlen); 890 } 891 892 hdr_addr = 0; 893 } 894 895 cpy_len = RTE_MIN(buf_avail, mbuf_avail); 896 897 if (likely(cpy_len > MAX_BATCH_LEN || 898 vq->batch_copy_nb_elems >= vq->size)) { 899 rte_memcpy((void *)((uintptr_t)(buf_addr + buf_offset)), 900 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), 901 cpy_len); 902 vhost_log_cache_write_iova(dev, vq, 903 buf_iova + buf_offset, 904 cpy_len); 905 PRINT_PACKET(dev, (uintptr_t)(buf_addr + buf_offset), 906 cpy_len, 0); 907 } else { 908 batch_copy[vq->batch_copy_nb_elems].dst = 909 (void *)((uintptr_t)(buf_addr + buf_offset)); 910 batch_copy[vq->batch_copy_nb_elems].src = 911 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset); 912 batch_copy[vq->batch_copy_nb_elems].log_addr = 913 buf_iova + buf_offset; 914 batch_copy[vq->batch_copy_nb_elems].len = cpy_len; 915 vq->batch_copy_nb_elems++; 916 } 917 918 mbuf_avail -= cpy_len; 919 mbuf_offset += cpy_len; 920 buf_avail -= cpy_len; 921 buf_offset += cpy_len; 922 } 923 924 out: 925 926 return error; 927 } 928 929 static __rte_always_inline void 930 async_fill_vec(struct iovec *v, void *base, size_t len) 931 { 932 v->iov_base = base; 933 v->iov_len = len; 934 } 935 936 static __rte_always_inline void 937 async_fill_iter(struct rte_vhost_iov_iter *it, size_t count, 938 struct iovec *vec, unsigned long nr_seg) 939 { 940 it->offset = 0; 941 it->count = count; 942 943 if (count) { 944 it->iov = vec; 945 it->nr_segs = nr_seg; 946 } else { 947 it->iov = 0; 948 it->nr_segs = 0; 949 } 950 } 951 952 static __rte_always_inline void 953 async_fill_desc(struct rte_vhost_async_desc *desc, 954 struct rte_vhost_iov_iter *src, struct rte_vhost_iov_iter *dst) 955 { 956 desc->src = src; 957 desc->dst = dst; 958 } 959 960 static __rte_always_inline int 961 async_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, 962 struct rte_mbuf *m, struct buf_vector *buf_vec, 963 uint16_t nr_vec, uint16_t num_buffers, 964 struct iovec *src_iovec, struct iovec *dst_iovec, 965 struct rte_vhost_iov_iter *src_it, 966 struct rte_vhost_iov_iter *dst_it) 967 { 968 struct rte_mbuf *hdr_mbuf; 969 struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL; 970 uint64_t buf_addr, buf_iova; 971 uint64_t hdr_addr; 972 uint64_t mapped_len; 973 uint32_t vec_idx = 0; 974 uint32_t mbuf_offset, mbuf_avail; 975 uint32_t buf_offset, buf_avail; 976 uint32_t cpy_len, buf_len; 977 int error = 0; 978 979 uint32_t tlen = 0; 980 int tvec_idx = 0; 981 void *hpa; 982 983 if (unlikely(m == NULL)) { 984 error = -1; 985 goto out; 986 } 987 988 buf_addr = buf_vec[vec_idx].buf_addr; 989 buf_iova = buf_vec[vec_idx].buf_iova; 990 buf_len = buf_vec[vec_idx].buf_len; 991 992 if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) { 993 error = -1; 994 goto out; 995 } 996 997 hdr_mbuf = m; 998 hdr_addr = buf_addr; 999 if (unlikely(buf_len < dev->vhost_hlen)) { 1000 memset(&tmp_hdr, 0, sizeof(struct virtio_net_hdr_mrg_rxbuf)); 1001 hdr = &tmp_hdr; 1002 } else 1003 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr; 1004 1005 VHOST_LOG_DATA(DEBUG, "(%d) RX: num merge buffers %d\n", 1006 dev->vid, num_buffers); 1007 1008 if (unlikely(buf_len < dev->vhost_hlen)) { 1009 buf_offset = dev->vhost_hlen - buf_len; 1010 vec_idx++; 1011 buf_addr = buf_vec[vec_idx].buf_addr; 1012 buf_iova = buf_vec[vec_idx].buf_iova; 1013 buf_len = buf_vec[vec_idx].buf_len; 1014 buf_avail = buf_len - buf_offset; 1015 } else { 1016 buf_offset = dev->vhost_hlen; 1017 buf_avail = buf_len - dev->vhost_hlen; 1018 } 1019 1020 mbuf_avail = rte_pktmbuf_data_len(m); 1021 mbuf_offset = 0; 1022 1023 while (mbuf_avail != 0 || m->next != NULL) { 1024 /* done with current buf, get the next one */ 1025 if (buf_avail == 0) { 1026 vec_idx++; 1027 if (unlikely(vec_idx >= nr_vec)) { 1028 error = -1; 1029 goto out; 1030 } 1031 1032 buf_addr = buf_vec[vec_idx].buf_addr; 1033 buf_iova = buf_vec[vec_idx].buf_iova; 1034 buf_len = buf_vec[vec_idx].buf_len; 1035 1036 buf_offset = 0; 1037 buf_avail = buf_len; 1038 } 1039 1040 /* done with current mbuf, get the next one */ 1041 if (mbuf_avail == 0) { 1042 m = m->next; 1043 1044 mbuf_offset = 0; 1045 mbuf_avail = rte_pktmbuf_data_len(m); 1046 } 1047 1048 if (hdr_addr) { 1049 virtio_enqueue_offload(hdr_mbuf, &hdr->hdr); 1050 if (rxvq_is_mergeable(dev)) 1051 ASSIGN_UNLESS_EQUAL(hdr->num_buffers, 1052 num_buffers); 1053 1054 if (unlikely(hdr == &tmp_hdr)) { 1055 copy_vnet_hdr_to_desc(dev, vq, buf_vec, hdr); 1056 } else { 1057 PRINT_PACKET(dev, (uintptr_t)hdr_addr, 1058 dev->vhost_hlen, 0); 1059 vhost_log_cache_write_iova(dev, vq, 1060 buf_vec[0].buf_iova, 1061 dev->vhost_hlen); 1062 } 1063 1064 hdr_addr = 0; 1065 } 1066 1067 cpy_len = RTE_MIN(buf_avail, mbuf_avail); 1068 1069 while (unlikely(cpy_len)) { 1070 hpa = (void *)(uintptr_t)gpa_to_first_hpa(dev, 1071 buf_iova + buf_offset, 1072 cpy_len, &mapped_len); 1073 if (unlikely(!hpa)) { 1074 VHOST_LOG_DATA(ERR, "(%d) %s: failed to get hpa.\n", 1075 dev->vid, __func__); 1076 error = -1; 1077 goto out; 1078 } 1079 1080 async_fill_vec(src_iovec + tvec_idx, 1081 (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, 1082 mbuf_offset), (size_t)mapped_len); 1083 async_fill_vec(dst_iovec + tvec_idx, 1084 hpa, (size_t)mapped_len); 1085 1086 tlen += (uint32_t)mapped_len; 1087 cpy_len -= (uint32_t)mapped_len; 1088 mbuf_avail -= (uint32_t)mapped_len; 1089 mbuf_offset += (uint32_t)mapped_len; 1090 buf_avail -= (uint32_t)mapped_len; 1091 buf_offset += (uint32_t)mapped_len; 1092 tvec_idx++; 1093 } 1094 } 1095 1096 async_fill_iter(src_it, tlen, src_iovec, tvec_idx); 1097 async_fill_iter(dst_it, tlen, dst_iovec, tvec_idx); 1098 out: 1099 return error; 1100 } 1101 1102 static __rte_always_inline int 1103 vhost_enqueue_single_packed(struct virtio_net *dev, 1104 struct vhost_virtqueue *vq, 1105 struct rte_mbuf *pkt, 1106 struct buf_vector *buf_vec, 1107 uint16_t *nr_descs) 1108 { 1109 uint16_t nr_vec = 0; 1110 uint16_t avail_idx = vq->last_avail_idx; 1111 uint16_t max_tries, tries = 0; 1112 uint16_t buf_id = 0; 1113 uint32_t len = 0; 1114 uint16_t desc_count; 1115 uint32_t size = pkt->pkt_len + sizeof(struct virtio_net_hdr_mrg_rxbuf); 1116 uint16_t num_buffers = 0; 1117 uint32_t buffer_len[vq->size]; 1118 uint16_t buffer_buf_id[vq->size]; 1119 uint16_t buffer_desc_count[vq->size]; 1120 1121 if (rxvq_is_mergeable(dev)) 1122 max_tries = vq->size - 1; 1123 else 1124 max_tries = 1; 1125 1126 while (size > 0) { 1127 /* 1128 * if we tried all available ring items, and still 1129 * can't get enough buf, it means something abnormal 1130 * happened. 1131 */ 1132 if (unlikely(++tries > max_tries)) 1133 return -1; 1134 1135 if (unlikely(fill_vec_buf_packed(dev, vq, 1136 avail_idx, &desc_count, 1137 buf_vec, &nr_vec, 1138 &buf_id, &len, 1139 VHOST_ACCESS_RW) < 0)) 1140 return -1; 1141 1142 len = RTE_MIN(len, size); 1143 size -= len; 1144 1145 buffer_len[num_buffers] = len; 1146 buffer_buf_id[num_buffers] = buf_id; 1147 buffer_desc_count[num_buffers] = desc_count; 1148 num_buffers += 1; 1149 1150 *nr_descs += desc_count; 1151 avail_idx += desc_count; 1152 if (avail_idx >= vq->size) 1153 avail_idx -= vq->size; 1154 } 1155 1156 if (copy_mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, num_buffers) < 0) 1157 return -1; 1158 1159 vhost_shadow_enqueue_single_packed(dev, vq, buffer_len, buffer_buf_id, 1160 buffer_desc_count, num_buffers); 1161 1162 return 0; 1163 } 1164 1165 static __rte_noinline uint32_t 1166 virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 1167 struct rte_mbuf **pkts, uint32_t count) 1168 { 1169 uint32_t pkt_idx = 0; 1170 uint16_t num_buffers; 1171 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1172 uint16_t avail_head; 1173 1174 /* 1175 * The ordering between avail index and 1176 * desc reads needs to be enforced. 1177 */ 1178 avail_head = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE); 1179 1180 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); 1181 1182 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { 1183 uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen; 1184 uint16_t nr_vec = 0; 1185 1186 if (unlikely(reserve_avail_buf_split(dev, vq, 1187 pkt_len, buf_vec, &num_buffers, 1188 avail_head, &nr_vec) < 0)) { 1189 VHOST_LOG_DATA(DEBUG, 1190 "(%d) failed to get enough desc from vring\n", 1191 dev->vid); 1192 vq->shadow_used_idx -= num_buffers; 1193 break; 1194 } 1195 1196 VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n", 1197 dev->vid, vq->last_avail_idx, 1198 vq->last_avail_idx + num_buffers); 1199 1200 if (copy_mbuf_to_desc(dev, vq, pkts[pkt_idx], 1201 buf_vec, nr_vec, 1202 num_buffers) < 0) { 1203 vq->shadow_used_idx -= num_buffers; 1204 break; 1205 } 1206 1207 vq->last_avail_idx += num_buffers; 1208 } 1209 1210 do_data_copy_enqueue(dev, vq); 1211 1212 if (likely(vq->shadow_used_idx)) { 1213 flush_shadow_used_ring_split(dev, vq); 1214 vhost_vring_call_split(dev, vq); 1215 } 1216 1217 return pkt_idx; 1218 } 1219 1220 static __rte_always_inline int 1221 virtio_dev_rx_sync_batch_check(struct virtio_net *dev, 1222 struct vhost_virtqueue *vq, 1223 struct rte_mbuf **pkts, 1224 uint64_t *desc_addrs, 1225 uint64_t *lens) 1226 { 1227 bool wrap_counter = vq->avail_wrap_counter; 1228 struct vring_packed_desc *descs = vq->desc_packed; 1229 uint16_t avail_idx = vq->last_avail_idx; 1230 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 1231 uint16_t i; 1232 1233 if (unlikely(avail_idx & PACKED_BATCH_MASK)) 1234 return -1; 1235 1236 if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size)) 1237 return -1; 1238 1239 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1240 if (unlikely(pkts[i]->next != NULL)) 1241 return -1; 1242 if (unlikely(!desc_is_avail(&descs[avail_idx + i], 1243 wrap_counter))) 1244 return -1; 1245 } 1246 1247 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1248 lens[i] = descs[avail_idx + i].len; 1249 1250 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1251 if (unlikely(pkts[i]->pkt_len > (lens[i] - buf_offset))) 1252 return -1; 1253 } 1254 1255 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1256 desc_addrs[i] = vhost_iova_to_vva(dev, vq, 1257 descs[avail_idx + i].addr, 1258 &lens[i], 1259 VHOST_ACCESS_RW); 1260 1261 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1262 if (unlikely(!desc_addrs[i])) 1263 return -1; 1264 if (unlikely(lens[i] != descs[avail_idx + i].len)) 1265 return -1; 1266 } 1267 1268 return 0; 1269 } 1270 1271 static __rte_always_inline void 1272 virtio_dev_rx_batch_packed_copy(struct virtio_net *dev, 1273 struct vhost_virtqueue *vq, 1274 struct rte_mbuf **pkts, 1275 uint64_t *desc_addrs, 1276 uint64_t *lens) 1277 { 1278 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 1279 struct virtio_net_hdr_mrg_rxbuf *hdrs[PACKED_BATCH_SIZE]; 1280 struct vring_packed_desc *descs = vq->desc_packed; 1281 uint16_t avail_idx = vq->last_avail_idx; 1282 uint16_t ids[PACKED_BATCH_SIZE]; 1283 uint16_t i; 1284 1285 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1286 rte_prefetch0((void *)(uintptr_t)desc_addrs[i]); 1287 hdrs[i] = (struct virtio_net_hdr_mrg_rxbuf *) 1288 (uintptr_t)desc_addrs[i]; 1289 lens[i] = pkts[i]->pkt_len + 1290 sizeof(struct virtio_net_hdr_mrg_rxbuf); 1291 } 1292 1293 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1294 virtio_enqueue_offload(pkts[i], &hdrs[i]->hdr); 1295 1296 vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE); 1297 1298 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 1299 rte_memcpy((void *)(uintptr_t)(desc_addrs[i] + buf_offset), 1300 rte_pktmbuf_mtod_offset(pkts[i], void *, 0), 1301 pkts[i]->pkt_len); 1302 } 1303 1304 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1305 vhost_log_cache_write_iova(dev, vq, descs[avail_idx + i].addr, 1306 lens[i]); 1307 1308 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 1309 ids[i] = descs[avail_idx + i].id; 1310 1311 vhost_flush_enqueue_batch_packed(dev, vq, lens, ids); 1312 } 1313 1314 static __rte_always_inline int 1315 virtio_dev_rx_sync_batch_packed(struct virtio_net *dev, 1316 struct vhost_virtqueue *vq, 1317 struct rte_mbuf **pkts) 1318 { 1319 uint64_t desc_addrs[PACKED_BATCH_SIZE]; 1320 uint64_t lens[PACKED_BATCH_SIZE]; 1321 1322 if (virtio_dev_rx_sync_batch_check(dev, vq, pkts, desc_addrs, lens) == -1) 1323 return -1; 1324 1325 if (vq->shadow_used_idx) { 1326 do_data_copy_enqueue(dev, vq); 1327 vhost_flush_enqueue_shadow_packed(dev, vq); 1328 } 1329 1330 virtio_dev_rx_batch_packed_copy(dev, vq, pkts, desc_addrs, lens); 1331 1332 return 0; 1333 } 1334 1335 static __rte_always_inline int16_t 1336 virtio_dev_rx_single_packed(struct virtio_net *dev, 1337 struct vhost_virtqueue *vq, 1338 struct rte_mbuf *pkt) 1339 { 1340 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1341 uint16_t nr_descs = 0; 1342 1343 if (unlikely(vhost_enqueue_single_packed(dev, vq, pkt, buf_vec, 1344 &nr_descs) < 0)) { 1345 VHOST_LOG_DATA(DEBUG, 1346 "(%d) failed to get enough desc from vring\n", 1347 dev->vid); 1348 return -1; 1349 } 1350 1351 VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n", 1352 dev->vid, vq->last_avail_idx, 1353 vq->last_avail_idx + nr_descs); 1354 1355 vq_inc_last_avail_packed(vq, nr_descs); 1356 1357 return 0; 1358 } 1359 1360 static __rte_noinline uint32_t 1361 virtio_dev_rx_packed(struct virtio_net *dev, 1362 struct vhost_virtqueue *__rte_restrict vq, 1363 struct rte_mbuf **__rte_restrict pkts, 1364 uint32_t count) 1365 { 1366 uint32_t pkt_idx = 0; 1367 1368 do { 1369 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); 1370 1371 if (count - pkt_idx >= PACKED_BATCH_SIZE) { 1372 if (!virtio_dev_rx_sync_batch_packed(dev, vq, 1373 &pkts[pkt_idx])) { 1374 pkt_idx += PACKED_BATCH_SIZE; 1375 continue; 1376 } 1377 } 1378 1379 if (virtio_dev_rx_single_packed(dev, vq, pkts[pkt_idx])) 1380 break; 1381 pkt_idx++; 1382 1383 } while (pkt_idx < count); 1384 1385 if (vq->shadow_used_idx) { 1386 do_data_copy_enqueue(dev, vq); 1387 vhost_flush_enqueue_shadow_packed(dev, vq); 1388 } 1389 1390 if (pkt_idx) 1391 vhost_vring_call_packed(dev, vq); 1392 1393 return pkt_idx; 1394 } 1395 1396 static __rte_always_inline uint32_t 1397 virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, 1398 struct rte_mbuf **pkts, uint32_t count) 1399 { 1400 struct vhost_virtqueue *vq; 1401 uint32_t nb_tx = 0; 1402 1403 VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__); 1404 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { 1405 VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n", 1406 dev->vid, __func__, queue_id); 1407 return 0; 1408 } 1409 1410 vq = dev->virtqueue[queue_id]; 1411 1412 rte_spinlock_lock(&vq->access_lock); 1413 1414 if (unlikely(!vq->enabled)) 1415 goto out_access_unlock; 1416 1417 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 1418 vhost_user_iotlb_rd_lock(vq); 1419 1420 if (unlikely(!vq->access_ok)) 1421 if (unlikely(vring_translate(dev, vq) < 0)) 1422 goto out; 1423 1424 count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); 1425 if (count == 0) 1426 goto out; 1427 1428 if (vq_is_packed(dev)) 1429 nb_tx = virtio_dev_rx_packed(dev, vq, pkts, count); 1430 else 1431 nb_tx = virtio_dev_rx_split(dev, vq, pkts, count); 1432 1433 out: 1434 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 1435 vhost_user_iotlb_rd_unlock(vq); 1436 1437 out_access_unlock: 1438 rte_spinlock_unlock(&vq->access_lock); 1439 1440 return nb_tx; 1441 } 1442 1443 uint16_t 1444 rte_vhost_enqueue_burst(int vid, uint16_t queue_id, 1445 struct rte_mbuf **__rte_restrict pkts, uint16_t count) 1446 { 1447 struct virtio_net *dev = get_device(vid); 1448 1449 if (!dev) 1450 return 0; 1451 1452 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { 1453 VHOST_LOG_DATA(ERR, 1454 "(%d) %s: built-in vhost net backend is disabled.\n", 1455 dev->vid, __func__); 1456 return 0; 1457 } 1458 1459 return virtio_dev_rx(dev, queue_id, pkts, count); 1460 } 1461 1462 static __rte_always_inline uint16_t 1463 virtio_dev_rx_async_get_info_idx(uint16_t pkts_idx, 1464 uint16_t vq_size, uint16_t n_inflight) 1465 { 1466 return pkts_idx > n_inflight ? (pkts_idx - n_inflight) : 1467 (vq_size - n_inflight + pkts_idx) % vq_size; 1468 } 1469 1470 static __rte_always_inline void 1471 store_dma_desc_info_split(struct vring_used_elem *s_ring, struct vring_used_elem *d_ring, 1472 uint16_t ring_size, uint16_t s_idx, uint16_t d_idx, uint16_t count) 1473 { 1474 size_t elem_size = sizeof(struct vring_used_elem); 1475 1476 if (d_idx + count <= ring_size) { 1477 rte_memcpy(d_ring + d_idx, s_ring + s_idx, count * elem_size); 1478 } else { 1479 uint16_t size = ring_size - d_idx; 1480 1481 rte_memcpy(d_ring + d_idx, s_ring + s_idx, size * elem_size); 1482 rte_memcpy(d_ring, s_ring + s_idx + size, (count - size) * elem_size); 1483 } 1484 } 1485 1486 static __rte_always_inline void 1487 store_dma_desc_info_packed(struct vring_used_elem_packed *s_ring, 1488 struct vring_used_elem_packed *d_ring, 1489 uint16_t ring_size, uint16_t s_idx, uint16_t d_idx, uint16_t count) 1490 { 1491 size_t elem_size = sizeof(struct vring_used_elem_packed); 1492 1493 if (d_idx + count <= ring_size) { 1494 rte_memcpy(d_ring + d_idx, s_ring + s_idx, count * elem_size); 1495 } else { 1496 uint16_t size = ring_size - d_idx; 1497 1498 rte_memcpy(d_ring + d_idx, s_ring + s_idx, size * elem_size); 1499 rte_memcpy(d_ring, s_ring + s_idx + size, (count - size) * elem_size); 1500 } 1501 } 1502 1503 static __rte_noinline uint32_t 1504 virtio_dev_rx_async_submit_split(struct virtio_net *dev, 1505 struct vhost_virtqueue *vq, uint16_t queue_id, 1506 struct rte_mbuf **pkts, uint32_t count) 1507 { 1508 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1509 uint32_t pkt_idx = 0, pkt_burst_idx = 0; 1510 uint16_t num_buffers; 1511 uint16_t avail_head; 1512 1513 struct rte_vhost_iov_iter *it_pool = vq->it_pool; 1514 struct iovec *vec_pool = vq->vec_pool; 1515 struct rte_vhost_async_desc tdes[MAX_PKT_BURST]; 1516 struct iovec *src_iovec = vec_pool; 1517 struct iovec *dst_iovec = vec_pool + (VHOST_MAX_ASYNC_VEC >> 1); 1518 struct async_inflight_info *pkts_info = vq->async_pkts_info; 1519 uint32_t n_pkts = 0, pkt_err = 0; 1520 int32_t n_xfer; 1521 uint16_t segs_await = 0; 1522 uint16_t iovec_idx = 0, it_idx = 0, slot_idx = 0; 1523 1524 /* 1525 * The ordering between avail index and desc reads need to be enforced. 1526 */ 1527 avail_head = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE); 1528 1529 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); 1530 1531 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { 1532 uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen; 1533 uint16_t nr_vec = 0; 1534 1535 if (unlikely(reserve_avail_buf_split(dev, vq, 1536 pkt_len, buf_vec, &num_buffers, 1537 avail_head, &nr_vec) < 0)) { 1538 VHOST_LOG_DATA(DEBUG, 1539 "(%d) failed to get enough desc from vring\n", 1540 dev->vid); 1541 vq->shadow_used_idx -= num_buffers; 1542 break; 1543 } 1544 1545 VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n", 1546 dev->vid, vq->last_avail_idx, 1547 vq->last_avail_idx + num_buffers); 1548 1549 if (async_mbuf_to_desc(dev, vq, pkts[pkt_idx], buf_vec, nr_vec, num_buffers, 1550 &src_iovec[iovec_idx], &dst_iovec[iovec_idx], 1551 &it_pool[it_idx], &it_pool[it_idx + 1]) < 0) { 1552 vq->shadow_used_idx -= num_buffers; 1553 break; 1554 } 1555 1556 async_fill_desc(&tdes[pkt_burst_idx++], &it_pool[it_idx], 1557 &it_pool[it_idx + 1]); 1558 1559 slot_idx = (vq->async_pkts_idx + pkt_idx) & (vq->size - 1); 1560 pkts_info[slot_idx].descs = num_buffers; 1561 pkts_info[slot_idx].mbuf = pkts[pkt_idx]; 1562 1563 iovec_idx += it_pool[it_idx].nr_segs; 1564 segs_await += it_pool[it_idx].nr_segs; 1565 it_idx += 2; 1566 1567 vq->last_avail_idx += num_buffers; 1568 1569 /* 1570 * conditions to trigger async device transfer: 1571 * - buffered packet number reaches transfer threshold 1572 * - unused async iov number is less than max vhost vector 1573 */ 1574 if (unlikely(pkt_burst_idx >= VHOST_ASYNC_BATCH_THRESHOLD || 1575 ((VHOST_MAX_ASYNC_VEC >> 1) - segs_await < 1576 BUF_VECTOR_MAX))) { 1577 n_xfer = vq->async_ops.transfer_data(dev->vid, 1578 queue_id, tdes, 0, pkt_burst_idx); 1579 if (likely(n_xfer >= 0)) { 1580 n_pkts = n_xfer; 1581 } else { 1582 VHOST_LOG_DATA(ERR, 1583 "(%d) %s: failed to transfer data for queue id %d.\n", 1584 dev->vid, __func__, queue_id); 1585 n_pkts = 0; 1586 } 1587 1588 iovec_idx = 0; 1589 it_idx = 0; 1590 segs_await = 0; 1591 1592 if (unlikely(n_pkts < pkt_burst_idx)) { 1593 /* 1594 * log error packets number here and do actual 1595 * error processing when applications poll 1596 * completion 1597 */ 1598 pkt_err = pkt_burst_idx - n_pkts; 1599 pkt_idx++; 1600 pkt_burst_idx = 0; 1601 break; 1602 } 1603 1604 pkt_burst_idx = 0; 1605 } 1606 } 1607 1608 if (pkt_burst_idx) { 1609 n_xfer = vq->async_ops.transfer_data(dev->vid, queue_id, tdes, 0, pkt_burst_idx); 1610 if (likely(n_xfer >= 0)) { 1611 n_pkts = n_xfer; 1612 } else { 1613 VHOST_LOG_DATA(ERR, "(%d) %s: failed to transfer data for queue id %d.\n", 1614 dev->vid, __func__, queue_id); 1615 n_pkts = 0; 1616 } 1617 1618 if (unlikely(n_pkts < pkt_burst_idx)) 1619 pkt_err = pkt_burst_idx - n_pkts; 1620 } 1621 1622 if (unlikely(pkt_err)) { 1623 uint16_t num_descs = 0; 1624 1625 /* update number of completed packets */ 1626 pkt_idx -= pkt_err; 1627 1628 /* calculate the sum of descriptors to revert */ 1629 while (pkt_err-- > 0) { 1630 num_descs += pkts_info[slot_idx & (vq->size - 1)].descs; 1631 slot_idx--; 1632 } 1633 1634 /* recover shadow used ring and available ring */ 1635 vq->shadow_used_idx -= num_descs; 1636 vq->last_avail_idx -= num_descs; 1637 } 1638 1639 /* keep used descriptors */ 1640 if (likely(vq->shadow_used_idx)) { 1641 uint16_t to = vq->async_desc_idx_split & (vq->size - 1); 1642 1643 store_dma_desc_info_split(vq->shadow_used_split, 1644 vq->async_descs_split, vq->size, 0, to, 1645 vq->shadow_used_idx); 1646 1647 vq->async_desc_idx_split += vq->shadow_used_idx; 1648 vq->async_pkts_idx += pkt_idx; 1649 vq->async_pkts_inflight_n += pkt_idx; 1650 vq->shadow_used_idx = 0; 1651 } 1652 1653 return pkt_idx; 1654 } 1655 1656 static __rte_always_inline void 1657 vhost_update_used_packed(struct vhost_virtqueue *vq, 1658 struct vring_used_elem_packed *shadow_ring, 1659 uint16_t count) 1660 { 1661 int i; 1662 uint16_t used_idx = vq->last_used_idx; 1663 uint16_t head_idx = vq->last_used_idx; 1664 uint16_t head_flags = 0; 1665 1666 if (count == 0) 1667 return; 1668 1669 /* Split loop in two to save memory barriers */ 1670 for (i = 0; i < count; i++) { 1671 vq->desc_packed[used_idx].id = shadow_ring[i].id; 1672 vq->desc_packed[used_idx].len = shadow_ring[i].len; 1673 1674 used_idx += shadow_ring[i].count; 1675 if (used_idx >= vq->size) 1676 used_idx -= vq->size; 1677 } 1678 1679 /* The ordering for storing desc flags needs to be enforced. */ 1680 rte_atomic_thread_fence(__ATOMIC_RELEASE); 1681 1682 for (i = 0; i < count; i++) { 1683 uint16_t flags; 1684 1685 if (vq->shadow_used_packed[i].len) 1686 flags = VRING_DESC_F_WRITE; 1687 else 1688 flags = 0; 1689 1690 if (vq->used_wrap_counter) { 1691 flags |= VRING_DESC_F_USED; 1692 flags |= VRING_DESC_F_AVAIL; 1693 } else { 1694 flags &= ~VRING_DESC_F_USED; 1695 flags &= ~VRING_DESC_F_AVAIL; 1696 } 1697 1698 if (i > 0) { 1699 vq->desc_packed[vq->last_used_idx].flags = flags; 1700 } else { 1701 head_idx = vq->last_used_idx; 1702 head_flags = flags; 1703 } 1704 1705 vq_inc_last_used_packed(vq, shadow_ring[i].count); 1706 } 1707 1708 vq->desc_packed[head_idx].flags = head_flags; 1709 } 1710 1711 static __rte_always_inline int 1712 vhost_enqueue_async_packed(struct virtio_net *dev, 1713 struct vhost_virtqueue *vq, 1714 struct rte_mbuf *pkt, 1715 struct buf_vector *buf_vec, 1716 uint16_t *nr_descs, 1717 uint16_t *nr_buffers, 1718 struct iovec *src_iovec, struct iovec *dst_iovec, 1719 struct rte_vhost_iov_iter *src_it, 1720 struct rte_vhost_iov_iter *dst_it) 1721 { 1722 uint16_t nr_vec = 0; 1723 uint16_t avail_idx = vq->last_avail_idx; 1724 uint16_t max_tries, tries = 0; 1725 uint16_t buf_id = 0; 1726 uint32_t len = 0; 1727 uint16_t desc_count = 0; 1728 uint32_t size = pkt->pkt_len + sizeof(struct virtio_net_hdr_mrg_rxbuf); 1729 uint32_t buffer_len[vq->size]; 1730 uint16_t buffer_buf_id[vq->size]; 1731 uint16_t buffer_desc_count[vq->size]; 1732 1733 if (rxvq_is_mergeable(dev)) 1734 max_tries = vq->size - 1; 1735 else 1736 max_tries = 1; 1737 1738 while (size > 0) { 1739 /* 1740 * if we tried all available ring items, and still 1741 * can't get enough buf, it means something abnormal 1742 * happened. 1743 */ 1744 if (unlikely(++tries > max_tries)) 1745 return -1; 1746 1747 if (unlikely(fill_vec_buf_packed(dev, vq, avail_idx, &desc_count, buf_vec, &nr_vec, 1748 &buf_id, &len, VHOST_ACCESS_RW) < 0)) 1749 return -1; 1750 1751 len = RTE_MIN(len, size); 1752 size -= len; 1753 1754 buffer_len[*nr_buffers] = len; 1755 buffer_buf_id[*nr_buffers] = buf_id; 1756 buffer_desc_count[*nr_buffers] = desc_count; 1757 *nr_buffers += 1; 1758 *nr_descs += desc_count; 1759 avail_idx += desc_count; 1760 if (avail_idx >= vq->size) 1761 avail_idx -= vq->size; 1762 } 1763 1764 if (unlikely(async_mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, 1765 *nr_buffers, src_iovec, dst_iovec, 1766 src_it, dst_it) < 0)) 1767 return -1; 1768 1769 vhost_shadow_enqueue_packed(vq, buffer_len, buffer_buf_id, buffer_desc_count, *nr_buffers); 1770 1771 return 0; 1772 } 1773 1774 static __rte_always_inline int16_t 1775 virtio_dev_rx_async_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, 1776 struct rte_mbuf *pkt, uint16_t *nr_descs, uint16_t *nr_buffers, 1777 struct iovec *src_iovec, struct iovec *dst_iovec, 1778 struct rte_vhost_iov_iter *src_it, struct rte_vhost_iov_iter *dst_it) 1779 { 1780 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 1781 1782 if (unlikely(vhost_enqueue_async_packed(dev, vq, pkt, buf_vec, nr_descs, nr_buffers, 1783 src_iovec, dst_iovec, 1784 src_it, dst_it) < 0)) { 1785 VHOST_LOG_DATA(DEBUG, "(%d) failed to get enough desc from vring\n", dev->vid); 1786 return -1; 1787 } 1788 1789 VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n", 1790 dev->vid, vq->last_avail_idx, vq->last_avail_idx + *nr_descs); 1791 1792 return 0; 1793 } 1794 1795 static __rte_always_inline void 1796 dma_error_handler_packed(struct vhost_virtqueue *vq, uint16_t slot_idx, 1797 uint32_t nr_err, uint32_t *pkt_idx) 1798 { 1799 uint16_t descs_err = 0; 1800 uint16_t buffers_err = 0; 1801 struct async_inflight_info *pkts_info = vq->async_pkts_info; 1802 1803 *pkt_idx -= nr_err; 1804 /* calculate the sum of buffers and descs of DMA-error packets. */ 1805 while (nr_err-- > 0) { 1806 descs_err += pkts_info[slot_idx % vq->size].descs; 1807 buffers_err += pkts_info[slot_idx % vq->size].nr_buffers; 1808 slot_idx--; 1809 } 1810 1811 if (vq->last_avail_idx >= descs_err) { 1812 vq->last_avail_idx -= descs_err; 1813 } else { 1814 vq->last_avail_idx = vq->last_avail_idx + vq->size - descs_err; 1815 vq->avail_wrap_counter ^= 1; 1816 } 1817 1818 vq->shadow_used_idx -= buffers_err; 1819 } 1820 1821 static __rte_noinline uint32_t 1822 virtio_dev_rx_async_submit_packed(struct virtio_net *dev, 1823 struct vhost_virtqueue *vq, uint16_t queue_id, 1824 struct rte_mbuf **pkts, uint32_t count) 1825 { 1826 uint32_t pkt_idx = 0, pkt_burst_idx = 0; 1827 uint32_t remained = count; 1828 int32_t n_xfer; 1829 uint16_t num_buffers; 1830 uint16_t num_descs; 1831 1832 struct rte_vhost_iov_iter *it_pool = vq->it_pool; 1833 struct iovec *vec_pool = vq->vec_pool; 1834 struct rte_vhost_async_desc tdes[MAX_PKT_BURST]; 1835 struct iovec *src_iovec = vec_pool; 1836 struct iovec *dst_iovec = vec_pool + (VHOST_MAX_ASYNC_VEC >> 1); 1837 struct async_inflight_info *pkts_info = vq->async_pkts_info; 1838 uint32_t n_pkts = 0, pkt_err = 0; 1839 uint16_t slot_idx = 0; 1840 uint16_t segs_await = 0; 1841 uint16_t iovec_idx = 0, it_idx = 0; 1842 1843 do { 1844 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); 1845 1846 num_buffers = 0; 1847 num_descs = 0; 1848 if (unlikely(virtio_dev_rx_async_packed(dev, vq, pkts[pkt_idx], 1849 &num_descs, &num_buffers, 1850 &src_iovec[iovec_idx], &dst_iovec[iovec_idx], 1851 &it_pool[it_idx], &it_pool[it_idx + 1]) < 0)) 1852 break; 1853 1854 slot_idx = (vq->async_pkts_idx + pkt_idx) % vq->size; 1855 1856 async_fill_desc(&tdes[pkt_burst_idx++], &it_pool[it_idx], 1857 &it_pool[it_idx + 1]); 1858 pkts_info[slot_idx].descs = num_descs; 1859 pkts_info[slot_idx].nr_buffers = num_buffers; 1860 pkts_info[slot_idx].mbuf = pkts[pkt_idx]; 1861 iovec_idx += it_pool[it_idx].nr_segs; 1862 segs_await += it_pool[it_idx].nr_segs; 1863 it_idx += 2; 1864 1865 pkt_idx++; 1866 remained--; 1867 vq_inc_last_avail_packed(vq, num_descs); 1868 1869 /* 1870 * conditions to trigger async device transfer: 1871 * - buffered packet number reaches transfer threshold 1872 * - unused async iov number is less than max vhost vector 1873 */ 1874 if (unlikely(pkt_burst_idx >= VHOST_ASYNC_BATCH_THRESHOLD || 1875 ((VHOST_MAX_ASYNC_VEC >> 1) - segs_await < BUF_VECTOR_MAX))) { 1876 n_xfer = vq->async_ops.transfer_data(dev->vid, 1877 queue_id, tdes, 0, pkt_burst_idx); 1878 if (likely(n_xfer >= 0)) { 1879 n_pkts = n_xfer; 1880 } else { 1881 VHOST_LOG_DATA(ERR, 1882 "(%d) %s: failed to transfer data for queue id %d.\n", 1883 dev->vid, __func__, queue_id); 1884 n_pkts = 0; 1885 } 1886 1887 iovec_idx = 0; 1888 it_idx = 0; 1889 segs_await = 0; 1890 1891 if (unlikely(n_pkts < pkt_burst_idx)) { 1892 /* 1893 * log error packets number here and do actual 1894 * error processing when applications poll 1895 * completion 1896 */ 1897 pkt_err = pkt_burst_idx - n_pkts; 1898 pkt_burst_idx = 0; 1899 break; 1900 } 1901 1902 pkt_burst_idx = 0; 1903 } 1904 } while (pkt_idx < count); 1905 1906 if (pkt_burst_idx) { 1907 n_xfer = vq->async_ops.transfer_data(dev->vid, queue_id, tdes, 0, pkt_burst_idx); 1908 if (likely(n_xfer >= 0)) { 1909 n_pkts = n_xfer; 1910 } else { 1911 VHOST_LOG_DATA(ERR, "(%d) %s: failed to transfer data for queue id %d.\n", 1912 dev->vid, __func__, queue_id); 1913 n_pkts = 0; 1914 } 1915 1916 if (unlikely(n_pkts < pkt_burst_idx)) 1917 pkt_err = pkt_burst_idx - n_pkts; 1918 } 1919 1920 if (unlikely(pkt_err)) 1921 dma_error_handler_packed(vq, slot_idx, pkt_err, &pkt_idx); 1922 1923 if (likely(vq->shadow_used_idx)) { 1924 /* keep used descriptors. */ 1925 store_dma_desc_info_packed(vq->shadow_used_packed, vq->async_buffers_packed, 1926 vq->size, 0, vq->async_buffer_idx_packed, 1927 vq->shadow_used_idx); 1928 1929 vq->async_buffer_idx_packed += vq->shadow_used_idx; 1930 if (vq->async_buffer_idx_packed >= vq->size) 1931 vq->async_buffer_idx_packed -= vq->size; 1932 1933 vq->async_pkts_idx += pkt_idx; 1934 if (vq->async_pkts_idx >= vq->size) 1935 vq->async_pkts_idx -= vq->size; 1936 1937 vq->shadow_used_idx = 0; 1938 vq->async_pkts_inflight_n += pkt_idx; 1939 } 1940 1941 return pkt_idx; 1942 } 1943 1944 static __rte_always_inline void 1945 write_back_completed_descs_split(struct vhost_virtqueue *vq, uint16_t n_descs) 1946 { 1947 uint16_t nr_left = n_descs; 1948 uint16_t nr_copy; 1949 uint16_t to, from; 1950 1951 do { 1952 from = vq->last_async_desc_idx_split & (vq->size - 1); 1953 nr_copy = nr_left + from <= vq->size ? nr_left : vq->size - from; 1954 to = vq->last_used_idx & (vq->size - 1); 1955 1956 if (to + nr_copy <= vq->size) { 1957 rte_memcpy(&vq->used->ring[to], &vq->async_descs_split[from], 1958 nr_copy * sizeof(struct vring_used_elem)); 1959 } else { 1960 uint16_t size = vq->size - to; 1961 1962 rte_memcpy(&vq->used->ring[to], &vq->async_descs_split[from], 1963 size * sizeof(struct vring_used_elem)); 1964 rte_memcpy(&vq->used->ring[0], &vq->async_descs_split[from + size], 1965 (nr_copy - size) * sizeof(struct vring_used_elem)); 1966 } 1967 1968 vq->last_async_desc_idx_split += nr_copy; 1969 vq->last_used_idx += nr_copy; 1970 nr_left -= nr_copy; 1971 } while (nr_left > 0); 1972 } 1973 1974 static __rte_always_inline void 1975 write_back_completed_descs_packed(struct vhost_virtqueue *vq, 1976 uint16_t n_buffers) 1977 { 1978 uint16_t nr_left = n_buffers; 1979 uint16_t from, to; 1980 1981 do { 1982 from = vq->last_async_buffer_idx_packed; 1983 to = (from + nr_left) % vq->size; 1984 if (to > from) { 1985 vhost_update_used_packed(vq, vq->async_buffers_packed + from, to - from); 1986 vq->last_async_buffer_idx_packed += nr_left; 1987 nr_left = 0; 1988 } else { 1989 vhost_update_used_packed(vq, vq->async_buffers_packed + from, 1990 vq->size - from); 1991 vq->last_async_buffer_idx_packed = 0; 1992 nr_left -= vq->size - from; 1993 } 1994 } while (nr_left > 0); 1995 } 1996 1997 static __rte_always_inline uint16_t 1998 vhost_poll_enqueue_completed(struct virtio_net *dev, uint16_t queue_id, 1999 struct rte_mbuf **pkts, uint16_t count) 2000 { 2001 struct vhost_virtqueue *vq; 2002 struct async_inflight_info *pkts_info; 2003 int32_t n_cpl; 2004 uint16_t n_pkts_cpl = 0, n_pkts_put = 0, n_descs = 0, n_buffers = 0; 2005 uint16_t start_idx, pkts_idx, vq_size; 2006 uint16_t from, i; 2007 2008 vq = dev->virtqueue[queue_id]; 2009 pkts_idx = vq->async_pkts_idx % vq->size; 2010 pkts_info = vq->async_pkts_info; 2011 vq_size = vq->size; 2012 start_idx = virtio_dev_rx_async_get_info_idx(pkts_idx, 2013 vq_size, vq->async_pkts_inflight_n); 2014 2015 if (count > vq->async_last_pkts_n) { 2016 n_cpl = vq->async_ops.check_completed_copies(dev->vid, 2017 queue_id, 0, count - vq->async_last_pkts_n); 2018 if (likely(n_cpl >= 0)) { 2019 n_pkts_cpl = n_cpl; 2020 } else { 2021 VHOST_LOG_DATA(ERR, 2022 "(%d) %s: failed to check completed copies for queue id %d.\n", 2023 dev->vid, __func__, queue_id); 2024 n_pkts_cpl = 0; 2025 } 2026 } 2027 2028 n_pkts_cpl += vq->async_last_pkts_n; 2029 n_pkts_put = RTE_MIN(n_pkts_cpl, count); 2030 if (unlikely(n_pkts_put == 0)) { 2031 vq->async_last_pkts_n = n_pkts_cpl; 2032 return 0; 2033 } 2034 2035 if (vq_is_packed(dev)) { 2036 for (i = 0; i < n_pkts_put; i++) { 2037 from = (start_idx + i) % vq_size; 2038 n_buffers += pkts_info[from].nr_buffers; 2039 pkts[i] = pkts_info[from].mbuf; 2040 } 2041 } else { 2042 for (i = 0; i < n_pkts_put; i++) { 2043 from = (start_idx + i) & (vq_size - 1); 2044 n_descs += pkts_info[from].descs; 2045 pkts[i] = pkts_info[from].mbuf; 2046 } 2047 } 2048 vq->async_last_pkts_n = n_pkts_cpl - n_pkts_put; 2049 vq->async_pkts_inflight_n -= n_pkts_put; 2050 2051 if (likely(vq->enabled && vq->access_ok)) { 2052 if (vq_is_packed(dev)) { 2053 write_back_completed_descs_packed(vq, n_buffers); 2054 2055 vhost_vring_call_packed(dev, vq); 2056 } else { 2057 write_back_completed_descs_split(vq, n_descs); 2058 2059 __atomic_add_fetch(&vq->used->idx, n_descs, 2060 __ATOMIC_RELEASE); 2061 vhost_vring_call_split(dev, vq); 2062 } 2063 } else { 2064 if (vq_is_packed(dev)) { 2065 vq->last_async_buffer_idx_packed += n_buffers; 2066 if (vq->last_async_buffer_idx_packed >= vq->size) 2067 vq->last_async_buffer_idx_packed -= vq->size; 2068 } else { 2069 vq->last_async_desc_idx_split += n_descs; 2070 } 2071 } 2072 2073 return n_pkts_put; 2074 } 2075 2076 uint16_t 2077 rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id, 2078 struct rte_mbuf **pkts, uint16_t count) 2079 { 2080 struct virtio_net *dev = get_device(vid); 2081 struct vhost_virtqueue *vq; 2082 uint16_t n_pkts_cpl = 0; 2083 2084 if (unlikely(!dev)) 2085 return 0; 2086 2087 VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__); 2088 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { 2089 VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n", 2090 dev->vid, __func__, queue_id); 2091 return 0; 2092 } 2093 2094 vq = dev->virtqueue[queue_id]; 2095 2096 if (unlikely(!vq->async_registered)) { 2097 VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for queue id %d.\n", 2098 dev->vid, __func__, queue_id); 2099 return 0; 2100 } 2101 2102 rte_spinlock_lock(&vq->access_lock); 2103 2104 n_pkts_cpl = vhost_poll_enqueue_completed(dev, queue_id, pkts, count); 2105 2106 rte_spinlock_unlock(&vq->access_lock); 2107 2108 return n_pkts_cpl; 2109 } 2110 2111 uint16_t 2112 rte_vhost_clear_queue_thread_unsafe(int vid, uint16_t queue_id, 2113 struct rte_mbuf **pkts, uint16_t count) 2114 { 2115 struct virtio_net *dev = get_device(vid); 2116 struct vhost_virtqueue *vq; 2117 uint16_t n_pkts_cpl = 0; 2118 2119 if (!dev) 2120 return 0; 2121 2122 VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__); 2123 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { 2124 VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n", 2125 dev->vid, __func__, queue_id); 2126 return 0; 2127 } 2128 2129 vq = dev->virtqueue[queue_id]; 2130 2131 if (unlikely(!vq->async_registered)) { 2132 VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for queue id %d.\n", 2133 dev->vid, __func__, queue_id); 2134 return 0; 2135 } 2136 2137 n_pkts_cpl = vhost_poll_enqueue_completed(dev, queue_id, pkts, count); 2138 2139 return n_pkts_cpl; 2140 } 2141 2142 static __rte_always_inline uint32_t 2143 virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id, 2144 struct rte_mbuf **pkts, uint32_t count) 2145 { 2146 struct vhost_virtqueue *vq; 2147 uint32_t nb_tx = 0; 2148 2149 VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__); 2150 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { 2151 VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n", 2152 dev->vid, __func__, queue_id); 2153 return 0; 2154 } 2155 2156 vq = dev->virtqueue[queue_id]; 2157 2158 rte_spinlock_lock(&vq->access_lock); 2159 2160 if (unlikely(!vq->enabled || !vq->async_registered)) 2161 goto out_access_unlock; 2162 2163 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 2164 vhost_user_iotlb_rd_lock(vq); 2165 2166 if (unlikely(!vq->access_ok)) 2167 if (unlikely(vring_translate(dev, vq) < 0)) 2168 goto out; 2169 2170 count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); 2171 if (count == 0) 2172 goto out; 2173 2174 if (vq_is_packed(dev)) 2175 nb_tx = virtio_dev_rx_async_submit_packed(dev, vq, queue_id, 2176 pkts, count); 2177 else 2178 nb_tx = virtio_dev_rx_async_submit_split(dev, vq, queue_id, 2179 pkts, count); 2180 2181 out: 2182 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 2183 vhost_user_iotlb_rd_unlock(vq); 2184 2185 out_access_unlock: 2186 rte_spinlock_unlock(&vq->access_lock); 2187 2188 return nb_tx; 2189 } 2190 2191 uint16_t 2192 rte_vhost_submit_enqueue_burst(int vid, uint16_t queue_id, 2193 struct rte_mbuf **pkts, uint16_t count) 2194 { 2195 struct virtio_net *dev = get_device(vid); 2196 2197 if (!dev) 2198 return 0; 2199 2200 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { 2201 VHOST_LOG_DATA(ERR, 2202 "(%d) %s: built-in vhost net backend is disabled.\n", 2203 dev->vid, __func__); 2204 return 0; 2205 } 2206 2207 return virtio_dev_rx_async_submit(dev, queue_id, pkts, count); 2208 } 2209 2210 static inline bool 2211 virtio_net_with_host_offload(struct virtio_net *dev) 2212 { 2213 if (dev->features & 2214 ((1ULL << VIRTIO_NET_F_CSUM) | 2215 (1ULL << VIRTIO_NET_F_HOST_ECN) | 2216 (1ULL << VIRTIO_NET_F_HOST_TSO4) | 2217 (1ULL << VIRTIO_NET_F_HOST_TSO6) | 2218 (1ULL << VIRTIO_NET_F_HOST_UFO))) 2219 return true; 2220 2221 return false; 2222 } 2223 2224 static int 2225 parse_headers(struct rte_mbuf *m, uint8_t *l4_proto) 2226 { 2227 struct rte_ipv4_hdr *ipv4_hdr; 2228 struct rte_ipv6_hdr *ipv6_hdr; 2229 struct rte_ether_hdr *eth_hdr; 2230 uint16_t ethertype; 2231 uint16_t data_len = rte_pktmbuf_data_len(m); 2232 2233 if (data_len < sizeof(struct rte_ether_hdr)) 2234 return -EINVAL; 2235 2236 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 2237 2238 m->l2_len = sizeof(struct rte_ether_hdr); 2239 ethertype = rte_be_to_cpu_16(eth_hdr->ether_type); 2240 2241 if (ethertype == RTE_ETHER_TYPE_VLAN) { 2242 if (data_len < sizeof(struct rte_ether_hdr) + 2243 sizeof(struct rte_vlan_hdr)) 2244 goto error; 2245 2246 struct rte_vlan_hdr *vlan_hdr = 2247 (struct rte_vlan_hdr *)(eth_hdr + 1); 2248 2249 m->l2_len += sizeof(struct rte_vlan_hdr); 2250 ethertype = rte_be_to_cpu_16(vlan_hdr->eth_proto); 2251 } 2252 2253 switch (ethertype) { 2254 case RTE_ETHER_TYPE_IPV4: 2255 if (data_len < m->l2_len + sizeof(struct rte_ipv4_hdr)) 2256 goto error; 2257 ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *, 2258 m->l2_len); 2259 m->l3_len = rte_ipv4_hdr_len(ipv4_hdr); 2260 if (data_len < m->l2_len + m->l3_len) 2261 goto error; 2262 m->ol_flags |= RTE_MBUF_F_TX_IPV4; 2263 *l4_proto = ipv4_hdr->next_proto_id; 2264 break; 2265 case RTE_ETHER_TYPE_IPV6: 2266 if (data_len < m->l2_len + sizeof(struct rte_ipv6_hdr)) 2267 goto error; 2268 ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *, 2269 m->l2_len); 2270 m->l3_len = sizeof(struct rte_ipv6_hdr); 2271 m->ol_flags |= RTE_MBUF_F_TX_IPV6; 2272 *l4_proto = ipv6_hdr->proto; 2273 break; 2274 default: 2275 /* a valid L3 header is needed for further L4 parsing */ 2276 goto error; 2277 } 2278 2279 /* both CSUM and GSO need a valid L4 header */ 2280 switch (*l4_proto) { 2281 case IPPROTO_TCP: 2282 if (data_len < m->l2_len + m->l3_len + 2283 sizeof(struct rte_tcp_hdr)) 2284 goto error; 2285 break; 2286 case IPPROTO_UDP: 2287 if (data_len < m->l2_len + m->l3_len + 2288 sizeof(struct rte_udp_hdr)) 2289 goto error; 2290 break; 2291 case IPPROTO_SCTP: 2292 if (data_len < m->l2_len + m->l3_len + 2293 sizeof(struct rte_sctp_hdr)) 2294 goto error; 2295 break; 2296 default: 2297 goto error; 2298 } 2299 2300 return 0; 2301 2302 error: 2303 m->l2_len = 0; 2304 m->l3_len = 0; 2305 m->ol_flags = 0; 2306 return -EINVAL; 2307 } 2308 2309 static __rte_always_inline void 2310 vhost_dequeue_offload_legacy(struct virtio_net_hdr *hdr, struct rte_mbuf *m) 2311 { 2312 uint8_t l4_proto = 0; 2313 struct rte_tcp_hdr *tcp_hdr = NULL; 2314 uint16_t tcp_len; 2315 uint16_t data_len = rte_pktmbuf_data_len(m); 2316 2317 if (parse_headers(m, &l4_proto) < 0) 2318 return; 2319 2320 if (hdr->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) { 2321 if (hdr->csum_start == (m->l2_len + m->l3_len)) { 2322 switch (hdr->csum_offset) { 2323 case (offsetof(struct rte_tcp_hdr, cksum)): 2324 if (l4_proto != IPPROTO_TCP) 2325 goto error; 2326 m->ol_flags |= RTE_MBUF_F_TX_TCP_CKSUM; 2327 break; 2328 case (offsetof(struct rte_udp_hdr, dgram_cksum)): 2329 if (l4_proto != IPPROTO_UDP) 2330 goto error; 2331 m->ol_flags |= RTE_MBUF_F_TX_UDP_CKSUM; 2332 break; 2333 case (offsetof(struct rte_sctp_hdr, cksum)): 2334 if (l4_proto != IPPROTO_SCTP) 2335 goto error; 2336 m->ol_flags |= RTE_MBUF_F_TX_SCTP_CKSUM; 2337 break; 2338 default: 2339 goto error; 2340 } 2341 } else { 2342 goto error; 2343 } 2344 } 2345 2346 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 2347 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 2348 case VIRTIO_NET_HDR_GSO_TCPV4: 2349 case VIRTIO_NET_HDR_GSO_TCPV6: 2350 if (l4_proto != IPPROTO_TCP) 2351 goto error; 2352 tcp_hdr = rte_pktmbuf_mtod_offset(m, 2353 struct rte_tcp_hdr *, 2354 m->l2_len + m->l3_len); 2355 tcp_len = (tcp_hdr->data_off & 0xf0) >> 2; 2356 if (data_len < m->l2_len + m->l3_len + tcp_len) 2357 goto error; 2358 m->ol_flags |= RTE_MBUF_F_TX_TCP_SEG; 2359 m->tso_segsz = hdr->gso_size; 2360 m->l4_len = tcp_len; 2361 break; 2362 case VIRTIO_NET_HDR_GSO_UDP: 2363 if (l4_proto != IPPROTO_UDP) 2364 goto error; 2365 m->ol_flags |= RTE_MBUF_F_TX_UDP_SEG; 2366 m->tso_segsz = hdr->gso_size; 2367 m->l4_len = sizeof(struct rte_udp_hdr); 2368 break; 2369 default: 2370 VHOST_LOG_DATA(WARNING, 2371 "unsupported gso type %u.\n", hdr->gso_type); 2372 goto error; 2373 } 2374 } 2375 return; 2376 2377 error: 2378 m->l2_len = 0; 2379 m->l3_len = 0; 2380 m->ol_flags = 0; 2381 } 2382 2383 static __rte_always_inline void 2384 vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m, 2385 bool legacy_ol_flags) 2386 { 2387 struct rte_net_hdr_lens hdr_lens; 2388 int l4_supported = 0; 2389 uint32_t ptype; 2390 2391 if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE) 2392 return; 2393 2394 if (legacy_ol_flags) { 2395 vhost_dequeue_offload_legacy(hdr, m); 2396 return; 2397 } 2398 2399 m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN; 2400 2401 ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK); 2402 m->packet_type = ptype; 2403 if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP || 2404 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP || 2405 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP) 2406 l4_supported = 1; 2407 2408 /* According to Virtio 1.1 spec, the device only needs to look at 2409 * VIRTIO_NET_HDR_F_NEEDS_CSUM in the packet transmission path. 2410 * This differs from the processing incoming packets path where the 2411 * driver could rely on VIRTIO_NET_HDR_F_DATA_VALID flag set by the 2412 * device. 2413 * 2414 * 5.1.6.2.1 Driver Requirements: Packet Transmission 2415 * The driver MUST NOT set the VIRTIO_NET_HDR_F_DATA_VALID and 2416 * VIRTIO_NET_HDR_F_RSC_INFO bits in flags. 2417 * 2418 * 5.1.6.2.2 Device Requirements: Packet Transmission 2419 * The device MUST ignore flag bits that it does not recognize. 2420 */ 2421 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 2422 uint32_t hdrlen; 2423 2424 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len; 2425 if (hdr->csum_start <= hdrlen && l4_supported != 0) { 2426 m->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_NONE; 2427 } else { 2428 /* Unknown proto or tunnel, do sw cksum. We can assume 2429 * the cksum field is in the first segment since the 2430 * buffers we provided to the host are large enough. 2431 * In case of SCTP, this will be wrong since it's a CRC 2432 * but there's nothing we can do. 2433 */ 2434 uint16_t csum = 0, off; 2435 2436 if (rte_raw_cksum_mbuf(m, hdr->csum_start, 2437 rte_pktmbuf_pkt_len(m) - hdr->csum_start, &csum) < 0) 2438 return; 2439 if (likely(csum != 0xffff)) 2440 csum = ~csum; 2441 off = hdr->csum_offset + hdr->csum_start; 2442 if (rte_pktmbuf_data_len(m) >= off + 1) 2443 *rte_pktmbuf_mtod_offset(m, uint16_t *, off) = csum; 2444 } 2445 } 2446 2447 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 2448 if (hdr->gso_size == 0) 2449 return; 2450 2451 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 2452 case VIRTIO_NET_HDR_GSO_TCPV4: 2453 case VIRTIO_NET_HDR_GSO_TCPV6: 2454 if ((ptype & RTE_PTYPE_L4_MASK) != RTE_PTYPE_L4_TCP) 2455 break; 2456 m->ol_flags |= RTE_MBUF_F_RX_LRO | RTE_MBUF_F_RX_L4_CKSUM_NONE; 2457 m->tso_segsz = hdr->gso_size; 2458 break; 2459 case VIRTIO_NET_HDR_GSO_UDP: 2460 if ((ptype & RTE_PTYPE_L4_MASK) != RTE_PTYPE_L4_UDP) 2461 break; 2462 m->ol_flags |= RTE_MBUF_F_RX_LRO | RTE_MBUF_F_RX_L4_CKSUM_NONE; 2463 m->tso_segsz = hdr->gso_size; 2464 break; 2465 default: 2466 break; 2467 } 2468 } 2469 } 2470 2471 static __rte_noinline void 2472 copy_vnet_hdr_from_desc(struct virtio_net_hdr *hdr, 2473 struct buf_vector *buf_vec) 2474 { 2475 uint64_t len; 2476 uint64_t remain = sizeof(struct virtio_net_hdr); 2477 uint64_t src; 2478 uint64_t dst = (uint64_t)(uintptr_t)hdr; 2479 2480 while (remain) { 2481 len = RTE_MIN(remain, buf_vec->buf_len); 2482 src = buf_vec->buf_addr; 2483 rte_memcpy((void *)(uintptr_t)dst, 2484 (void *)(uintptr_t)src, len); 2485 2486 remain -= len; 2487 dst += len; 2488 buf_vec++; 2489 } 2490 } 2491 2492 static __rte_always_inline int 2493 copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, 2494 struct buf_vector *buf_vec, uint16_t nr_vec, 2495 struct rte_mbuf *m, struct rte_mempool *mbuf_pool, 2496 bool legacy_ol_flags) 2497 { 2498 uint32_t buf_avail, buf_offset; 2499 uint64_t buf_addr, buf_len; 2500 uint32_t mbuf_avail, mbuf_offset; 2501 uint32_t cpy_len; 2502 struct rte_mbuf *cur = m, *prev = m; 2503 struct virtio_net_hdr tmp_hdr; 2504 struct virtio_net_hdr *hdr = NULL; 2505 /* A counter to avoid desc dead loop chain */ 2506 uint16_t vec_idx = 0; 2507 struct batch_copy_elem *batch_copy = vq->batch_copy_elems; 2508 int error = 0; 2509 2510 buf_addr = buf_vec[vec_idx].buf_addr; 2511 buf_len = buf_vec[vec_idx].buf_len; 2512 2513 if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) { 2514 error = -1; 2515 goto out; 2516 } 2517 2518 if (virtio_net_with_host_offload(dev)) { 2519 if (unlikely(buf_len < sizeof(struct virtio_net_hdr))) { 2520 /* 2521 * No luck, the virtio-net header doesn't fit 2522 * in a contiguous virtual area. 2523 */ 2524 copy_vnet_hdr_from_desc(&tmp_hdr, buf_vec); 2525 hdr = &tmp_hdr; 2526 } else { 2527 hdr = (struct virtio_net_hdr *)((uintptr_t)buf_addr); 2528 } 2529 } 2530 2531 /* 2532 * A virtio driver normally uses at least 2 desc buffers 2533 * for Tx: the first for storing the header, and others 2534 * for storing the data. 2535 */ 2536 if (unlikely(buf_len < dev->vhost_hlen)) { 2537 buf_offset = dev->vhost_hlen - buf_len; 2538 vec_idx++; 2539 buf_addr = buf_vec[vec_idx].buf_addr; 2540 buf_len = buf_vec[vec_idx].buf_len; 2541 buf_avail = buf_len - buf_offset; 2542 } else if (buf_len == dev->vhost_hlen) { 2543 if (unlikely(++vec_idx >= nr_vec)) 2544 goto out; 2545 buf_addr = buf_vec[vec_idx].buf_addr; 2546 buf_len = buf_vec[vec_idx].buf_len; 2547 2548 buf_offset = 0; 2549 buf_avail = buf_len; 2550 } else { 2551 buf_offset = dev->vhost_hlen; 2552 buf_avail = buf_vec[vec_idx].buf_len - dev->vhost_hlen; 2553 } 2554 2555 PRINT_PACKET(dev, 2556 (uintptr_t)(buf_addr + buf_offset), 2557 (uint32_t)buf_avail, 0); 2558 2559 mbuf_offset = 0; 2560 mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM; 2561 while (1) { 2562 cpy_len = RTE_MIN(buf_avail, mbuf_avail); 2563 2564 if (likely(cpy_len > MAX_BATCH_LEN || 2565 vq->batch_copy_nb_elems >= vq->size || 2566 (hdr && cur == m))) { 2567 rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, 2568 mbuf_offset), 2569 (void *)((uintptr_t)(buf_addr + 2570 buf_offset)), cpy_len); 2571 } else { 2572 batch_copy[vq->batch_copy_nb_elems].dst = 2573 rte_pktmbuf_mtod_offset(cur, void *, 2574 mbuf_offset); 2575 batch_copy[vq->batch_copy_nb_elems].src = 2576 (void *)((uintptr_t)(buf_addr + buf_offset)); 2577 batch_copy[vq->batch_copy_nb_elems].len = cpy_len; 2578 vq->batch_copy_nb_elems++; 2579 } 2580 2581 mbuf_avail -= cpy_len; 2582 mbuf_offset += cpy_len; 2583 buf_avail -= cpy_len; 2584 buf_offset += cpy_len; 2585 2586 /* This buf reaches to its end, get the next one */ 2587 if (buf_avail == 0) { 2588 if (++vec_idx >= nr_vec) 2589 break; 2590 2591 buf_addr = buf_vec[vec_idx].buf_addr; 2592 buf_len = buf_vec[vec_idx].buf_len; 2593 2594 buf_offset = 0; 2595 buf_avail = buf_len; 2596 2597 PRINT_PACKET(dev, (uintptr_t)buf_addr, 2598 (uint32_t)buf_avail, 0); 2599 } 2600 2601 /* 2602 * This mbuf reaches to its end, get a new one 2603 * to hold more data. 2604 */ 2605 if (mbuf_avail == 0) { 2606 cur = rte_pktmbuf_alloc(mbuf_pool); 2607 if (unlikely(cur == NULL)) { 2608 VHOST_LOG_DATA(ERR, "Failed to " 2609 "allocate memory for mbuf.\n"); 2610 error = -1; 2611 goto out; 2612 } 2613 2614 prev->next = cur; 2615 prev->data_len = mbuf_offset; 2616 m->nb_segs += 1; 2617 m->pkt_len += mbuf_offset; 2618 prev = cur; 2619 2620 mbuf_offset = 0; 2621 mbuf_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM; 2622 } 2623 } 2624 2625 prev->data_len = mbuf_offset; 2626 m->pkt_len += mbuf_offset; 2627 2628 if (hdr) 2629 vhost_dequeue_offload(hdr, m, legacy_ol_flags); 2630 2631 out: 2632 2633 return error; 2634 } 2635 2636 static void 2637 virtio_dev_extbuf_free(void *addr __rte_unused, void *opaque) 2638 { 2639 rte_free(opaque); 2640 } 2641 2642 static int 2643 virtio_dev_extbuf_alloc(struct rte_mbuf *pkt, uint32_t size) 2644 { 2645 struct rte_mbuf_ext_shared_info *shinfo = NULL; 2646 uint32_t total_len = RTE_PKTMBUF_HEADROOM + size; 2647 uint16_t buf_len; 2648 rte_iova_t iova; 2649 void *buf; 2650 2651 total_len += sizeof(*shinfo) + sizeof(uintptr_t); 2652 total_len = RTE_ALIGN_CEIL(total_len, sizeof(uintptr_t)); 2653 2654 if (unlikely(total_len > UINT16_MAX)) 2655 return -ENOSPC; 2656 2657 buf_len = total_len; 2658 buf = rte_malloc(NULL, buf_len, RTE_CACHE_LINE_SIZE); 2659 if (unlikely(buf == NULL)) 2660 return -ENOMEM; 2661 2662 /* Initialize shinfo */ 2663 shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len, 2664 virtio_dev_extbuf_free, buf); 2665 if (unlikely(shinfo == NULL)) { 2666 rte_free(buf); 2667 VHOST_LOG_DATA(ERR, "Failed to init shinfo\n"); 2668 return -1; 2669 } 2670 2671 iova = rte_malloc_virt2iova(buf); 2672 rte_pktmbuf_attach_extbuf(pkt, buf, iova, buf_len, shinfo); 2673 rte_pktmbuf_reset_headroom(pkt); 2674 2675 return 0; 2676 } 2677 2678 /* 2679 * Prepare a host supported pktmbuf. 2680 */ 2681 static __rte_always_inline int 2682 virtio_dev_pktmbuf_prep(struct virtio_net *dev, struct rte_mbuf *pkt, 2683 uint32_t data_len) 2684 { 2685 if (rte_pktmbuf_tailroom(pkt) >= data_len) 2686 return 0; 2687 2688 /* attach an external buffer if supported */ 2689 if (dev->extbuf && !virtio_dev_extbuf_alloc(pkt, data_len)) 2690 return 0; 2691 2692 /* check if chained buffers are allowed */ 2693 if (!dev->linearbuf) 2694 return 0; 2695 2696 return -1; 2697 } 2698 2699 __rte_always_inline 2700 static uint16_t 2701 virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, 2702 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count, 2703 bool legacy_ol_flags) 2704 { 2705 uint16_t i; 2706 uint16_t free_entries; 2707 uint16_t dropped = 0; 2708 static bool allocerr_warned; 2709 2710 /* 2711 * The ordering between avail index and 2712 * desc reads needs to be enforced. 2713 */ 2714 free_entries = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE) - 2715 vq->last_avail_idx; 2716 if (free_entries == 0) 2717 return 0; 2718 2719 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); 2720 2721 VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__); 2722 2723 count = RTE_MIN(count, MAX_PKT_BURST); 2724 count = RTE_MIN(count, free_entries); 2725 VHOST_LOG_DATA(DEBUG, "(%d) about to dequeue %u buffers\n", 2726 dev->vid, count); 2727 2728 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, count)) 2729 return 0; 2730 2731 for (i = 0; i < count; i++) { 2732 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 2733 uint16_t head_idx; 2734 uint32_t buf_len; 2735 uint16_t nr_vec = 0; 2736 int err; 2737 2738 if (unlikely(fill_vec_buf_split(dev, vq, 2739 vq->last_avail_idx + i, 2740 &nr_vec, buf_vec, 2741 &head_idx, &buf_len, 2742 VHOST_ACCESS_RO) < 0)) 2743 break; 2744 2745 update_shadow_used_ring_split(vq, head_idx, 0); 2746 2747 err = virtio_dev_pktmbuf_prep(dev, pkts[i], buf_len); 2748 if (unlikely(err)) { 2749 /* 2750 * mbuf allocation fails for jumbo packets when external 2751 * buffer allocation is not allowed and linear buffer 2752 * is required. Drop this packet. 2753 */ 2754 if (!allocerr_warned) { 2755 VHOST_LOG_DATA(ERR, 2756 "Failed mbuf alloc of size %d from %s on %s.\n", 2757 buf_len, mbuf_pool->name, dev->ifname); 2758 allocerr_warned = true; 2759 } 2760 dropped += 1; 2761 i++; 2762 break; 2763 } 2764 2765 err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i], 2766 mbuf_pool, legacy_ol_flags); 2767 if (unlikely(err)) { 2768 if (!allocerr_warned) { 2769 VHOST_LOG_DATA(ERR, 2770 "Failed to copy desc to mbuf on %s.\n", 2771 dev->ifname); 2772 allocerr_warned = true; 2773 } 2774 dropped += 1; 2775 i++; 2776 break; 2777 } 2778 } 2779 2780 if (dropped) 2781 rte_pktmbuf_free_bulk(&pkts[i - 1], count - i + 1); 2782 2783 vq->last_avail_idx += i; 2784 2785 do_data_copy_dequeue(vq); 2786 if (unlikely(i < count)) 2787 vq->shadow_used_idx = i; 2788 if (likely(vq->shadow_used_idx)) { 2789 flush_shadow_used_ring_split(dev, vq); 2790 vhost_vring_call_split(dev, vq); 2791 } 2792 2793 return (i - dropped); 2794 } 2795 2796 __rte_noinline 2797 static uint16_t 2798 virtio_dev_tx_split_legacy(struct virtio_net *dev, 2799 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, 2800 struct rte_mbuf **pkts, uint16_t count) 2801 { 2802 return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, true); 2803 } 2804 2805 __rte_noinline 2806 static uint16_t 2807 virtio_dev_tx_split_compliant(struct virtio_net *dev, 2808 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, 2809 struct rte_mbuf **pkts, uint16_t count) 2810 { 2811 return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, false); 2812 } 2813 2814 static __rte_always_inline int 2815 vhost_reserve_avail_batch_packed(struct virtio_net *dev, 2816 struct vhost_virtqueue *vq, 2817 struct rte_mbuf **pkts, 2818 uint16_t avail_idx, 2819 uintptr_t *desc_addrs, 2820 uint16_t *ids) 2821 { 2822 bool wrap = vq->avail_wrap_counter; 2823 struct vring_packed_desc *descs = vq->desc_packed; 2824 uint64_t lens[PACKED_BATCH_SIZE]; 2825 uint64_t buf_lens[PACKED_BATCH_SIZE]; 2826 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 2827 uint16_t flags, i; 2828 2829 if (unlikely(avail_idx & PACKED_BATCH_MASK)) 2830 return -1; 2831 if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size)) 2832 return -1; 2833 2834 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 2835 flags = descs[avail_idx + i].flags; 2836 if (unlikely((wrap != !!(flags & VRING_DESC_F_AVAIL)) || 2837 (wrap == !!(flags & VRING_DESC_F_USED)) || 2838 (flags & PACKED_DESC_SINGLE_DEQUEUE_FLAG))) 2839 return -1; 2840 } 2841 2842 rte_atomic_thread_fence(__ATOMIC_ACQUIRE); 2843 2844 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 2845 lens[i] = descs[avail_idx + i].len; 2846 2847 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 2848 desc_addrs[i] = vhost_iova_to_vva(dev, vq, 2849 descs[avail_idx + i].addr, 2850 &lens[i], VHOST_ACCESS_RW); 2851 } 2852 2853 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 2854 if (unlikely(!desc_addrs[i])) 2855 return -1; 2856 if (unlikely((lens[i] != descs[avail_idx + i].len))) 2857 return -1; 2858 } 2859 2860 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 2861 if (virtio_dev_pktmbuf_prep(dev, pkts[i], lens[i])) 2862 goto err; 2863 } 2864 2865 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 2866 buf_lens[i] = pkts[i]->buf_len - pkts[i]->data_off; 2867 2868 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 2869 if (unlikely(buf_lens[i] < (lens[i] - buf_offset))) 2870 goto err; 2871 } 2872 2873 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 2874 pkts[i]->pkt_len = lens[i] - buf_offset; 2875 pkts[i]->data_len = pkts[i]->pkt_len; 2876 ids[i] = descs[avail_idx + i].id; 2877 } 2878 2879 return 0; 2880 2881 err: 2882 return -1; 2883 } 2884 2885 static __rte_always_inline int 2886 virtio_dev_tx_batch_packed(struct virtio_net *dev, 2887 struct vhost_virtqueue *vq, 2888 struct rte_mbuf **pkts, 2889 bool legacy_ol_flags) 2890 { 2891 uint16_t avail_idx = vq->last_avail_idx; 2892 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); 2893 struct virtio_net_hdr *hdr; 2894 uintptr_t desc_addrs[PACKED_BATCH_SIZE]; 2895 uint16_t ids[PACKED_BATCH_SIZE]; 2896 uint16_t i; 2897 2898 if (vhost_reserve_avail_batch_packed(dev, vq, pkts, avail_idx, 2899 desc_addrs, ids)) 2900 return -1; 2901 2902 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 2903 rte_prefetch0((void *)(uintptr_t)desc_addrs[i]); 2904 2905 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) 2906 rte_memcpy(rte_pktmbuf_mtod_offset(pkts[i], void *, 0), 2907 (void *)(uintptr_t)(desc_addrs[i] + buf_offset), 2908 pkts[i]->pkt_len); 2909 2910 if (virtio_net_with_host_offload(dev)) { 2911 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 2912 hdr = (struct virtio_net_hdr *)(desc_addrs[i]); 2913 vhost_dequeue_offload(hdr, pkts[i], legacy_ol_flags); 2914 } 2915 } 2916 2917 if (virtio_net_is_inorder(dev)) 2918 vhost_shadow_dequeue_batch_packed_inorder(vq, 2919 ids[PACKED_BATCH_SIZE - 1]); 2920 else 2921 vhost_shadow_dequeue_batch_packed(dev, vq, ids); 2922 2923 vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE); 2924 2925 return 0; 2926 } 2927 2928 static __rte_always_inline int 2929 vhost_dequeue_single_packed(struct virtio_net *dev, 2930 struct vhost_virtqueue *vq, 2931 struct rte_mempool *mbuf_pool, 2932 struct rte_mbuf *pkts, 2933 uint16_t *buf_id, 2934 uint16_t *desc_count, 2935 bool legacy_ol_flags) 2936 { 2937 struct buf_vector buf_vec[BUF_VECTOR_MAX]; 2938 uint32_t buf_len; 2939 uint16_t nr_vec = 0; 2940 int err; 2941 static bool allocerr_warned; 2942 2943 if (unlikely(fill_vec_buf_packed(dev, vq, 2944 vq->last_avail_idx, desc_count, 2945 buf_vec, &nr_vec, 2946 buf_id, &buf_len, 2947 VHOST_ACCESS_RO) < 0)) 2948 return -1; 2949 2950 if (unlikely(virtio_dev_pktmbuf_prep(dev, pkts, buf_len))) { 2951 if (!allocerr_warned) { 2952 VHOST_LOG_DATA(ERR, 2953 "Failed mbuf alloc of size %d from %s on %s.\n", 2954 buf_len, mbuf_pool->name, dev->ifname); 2955 allocerr_warned = true; 2956 } 2957 return -1; 2958 } 2959 2960 err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts, 2961 mbuf_pool, legacy_ol_flags); 2962 if (unlikely(err)) { 2963 if (!allocerr_warned) { 2964 VHOST_LOG_DATA(ERR, 2965 "Failed to copy desc to mbuf on %s.\n", 2966 dev->ifname); 2967 allocerr_warned = true; 2968 } 2969 return -1; 2970 } 2971 2972 return 0; 2973 } 2974 2975 static __rte_always_inline int 2976 virtio_dev_tx_single_packed(struct virtio_net *dev, 2977 struct vhost_virtqueue *vq, 2978 struct rte_mempool *mbuf_pool, 2979 struct rte_mbuf *pkts, 2980 bool legacy_ol_flags) 2981 { 2982 2983 uint16_t buf_id, desc_count = 0; 2984 int ret; 2985 2986 ret = vhost_dequeue_single_packed(dev, vq, mbuf_pool, pkts, &buf_id, 2987 &desc_count, legacy_ol_flags); 2988 2989 if (likely(desc_count > 0)) { 2990 if (virtio_net_is_inorder(dev)) 2991 vhost_shadow_dequeue_single_packed_inorder(vq, buf_id, 2992 desc_count); 2993 else 2994 vhost_shadow_dequeue_single_packed(vq, buf_id, 2995 desc_count); 2996 2997 vq_inc_last_avail_packed(vq, desc_count); 2998 } 2999 3000 return ret; 3001 } 3002 3003 __rte_always_inline 3004 static uint16_t 3005 virtio_dev_tx_packed(struct virtio_net *dev, 3006 struct vhost_virtqueue *__rte_restrict vq, 3007 struct rte_mempool *mbuf_pool, 3008 struct rte_mbuf **__rte_restrict pkts, 3009 uint32_t count, 3010 bool legacy_ol_flags) 3011 { 3012 uint32_t pkt_idx = 0; 3013 3014 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, count)) 3015 return 0; 3016 3017 do { 3018 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); 3019 3020 if (count - pkt_idx >= PACKED_BATCH_SIZE) { 3021 if (!virtio_dev_tx_batch_packed(dev, vq, 3022 &pkts[pkt_idx], 3023 legacy_ol_flags)) { 3024 pkt_idx += PACKED_BATCH_SIZE; 3025 continue; 3026 } 3027 } 3028 3029 if (virtio_dev_tx_single_packed(dev, vq, mbuf_pool, 3030 pkts[pkt_idx], 3031 legacy_ol_flags)) 3032 break; 3033 pkt_idx++; 3034 } while (pkt_idx < count); 3035 3036 if (pkt_idx != count) 3037 rte_pktmbuf_free_bulk(&pkts[pkt_idx], count - pkt_idx); 3038 3039 if (vq->shadow_used_idx) { 3040 do_data_copy_dequeue(vq); 3041 3042 vhost_flush_dequeue_shadow_packed(dev, vq); 3043 vhost_vring_call_packed(dev, vq); 3044 } 3045 3046 return pkt_idx; 3047 } 3048 3049 __rte_noinline 3050 static uint16_t 3051 virtio_dev_tx_packed_legacy(struct virtio_net *dev, 3052 struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool, 3053 struct rte_mbuf **__rte_restrict pkts, uint32_t count) 3054 { 3055 return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, true); 3056 } 3057 3058 __rte_noinline 3059 static uint16_t 3060 virtio_dev_tx_packed_compliant(struct virtio_net *dev, 3061 struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool, 3062 struct rte_mbuf **__rte_restrict pkts, uint32_t count) 3063 { 3064 return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, false); 3065 } 3066 3067 uint16_t 3068 rte_vhost_dequeue_burst(int vid, uint16_t queue_id, 3069 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) 3070 { 3071 struct virtio_net *dev; 3072 struct rte_mbuf *rarp_mbuf = NULL; 3073 struct vhost_virtqueue *vq; 3074 int16_t success = 1; 3075 3076 dev = get_device(vid); 3077 if (!dev) 3078 return 0; 3079 3080 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { 3081 VHOST_LOG_DATA(ERR, 3082 "(%d) %s: built-in vhost net backend is disabled.\n", 3083 dev->vid, __func__); 3084 return 0; 3085 } 3086 3087 if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) { 3088 VHOST_LOG_DATA(ERR, 3089 "(%d) %s: invalid virtqueue idx %d.\n", 3090 dev->vid, __func__, queue_id); 3091 return 0; 3092 } 3093 3094 vq = dev->virtqueue[queue_id]; 3095 3096 if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0)) 3097 return 0; 3098 3099 if (unlikely(!vq->enabled)) { 3100 count = 0; 3101 goto out_access_unlock; 3102 } 3103 3104 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 3105 vhost_user_iotlb_rd_lock(vq); 3106 3107 if (unlikely(!vq->access_ok)) 3108 if (unlikely(vring_translate(dev, vq) < 0)) { 3109 count = 0; 3110 goto out; 3111 } 3112 3113 /* 3114 * Construct a RARP broadcast packet, and inject it to the "pkts" 3115 * array, to looks like that guest actually send such packet. 3116 * 3117 * Check user_send_rarp() for more information. 3118 * 3119 * broadcast_rarp shares a cacheline in the virtio_net structure 3120 * with some fields that are accessed during enqueue and 3121 * __atomic_compare_exchange_n causes a write if performed compare 3122 * and exchange. This could result in false sharing between enqueue 3123 * and dequeue. 3124 * 3125 * Prevent unnecessary false sharing by reading broadcast_rarp first 3126 * and only performing compare and exchange if the read indicates it 3127 * is likely to be set. 3128 */ 3129 if (unlikely(__atomic_load_n(&dev->broadcast_rarp, __ATOMIC_ACQUIRE) && 3130 __atomic_compare_exchange_n(&dev->broadcast_rarp, 3131 &success, 0, 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED))) { 3132 3133 rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac); 3134 if (rarp_mbuf == NULL) { 3135 VHOST_LOG_DATA(ERR, "Failed to make RARP packet.\n"); 3136 count = 0; 3137 goto out; 3138 } 3139 /* 3140 * Inject it to the head of "pkts" array, so that switch's mac 3141 * learning table will get updated first. 3142 */ 3143 pkts[0] = rarp_mbuf; 3144 pkts++; 3145 count -= 1; 3146 } 3147 3148 if (vq_is_packed(dev)) { 3149 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS) 3150 count = virtio_dev_tx_packed_legacy(dev, vq, mbuf_pool, pkts, count); 3151 else 3152 count = virtio_dev_tx_packed_compliant(dev, vq, mbuf_pool, pkts, count); 3153 } else { 3154 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS) 3155 count = virtio_dev_tx_split_legacy(dev, vq, mbuf_pool, pkts, count); 3156 else 3157 count = virtio_dev_tx_split_compliant(dev, vq, mbuf_pool, pkts, count); 3158 } 3159 3160 out: 3161 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 3162 vhost_user_iotlb_rd_unlock(vq); 3163 3164 out_access_unlock: 3165 rte_spinlock_unlock(&vq->access_lock); 3166 3167 if (unlikely(rarp_mbuf != NULL)) 3168 count += 1; 3169 3170 return count; 3171 } 3172