1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #ifndef _VIRTQUEUE_H_ 6 #define _VIRTQUEUE_H_ 7 8 #include <stdint.h> 9 10 #include <rte_atomic.h> 11 #include <rte_memory.h> 12 #include <rte_mempool.h> 13 #include <rte_net.h> 14 15 #include "virtio_pci.h" 16 #include "virtio_ring.h" 17 #include "virtio_logs.h" 18 #include "virtio_rxtx.h" 19 20 struct rte_mbuf; 21 22 #define DEFAULT_TX_FREE_THRESH 32 23 #define DEFAULT_RX_FREE_THRESH 32 24 25 #define VIRTIO_MBUF_BURST_SZ 64 26 /* 27 * Per virtio_ring.h in Linux. 28 * For virtio_pci on SMP, we don't need to order with respect to MMIO 29 * accesses through relaxed memory I/O windows, so thread_fence is 30 * sufficient. 31 * 32 * For using virtio to talk to real devices (eg. vDPA) we do need real 33 * barriers. 34 */ 35 static inline void 36 virtio_mb(uint8_t weak_barriers) 37 { 38 if (weak_barriers) 39 rte_atomic_thread_fence(__ATOMIC_SEQ_CST); 40 else 41 rte_mb(); 42 } 43 44 static inline void 45 virtio_rmb(uint8_t weak_barriers) 46 { 47 if (weak_barriers) 48 rte_atomic_thread_fence(__ATOMIC_ACQUIRE); 49 else 50 rte_io_rmb(); 51 } 52 53 static inline void 54 virtio_wmb(uint8_t weak_barriers) 55 { 56 if (weak_barriers) 57 rte_atomic_thread_fence(__ATOMIC_RELEASE); 58 else 59 rte_io_wmb(); 60 } 61 62 static inline uint16_t 63 virtqueue_fetch_flags_packed(struct vring_packed_desc *dp, 64 uint8_t weak_barriers) 65 { 66 uint16_t flags; 67 68 if (weak_barriers) { 69 /* x86 prefers to using rte_io_rmb over __atomic_load_n as it reports 70 * a better perf(~1.5%), which comes from the saved branch by the compiler. 71 * The if and else branch are identical on the platforms except Arm. 72 */ 73 #ifdef RTE_ARCH_ARM 74 flags = __atomic_load_n(&dp->flags, __ATOMIC_ACQUIRE); 75 #else 76 flags = dp->flags; 77 rte_io_rmb(); 78 #endif 79 } else { 80 flags = dp->flags; 81 rte_io_rmb(); 82 } 83 84 return flags; 85 } 86 87 static inline void 88 virtqueue_store_flags_packed(struct vring_packed_desc *dp, 89 uint16_t flags, uint8_t weak_barriers) 90 { 91 if (weak_barriers) { 92 /* x86 prefers to using rte_io_wmb over __atomic_store_n as it reports 93 * a better perf(~1.5%), which comes from the saved branch by the compiler. 94 * The if and else branch are identical on the platforms except Arm. 95 */ 96 #ifdef RTE_ARCH_ARM 97 __atomic_store_n(&dp->flags, flags, __ATOMIC_RELEASE); 98 #else 99 rte_io_wmb(); 100 dp->flags = flags; 101 #endif 102 } else { 103 rte_io_wmb(); 104 dp->flags = flags; 105 } 106 } 107 108 #ifdef RTE_PMD_PACKET_PREFETCH 109 #define rte_packet_prefetch(p) rte_prefetch1(p) 110 #else 111 #define rte_packet_prefetch(p) do {} while(0) 112 #endif 113 114 #define VIRTQUEUE_MAX_NAME_SZ 32 115 116 #ifdef RTE_VIRTIO_USER 117 /** 118 * Return the physical address (or virtual address in case of 119 * virtio-user) of mbuf data buffer. 120 * 121 * The address is firstly casted to the word size (sizeof(uintptr_t)) 122 * before casting it to uint64_t. This is to make it work with different 123 * combination of word size (64 bit and 32 bit) and virtio device 124 * (virtio-pci and virtio-user). 125 */ 126 #define VIRTIO_MBUF_ADDR(mb, vq) \ 127 ((uint64_t)(*(uintptr_t *)((uintptr_t)(mb) + (vq)->offset))) 128 #else 129 #define VIRTIO_MBUF_ADDR(mb, vq) ((mb)->buf_iova) 130 #endif 131 132 /** 133 * Return the physical address (or virtual address in case of 134 * virtio-user) of mbuf data buffer, taking care of mbuf data offset 135 */ 136 #define VIRTIO_MBUF_DATA_DMA_ADDR(mb, vq) \ 137 (VIRTIO_MBUF_ADDR(mb, vq) + (mb)->data_off) 138 139 #define VTNET_SQ_RQ_QUEUE_IDX 0 140 #define VTNET_SQ_TQ_QUEUE_IDX 1 141 #define VTNET_SQ_CQ_QUEUE_IDX 2 142 143 enum { VTNET_RQ = 0, VTNET_TQ = 1, VTNET_CQ = 2 }; 144 /** 145 * The maximum virtqueue size is 2^15. Use that value as the end of 146 * descriptor chain terminator since it will never be a valid index 147 * in the descriptor table. This is used to verify we are correctly 148 * handling vq_free_cnt. 149 */ 150 #define VQ_RING_DESC_CHAIN_END 32768 151 152 /** 153 * Control the RX mode, ie. promiscuous, allmulti, etc... 154 * All commands require an "out" sg entry containing a 1 byte 155 * state value, zero = disable, non-zero = enable. Commands 156 * 0 and 1 are supported with the VIRTIO_NET_F_CTRL_RX feature. 157 * Commands 2-5 are added with VIRTIO_NET_F_CTRL_RX_EXTRA. 158 */ 159 #define VIRTIO_NET_CTRL_RX 0 160 #define VIRTIO_NET_CTRL_RX_PROMISC 0 161 #define VIRTIO_NET_CTRL_RX_ALLMULTI 1 162 #define VIRTIO_NET_CTRL_RX_ALLUNI 2 163 #define VIRTIO_NET_CTRL_RX_NOMULTI 3 164 #define VIRTIO_NET_CTRL_RX_NOUNI 4 165 #define VIRTIO_NET_CTRL_RX_NOBCAST 5 166 167 /** 168 * Control the MAC 169 * 170 * The MAC filter table is managed by the hypervisor, the guest should 171 * assume the size is infinite. Filtering should be considered 172 * non-perfect, ie. based on hypervisor resources, the guest may 173 * received packets from sources not specified in the filter list. 174 * 175 * In addition to the class/cmd header, the TABLE_SET command requires 176 * two out scatterlists. Each contains a 4 byte count of entries followed 177 * by a concatenated byte stream of the ETH_ALEN MAC addresses. The 178 * first sg list contains unicast addresses, the second is for multicast. 179 * This functionality is present if the VIRTIO_NET_F_CTRL_RX feature 180 * is available. 181 * 182 * The ADDR_SET command requests one out scatterlist, it contains a 183 * 6 bytes MAC address. This functionality is present if the 184 * VIRTIO_NET_F_CTRL_MAC_ADDR feature is available. 185 */ 186 struct virtio_net_ctrl_mac { 187 uint32_t entries; 188 uint8_t macs[][RTE_ETHER_ADDR_LEN]; 189 } __rte_packed; 190 191 #define VIRTIO_NET_CTRL_MAC 1 192 #define VIRTIO_NET_CTRL_MAC_TABLE_SET 0 193 #define VIRTIO_NET_CTRL_MAC_ADDR_SET 1 194 195 /** 196 * Control VLAN filtering 197 * 198 * The VLAN filter table is controlled via a simple ADD/DEL interface. 199 * VLAN IDs not added may be filtered by the hypervisor. Del is the 200 * opposite of add. Both commands expect an out entry containing a 2 201 * byte VLAN ID. VLAN filtering is available with the 202 * VIRTIO_NET_F_CTRL_VLAN feature bit. 203 */ 204 #define VIRTIO_NET_CTRL_VLAN 2 205 #define VIRTIO_NET_CTRL_VLAN_ADD 0 206 #define VIRTIO_NET_CTRL_VLAN_DEL 1 207 208 /* 209 * Control link announce acknowledgement 210 * 211 * The command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that 212 * driver has recevied the notification; device would clear the 213 * VIRTIO_NET_S_ANNOUNCE bit in the status field after it receives 214 * this command. 215 */ 216 #define VIRTIO_NET_CTRL_ANNOUNCE 3 217 #define VIRTIO_NET_CTRL_ANNOUNCE_ACK 0 218 219 struct virtio_net_ctrl_hdr { 220 uint8_t class; 221 uint8_t cmd; 222 } __rte_packed; 223 224 typedef uint8_t virtio_net_ctrl_ack; 225 226 #define VIRTIO_NET_OK 0 227 #define VIRTIO_NET_ERR 1 228 229 #define VIRTIO_MAX_CTRL_DATA 2048 230 231 struct virtio_pmd_ctrl { 232 struct virtio_net_ctrl_hdr hdr; 233 virtio_net_ctrl_ack status; 234 uint8_t data[VIRTIO_MAX_CTRL_DATA]; 235 }; 236 237 struct vq_desc_extra { 238 void *cookie; 239 uint16_t ndescs; 240 uint16_t next; 241 }; 242 243 struct virtqueue { 244 struct virtio_hw *hw; /**< virtio_hw structure pointer. */ 245 union { 246 struct { 247 /**< vring keeping desc, used and avail */ 248 struct vring ring; 249 } vq_split; 250 251 struct { 252 /**< vring keeping descs and events */ 253 struct vring_packed ring; 254 bool used_wrap_counter; 255 uint16_t cached_flags; /**< cached flags for descs */ 256 uint16_t event_flags_shadow; 257 } vq_packed; 258 }; 259 260 uint16_t vq_used_cons_idx; /**< last consumed descriptor */ 261 uint16_t vq_nentries; /**< vring desc numbers */ 262 uint16_t vq_free_cnt; /**< num of desc available */ 263 uint16_t vq_avail_idx; /**< sync until needed */ 264 uint16_t vq_free_thresh; /**< free threshold */ 265 266 void *vq_ring_virt_mem; /**< linear address of vring*/ 267 unsigned int vq_ring_size; 268 269 union { 270 struct virtnet_rx rxq; 271 struct virtnet_tx txq; 272 struct virtnet_ctl cq; 273 }; 274 275 rte_iova_t vq_ring_mem; /**< physical address of vring, 276 * or virtual address for virtio_user. */ 277 278 /** 279 * Head of the free chain in the descriptor table. If 280 * there are no free descriptors, this will be set to 281 * VQ_RING_DESC_CHAIN_END. 282 */ 283 uint16_t vq_desc_head_idx; 284 uint16_t vq_desc_tail_idx; 285 uint16_t vq_queue_index; /**< PCI queue index */ 286 uint16_t offset; /**< relative offset to obtain addr in mbuf */ 287 uint16_t *notify_addr; 288 struct rte_mbuf **sw_ring; /**< RX software ring. */ 289 struct vq_desc_extra vq_descx[0]; 290 }; 291 292 /* If multiqueue is provided by host, then we suppport it. */ 293 #define VIRTIO_NET_CTRL_MQ 4 294 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET 0 295 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN 1 296 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX 0x8000 297 298 /** 299 * This is the first element of the scatter-gather list. If you don't 300 * specify GSO or CSUM features, you can simply ignore the header. 301 */ 302 struct virtio_net_hdr { 303 #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /**< Use csum_start,csum_offset*/ 304 #define VIRTIO_NET_HDR_F_DATA_VALID 2 /**< Checksum is valid */ 305 uint8_t flags; 306 #define VIRTIO_NET_HDR_GSO_NONE 0 /**< Not a GSO frame */ 307 #define VIRTIO_NET_HDR_GSO_TCPV4 1 /**< GSO frame, IPv4 TCP (TSO) */ 308 #define VIRTIO_NET_HDR_GSO_UDP 3 /**< GSO frame, IPv4 UDP (UFO) */ 309 #define VIRTIO_NET_HDR_GSO_TCPV6 4 /**< GSO frame, IPv6 TCP */ 310 #define VIRTIO_NET_HDR_GSO_ECN 0x80 /**< TCP has ECN set */ 311 uint8_t gso_type; 312 uint16_t hdr_len; /**< Ethernet + IP + tcp/udp hdrs */ 313 uint16_t gso_size; /**< Bytes to append to hdr_len per frame */ 314 uint16_t csum_start; /**< Position to start checksumming from */ 315 uint16_t csum_offset; /**< Offset after that to place checksum */ 316 }; 317 318 /** 319 * This is the version of the header to use when the MRG_RXBUF 320 * feature has been negotiated. 321 */ 322 struct virtio_net_hdr_mrg_rxbuf { 323 struct virtio_net_hdr hdr; 324 uint16_t num_buffers; /**< Number of merged rx buffers */ 325 }; 326 327 /* Region reserved to allow for transmit header and indirect ring */ 328 #define VIRTIO_MAX_TX_INDIRECT 8 329 struct virtio_tx_region { 330 struct virtio_net_hdr_mrg_rxbuf tx_hdr; 331 union { 332 struct vring_desc tx_indir[VIRTIO_MAX_TX_INDIRECT]; 333 struct vring_packed_desc 334 tx_packed_indir[VIRTIO_MAX_TX_INDIRECT]; 335 } __rte_aligned(16); 336 }; 337 338 static inline int 339 desc_is_used(struct vring_packed_desc *desc, struct virtqueue *vq) 340 { 341 uint16_t used, avail, flags; 342 343 flags = virtqueue_fetch_flags_packed(desc, vq->hw->weak_barriers); 344 used = !!(flags & VRING_PACKED_DESC_F_USED); 345 avail = !!(flags & VRING_PACKED_DESC_F_AVAIL); 346 347 return avail == used && used == vq->vq_packed.used_wrap_counter; 348 } 349 350 static inline void 351 vring_desc_init_packed(struct virtqueue *vq, int n) 352 { 353 int i; 354 for (i = 0; i < n - 1; i++) { 355 vq->vq_packed.ring.desc[i].id = i; 356 vq->vq_descx[i].next = i + 1; 357 } 358 vq->vq_packed.ring.desc[i].id = i; 359 vq->vq_descx[i].next = VQ_RING_DESC_CHAIN_END; 360 } 361 362 /* Chain all the descriptors in the ring with an END */ 363 static inline void 364 vring_desc_init_split(struct vring_desc *dp, uint16_t n) 365 { 366 uint16_t i; 367 368 for (i = 0; i < n - 1; i++) 369 dp[i].next = (uint16_t)(i + 1); 370 dp[i].next = VQ_RING_DESC_CHAIN_END; 371 } 372 373 static inline void 374 vring_desc_init_indirect_packed(struct vring_packed_desc *dp, int n) 375 { 376 int i; 377 for (i = 0; i < n; i++) { 378 dp[i].id = (uint16_t)i; 379 dp[i].flags = VRING_DESC_F_WRITE; 380 } 381 } 382 383 /** 384 * Tell the backend not to interrupt us. Implementation for packed virtqueues. 385 */ 386 static inline void 387 virtqueue_disable_intr_packed(struct virtqueue *vq) 388 { 389 if (vq->vq_packed.event_flags_shadow != RING_EVENT_FLAGS_DISABLE) { 390 vq->vq_packed.event_flags_shadow = RING_EVENT_FLAGS_DISABLE; 391 vq->vq_packed.ring.driver->desc_event_flags = 392 vq->vq_packed.event_flags_shadow; 393 } 394 } 395 396 /** 397 * Tell the backend not to interrupt us. Implementation for split virtqueues. 398 */ 399 static inline void 400 virtqueue_disable_intr_split(struct virtqueue *vq) 401 { 402 vq->vq_split.ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; 403 } 404 405 /** 406 * Tell the backend not to interrupt us. 407 */ 408 static inline void 409 virtqueue_disable_intr(struct virtqueue *vq) 410 { 411 if (vtpci_packed_queue(vq->hw)) 412 virtqueue_disable_intr_packed(vq); 413 else 414 virtqueue_disable_intr_split(vq); 415 } 416 417 /** 418 * Tell the backend to interrupt. Implementation for packed virtqueues. 419 */ 420 static inline void 421 virtqueue_enable_intr_packed(struct virtqueue *vq) 422 { 423 if (vq->vq_packed.event_flags_shadow == RING_EVENT_FLAGS_DISABLE) { 424 vq->vq_packed.event_flags_shadow = RING_EVENT_FLAGS_ENABLE; 425 vq->vq_packed.ring.driver->desc_event_flags = 426 vq->vq_packed.event_flags_shadow; 427 } 428 } 429 430 /** 431 * Tell the backend to interrupt. Implementation for split virtqueues. 432 */ 433 static inline void 434 virtqueue_enable_intr_split(struct virtqueue *vq) 435 { 436 vq->vq_split.ring.avail->flags &= (~VRING_AVAIL_F_NO_INTERRUPT); 437 } 438 439 /** 440 * Tell the backend to interrupt us. 441 */ 442 static inline void 443 virtqueue_enable_intr(struct virtqueue *vq) 444 { 445 if (vtpci_packed_queue(vq->hw)) 446 virtqueue_enable_intr_packed(vq); 447 else 448 virtqueue_enable_intr_split(vq); 449 } 450 451 /** 452 * Dump virtqueue internal structures, for debug purpose only. 453 */ 454 void virtqueue_dump(struct virtqueue *vq); 455 /** 456 * Get all mbufs to be freed. 457 */ 458 struct rte_mbuf *virtqueue_detach_unused(struct virtqueue *vq); 459 460 /* Flush the elements in the used ring. */ 461 void virtqueue_rxvq_flush(struct virtqueue *vq); 462 463 int virtqueue_rxvq_reset_packed(struct virtqueue *vq); 464 465 int virtqueue_txvq_reset_packed(struct virtqueue *vq); 466 467 static inline int 468 virtqueue_full(const struct virtqueue *vq) 469 { 470 return vq->vq_free_cnt == 0; 471 } 472 473 static inline int 474 virtio_get_queue_type(struct virtio_hw *hw, uint16_t vtpci_queue_idx) 475 { 476 if (vtpci_queue_idx == hw->max_queue_pairs * 2) 477 return VTNET_CQ; 478 else if (vtpci_queue_idx % 2 == 0) 479 return VTNET_RQ; 480 else 481 return VTNET_TQ; 482 } 483 484 /* virtqueue_nused has load-acquire or rte_io_rmb insed */ 485 static inline uint16_t 486 virtqueue_nused(const struct virtqueue *vq) 487 { 488 uint16_t idx; 489 490 if (vq->hw->weak_barriers) { 491 /** 492 * x86 prefers to using rte_smp_rmb over __atomic_load_n as it 493 * reports a slightly better perf, which comes from the saved 494 * branch by the compiler. 495 * The if and else branches are identical with the smp and io 496 * barriers both defined as compiler barriers on x86. 497 */ 498 #ifdef RTE_ARCH_X86_64 499 idx = vq->vq_split.ring.used->idx; 500 rte_smp_rmb(); 501 #else 502 idx = __atomic_load_n(&(vq)->vq_split.ring.used->idx, 503 __ATOMIC_ACQUIRE); 504 #endif 505 } else { 506 idx = vq->vq_split.ring.used->idx; 507 rte_io_rmb(); 508 } 509 return idx - vq->vq_used_cons_idx; 510 } 511 512 void vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx); 513 void vq_ring_free_chain_packed(struct virtqueue *vq, uint16_t used_idx); 514 void vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx, 515 uint16_t num); 516 517 static inline void 518 vq_update_avail_idx(struct virtqueue *vq) 519 { 520 if (vq->hw->weak_barriers) { 521 /* x86 prefers to using rte_smp_wmb over __atomic_store_n as 522 * it reports a slightly better perf, which comes from the 523 * saved branch by the compiler. 524 * The if and else branches are identical with the smp and 525 * io barriers both defined as compiler barriers on x86. 526 */ 527 #ifdef RTE_ARCH_X86_64 528 rte_smp_wmb(); 529 vq->vq_split.ring.avail->idx = vq->vq_avail_idx; 530 #else 531 __atomic_store_n(&vq->vq_split.ring.avail->idx, 532 vq->vq_avail_idx, __ATOMIC_RELEASE); 533 #endif 534 } else { 535 rte_io_wmb(); 536 vq->vq_split.ring.avail->idx = vq->vq_avail_idx; 537 } 538 } 539 540 static inline void 541 vq_update_avail_ring(struct virtqueue *vq, uint16_t desc_idx) 542 { 543 uint16_t avail_idx; 544 /* 545 * Place the head of the descriptor chain into the next slot and make 546 * it usable to the host. The chain is made available now rather than 547 * deferring to virtqueue_notify() in the hopes that if the host is 548 * currently running on another CPU, we can keep it processing the new 549 * descriptor. 550 */ 551 avail_idx = (uint16_t)(vq->vq_avail_idx & (vq->vq_nentries - 1)); 552 if (unlikely(vq->vq_split.ring.avail->ring[avail_idx] != desc_idx)) 553 vq->vq_split.ring.avail->ring[avail_idx] = desc_idx; 554 vq->vq_avail_idx++; 555 } 556 557 static inline int 558 virtqueue_kick_prepare(struct virtqueue *vq) 559 { 560 /* 561 * Ensure updated avail->idx is visible to vhost before reading 562 * the used->flags. 563 */ 564 virtio_mb(vq->hw->weak_barriers); 565 return !(vq->vq_split.ring.used->flags & VRING_USED_F_NO_NOTIFY); 566 } 567 568 static inline int 569 virtqueue_kick_prepare_packed(struct virtqueue *vq) 570 { 571 uint16_t flags; 572 573 /* 574 * Ensure updated data is visible to vhost before reading the flags. 575 */ 576 virtio_mb(vq->hw->weak_barriers); 577 flags = vq->vq_packed.ring.device->desc_event_flags; 578 579 return flags != RING_EVENT_FLAGS_DISABLE; 580 } 581 582 /* 583 * virtqueue_kick_prepare*() or the virtio_wmb() should be called 584 * before this function to be sure that all the data is visible to vhost. 585 */ 586 static inline void 587 virtqueue_notify(struct virtqueue *vq) 588 { 589 VTPCI_OPS(vq->hw)->notify_queue(vq->hw, vq); 590 } 591 592 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP 593 #define VIRTQUEUE_DUMP(vq) do { \ 594 uint16_t used_idx, nused; \ 595 used_idx = __atomic_load_n(&(vq)->vq_split.ring.used->idx, \ 596 __ATOMIC_RELAXED); \ 597 nused = (uint16_t)(used_idx - (vq)->vq_used_cons_idx); \ 598 if (vtpci_packed_queue((vq)->hw)) { \ 599 PMD_INIT_LOG(DEBUG, \ 600 "VQ: - size=%d; free=%d; used_cons_idx=%d; avail_idx=%d;" \ 601 " cached_flags=0x%x; used_wrap_counter=%d", \ 602 (vq)->vq_nentries, (vq)->vq_free_cnt, (vq)->vq_used_cons_idx, \ 603 (vq)->vq_avail_idx, (vq)->vq_packed.cached_flags, \ 604 (vq)->vq_packed.used_wrap_counter); \ 605 break; \ 606 } \ 607 PMD_INIT_LOG(DEBUG, \ 608 "VQ: - size=%d; free=%d; used=%d; desc_head_idx=%d;" \ 609 " avail.idx=%d; used_cons_idx=%d; used.idx=%d;" \ 610 " avail.flags=0x%x; used.flags=0x%x", \ 611 (vq)->vq_nentries, (vq)->vq_free_cnt, nused, (vq)->vq_desc_head_idx, \ 612 (vq)->vq_split.ring.avail->idx, (vq)->vq_used_cons_idx, \ 613 __atomic_load_n(&(vq)->vq_split.ring.used->idx, __ATOMIC_RELAXED), \ 614 (vq)->vq_split.ring.avail->flags, (vq)->vq_split.ring.used->flags); \ 615 } while (0) 616 #else 617 #define VIRTQUEUE_DUMP(vq) do { } while (0) 618 #endif 619 620 /* avoid write operation when necessary, to lessen cache issues */ 621 #define ASSIGN_UNLESS_EQUAL(var, val) do { \ 622 typeof(var) *const var_ = &(var); \ 623 typeof(val) const val_ = (val); \ 624 if (*var_ != val_) \ 625 *var_ = val_; \ 626 } while (0) 627 628 #define virtqueue_clear_net_hdr(hdr) do { \ 629 typeof(hdr) hdr_ = (hdr); \ 630 ASSIGN_UNLESS_EQUAL((hdr_)->csum_start, 0); \ 631 ASSIGN_UNLESS_EQUAL((hdr_)->csum_offset, 0); \ 632 ASSIGN_UNLESS_EQUAL((hdr_)->flags, 0); \ 633 ASSIGN_UNLESS_EQUAL((hdr_)->gso_type, 0); \ 634 ASSIGN_UNLESS_EQUAL((hdr_)->gso_size, 0); \ 635 ASSIGN_UNLESS_EQUAL((hdr_)->hdr_len, 0); \ 636 } while (0) 637 638 static inline void 639 virtqueue_xmit_offload(struct virtio_net_hdr *hdr, 640 struct rte_mbuf *cookie, 641 bool offload) 642 { 643 if (offload) { 644 if (cookie->ol_flags & PKT_TX_TCP_SEG) 645 cookie->ol_flags |= PKT_TX_TCP_CKSUM; 646 647 switch (cookie->ol_flags & PKT_TX_L4_MASK) { 648 case PKT_TX_UDP_CKSUM: 649 hdr->csum_start = cookie->l2_len + cookie->l3_len; 650 hdr->csum_offset = offsetof(struct rte_udp_hdr, 651 dgram_cksum); 652 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 653 break; 654 655 case PKT_TX_TCP_CKSUM: 656 hdr->csum_start = cookie->l2_len + cookie->l3_len; 657 hdr->csum_offset = offsetof(struct rte_tcp_hdr, cksum); 658 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 659 break; 660 661 default: 662 ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0); 663 ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0); 664 ASSIGN_UNLESS_EQUAL(hdr->flags, 0); 665 break; 666 } 667 668 /* TCP Segmentation Offload */ 669 if (cookie->ol_flags & PKT_TX_TCP_SEG) { 670 hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ? 671 VIRTIO_NET_HDR_GSO_TCPV6 : 672 VIRTIO_NET_HDR_GSO_TCPV4; 673 hdr->gso_size = cookie->tso_segsz; 674 hdr->hdr_len = 675 cookie->l2_len + 676 cookie->l3_len + 677 cookie->l4_len; 678 } else { 679 ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0); 680 ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0); 681 ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0); 682 } 683 } 684 } 685 686 static inline void 687 virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie, 688 uint16_t needed, int use_indirect, int can_push, 689 int in_order) 690 { 691 struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr; 692 struct vq_desc_extra *dxp; 693 struct virtqueue *vq = txvq->vq; 694 struct vring_packed_desc *start_dp, *head_dp; 695 uint16_t idx, id, head_idx, head_flags; 696 int16_t head_size = vq->hw->vtnet_hdr_size; 697 struct virtio_net_hdr *hdr; 698 uint16_t prev; 699 bool prepend_header = false; 700 uint16_t seg_num = cookie->nb_segs; 701 702 id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx; 703 704 dxp = &vq->vq_descx[id]; 705 dxp->ndescs = needed; 706 dxp->cookie = cookie; 707 708 head_idx = vq->vq_avail_idx; 709 idx = head_idx; 710 prev = head_idx; 711 start_dp = vq->vq_packed.ring.desc; 712 713 head_dp = &vq->vq_packed.ring.desc[idx]; 714 head_flags = cookie->next ? VRING_DESC_F_NEXT : 0; 715 head_flags |= vq->vq_packed.cached_flags; 716 717 if (can_push) { 718 /* prepend cannot fail, checked by caller */ 719 hdr = rte_pktmbuf_mtod_offset(cookie, struct virtio_net_hdr *, 720 -head_size); 721 prepend_header = true; 722 723 /* if offload disabled, it is not zeroed below, do it now */ 724 if (!vq->hw->has_tx_offload) 725 virtqueue_clear_net_hdr(hdr); 726 } else if (use_indirect) { 727 /* setup tx ring slot to point to indirect 728 * descriptor list stored in reserved region. 729 * 730 * the first slot in indirect ring is already preset 731 * to point to the header in reserved region 732 */ 733 start_dp[idx].addr = txvq->virtio_net_hdr_mem + 734 RTE_PTR_DIFF(&txr[idx].tx_packed_indir, txr); 735 start_dp[idx].len = (seg_num + 1) * 736 sizeof(struct vring_packed_desc); 737 /* reset flags for indirect desc */ 738 head_flags = VRING_DESC_F_INDIRECT; 739 head_flags |= vq->vq_packed.cached_flags; 740 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr; 741 742 /* loop below will fill in rest of the indirect elements */ 743 start_dp = txr[idx].tx_packed_indir; 744 idx = 1; 745 } else { 746 /* setup first tx ring slot to point to header 747 * stored in reserved region. 748 */ 749 start_dp[idx].addr = txvq->virtio_net_hdr_mem + 750 RTE_PTR_DIFF(&txr[idx].tx_hdr, txr); 751 start_dp[idx].len = vq->hw->vtnet_hdr_size; 752 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr; 753 idx++; 754 if (idx >= vq->vq_nentries) { 755 idx -= vq->vq_nentries; 756 vq->vq_packed.cached_flags ^= 757 VRING_PACKED_DESC_F_AVAIL_USED; 758 } 759 } 760 761 virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload); 762 763 do { 764 uint16_t flags; 765 766 start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq); 767 start_dp[idx].len = cookie->data_len; 768 if (prepend_header) { 769 start_dp[idx].addr -= head_size; 770 start_dp[idx].len += head_size; 771 prepend_header = false; 772 } 773 774 if (likely(idx != head_idx)) { 775 flags = cookie->next ? VRING_DESC_F_NEXT : 0; 776 flags |= vq->vq_packed.cached_flags; 777 start_dp[idx].flags = flags; 778 } 779 prev = idx; 780 idx++; 781 if (idx >= vq->vq_nentries) { 782 idx -= vq->vq_nentries; 783 vq->vq_packed.cached_flags ^= 784 VRING_PACKED_DESC_F_AVAIL_USED; 785 } 786 } while ((cookie = cookie->next) != NULL); 787 788 start_dp[prev].id = id; 789 790 if (use_indirect) { 791 idx = head_idx; 792 if (++idx >= vq->vq_nentries) { 793 idx -= vq->vq_nentries; 794 vq->vq_packed.cached_flags ^= 795 VRING_PACKED_DESC_F_AVAIL_USED; 796 } 797 } 798 799 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed); 800 vq->vq_avail_idx = idx; 801 802 if (!in_order) { 803 vq->vq_desc_head_idx = dxp->next; 804 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) 805 vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END; 806 } 807 808 virtqueue_store_flags_packed(head_dp, head_flags, 809 vq->hw->weak_barriers); 810 } 811 812 static void 813 vq_ring_free_id_packed(struct virtqueue *vq, uint16_t id) 814 { 815 struct vq_desc_extra *dxp; 816 817 dxp = &vq->vq_descx[id]; 818 vq->vq_free_cnt += dxp->ndescs; 819 820 if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) 821 vq->vq_desc_head_idx = id; 822 else 823 vq->vq_descx[vq->vq_desc_tail_idx].next = id; 824 825 vq->vq_desc_tail_idx = id; 826 dxp->next = VQ_RING_DESC_CHAIN_END; 827 } 828 829 static void 830 virtio_xmit_cleanup_inorder_packed(struct virtqueue *vq, int num) 831 { 832 uint16_t used_idx, id, curr_id, free_cnt = 0; 833 uint16_t size = vq->vq_nentries; 834 struct vring_packed_desc *desc = vq->vq_packed.ring.desc; 835 struct vq_desc_extra *dxp; 836 837 used_idx = vq->vq_used_cons_idx; 838 /* desc_is_used has a load-acquire or rte_io_rmb inside 839 * and wait for used desc in virtqueue. 840 */ 841 while (num > 0 && desc_is_used(&desc[used_idx], vq)) { 842 id = desc[used_idx].id; 843 do { 844 curr_id = used_idx; 845 dxp = &vq->vq_descx[used_idx]; 846 used_idx += dxp->ndescs; 847 free_cnt += dxp->ndescs; 848 num -= dxp->ndescs; 849 if (used_idx >= size) { 850 used_idx -= size; 851 vq->vq_packed.used_wrap_counter ^= 1; 852 } 853 if (dxp->cookie != NULL) { 854 rte_pktmbuf_free(dxp->cookie); 855 dxp->cookie = NULL; 856 } 857 } while (curr_id != id); 858 } 859 vq->vq_used_cons_idx = used_idx; 860 vq->vq_free_cnt += free_cnt; 861 } 862 863 static void 864 virtio_xmit_cleanup_normal_packed(struct virtqueue *vq, int num) 865 { 866 uint16_t used_idx, id; 867 uint16_t size = vq->vq_nentries; 868 struct vring_packed_desc *desc = vq->vq_packed.ring.desc; 869 struct vq_desc_extra *dxp; 870 871 used_idx = vq->vq_used_cons_idx; 872 /* desc_is_used has a load-acquire or rte_io_rmb inside 873 * and wait for used desc in virtqueue. 874 */ 875 while (num-- && desc_is_used(&desc[used_idx], vq)) { 876 id = desc[used_idx].id; 877 dxp = &vq->vq_descx[id]; 878 vq->vq_used_cons_idx += dxp->ndescs; 879 if (vq->vq_used_cons_idx >= size) { 880 vq->vq_used_cons_idx -= size; 881 vq->vq_packed.used_wrap_counter ^= 1; 882 } 883 vq_ring_free_id_packed(vq, id); 884 if (dxp->cookie != NULL) { 885 rte_pktmbuf_free(dxp->cookie); 886 dxp->cookie = NULL; 887 } 888 used_idx = vq->vq_used_cons_idx; 889 } 890 } 891 892 /* Cleanup from completed transmits. */ 893 static inline void 894 virtio_xmit_cleanup_packed(struct virtqueue *vq, int num, int in_order) 895 { 896 if (in_order) 897 virtio_xmit_cleanup_inorder_packed(vq, num); 898 else 899 virtio_xmit_cleanup_normal_packed(vq, num); 900 } 901 902 static inline void 903 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num) 904 { 905 uint16_t i, used_idx, desc_idx; 906 for (i = 0; i < num; i++) { 907 struct vring_used_elem *uep; 908 struct vq_desc_extra *dxp; 909 910 used_idx = (uint16_t)(vq->vq_used_cons_idx & 911 (vq->vq_nentries - 1)); 912 uep = &vq->vq_split.ring.used->ring[used_idx]; 913 914 desc_idx = (uint16_t)uep->id; 915 dxp = &vq->vq_descx[desc_idx]; 916 vq->vq_used_cons_idx++; 917 vq_ring_free_chain(vq, desc_idx); 918 919 if (dxp->cookie != NULL) { 920 rte_pktmbuf_free(dxp->cookie); 921 dxp->cookie = NULL; 922 } 923 } 924 } 925 926 /* Cleanup from completed inorder transmits. */ 927 static __rte_always_inline void 928 virtio_xmit_cleanup_inorder(struct virtqueue *vq, uint16_t num) 929 { 930 uint16_t i, idx = vq->vq_used_cons_idx; 931 int16_t free_cnt = 0; 932 struct vq_desc_extra *dxp = NULL; 933 934 if (unlikely(num == 0)) 935 return; 936 937 for (i = 0; i < num; i++) { 938 dxp = &vq->vq_descx[idx++ & (vq->vq_nentries - 1)]; 939 free_cnt += dxp->ndescs; 940 if (dxp->cookie != NULL) { 941 rte_pktmbuf_free(dxp->cookie); 942 dxp->cookie = NULL; 943 } 944 } 945 946 vq->vq_free_cnt += free_cnt; 947 vq->vq_used_cons_idx = idx; 948 } 949 #endif /* _VIRTQUEUE_H_ */ 950