1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #ifndef _VIRTQUEUE_H_ 6 #define _VIRTQUEUE_H_ 7 8 #include <stdint.h> 9 10 #include <rte_atomic.h> 11 #include <rte_memory.h> 12 #include <rte_mempool.h> 13 14 #include "virtio_pci.h" 15 #include "virtio_ring.h" 16 #include "virtio_logs.h" 17 #include "virtio_rxtx.h" 18 19 struct rte_mbuf; 20 21 /* 22 * Per virtio_ring.h in Linux. 23 * For virtio_pci on SMP, we don't need to order with respect to MMIO 24 * accesses through relaxed memory I/O windows, so smp_mb() et al are 25 * sufficient. 26 * 27 * For using virtio to talk to real devices (eg. vDPA) we do need real 28 * barriers. 29 */ 30 static inline void 31 virtio_mb(uint8_t weak_barriers) 32 { 33 if (weak_barriers) 34 rte_smp_mb(); 35 else 36 rte_mb(); 37 } 38 39 static inline void 40 virtio_rmb(uint8_t weak_barriers) 41 { 42 if (weak_barriers) 43 rte_smp_rmb(); 44 else 45 rte_cio_rmb(); 46 } 47 48 static inline void 49 virtio_wmb(uint8_t weak_barriers) 50 { 51 if (weak_barriers) 52 rte_smp_wmb(); 53 else 54 rte_cio_wmb(); 55 } 56 57 static inline uint16_t 58 virtqueue_fetch_flags_packed(struct vring_packed_desc *dp, 59 uint8_t weak_barriers) 60 { 61 uint16_t flags; 62 63 if (weak_barriers) { 64 /* x86 prefers to using rte_smp_rmb over __atomic_load_n as it reports 65 * a better perf(~1.5%), which comes from the saved branch by the compiler. 66 * The if and else branch are identical with the smp and cio barriers both 67 * defined as compiler barriers on x86. 68 */ 69 #ifdef RTE_ARCH_X86_64 70 flags = dp->flags; 71 rte_smp_rmb(); 72 #else 73 flags = __atomic_load_n(&dp->flags, __ATOMIC_ACQUIRE); 74 #endif 75 } else { 76 flags = dp->flags; 77 rte_cio_rmb(); 78 } 79 80 return flags; 81 } 82 83 static inline void 84 virtqueue_store_flags_packed(struct vring_packed_desc *dp, 85 uint16_t flags, uint8_t weak_barriers) 86 { 87 if (weak_barriers) { 88 /* x86 prefers to using rte_smp_wmb over __atomic_store_n as it reports 89 * a better perf(~1.5%), which comes from the saved branch by the compiler. 90 * The if and else branch are identical with the smp and cio barriers both 91 * defined as compiler barriers on x86. 92 */ 93 #ifdef RTE_ARCH_X86_64 94 rte_smp_wmb(); 95 dp->flags = flags; 96 #else 97 __atomic_store_n(&dp->flags, flags, __ATOMIC_RELEASE); 98 #endif 99 } else { 100 rte_cio_wmb(); 101 dp->flags = flags; 102 } 103 } 104 #ifdef RTE_PMD_PACKET_PREFETCH 105 #define rte_packet_prefetch(p) rte_prefetch1(p) 106 #else 107 #define rte_packet_prefetch(p) do {} while(0) 108 #endif 109 110 #define VIRTQUEUE_MAX_NAME_SZ 32 111 112 #ifdef RTE_VIRTIO_USER 113 /** 114 * Return the physical address (or virtual address in case of 115 * virtio-user) of mbuf data buffer. 116 * 117 * The address is firstly casted to the word size (sizeof(uintptr_t)) 118 * before casting it to uint64_t. This is to make it work with different 119 * combination of word size (64 bit and 32 bit) and virtio device 120 * (virtio-pci and virtio-user). 121 */ 122 #define VIRTIO_MBUF_ADDR(mb, vq) \ 123 ((uint64_t)(*(uintptr_t *)((uintptr_t)(mb) + (vq)->offset))) 124 #else 125 #define VIRTIO_MBUF_ADDR(mb, vq) ((mb)->buf_iova) 126 #endif 127 128 /** 129 * Return the physical address (or virtual address in case of 130 * virtio-user) of mbuf data buffer, taking care of mbuf data offset 131 */ 132 #define VIRTIO_MBUF_DATA_DMA_ADDR(mb, vq) \ 133 (VIRTIO_MBUF_ADDR(mb, vq) + (mb)->data_off) 134 135 #define VTNET_SQ_RQ_QUEUE_IDX 0 136 #define VTNET_SQ_TQ_QUEUE_IDX 1 137 #define VTNET_SQ_CQ_QUEUE_IDX 2 138 139 enum { VTNET_RQ = 0, VTNET_TQ = 1, VTNET_CQ = 2 }; 140 /** 141 * The maximum virtqueue size is 2^15. Use that value as the end of 142 * descriptor chain terminator since it will never be a valid index 143 * in the descriptor table. This is used to verify we are correctly 144 * handling vq_free_cnt. 145 */ 146 #define VQ_RING_DESC_CHAIN_END 32768 147 148 /** 149 * Control the RX mode, ie. promiscuous, allmulti, etc... 150 * All commands require an "out" sg entry containing a 1 byte 151 * state value, zero = disable, non-zero = enable. Commands 152 * 0 and 1 are supported with the VIRTIO_NET_F_CTRL_RX feature. 153 * Commands 2-5 are added with VIRTIO_NET_F_CTRL_RX_EXTRA. 154 */ 155 #define VIRTIO_NET_CTRL_RX 0 156 #define VIRTIO_NET_CTRL_RX_PROMISC 0 157 #define VIRTIO_NET_CTRL_RX_ALLMULTI 1 158 #define VIRTIO_NET_CTRL_RX_ALLUNI 2 159 #define VIRTIO_NET_CTRL_RX_NOMULTI 3 160 #define VIRTIO_NET_CTRL_RX_NOUNI 4 161 #define VIRTIO_NET_CTRL_RX_NOBCAST 5 162 163 /** 164 * Control the MAC 165 * 166 * The MAC filter table is managed by the hypervisor, the guest should 167 * assume the size is infinite. Filtering should be considered 168 * non-perfect, ie. based on hypervisor resources, the guest may 169 * received packets from sources not specified in the filter list. 170 * 171 * In addition to the class/cmd header, the TABLE_SET command requires 172 * two out scatterlists. Each contains a 4 byte count of entries followed 173 * by a concatenated byte stream of the ETH_ALEN MAC addresses. The 174 * first sg list contains unicast addresses, the second is for multicast. 175 * This functionality is present if the VIRTIO_NET_F_CTRL_RX feature 176 * is available. 177 * 178 * The ADDR_SET command requests one out scatterlist, it contains a 179 * 6 bytes MAC address. This functionality is present if the 180 * VIRTIO_NET_F_CTRL_MAC_ADDR feature is available. 181 */ 182 struct virtio_net_ctrl_mac { 183 uint32_t entries; 184 uint8_t macs[][RTE_ETHER_ADDR_LEN]; 185 } __attribute__((__packed__)); 186 187 #define VIRTIO_NET_CTRL_MAC 1 188 #define VIRTIO_NET_CTRL_MAC_TABLE_SET 0 189 #define VIRTIO_NET_CTRL_MAC_ADDR_SET 1 190 191 /** 192 * Control VLAN filtering 193 * 194 * The VLAN filter table is controlled via a simple ADD/DEL interface. 195 * VLAN IDs not added may be filtered by the hypervisor. Del is the 196 * opposite of add. Both commands expect an out entry containing a 2 197 * byte VLAN ID. VLAN filtering is available with the 198 * VIRTIO_NET_F_CTRL_VLAN feature bit. 199 */ 200 #define VIRTIO_NET_CTRL_VLAN 2 201 #define VIRTIO_NET_CTRL_VLAN_ADD 0 202 #define VIRTIO_NET_CTRL_VLAN_DEL 1 203 204 /* 205 * Control link announce acknowledgement 206 * 207 * The command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that 208 * driver has recevied the notification; device would clear the 209 * VIRTIO_NET_S_ANNOUNCE bit in the status field after it receives 210 * this command. 211 */ 212 #define VIRTIO_NET_CTRL_ANNOUNCE 3 213 #define VIRTIO_NET_CTRL_ANNOUNCE_ACK 0 214 215 struct virtio_net_ctrl_hdr { 216 uint8_t class; 217 uint8_t cmd; 218 } __attribute__((packed)); 219 220 typedef uint8_t virtio_net_ctrl_ack; 221 222 #define VIRTIO_NET_OK 0 223 #define VIRTIO_NET_ERR 1 224 225 #define VIRTIO_MAX_CTRL_DATA 2048 226 227 struct virtio_pmd_ctrl { 228 struct virtio_net_ctrl_hdr hdr; 229 virtio_net_ctrl_ack status; 230 uint8_t data[VIRTIO_MAX_CTRL_DATA]; 231 }; 232 233 struct vq_desc_extra { 234 void *cookie; 235 uint16_t ndescs; 236 uint16_t next; 237 }; 238 239 struct virtqueue { 240 struct virtio_hw *hw; /**< virtio_hw structure pointer. */ 241 union { 242 struct { 243 /**< vring keeping desc, used and avail */ 244 struct vring ring; 245 } vq_split; 246 247 struct { 248 /**< vring keeping descs and events */ 249 struct vring_packed ring; 250 bool used_wrap_counter; 251 uint16_t cached_flags; /**< cached flags for descs */ 252 uint16_t event_flags_shadow; 253 } vq_packed; 254 }; 255 256 uint16_t vq_used_cons_idx; /**< last consumed descriptor */ 257 uint16_t vq_nentries; /**< vring desc numbers */ 258 uint16_t vq_free_cnt; /**< num of desc available */ 259 uint16_t vq_avail_idx; /**< sync until needed */ 260 uint16_t vq_free_thresh; /**< free threshold */ 261 262 void *vq_ring_virt_mem; /**< linear address of vring*/ 263 unsigned int vq_ring_size; 264 265 union { 266 struct virtnet_rx rxq; 267 struct virtnet_tx txq; 268 struct virtnet_ctl cq; 269 }; 270 271 rte_iova_t vq_ring_mem; /**< physical address of vring, 272 * or virtual address for virtio_user. */ 273 274 /** 275 * Head of the free chain in the descriptor table. If 276 * there are no free descriptors, this will be set to 277 * VQ_RING_DESC_CHAIN_END. 278 */ 279 uint16_t vq_desc_head_idx; 280 uint16_t vq_desc_tail_idx; 281 uint16_t vq_queue_index; /**< PCI queue index */ 282 uint16_t offset; /**< relative offset to obtain addr in mbuf */ 283 uint16_t *notify_addr; 284 struct rte_mbuf **sw_ring; /**< RX software ring. */ 285 struct vq_desc_extra vq_descx[0]; 286 }; 287 288 /* If multiqueue is provided by host, then we suppport it. */ 289 #define VIRTIO_NET_CTRL_MQ 4 290 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET 0 291 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN 1 292 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX 0x8000 293 294 /** 295 * This is the first element of the scatter-gather list. If you don't 296 * specify GSO or CSUM features, you can simply ignore the header. 297 */ 298 struct virtio_net_hdr { 299 #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /**< Use csum_start,csum_offset*/ 300 #define VIRTIO_NET_HDR_F_DATA_VALID 2 /**< Checksum is valid */ 301 uint8_t flags; 302 #define VIRTIO_NET_HDR_GSO_NONE 0 /**< Not a GSO frame */ 303 #define VIRTIO_NET_HDR_GSO_TCPV4 1 /**< GSO frame, IPv4 TCP (TSO) */ 304 #define VIRTIO_NET_HDR_GSO_UDP 3 /**< GSO frame, IPv4 UDP (UFO) */ 305 #define VIRTIO_NET_HDR_GSO_TCPV6 4 /**< GSO frame, IPv6 TCP */ 306 #define VIRTIO_NET_HDR_GSO_ECN 0x80 /**< TCP has ECN set */ 307 uint8_t gso_type; 308 uint16_t hdr_len; /**< Ethernet + IP + tcp/udp hdrs */ 309 uint16_t gso_size; /**< Bytes to append to hdr_len per frame */ 310 uint16_t csum_start; /**< Position to start checksumming from */ 311 uint16_t csum_offset; /**< Offset after that to place checksum */ 312 }; 313 314 /** 315 * This is the version of the header to use when the MRG_RXBUF 316 * feature has been negotiated. 317 */ 318 struct virtio_net_hdr_mrg_rxbuf { 319 struct virtio_net_hdr hdr; 320 uint16_t num_buffers; /**< Number of merged rx buffers */ 321 }; 322 323 /* Region reserved to allow for transmit header and indirect ring */ 324 #define VIRTIO_MAX_TX_INDIRECT 8 325 struct virtio_tx_region { 326 struct virtio_net_hdr_mrg_rxbuf tx_hdr; 327 struct vring_desc tx_indir[VIRTIO_MAX_TX_INDIRECT] 328 __attribute__((__aligned__(16))); 329 }; 330 331 static inline int 332 desc_is_used(struct vring_packed_desc *desc, struct virtqueue *vq) 333 { 334 uint16_t used, avail, flags; 335 336 flags = virtqueue_fetch_flags_packed(desc, vq->hw->weak_barriers); 337 used = !!(flags & VRING_PACKED_DESC_F_USED); 338 avail = !!(flags & VRING_PACKED_DESC_F_AVAIL); 339 340 return avail == used && used == vq->vq_packed.used_wrap_counter; 341 } 342 343 static inline void 344 vring_desc_init_packed(struct virtqueue *vq, int n) 345 { 346 int i; 347 for (i = 0; i < n - 1; i++) { 348 vq->vq_packed.ring.desc[i].id = i; 349 vq->vq_descx[i].next = i + 1; 350 } 351 vq->vq_packed.ring.desc[i].id = i; 352 vq->vq_descx[i].next = VQ_RING_DESC_CHAIN_END; 353 } 354 355 /* Chain all the descriptors in the ring with an END */ 356 static inline void 357 vring_desc_init_split(struct vring_desc *dp, uint16_t n) 358 { 359 uint16_t i; 360 361 for (i = 0; i < n - 1; i++) 362 dp[i].next = (uint16_t)(i + 1); 363 dp[i].next = VQ_RING_DESC_CHAIN_END; 364 } 365 366 /** 367 * Tell the backend not to interrupt us. Implementation for packed virtqueues. 368 */ 369 static inline void 370 virtqueue_disable_intr_packed(struct virtqueue *vq) 371 { 372 if (vq->vq_packed.event_flags_shadow != RING_EVENT_FLAGS_DISABLE) { 373 vq->vq_packed.event_flags_shadow = RING_EVENT_FLAGS_DISABLE; 374 vq->vq_packed.ring.driver->desc_event_flags = 375 vq->vq_packed.event_flags_shadow; 376 } 377 } 378 379 /** 380 * Tell the backend not to interrupt us. Implementation for split virtqueues. 381 */ 382 static inline void 383 virtqueue_disable_intr_split(struct virtqueue *vq) 384 { 385 vq->vq_split.ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; 386 } 387 388 /** 389 * Tell the backend not to interrupt us. 390 */ 391 static inline void 392 virtqueue_disable_intr(struct virtqueue *vq) 393 { 394 if (vtpci_packed_queue(vq->hw)) 395 virtqueue_disable_intr_packed(vq); 396 else 397 virtqueue_disable_intr_split(vq); 398 } 399 400 /** 401 * Tell the backend to interrupt. Implementation for packed virtqueues. 402 */ 403 static inline void 404 virtqueue_enable_intr_packed(struct virtqueue *vq) 405 { 406 if (vq->vq_packed.event_flags_shadow == RING_EVENT_FLAGS_DISABLE) { 407 vq->vq_packed.event_flags_shadow = RING_EVENT_FLAGS_ENABLE; 408 vq->vq_packed.ring.driver->desc_event_flags = 409 vq->vq_packed.event_flags_shadow; 410 } 411 } 412 413 /** 414 * Tell the backend to interrupt. Implementation for split virtqueues. 415 */ 416 static inline void 417 virtqueue_enable_intr_split(struct virtqueue *vq) 418 { 419 vq->vq_split.ring.avail->flags &= (~VRING_AVAIL_F_NO_INTERRUPT); 420 } 421 422 /** 423 * Tell the backend to interrupt us. 424 */ 425 static inline void 426 virtqueue_enable_intr(struct virtqueue *vq) 427 { 428 if (vtpci_packed_queue(vq->hw)) 429 virtqueue_enable_intr_packed(vq); 430 else 431 virtqueue_enable_intr_split(vq); 432 } 433 434 /** 435 * Dump virtqueue internal structures, for debug purpose only. 436 */ 437 void virtqueue_dump(struct virtqueue *vq); 438 /** 439 * Get all mbufs to be freed. 440 */ 441 struct rte_mbuf *virtqueue_detach_unused(struct virtqueue *vq); 442 443 /* Flush the elements in the used ring. */ 444 void virtqueue_rxvq_flush(struct virtqueue *vq); 445 446 static inline int 447 virtqueue_full(const struct virtqueue *vq) 448 { 449 return vq->vq_free_cnt == 0; 450 } 451 452 static inline int 453 virtio_get_queue_type(struct virtio_hw *hw, uint16_t vtpci_queue_idx) 454 { 455 if (vtpci_queue_idx == hw->max_queue_pairs * 2) 456 return VTNET_CQ; 457 else if (vtpci_queue_idx % 2 == 0) 458 return VTNET_RQ; 459 else 460 return VTNET_TQ; 461 } 462 463 #define VIRTQUEUE_NUSED(vq) ((uint16_t)((vq)->vq_split.ring.used->idx - \ 464 (vq)->vq_used_cons_idx)) 465 466 void vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx); 467 void vq_ring_free_chain_packed(struct virtqueue *vq, uint16_t used_idx); 468 void vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx, 469 uint16_t num); 470 471 static inline void 472 vq_update_avail_idx(struct virtqueue *vq) 473 { 474 virtio_wmb(vq->hw->weak_barriers); 475 vq->vq_split.ring.avail->idx = vq->vq_avail_idx; 476 } 477 478 static inline void 479 vq_update_avail_ring(struct virtqueue *vq, uint16_t desc_idx) 480 { 481 uint16_t avail_idx; 482 /* 483 * Place the head of the descriptor chain into the next slot and make 484 * it usable to the host. The chain is made available now rather than 485 * deferring to virtqueue_notify() in the hopes that if the host is 486 * currently running on another CPU, we can keep it processing the new 487 * descriptor. 488 */ 489 avail_idx = (uint16_t)(vq->vq_avail_idx & (vq->vq_nentries - 1)); 490 if (unlikely(vq->vq_split.ring.avail->ring[avail_idx] != desc_idx)) 491 vq->vq_split.ring.avail->ring[avail_idx] = desc_idx; 492 vq->vq_avail_idx++; 493 } 494 495 static inline int 496 virtqueue_kick_prepare(struct virtqueue *vq) 497 { 498 /* 499 * Ensure updated avail->idx is visible to vhost before reading 500 * the used->flags. 501 */ 502 virtio_mb(vq->hw->weak_barriers); 503 return !(vq->vq_split.ring.used->flags & VRING_USED_F_NO_NOTIFY); 504 } 505 506 static inline int 507 virtqueue_kick_prepare_packed(struct virtqueue *vq) 508 { 509 uint16_t flags; 510 511 /* 512 * Ensure updated data is visible to vhost before reading the flags. 513 */ 514 virtio_mb(vq->hw->weak_barriers); 515 flags = vq->vq_packed.ring.device->desc_event_flags; 516 517 return flags != RING_EVENT_FLAGS_DISABLE; 518 } 519 520 /* 521 * virtqueue_kick_prepare*() or the virtio_wmb() should be called 522 * before this function to be sure that all the data is visible to vhost. 523 */ 524 static inline void 525 virtqueue_notify(struct virtqueue *vq) 526 { 527 VTPCI_OPS(vq->hw)->notify_queue(vq->hw, vq); 528 } 529 530 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP 531 #define VIRTQUEUE_DUMP(vq) do { \ 532 uint16_t used_idx, nused; \ 533 used_idx = (vq)->vq_split.ring.used->idx; \ 534 nused = (uint16_t)(used_idx - (vq)->vq_used_cons_idx); \ 535 if (vtpci_packed_queue((vq)->hw)) { \ 536 PMD_INIT_LOG(DEBUG, \ 537 "VQ: - size=%d; free=%d; used_cons_idx=%d; avail_idx=%d;" \ 538 " cached_flags=0x%x; used_wrap_counter=%d", \ 539 (vq)->vq_nentries, (vq)->vq_free_cnt, (vq)->vq_used_cons_idx, \ 540 (vq)->vq_avail_idx, (vq)->vq_packed.cached_flags, \ 541 (vq)->vq_packed.used_wrap_counter); \ 542 break; \ 543 } \ 544 PMD_INIT_LOG(DEBUG, \ 545 "VQ: - size=%d; free=%d; used=%d; desc_head_idx=%d;" \ 546 " avail.idx=%d; used_cons_idx=%d; used.idx=%d;" \ 547 " avail.flags=0x%x; used.flags=0x%x", \ 548 (vq)->vq_nentries, (vq)->vq_free_cnt, nused, \ 549 (vq)->vq_desc_head_idx, (vq)->vq_split.ring.avail->idx, \ 550 (vq)->vq_used_cons_idx, (vq)->vq_split.ring.used->idx, \ 551 (vq)->vq_split.ring.avail->flags, (vq)->vq_split.ring.used->flags); \ 552 } while (0) 553 #else 554 #define VIRTQUEUE_DUMP(vq) do { } while (0) 555 #endif 556 557 #endif /* _VIRTQUEUE_H_ */ 558