1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #ifndef _VIRTQUEUE_H_ 6 #define _VIRTQUEUE_H_ 7 8 #include <stdint.h> 9 10 #include <rte_atomic.h> 11 #include <rte_memory.h> 12 #include <rte_mempool.h> 13 14 #include "virtio_pci.h" 15 #include "virtio_ring.h" 16 #include "virtio_logs.h" 17 #include "virtio_rxtx.h" 18 19 struct rte_mbuf; 20 21 /* 22 * Per virtio_ring.h in Linux. 23 * For virtio_pci on SMP, we don't need to order with respect to MMIO 24 * accesses through relaxed memory I/O windows, so smp_mb() et al are 25 * sufficient. 26 * 27 * For using virtio to talk to real devices (eg. vDPA) we do need real 28 * barriers. 29 */ 30 static inline void 31 virtio_mb(uint8_t weak_barriers) 32 { 33 if (weak_barriers) 34 rte_smp_mb(); 35 else 36 rte_mb(); 37 } 38 39 static inline void 40 virtio_rmb(uint8_t weak_barriers) 41 { 42 if (weak_barriers) 43 rte_smp_rmb(); 44 else 45 rte_cio_rmb(); 46 } 47 48 static inline void 49 virtio_wmb(uint8_t weak_barriers) 50 { 51 if (weak_barriers) 52 rte_smp_wmb(); 53 else 54 rte_cio_wmb(); 55 } 56 57 #ifdef RTE_PMD_PACKET_PREFETCH 58 #define rte_packet_prefetch(p) rte_prefetch1(p) 59 #else 60 #define rte_packet_prefetch(p) do {} while(0) 61 #endif 62 63 #define VIRTQUEUE_MAX_NAME_SZ 32 64 65 #ifdef RTE_VIRTIO_USER 66 /** 67 * Return the physical address (or virtual address in case of 68 * virtio-user) of mbuf data buffer. 69 * 70 * The address is firstly casted to the word size (sizeof(uintptr_t)) 71 * before casting it to uint64_t. This is to make it work with different 72 * combination of word size (64 bit and 32 bit) and virtio device 73 * (virtio-pci and virtio-user). 74 */ 75 #define VIRTIO_MBUF_ADDR(mb, vq) \ 76 ((uint64_t)(*(uintptr_t *)((uintptr_t)(mb) + (vq)->offset))) 77 #else 78 #define VIRTIO_MBUF_ADDR(mb, vq) ((mb)->buf_iova) 79 #endif 80 81 /** 82 * Return the physical address (or virtual address in case of 83 * virtio-user) of mbuf data buffer, taking care of mbuf data offset 84 */ 85 #define VIRTIO_MBUF_DATA_DMA_ADDR(mb, vq) \ 86 (VIRTIO_MBUF_ADDR(mb, vq) + (mb)->data_off) 87 88 #define VTNET_SQ_RQ_QUEUE_IDX 0 89 #define VTNET_SQ_TQ_QUEUE_IDX 1 90 #define VTNET_SQ_CQ_QUEUE_IDX 2 91 92 enum { VTNET_RQ = 0, VTNET_TQ = 1, VTNET_CQ = 2 }; 93 /** 94 * The maximum virtqueue size is 2^15. Use that value as the end of 95 * descriptor chain terminator since it will never be a valid index 96 * in the descriptor table. This is used to verify we are correctly 97 * handling vq_free_cnt. 98 */ 99 #define VQ_RING_DESC_CHAIN_END 32768 100 101 /** 102 * Control the RX mode, ie. promiscuous, allmulti, etc... 103 * All commands require an "out" sg entry containing a 1 byte 104 * state value, zero = disable, non-zero = enable. Commands 105 * 0 and 1 are supported with the VIRTIO_NET_F_CTRL_RX feature. 106 * Commands 2-5 are added with VIRTIO_NET_F_CTRL_RX_EXTRA. 107 */ 108 #define VIRTIO_NET_CTRL_RX 0 109 #define VIRTIO_NET_CTRL_RX_PROMISC 0 110 #define VIRTIO_NET_CTRL_RX_ALLMULTI 1 111 #define VIRTIO_NET_CTRL_RX_ALLUNI 2 112 #define VIRTIO_NET_CTRL_RX_NOMULTI 3 113 #define VIRTIO_NET_CTRL_RX_NOUNI 4 114 #define VIRTIO_NET_CTRL_RX_NOBCAST 5 115 116 /** 117 * Control the MAC 118 * 119 * The MAC filter table is managed by the hypervisor, the guest should 120 * assume the size is infinite. Filtering should be considered 121 * non-perfect, ie. based on hypervisor resources, the guest may 122 * received packets from sources not specified in the filter list. 123 * 124 * In addition to the class/cmd header, the TABLE_SET command requires 125 * two out scatterlists. Each contains a 4 byte count of entries followed 126 * by a concatenated byte stream of the ETH_ALEN MAC addresses. The 127 * first sg list contains unicast addresses, the second is for multicast. 128 * This functionality is present if the VIRTIO_NET_F_CTRL_RX feature 129 * is available. 130 * 131 * The ADDR_SET command requests one out scatterlist, it contains a 132 * 6 bytes MAC address. This functionality is present if the 133 * VIRTIO_NET_F_CTRL_MAC_ADDR feature is available. 134 */ 135 struct virtio_net_ctrl_mac { 136 uint32_t entries; 137 uint8_t macs[][ETHER_ADDR_LEN]; 138 } __attribute__((__packed__)); 139 140 #define VIRTIO_NET_CTRL_MAC 1 141 #define VIRTIO_NET_CTRL_MAC_TABLE_SET 0 142 #define VIRTIO_NET_CTRL_MAC_ADDR_SET 1 143 144 /** 145 * Control VLAN filtering 146 * 147 * The VLAN filter table is controlled via a simple ADD/DEL interface. 148 * VLAN IDs not added may be filtered by the hypervisor. Del is the 149 * opposite of add. Both commands expect an out entry containing a 2 150 * byte VLAN ID. VLAN filtering is available with the 151 * VIRTIO_NET_F_CTRL_VLAN feature bit. 152 */ 153 #define VIRTIO_NET_CTRL_VLAN 2 154 #define VIRTIO_NET_CTRL_VLAN_ADD 0 155 #define VIRTIO_NET_CTRL_VLAN_DEL 1 156 157 /* 158 * Control link announce acknowledgement 159 * 160 * The command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that 161 * driver has recevied the notification; device would clear the 162 * VIRTIO_NET_S_ANNOUNCE bit in the status field after it receives 163 * this command. 164 */ 165 #define VIRTIO_NET_CTRL_ANNOUNCE 3 166 #define VIRTIO_NET_CTRL_ANNOUNCE_ACK 0 167 168 struct virtio_net_ctrl_hdr { 169 uint8_t class; 170 uint8_t cmd; 171 } __attribute__((packed)); 172 173 typedef uint8_t virtio_net_ctrl_ack; 174 175 #define VIRTIO_NET_OK 0 176 #define VIRTIO_NET_ERR 1 177 178 #define VIRTIO_MAX_CTRL_DATA 2048 179 180 struct virtio_pmd_ctrl { 181 struct virtio_net_ctrl_hdr hdr; 182 virtio_net_ctrl_ack status; 183 uint8_t data[VIRTIO_MAX_CTRL_DATA]; 184 }; 185 186 struct vq_desc_extra { 187 void *cookie; 188 uint16_t ndescs; 189 uint16_t next; 190 }; 191 192 struct virtqueue { 193 struct virtio_hw *hw; /**< virtio_hw structure pointer. */ 194 union { 195 struct { 196 /**< vring keeping desc, used and avail */ 197 struct vring ring; 198 } vq_split; 199 200 struct { 201 /**< vring keeping descs and events */ 202 struct vring_packed ring; 203 bool used_wrap_counter; 204 uint16_t cached_flags; /**< cached flags for descs */ 205 uint16_t event_flags_shadow; 206 } vq_packed; 207 }; 208 209 uint16_t vq_used_cons_idx; /**< last consumed descriptor */ 210 uint16_t vq_nentries; /**< vring desc numbers */ 211 uint16_t vq_free_cnt; /**< num of desc available */ 212 uint16_t vq_avail_idx; /**< sync until needed */ 213 uint16_t vq_free_thresh; /**< free threshold */ 214 215 void *vq_ring_virt_mem; /**< linear address of vring*/ 216 unsigned int vq_ring_size; 217 218 union { 219 struct virtnet_rx rxq; 220 struct virtnet_tx txq; 221 struct virtnet_ctl cq; 222 }; 223 224 rte_iova_t vq_ring_mem; /**< physical address of vring, 225 * or virtual address for virtio_user. */ 226 227 /** 228 * Head of the free chain in the descriptor table. If 229 * there are no free descriptors, this will be set to 230 * VQ_RING_DESC_CHAIN_END. 231 */ 232 uint16_t vq_desc_head_idx; 233 uint16_t vq_desc_tail_idx; 234 uint16_t vq_queue_index; /**< PCI queue index */ 235 uint16_t offset; /**< relative offset to obtain addr in mbuf */ 236 uint16_t *notify_addr; 237 struct rte_mbuf **sw_ring; /**< RX software ring. */ 238 struct vq_desc_extra vq_descx[0]; 239 }; 240 241 /* If multiqueue is provided by host, then we suppport it. */ 242 #define VIRTIO_NET_CTRL_MQ 4 243 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET 0 244 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN 1 245 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX 0x8000 246 247 /** 248 * This is the first element of the scatter-gather list. If you don't 249 * specify GSO or CSUM features, you can simply ignore the header. 250 */ 251 struct virtio_net_hdr { 252 #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /**< Use csum_start,csum_offset*/ 253 #define VIRTIO_NET_HDR_F_DATA_VALID 2 /**< Checksum is valid */ 254 uint8_t flags; 255 #define VIRTIO_NET_HDR_GSO_NONE 0 /**< Not a GSO frame */ 256 #define VIRTIO_NET_HDR_GSO_TCPV4 1 /**< GSO frame, IPv4 TCP (TSO) */ 257 #define VIRTIO_NET_HDR_GSO_UDP 3 /**< GSO frame, IPv4 UDP (UFO) */ 258 #define VIRTIO_NET_HDR_GSO_TCPV6 4 /**< GSO frame, IPv6 TCP */ 259 #define VIRTIO_NET_HDR_GSO_ECN 0x80 /**< TCP has ECN set */ 260 uint8_t gso_type; 261 uint16_t hdr_len; /**< Ethernet + IP + tcp/udp hdrs */ 262 uint16_t gso_size; /**< Bytes to append to hdr_len per frame */ 263 uint16_t csum_start; /**< Position to start checksumming from */ 264 uint16_t csum_offset; /**< Offset after that to place checksum */ 265 }; 266 267 /** 268 * This is the version of the header to use when the MRG_RXBUF 269 * feature has been negotiated. 270 */ 271 struct virtio_net_hdr_mrg_rxbuf { 272 struct virtio_net_hdr hdr; 273 uint16_t num_buffers; /**< Number of merged rx buffers */ 274 }; 275 276 /* Region reserved to allow for transmit header and indirect ring */ 277 #define VIRTIO_MAX_TX_INDIRECT 8 278 struct virtio_tx_region { 279 struct virtio_net_hdr_mrg_rxbuf tx_hdr; 280 struct vring_desc tx_indir[VIRTIO_MAX_TX_INDIRECT] 281 __attribute__((__aligned__(16))); 282 }; 283 284 static inline int 285 desc_is_used(struct vring_packed_desc *desc, struct virtqueue *vq) 286 { 287 uint16_t used, avail, flags; 288 289 flags = desc->flags; 290 used = !!(flags & VRING_PACKED_DESC_F_USED); 291 avail = !!(flags & VRING_PACKED_DESC_F_AVAIL); 292 293 return avail == used && used == vq->vq_packed.used_wrap_counter; 294 } 295 296 static inline void 297 vring_desc_init_packed(struct virtqueue *vq, int n) 298 { 299 int i; 300 for (i = 0; i < n - 1; i++) { 301 vq->vq_packed.ring.desc[i].id = i; 302 vq->vq_descx[i].next = i + 1; 303 } 304 vq->vq_packed.ring.desc[i].id = i; 305 vq->vq_descx[i].next = VQ_RING_DESC_CHAIN_END; 306 } 307 308 /* Chain all the descriptors in the ring with an END */ 309 static inline void 310 vring_desc_init_split(struct vring_desc *dp, uint16_t n) 311 { 312 uint16_t i; 313 314 for (i = 0; i < n - 1; i++) 315 dp[i].next = (uint16_t)(i + 1); 316 dp[i].next = VQ_RING_DESC_CHAIN_END; 317 } 318 319 /** 320 * Tell the backend not to interrupt us. Implementation for packed virtqueues. 321 */ 322 static inline void 323 virtqueue_disable_intr_packed(struct virtqueue *vq) 324 { 325 if (vq->vq_packed.event_flags_shadow != RING_EVENT_FLAGS_DISABLE) { 326 vq->vq_packed.event_flags_shadow = RING_EVENT_FLAGS_DISABLE; 327 vq->vq_packed.ring.driver->desc_event_flags = 328 vq->vq_packed.event_flags_shadow; 329 } 330 } 331 332 /** 333 * Tell the backend not to interrupt us. Implementation for split virtqueues. 334 */ 335 static inline void 336 virtqueue_disable_intr_split(struct virtqueue *vq) 337 { 338 vq->vq_split.ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; 339 } 340 341 /** 342 * Tell the backend not to interrupt us. 343 */ 344 static inline void 345 virtqueue_disable_intr(struct virtqueue *vq) 346 { 347 if (vtpci_packed_queue(vq->hw)) 348 virtqueue_disable_intr_packed(vq); 349 else 350 virtqueue_disable_intr_split(vq); 351 } 352 353 /** 354 * Tell the backend to interrupt. Implementation for packed virtqueues. 355 */ 356 static inline void 357 virtqueue_enable_intr_packed(struct virtqueue *vq) 358 { 359 if (vq->vq_packed.event_flags_shadow == RING_EVENT_FLAGS_DISABLE) { 360 vq->vq_packed.event_flags_shadow = RING_EVENT_FLAGS_ENABLE; 361 vq->vq_packed.ring.driver->desc_event_flags = 362 vq->vq_packed.event_flags_shadow; 363 } 364 } 365 366 /** 367 * Tell the backend to interrupt. Implementation for split virtqueues. 368 */ 369 static inline void 370 virtqueue_enable_intr_split(struct virtqueue *vq) 371 { 372 vq->vq_split.ring.avail->flags &= (~VRING_AVAIL_F_NO_INTERRUPT); 373 } 374 375 /** 376 * Tell the backend to interrupt us. 377 */ 378 static inline void 379 virtqueue_enable_intr(struct virtqueue *vq) 380 { 381 if (vtpci_packed_queue(vq->hw)) 382 virtqueue_enable_intr_packed(vq); 383 else 384 virtqueue_enable_intr_split(vq); 385 } 386 387 /** 388 * Dump virtqueue internal structures, for debug purpose only. 389 */ 390 void virtqueue_dump(struct virtqueue *vq); 391 /** 392 * Get all mbufs to be freed. 393 */ 394 struct rte_mbuf *virtqueue_detach_unused(struct virtqueue *vq); 395 396 /* Flush the elements in the used ring. */ 397 void virtqueue_rxvq_flush(struct virtqueue *vq); 398 399 static inline int 400 virtqueue_full(const struct virtqueue *vq) 401 { 402 return vq->vq_free_cnt == 0; 403 } 404 405 static inline int 406 virtio_get_queue_type(struct virtio_hw *hw, uint16_t vtpci_queue_idx) 407 { 408 if (vtpci_queue_idx == hw->max_queue_pairs * 2) 409 return VTNET_CQ; 410 else if (vtpci_queue_idx % 2 == 0) 411 return VTNET_RQ; 412 else 413 return VTNET_TQ; 414 } 415 416 #define VIRTQUEUE_NUSED(vq) ((uint16_t)((vq)->vq_split.ring.used->idx - \ 417 (vq)->vq_used_cons_idx)) 418 419 void vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx); 420 void vq_ring_free_chain_packed(struct virtqueue *vq, uint16_t used_idx); 421 void vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx, 422 uint16_t num); 423 424 static inline void 425 vq_update_avail_idx(struct virtqueue *vq) 426 { 427 virtio_wmb(vq->hw->weak_barriers); 428 vq->vq_split.ring.avail->idx = vq->vq_avail_idx; 429 } 430 431 static inline void 432 vq_update_avail_ring(struct virtqueue *vq, uint16_t desc_idx) 433 { 434 uint16_t avail_idx; 435 /* 436 * Place the head of the descriptor chain into the next slot and make 437 * it usable to the host. The chain is made available now rather than 438 * deferring to virtqueue_notify() in the hopes that if the host is 439 * currently running on another CPU, we can keep it processing the new 440 * descriptor. 441 */ 442 avail_idx = (uint16_t)(vq->vq_avail_idx & (vq->vq_nentries - 1)); 443 if (unlikely(vq->vq_split.ring.avail->ring[avail_idx] != desc_idx)) 444 vq->vq_split.ring.avail->ring[avail_idx] = desc_idx; 445 vq->vq_avail_idx++; 446 } 447 448 static inline int 449 virtqueue_kick_prepare(struct virtqueue *vq) 450 { 451 /* 452 * Ensure updated avail->idx is visible to vhost before reading 453 * the used->flags. 454 */ 455 virtio_mb(vq->hw->weak_barriers); 456 return !(vq->vq_split.ring.used->flags & VRING_USED_F_NO_NOTIFY); 457 } 458 459 static inline int 460 virtqueue_kick_prepare_packed(struct virtqueue *vq) 461 { 462 uint16_t flags; 463 464 /* 465 * Ensure updated data is visible to vhost before reading the flags. 466 */ 467 virtio_mb(vq->hw->weak_barriers); 468 flags = vq->vq_packed.ring.device->desc_event_flags; 469 470 return flags != RING_EVENT_FLAGS_DISABLE; 471 } 472 473 /* 474 * virtqueue_kick_prepare*() or the virtio_wmb() should be called 475 * before this function to be sure that all the data is visible to vhost. 476 */ 477 static inline void 478 virtqueue_notify(struct virtqueue *vq) 479 { 480 VTPCI_OPS(vq->hw)->notify_queue(vq->hw, vq); 481 } 482 483 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP 484 #define VIRTQUEUE_DUMP(vq) do { \ 485 uint16_t used_idx, nused; \ 486 used_idx = (vq)->vq_split.ring.used->idx; \ 487 nused = (uint16_t)(used_idx - (vq)->vq_used_cons_idx); \ 488 if (vtpci_packed_queue((vq)->hw)) { \ 489 PMD_INIT_LOG(DEBUG, \ 490 "VQ: - size=%d; free=%d; used_cons_idx=%d; avail_idx=%d;" \ 491 " cached_flags=0x%x; used_wrap_counter=%d", \ 492 (vq)->vq_nentries, (vq)->vq_free_cnt, (vq)->vq_used_cons_idx, \ 493 (vq)->vq_avail_idx, (vq)->vq_packed.cached_flags, \ 494 (vq)->vq_packed.used_wrap_counter); \ 495 break; \ 496 } \ 497 PMD_INIT_LOG(DEBUG, \ 498 "VQ: - size=%d; free=%d; used=%d; desc_head_idx=%d;" \ 499 " avail.idx=%d; used_cons_idx=%d; used.idx=%d;" \ 500 " avail.flags=0x%x; used.flags=0x%x", \ 501 (vq)->vq_nentries, (vq)->vq_free_cnt, nused, \ 502 (vq)->vq_desc_head_idx, (vq)->vq_split.ring.avail->idx, \ 503 (vq)->vq_used_cons_idx, (vq)->vq_split.ring.used->idx, \ 504 (vq)->vq_split.ring.avail->flags, (vq)->vq_split.ring.used->flags); \ 505 } while (0) 506 #else 507 #define VIRTQUEUE_DUMP(vq) do { } while (0) 508 #endif 509 510 #endif /* _VIRTQUEUE_H_ */ 511