1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #ifndef _VIRTQUEUE_H_ 6 #define _VIRTQUEUE_H_ 7 8 #include <stdint.h> 9 10 #include <rte_atomic.h> 11 #include <rte_memory.h> 12 #include <rte_mempool.h> 13 14 #include "virtio_pci.h" 15 #include "virtio_ring.h" 16 #include "virtio_logs.h" 17 #include "virtio_rxtx.h" 18 19 struct rte_mbuf; 20 21 /* 22 * Per virtio_ring.h in Linux. 23 * For virtio_pci on SMP, we don't need to order with respect to MMIO 24 * accesses through relaxed memory I/O windows, so smp_mb() et al are 25 * sufficient. 26 * 27 * For using virtio to talk to real devices (eg. vDPA) we do need real 28 * barriers. 29 */ 30 static inline void 31 virtio_mb(uint8_t weak_barriers) 32 { 33 if (weak_barriers) 34 rte_smp_mb(); 35 else 36 rte_mb(); 37 } 38 39 static inline void 40 virtio_rmb(uint8_t weak_barriers) 41 { 42 if (weak_barriers) 43 rte_smp_rmb(); 44 else 45 rte_cio_rmb(); 46 } 47 48 static inline void 49 virtio_wmb(uint8_t weak_barriers) 50 { 51 if (weak_barriers) 52 rte_smp_wmb(); 53 else 54 rte_cio_wmb(); 55 } 56 57 #ifdef RTE_PMD_PACKET_PREFETCH 58 #define rte_packet_prefetch(p) rte_prefetch1(p) 59 #else 60 #define rte_packet_prefetch(p) do {} while(0) 61 #endif 62 63 #define VIRTQUEUE_MAX_NAME_SZ 32 64 65 #ifdef RTE_VIRTIO_USER 66 /** 67 * Return the physical address (or virtual address in case of 68 * virtio-user) of mbuf data buffer. 69 * 70 * The address is firstly casted to the word size (sizeof(uintptr_t)) 71 * before casting it to uint64_t. This is to make it work with different 72 * combination of word size (64 bit and 32 bit) and virtio device 73 * (virtio-pci and virtio-user). 74 */ 75 #define VIRTIO_MBUF_ADDR(mb, vq) \ 76 ((uint64_t)(*(uintptr_t *)((uintptr_t)(mb) + (vq)->offset))) 77 #else 78 #define VIRTIO_MBUF_ADDR(mb, vq) ((mb)->buf_iova) 79 #endif 80 81 /** 82 * Return the physical address (or virtual address in case of 83 * virtio-user) of mbuf data buffer, taking care of mbuf data offset 84 */ 85 #define VIRTIO_MBUF_DATA_DMA_ADDR(mb, vq) \ 86 (VIRTIO_MBUF_ADDR(mb, vq) + (mb)->data_off) 87 88 #define VTNET_SQ_RQ_QUEUE_IDX 0 89 #define VTNET_SQ_TQ_QUEUE_IDX 1 90 #define VTNET_SQ_CQ_QUEUE_IDX 2 91 92 enum { VTNET_RQ = 0, VTNET_TQ = 1, VTNET_CQ = 2 }; 93 /** 94 * The maximum virtqueue size is 2^15. Use that value as the end of 95 * descriptor chain terminator since it will never be a valid index 96 * in the descriptor table. This is used to verify we are correctly 97 * handling vq_free_cnt. 98 */ 99 #define VQ_RING_DESC_CHAIN_END 32768 100 101 /** 102 * Control the RX mode, ie. promiscuous, allmulti, etc... 103 * All commands require an "out" sg entry containing a 1 byte 104 * state value, zero = disable, non-zero = enable. Commands 105 * 0 and 1 are supported with the VIRTIO_NET_F_CTRL_RX feature. 106 * Commands 2-5 are added with VIRTIO_NET_F_CTRL_RX_EXTRA. 107 */ 108 #define VIRTIO_NET_CTRL_RX 0 109 #define VIRTIO_NET_CTRL_RX_PROMISC 0 110 #define VIRTIO_NET_CTRL_RX_ALLMULTI 1 111 #define VIRTIO_NET_CTRL_RX_ALLUNI 2 112 #define VIRTIO_NET_CTRL_RX_NOMULTI 3 113 #define VIRTIO_NET_CTRL_RX_NOUNI 4 114 #define VIRTIO_NET_CTRL_RX_NOBCAST 5 115 116 /** 117 * Control the MAC 118 * 119 * The MAC filter table is managed by the hypervisor, the guest should 120 * assume the size is infinite. Filtering should be considered 121 * non-perfect, ie. based on hypervisor resources, the guest may 122 * received packets from sources not specified in the filter list. 123 * 124 * In addition to the class/cmd header, the TABLE_SET command requires 125 * two out scatterlists. Each contains a 4 byte count of entries followed 126 * by a concatenated byte stream of the ETH_ALEN MAC addresses. The 127 * first sg list contains unicast addresses, the second is for multicast. 128 * This functionality is present if the VIRTIO_NET_F_CTRL_RX feature 129 * is available. 130 * 131 * The ADDR_SET command requests one out scatterlist, it contains a 132 * 6 bytes MAC address. This functionality is present if the 133 * VIRTIO_NET_F_CTRL_MAC_ADDR feature is available. 134 */ 135 struct virtio_net_ctrl_mac { 136 uint32_t entries; 137 uint8_t macs[][ETHER_ADDR_LEN]; 138 } __attribute__((__packed__)); 139 140 #define VIRTIO_NET_CTRL_MAC 1 141 #define VIRTIO_NET_CTRL_MAC_TABLE_SET 0 142 #define VIRTIO_NET_CTRL_MAC_ADDR_SET 1 143 144 /** 145 * Control VLAN filtering 146 * 147 * The VLAN filter table is controlled via a simple ADD/DEL interface. 148 * VLAN IDs not added may be filtered by the hypervisor. Del is the 149 * opposite of add. Both commands expect an out entry containing a 2 150 * byte VLAN ID. VLAN filtering is available with the 151 * VIRTIO_NET_F_CTRL_VLAN feature bit. 152 */ 153 #define VIRTIO_NET_CTRL_VLAN 2 154 #define VIRTIO_NET_CTRL_VLAN_ADD 0 155 #define VIRTIO_NET_CTRL_VLAN_DEL 1 156 157 /* 158 * Control link announce acknowledgement 159 * 160 * The command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that 161 * driver has recevied the notification; device would clear the 162 * VIRTIO_NET_S_ANNOUNCE bit in the status field after it receives 163 * this command. 164 */ 165 #define VIRTIO_NET_CTRL_ANNOUNCE 3 166 #define VIRTIO_NET_CTRL_ANNOUNCE_ACK 0 167 168 struct virtio_net_ctrl_hdr { 169 uint8_t class; 170 uint8_t cmd; 171 } __attribute__((packed)); 172 173 typedef uint8_t virtio_net_ctrl_ack; 174 175 #define VIRTIO_NET_OK 0 176 #define VIRTIO_NET_ERR 1 177 178 #define VIRTIO_MAX_CTRL_DATA 2048 179 180 struct virtio_pmd_ctrl { 181 struct virtio_net_ctrl_hdr hdr; 182 virtio_net_ctrl_ack status; 183 uint8_t data[VIRTIO_MAX_CTRL_DATA]; 184 }; 185 186 struct vq_desc_extra { 187 void *cookie; 188 uint16_t ndescs; 189 uint16_t next; 190 }; 191 192 struct virtqueue { 193 struct virtio_hw *hw; /**< virtio_hw structure pointer. */ 194 struct vring vq_ring; /**< vring keeping desc, used and avail */ 195 struct vring_packed ring_packed; /**< vring keeping descs */ 196 bool avail_wrap_counter; 197 bool used_wrap_counter; 198 uint16_t event_flags_shadow; 199 uint16_t avail_used_flags; 200 /** 201 * Last consumed descriptor in the used table, 202 * trails vq_ring.used->idx. 203 */ 204 uint16_t vq_used_cons_idx; 205 uint16_t vq_nentries; /**< vring desc numbers */ 206 uint16_t vq_free_cnt; /**< num of desc available */ 207 uint16_t vq_avail_idx; /**< sync until needed */ 208 uint16_t vq_free_thresh; /**< free threshold */ 209 210 void *vq_ring_virt_mem; /**< linear address of vring*/ 211 unsigned int vq_ring_size; 212 213 union { 214 struct virtnet_rx rxq; 215 struct virtnet_tx txq; 216 struct virtnet_ctl cq; 217 }; 218 219 rte_iova_t vq_ring_mem; /**< physical address of vring, 220 * or virtual address for virtio_user. */ 221 222 /** 223 * Head of the free chain in the descriptor table. If 224 * there are no free descriptors, this will be set to 225 * VQ_RING_DESC_CHAIN_END. 226 */ 227 uint16_t vq_desc_head_idx; 228 uint16_t vq_desc_tail_idx; 229 uint16_t vq_queue_index; /**< PCI queue index */ 230 uint16_t offset; /**< relative offset to obtain addr in mbuf */ 231 uint16_t *notify_addr; 232 struct rte_mbuf **sw_ring; /**< RX software ring. */ 233 struct vq_desc_extra vq_descx[0]; 234 }; 235 236 /* If multiqueue is provided by host, then we suppport it. */ 237 #define VIRTIO_NET_CTRL_MQ 4 238 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET 0 239 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN 1 240 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX 0x8000 241 242 /** 243 * This is the first element of the scatter-gather list. If you don't 244 * specify GSO or CSUM features, you can simply ignore the header. 245 */ 246 struct virtio_net_hdr { 247 #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /**< Use csum_start,csum_offset*/ 248 #define VIRTIO_NET_HDR_F_DATA_VALID 2 /**< Checksum is valid */ 249 uint8_t flags; 250 #define VIRTIO_NET_HDR_GSO_NONE 0 /**< Not a GSO frame */ 251 #define VIRTIO_NET_HDR_GSO_TCPV4 1 /**< GSO frame, IPv4 TCP (TSO) */ 252 #define VIRTIO_NET_HDR_GSO_UDP 3 /**< GSO frame, IPv4 UDP (UFO) */ 253 #define VIRTIO_NET_HDR_GSO_TCPV6 4 /**< GSO frame, IPv6 TCP */ 254 #define VIRTIO_NET_HDR_GSO_ECN 0x80 /**< TCP has ECN set */ 255 uint8_t gso_type; 256 uint16_t hdr_len; /**< Ethernet + IP + tcp/udp hdrs */ 257 uint16_t gso_size; /**< Bytes to append to hdr_len per frame */ 258 uint16_t csum_start; /**< Position to start checksumming from */ 259 uint16_t csum_offset; /**< Offset after that to place checksum */ 260 }; 261 262 /** 263 * This is the version of the header to use when the MRG_RXBUF 264 * feature has been negotiated. 265 */ 266 struct virtio_net_hdr_mrg_rxbuf { 267 struct virtio_net_hdr hdr; 268 uint16_t num_buffers; /**< Number of merged rx buffers */ 269 }; 270 271 /* Region reserved to allow for transmit header and indirect ring */ 272 #define VIRTIO_MAX_TX_INDIRECT 8 273 struct virtio_tx_region { 274 struct virtio_net_hdr_mrg_rxbuf tx_hdr; 275 union { 276 struct vring_desc tx_indir[VIRTIO_MAX_TX_INDIRECT] 277 __attribute__((__aligned__(16))); 278 struct vring_packed_desc tx_indir_pq[VIRTIO_MAX_TX_INDIRECT] 279 __attribute__((__aligned__(16))); 280 }; 281 }; 282 283 static inline int 284 __desc_is_used(struct vring_packed_desc *desc, bool wrap_counter) 285 { 286 uint16_t used, avail, flags; 287 288 flags = desc->flags; 289 used = !!(flags & VRING_DESC_F_USED(1)); 290 avail = !!(flags & VRING_DESC_F_AVAIL(1)); 291 292 return avail == used && used == wrap_counter; 293 } 294 295 static inline int 296 desc_is_used(struct vring_packed_desc *desc, struct virtqueue *vq) 297 { 298 return __desc_is_used(desc, vq->used_wrap_counter); 299 } 300 301 302 static inline void 303 vring_desc_init_packed(struct virtqueue *vq, int n) 304 { 305 int i; 306 for (i = 0; i < n - 1; i++) { 307 vq->ring_packed.desc_packed[i].id = i; 308 vq->vq_descx[i].next = i + 1; 309 } 310 vq->ring_packed.desc_packed[i].id = i; 311 vq->vq_descx[i].next = VQ_RING_DESC_CHAIN_END; 312 } 313 314 /* Chain all the descriptors in the ring with an END */ 315 static inline void 316 vring_desc_init_split(struct vring_desc *dp, uint16_t n) 317 { 318 uint16_t i; 319 320 for (i = 0; i < n - 1; i++) 321 dp[i].next = (uint16_t)(i + 1); 322 dp[i].next = VQ_RING_DESC_CHAIN_END; 323 } 324 325 /** 326 * Tell the backend not to interrupt us. 327 */ 328 static inline void 329 virtqueue_disable_intr_packed(struct virtqueue *vq) 330 { 331 uint16_t *event_flags = &vq->ring_packed.driver_event->desc_event_flags; 332 333 *event_flags = RING_EVENT_FLAGS_DISABLE; 334 } 335 336 337 /** 338 * Tell the backend not to interrupt us. 339 */ 340 static inline void 341 virtqueue_disable_intr(struct virtqueue *vq) 342 { 343 if (vtpci_packed_queue(vq->hw)) 344 virtqueue_disable_intr_packed(vq); 345 else 346 vq->vq_ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; 347 } 348 349 /** 350 * Tell the backend to interrupt. Implementation for packed virtqueues. 351 */ 352 static inline void 353 virtqueue_enable_intr_packed(struct virtqueue *vq) 354 { 355 uint16_t *event_flags = &vq->ring_packed.driver_event->desc_event_flags; 356 357 358 if (vq->event_flags_shadow == RING_EVENT_FLAGS_DISABLE) { 359 virtio_wmb(vq->hw->weak_barriers); 360 vq->event_flags_shadow = RING_EVENT_FLAGS_ENABLE; 361 *event_flags = vq->event_flags_shadow; 362 } 363 } 364 365 /** 366 * Tell the backend to interrupt. Implementation for split virtqueues. 367 */ 368 static inline void 369 virtqueue_enable_intr_split(struct virtqueue *vq) 370 { 371 vq->vq_ring.avail->flags &= (~VRING_AVAIL_F_NO_INTERRUPT); 372 } 373 374 /** 375 * Tell the backend to interrupt us. 376 */ 377 static inline void 378 virtqueue_enable_intr(struct virtqueue *vq) 379 { 380 if (vtpci_packed_queue(vq->hw)) 381 virtqueue_enable_intr_packed(vq); 382 else 383 virtqueue_enable_intr_split(vq); 384 } 385 386 /** 387 * Dump virtqueue internal structures, for debug purpose only. 388 */ 389 void virtqueue_dump(struct virtqueue *vq); 390 /** 391 * Get all mbufs to be freed. 392 */ 393 struct rte_mbuf *virtqueue_detach_unused(struct virtqueue *vq); 394 395 /* Flush the elements in the used ring. */ 396 void virtqueue_rxvq_flush(struct virtqueue *vq); 397 398 static inline int 399 virtqueue_full(const struct virtqueue *vq) 400 { 401 return vq->vq_free_cnt == 0; 402 } 403 404 static inline int 405 virtio_get_queue_type(struct virtio_hw *hw, uint16_t vtpci_queue_idx) 406 { 407 if (vtpci_queue_idx == hw->max_queue_pairs * 2) 408 return VTNET_CQ; 409 else if (vtpci_queue_idx % 2 == 0) 410 return VTNET_RQ; 411 else 412 return VTNET_TQ; 413 } 414 415 #define VIRTQUEUE_NUSED(vq) ((uint16_t)((vq)->vq_ring.used->idx - (vq)->vq_used_cons_idx)) 416 417 void vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx); 418 void vq_ring_free_chain_packed(struct virtqueue *vq, uint16_t used_idx); 419 void vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx, 420 uint16_t num); 421 422 static inline void 423 vq_update_avail_idx(struct virtqueue *vq) 424 { 425 virtio_wmb(vq->hw->weak_barriers); 426 vq->vq_ring.avail->idx = vq->vq_avail_idx; 427 } 428 429 static inline void 430 vq_update_avail_ring(struct virtqueue *vq, uint16_t desc_idx) 431 { 432 uint16_t avail_idx; 433 /* 434 * Place the head of the descriptor chain into the next slot and make 435 * it usable to the host. The chain is made available now rather than 436 * deferring to virtqueue_notify() in the hopes that if the host is 437 * currently running on another CPU, we can keep it processing the new 438 * descriptor. 439 */ 440 avail_idx = (uint16_t)(vq->vq_avail_idx & (vq->vq_nentries - 1)); 441 if (unlikely(vq->vq_ring.avail->ring[avail_idx] != desc_idx)) 442 vq->vq_ring.avail->ring[avail_idx] = desc_idx; 443 vq->vq_avail_idx++; 444 } 445 446 static inline int 447 virtqueue_kick_prepare(struct virtqueue *vq) 448 { 449 /* 450 * Ensure updated avail->idx is visible to vhost before reading 451 * the used->flags. 452 */ 453 virtio_mb(vq->hw->weak_barriers); 454 return !(vq->vq_ring.used->flags & VRING_USED_F_NO_NOTIFY); 455 } 456 457 static inline int 458 virtqueue_kick_prepare_packed(struct virtqueue *vq) 459 { 460 uint16_t flags; 461 462 /* 463 * Ensure updated data is visible to vhost before reading the flags. 464 */ 465 virtio_mb(vq->hw->weak_barriers); 466 flags = vq->ring_packed.device_event->desc_event_flags; 467 468 return flags != RING_EVENT_FLAGS_DISABLE; 469 } 470 471 /* 472 * virtqueue_kick_prepare*() or the virtio_wmb() should be called 473 * before this function to be sure that all the data is visible to vhost. 474 */ 475 static inline void 476 virtqueue_notify(struct virtqueue *vq) 477 { 478 VTPCI_OPS(vq->hw)->notify_queue(vq->hw, vq); 479 } 480 481 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP 482 #define VIRTQUEUE_DUMP(vq) do { \ 483 uint16_t used_idx, nused; \ 484 used_idx = (vq)->vq_ring.used->idx; \ 485 nused = (uint16_t)(used_idx - (vq)->vq_used_cons_idx); \ 486 if (vtpci_packed_queue((vq)->hw)) { \ 487 PMD_INIT_LOG(DEBUG, \ 488 "VQ: - size=%d; free=%d; used_cons_idx=%d; avail_idx=%d;" \ 489 "VQ: - avail_wrap_counter=%d; used_wrap_counter=%d", \ 490 (vq)->vq_nentries, (vq)->vq_free_cnt, (vq)->vq_used_cons_idx, \ 491 (vq)->vq_avail_idx, (vq)->avail_wrap_counter, \ 492 (vq)->used_wrap_counter); \ 493 break; \ 494 } \ 495 PMD_INIT_LOG(DEBUG, \ 496 "VQ: - size=%d; free=%d; used=%d; desc_head_idx=%d;" \ 497 " avail.idx=%d; used_cons_idx=%d; used.idx=%d;" \ 498 " avail.flags=0x%x; used.flags=0x%x", \ 499 (vq)->vq_nentries, (vq)->vq_free_cnt, nused, \ 500 (vq)->vq_desc_head_idx, (vq)->vq_ring.avail->idx, \ 501 (vq)->vq_used_cons_idx, (vq)->vq_ring.used->idx, \ 502 (vq)->vq_ring.avail->flags, (vq)->vq_ring.used->flags); \ 503 } while (0) 504 #else 505 #define VIRTQUEUE_DUMP(vq) do { } while (0) 506 #endif 507 508 #endif /* _VIRTQUEUE_H_ */ 509