1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #ifndef SPDK_VIRTIO_H 35 #define SPDK_VIRTIO_H 36 37 #include "spdk/stdinc.h" 38 39 #include <linux/virtio_ring.h> 40 #include <linux/virtio_pci.h> 41 #include <linux/virtio_config.h> 42 43 #include "spdk/log.h" 44 #include "spdk/likely.h" 45 #include "spdk/queue.h" 46 #include "spdk/json.h" 47 #include "spdk/thread.h" 48 #include "spdk/pci_ids.h" 49 #include "spdk/env.h" 50 51 /** 52 * The maximum virtqueue size is 2^15. Use that value as the end of 53 * descriptor chain terminator since it will never be a valid index 54 * in the descriptor table. This is used to verify we are correctly 55 * handling vq_free_cnt. 56 */ 57 #define VQ_RING_DESC_CHAIN_END 32768 58 59 #define SPDK_VIRTIO_MAX_VIRTQUEUES 0x100 60 61 /* Extra status define for readability */ 62 #define VIRTIO_CONFIG_S_RESET 0 63 64 struct virtio_dev_ops; 65 66 struct virtio_dev { 67 struct virtqueue **vqs; 68 69 /** Name of this virtio dev set by backend */ 70 char *name; 71 72 /** Fixed number of backend-specific non-I/O virtqueues. */ 73 uint16_t fixed_queues_num; 74 75 /** Max number of virtqueues the host supports. */ 76 uint16_t max_queues; 77 78 /** Common device & guest features. */ 79 uint64_t negotiated_features; 80 81 int is_hw; 82 83 /** Modern/legacy virtio device flag. */ 84 uint8_t modern; 85 86 /** Mutex for asynchronous virtqueue-changing operations. */ 87 pthread_mutex_t mutex; 88 89 /** Backend-specific callbacks. */ 90 const struct virtio_dev_ops *backend_ops; 91 92 /** Context for the backend ops */ 93 void *ctx; 94 }; 95 96 struct virtio_dev_ops { 97 int (*read_dev_cfg)(struct virtio_dev *hw, size_t offset, 98 void *dst, int len); 99 int (*write_dev_cfg)(struct virtio_dev *hw, size_t offset, 100 const void *src, int len); 101 uint8_t (*get_status)(struct virtio_dev *hw); 102 void (*set_status)(struct virtio_dev *hw, uint8_t status); 103 104 /** 105 * Get device features. The features might be already 106 * negotiated with driver (guest) features. 107 */ 108 uint64_t (*get_features)(struct virtio_dev *vdev); 109 110 /** 111 * Negotiate and set device features. 112 * The negotiation can fail with return code -1. 113 * This function should also set vdev->negotiated_features field. 114 */ 115 int (*set_features)(struct virtio_dev *vdev, uint64_t features); 116 117 /** Destruct virtio device */ 118 void (*destruct_dev)(struct virtio_dev *vdev); 119 120 uint16_t (*get_queue_size)(struct virtio_dev *vdev, uint16_t queue_id); 121 int (*setup_queue)(struct virtio_dev *hw, struct virtqueue *vq); 122 void (*del_queue)(struct virtio_dev *hw, struct virtqueue *vq); 123 void (*notify_queue)(struct virtio_dev *hw, struct virtqueue *vq); 124 125 void (*dump_json_info)(struct virtio_dev *hw, struct spdk_json_write_ctx *w); 126 void (*write_json_config)(struct virtio_dev *hw, struct spdk_json_write_ctx *w); 127 }; 128 129 struct vq_desc_extra { 130 void *cookie; 131 uint16_t ndescs; 132 }; 133 134 struct virtqueue { 135 struct virtio_dev *vdev; /**< owner of this virtqueue */ 136 struct vring vq_ring; /**< vring keeping desc, used and avail */ 137 /** 138 * Last consumed descriptor in the used table, 139 * trails vq_ring.used->idx. 140 */ 141 uint16_t vq_used_cons_idx; 142 uint16_t vq_nentries; /**< vring desc numbers */ 143 uint16_t vq_free_cnt; /**< num of desc available */ 144 uint16_t vq_avail_idx; /**< sync until needed */ 145 146 void *vq_ring_virt_mem; /**< virtual address of vring */ 147 unsigned int vq_ring_size; 148 149 uint64_t vq_ring_mem; /**< physical address of vring */ 150 151 /** 152 * Head of the free chain in the descriptor table. If 153 * there are no free descriptors, this will be set to 154 * VQ_RING_DESC_CHAIN_END. 155 */ 156 uint16_t vq_desc_head_idx; 157 158 /** 159 * Tail of the free chain in desc table. If 160 * there are no free descriptors, this will be set to 161 * VQ_RING_DESC_CHAIN_END. 162 */ 163 uint16_t vq_desc_tail_idx; 164 uint16_t vq_queue_index; /**< PCI queue index */ 165 uint16_t *notify_addr; 166 167 /** Thread that's polling this queue. */ 168 struct spdk_thread *owner_thread; 169 170 uint16_t req_start; 171 uint16_t req_end; 172 uint16_t reqs_finished; 173 174 struct vq_desc_extra vq_descx[0]; 175 }; 176 177 enum spdk_virtio_desc_type { 178 SPDK_VIRTIO_DESC_RO = 0, /**< Read only */ 179 SPDK_VIRTIO_DESC_WR = VRING_DESC_F_WRITE, /**< Write only */ 180 /* TODO VIRTIO_DESC_INDIRECT */ 181 }; 182 183 /** Context for creating PCI virtio_devs */ 184 struct virtio_pci_ctx; 185 186 /** 187 * Callback for creating virtio_dev from a PCI device. 188 * \param pci_ctx PCI context to be associated with a virtio_dev 189 * \param ctx context provided by the user 190 * \return 0 on success, -1 on error. 191 */ 192 typedef int (*virtio_pci_create_cb)(struct virtio_pci_ctx *pci_ctx, void *ctx); 193 194 uint16_t virtio_recv_pkts(struct virtqueue *vq, void **io, uint32_t *len, uint16_t io_cnt); 195 196 /** 197 * Start a new request on the current vring head position and associate it 198 * with an opaque cookie object. The previous request in given vq will be 199 * made visible to the device in hopes it can be processed early, but there's 200 * no guarantee it will be until the device is notified with \c 201 * virtqueue_req_flush. This behavior is simply an optimization and virtqueues 202 * must always be flushed. Empty requests (with no descriptors added) will be 203 * ignored. The device owning given virtqueue must be started. 204 * 205 * \param vq virtio queue 206 * \param cookie opaque object to associate with this request. Once the request 207 * is sent, processed and a response is received, the same object will be 208 * returned to the user after calling the virtio poll API. 209 * \param iovcnt number of required iovectors for the request. This can be 210 * higher than than the actual number of iovectors to be added. 211 * \return 0 on success or negative errno otherwise. If the `iovcnt` is 212 * greater than virtqueue depth, -EINVAL is returned. If simply not enough 213 * iovectors are available, -ENOMEM is returned. 214 */ 215 int virtqueue_req_start(struct virtqueue *vq, void *cookie, int iovcnt); 216 217 /** 218 * Flush a virtqueue. This will notify the device if it's required. 219 * The device owning given virtqueue must be started. 220 * 221 * \param vq virtio queue 222 */ 223 void virtqueue_req_flush(struct virtqueue *vq); 224 225 /** 226 * Abort the very last request in a virtqueue. This will restore virtqueue 227 * state to the point before the last request was created. Note that this 228 * is only effective if a queue hasn't been flushed yet. The device owning 229 * given virtqueue must be started. 230 * 231 * \param vq virtio queue 232 */ 233 void virtqueue_req_abort(struct virtqueue *vq); 234 235 /** 236 * Add iovec chain to the last created request. This call does not provide any 237 * error-checking. The caller has to ensure that he doesn't add more iovs than 238 * what was specified during request creation. The device owning given virtqueue 239 * must be started. 240 * 241 * \param vq virtio queue 242 * \param iovs iovec array 243 * \param iovcnt number of iovs in iovec array 244 * \param desc_type type of all given iovectors 245 */ 246 void virtqueue_req_add_iovs(struct virtqueue *vq, struct iovec *iovs, uint16_t iovcnt, 247 enum spdk_virtio_desc_type desc_type); 248 249 /** 250 * Construct a virtio device. The device will be in stopped state by default. 251 * Before doing any I/O, it has to be manually started via \c virtio_dev_restart. 252 * 253 * \param vdev memory for virtio device, must be zeroed 254 * \param name name for the virtio device 255 * \param ops backend callbacks 256 * \param ops_ctx argument for the backend callbacks 257 * \return zero on success, or negative error code otherwise 258 */ 259 int virtio_dev_construct(struct virtio_dev *vdev, const char *name, 260 const struct virtio_dev_ops *ops, void *ops_ctx); 261 262 /** 263 * Reset the device and prepare it to be `virtio_dev_start`ed. This call 264 * will also renegotiate feature flags. 265 * 266 * \param vdev virtio device 267 * \param req_features features this driver supports. A VIRTIO_F_VERSION_1 268 * flag will be automatically appended, as legacy devices are not supported. 269 */ 270 int virtio_dev_reset(struct virtio_dev *vdev, uint64_t req_features); 271 272 /** 273 * Notify the host to start processing this virtio device. This is 274 * a blocking call that won't return until the host has started. 275 * This will also allocate virtqueues. 276 * 277 * \param vdev virtio device 278 * \param max_queues number of queues to allocate. The max number of 279 * usable I/O queues is also limited by the host device. `vdev` will be 280 * started successfully even if the host supports less queues than requested. 281 * \param fixed_queue_num number of queues preceeding the first 282 * request queue. For Virtio-SCSI this is equal to 2, as there are 283 * additional event and control queues. 284 */ 285 int virtio_dev_start(struct virtio_dev *vdev, uint16_t max_queues, 286 uint16_t fixed_queues_num); 287 288 /** 289 * Stop the host from processing the device. This is a blocking call 290 * that won't return until all outstanding I/O has been processed on 291 * the host (virtio device) side. In order to re-start the device, it 292 * has to be `virtio_dev_reset` first. 293 * 294 * \param vdev virtio device 295 */ 296 void virtio_dev_stop(struct virtio_dev *vdev); 297 298 /** 299 * Destruct a virtio device. Note that it must be in the stopped state. 300 * The virtio_dev should be manually freed afterwards. 301 * 302 * \param vdev virtio device 303 */ 304 void virtio_dev_destruct(struct virtio_dev *vdev); 305 306 /** 307 * Bind a virtqueue with given index to the current thread; 308 * 309 * This function is thread-safe. 310 * 311 * \param vdev vhost device 312 * \param index virtqueue index 313 * \return 0 on success, -1 in case a virtqueue with given index either 314 * does not exists or is already acquired. 315 */ 316 int virtio_dev_acquire_queue(struct virtio_dev *vdev, uint16_t index); 317 318 /** 319 * Look for unused queue and bind it to the current thread. This will 320 * scan the queues in range from *start_index* (inclusive) up to 321 * vdev->max_queues (exclusive). 322 * 323 * This function is thread-safe. 324 * 325 * \param vdev vhost device 326 * \param start_index virtqueue index to start looking from 327 * \return index of acquired queue or -1 in case no unused queue in given range 328 * has been found 329 */ 330 int32_t virtio_dev_find_and_acquire_queue(struct virtio_dev *vdev, uint16_t start_index); 331 332 /** 333 * Get thread that acquired given virtqueue. 334 * 335 * This function is thread-safe. 336 * 337 * \param vdev vhost device 338 * \param index index of virtqueue 339 * \return thread that acquired given virtqueue. If the queue is unused 340 * or doesn't exist a NULL is returned. 341 */ 342 struct spdk_thread *virtio_dev_queue_get_thread(struct virtio_dev *vdev, uint16_t index); 343 344 /** 345 * Check if virtqueue with given index is acquired. 346 * 347 * This function is thread-safe. 348 * 349 * \param vdev vhost device 350 * \param index index of virtqueue 351 * \return virtqueue acquire status. in case of invalid index *false* is returned. 352 */ 353 bool virtio_dev_queue_is_acquired(struct virtio_dev *vdev, uint16_t index); 354 355 /** 356 * Release previously acquired queue. 357 * 358 * This function must be called from the thread that acquired the queue. 359 * 360 * \param vdev vhost device 361 * \param index index of virtqueue to release 362 */ 363 void virtio_dev_release_queue(struct virtio_dev *vdev, uint16_t index); 364 365 /** 366 * Get Virtio status flags. 367 * 368 * \param vdev virtio device 369 */ 370 uint8_t virtio_dev_get_status(struct virtio_dev *vdev); 371 372 /** 373 * Set Virtio status flag. The flags have to be set in very specific order 374 * defined the VIRTIO 1.0 spec section 3.1.1. To unset the flags, stop the 375 * device or set \c VIRTIO_CONFIG_S_RESET status flag. There is no way to 376 * unset only particular flags. 377 * 378 * \param vdev virtio device 379 * \param flag flag to set 380 */ 381 void virtio_dev_set_status(struct virtio_dev *vdev, uint8_t flag); 382 383 /** 384 * Write raw data into the device config at given offset. This call does not 385 * provide any error checking. 386 * 387 * \param vdev virtio device 388 * \param offset offset in bytes 389 * \param src pointer to data to copy from 390 * \param len length of data to copy in bytes 391 * \return 0 on success, negative errno otherwise 392 */ 393 int virtio_dev_write_dev_config(struct virtio_dev *vdev, size_t offset, const void *src, int len); 394 395 /** 396 * Read raw data from the device config at given offset. This call does not 397 * provide any error checking. 398 * 399 * \param vdev virtio device 400 * \param offset offset in bytes 401 * \param dst pointer to buffer to copy data into 402 * \param len length of data to copy in bytes 403 * \return 0 on success, negative errno otherwise 404 */ 405 int virtio_dev_read_dev_config(struct virtio_dev *vdev, size_t offset, void *dst, int len); 406 407 /** 408 * Get backend-specific ops for given device. 409 * 410 * \param vdev virtio device 411 */ 412 const struct virtio_dev_ops *virtio_dev_backend_ops(struct virtio_dev *vdev); 413 414 /** 415 * Check if the device has negotiated given feature bit. 416 * 417 * \param vdev virtio device 418 * \param bit feature bit 419 */ 420 static inline bool 421 virtio_dev_has_feature(struct virtio_dev *vdev, uint64_t bit) 422 { 423 return !!(vdev->negotiated_features & (1ULL << bit)); 424 } 425 426 /** 427 * Dump all device specific information into given json stream. 428 * 429 * \param vdev virtio device 430 * \param w json stream 431 */ 432 void virtio_dev_dump_json_info(struct virtio_dev *vdev, struct spdk_json_write_ctx *w); 433 434 /** 435 * Enumerate all PCI Virtio devices of given type on the system. 436 * 437 * \param enum_cb a function to be called for each valid PCI device. 438 * If a virtio_dev is has been created, the callback should return 0. 439 * Returning any other value will cause the PCI context to be freed, 440 * making it unusable. 441 * \param enum_ctx additional opaque context to be passed into `enum_cb` 442 * \param pci_device_id PCI Device ID of devices to iterate through 443 */ 444 int virtio_pci_dev_enumerate(virtio_pci_create_cb enum_cb, void *enum_ctx, 445 uint16_t pci_device_id); 446 447 /** 448 * Attach a PCI Virtio device of given type. 449 * 450 * \param create_cb callback to create a virtio_dev. 451 * If virtio_dev is has been created, the callback should return 0. 452 * Returning any other value will cause the PCI context to be freed, 453 * making it unusable. 454 * \param enum_ctx additional opaque context to be passed into `enum_cb` 455 * \param device_id Device ID of devices to iterate through 456 * \param pci_addr PCI address of the device to attach 457 */ 458 int virtio_pci_dev_attach(virtio_pci_create_cb create_cb, void *enum_ctx, 459 uint16_t device_id, struct spdk_pci_addr *pci_addr); 460 461 /** 462 * Connect to a vhost-user device and init corresponding virtio_dev struct. 463 * The virtio_dev will have to be freed with \c virtio_dev_free. 464 * 465 * \param vdev preallocated vhost device struct to operate on 466 * \param name name of this virtio device 467 * \param path path to the Unix domain socket of the vhost-user device 468 * \param queue_size size of each of the queues 469 * \return virtio device 470 */ 471 int virtio_user_dev_init(struct virtio_dev *vdev, const char *name, const char *path, 472 uint32_t queue_size); 473 474 /** 475 * Initialize virtio_dev for a given PCI device. 476 * The virtio_dev has to be freed with \c virtio_dev_destruct. 477 * 478 * \param vdev preallocated vhost device struct to operate on 479 * \param name name of this virtio device 480 * \param pci_ctx context of the PCI device 481 * \return 0 on success, -1 on error. 482 */ 483 int virtio_pci_dev_init(struct virtio_dev *vdev, const char *name, 484 struct virtio_pci_ctx *pci_ctx); 485 486 #endif /* SPDK_VIRTIO_H */ 487