1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
3 * All rights reserved.
4 */
5
6 #ifndef SPDK_VIRTIO_H
7 #define SPDK_VIRTIO_H
8
9 #include "spdk/stdinc.h"
10
11 #include <linux/virtio_ring.h>
12 #include <linux/virtio_pci.h>
13 #include <linux/virtio_config.h>
14
15 #include "spdk/log.h"
16 #include "spdk/likely.h"
17 #include "spdk/queue.h"
18 #include "spdk/json.h"
19 #include "spdk/thread.h"
20 #include "spdk/pci_ids.h"
21 #include "spdk/env.h"
22
23 /**
24 * The maximum virtqueue size is 2^15. Use that value as the end of
25 * descriptor chain terminator since it will never be a valid index
26 * in the descriptor table. This is used to verify we are correctly
27 * handling vq_free_cnt.
28 */
29 #define VQ_RING_DESC_CHAIN_END 32768
30
31 #define SPDK_VIRTIO_MAX_VIRTQUEUES 0x100
32
33 /* Extra status define for readability */
34 #define VIRTIO_CONFIG_S_RESET 0
35
36 struct virtio_dev_ops;
37
38 struct virtio_dev {
39 struct virtqueue **vqs;
40
41 /** Name of this virtio dev set by backend */
42 char *name;
43
44 /** Fixed number of backend-specific non-I/O virtqueues. */
45 uint16_t fixed_queues_num;
46
47 /** Max number of virtqueues the host supports. */
48 uint16_t max_queues;
49
50 /** Common device & guest features. */
51 uint64_t negotiated_features;
52
53 int is_hw;
54
55 /** Modern/legacy virtio device flag. */
56 uint8_t modern;
57
58 /** Mutex for asynchronous virtqueue-changing operations. */
59 pthread_mutex_t mutex;
60
61 /** Backend-specific callbacks. */
62 const struct virtio_dev_ops *backend_ops;
63
64 /** Context for the backend ops */
65 void *ctx;
66 };
67
68 struct virtio_dev_ops {
69 int (*read_dev_cfg)(struct virtio_dev *hw, size_t offset,
70 void *dst, int len);
71 int (*write_dev_cfg)(struct virtio_dev *hw, size_t offset,
72 const void *src, int len);
73 uint8_t (*get_status)(struct virtio_dev *hw);
74 void (*set_status)(struct virtio_dev *hw, uint8_t status);
75
76 /**
77 * Get device features. The features might be already
78 * negotiated with driver (guest) features.
79 */
80 uint64_t (*get_features)(struct virtio_dev *vdev);
81
82 /**
83 * Negotiate and set device features.
84 * The negotiation can fail with return code -1.
85 * This function should also set vdev->negotiated_features field.
86 */
87 int (*set_features)(struct virtio_dev *vdev, uint64_t features);
88
89 /** Destruct virtio device */
90 void (*destruct_dev)(struct virtio_dev *vdev);
91
92 uint16_t (*get_queue_size)(struct virtio_dev *vdev, uint16_t queue_id);
93 int (*setup_queue)(struct virtio_dev *hw, struct virtqueue *vq);
94 void (*del_queue)(struct virtio_dev *hw, struct virtqueue *vq);
95 void (*notify_queue)(struct virtio_dev *hw, struct virtqueue *vq);
96
97 void (*dump_json_info)(struct virtio_dev *hw, struct spdk_json_write_ctx *w);
98 void (*write_json_config)(struct virtio_dev *hw, struct spdk_json_write_ctx *w);
99 };
100
101 struct vq_desc_extra {
102 void *cookie;
103 uint16_t ndescs;
104 };
105
106 struct virtqueue {
107 struct virtio_dev *vdev; /**< owner of this virtqueue */
108 struct vring vq_ring; /**< vring keeping desc, used and avail */
109 /**
110 * Last consumed descriptor in the used table,
111 * trails vq_ring.used->idx.
112 */
113 uint16_t vq_used_cons_idx;
114 uint16_t vq_nentries; /**< vring desc numbers */
115 uint16_t vq_free_cnt; /**< num of desc available */
116 uint16_t vq_avail_idx; /**< sync until needed */
117
118 void *vq_ring_virt_mem; /**< virtual address of vring */
119 unsigned int vq_ring_size;
120
121 uint64_t vq_ring_mem; /**< physical address of vring */
122
123 /**
124 * Head of the free chain in the descriptor table. If
125 * there are no free descriptors, this will be set to
126 * VQ_RING_DESC_CHAIN_END.
127 */
128 uint16_t vq_desc_head_idx;
129
130 /**
131 * Tail of the free chain in desc table. If
132 * there are no free descriptors, this will be set to
133 * VQ_RING_DESC_CHAIN_END.
134 */
135 uint16_t vq_desc_tail_idx;
136 uint16_t vq_queue_index; /**< PCI queue index */
137 uint16_t *notify_addr;
138
139 /** Thread that's polling this queue. */
140 struct spdk_thread *owner_thread;
141
142 uint16_t req_start;
143 uint16_t req_end;
144 uint16_t reqs_finished;
145
146 struct vq_desc_extra vq_descx[0];
147 };
148
149 enum spdk_virtio_desc_type {
150 SPDK_VIRTIO_DESC_RO = 0, /**< Read only */
151 SPDK_VIRTIO_DESC_WR = VRING_DESC_F_WRITE, /**< Write only */
152 /* TODO VIRTIO_DESC_INDIRECT */
153 };
154
155 /** Context for creating PCI virtio_devs */
156 struct virtio_pci_ctx;
157
158 /**
159 * Callback for creating virtio_dev from a PCI device.
160 * \param pci_ctx PCI context to be associated with a virtio_dev
161 * \param ctx context provided by the user
162 * \return 0 on success, -1 on error.
163 */
164 typedef int (*virtio_pci_create_cb)(struct virtio_pci_ctx *pci_ctx, void *ctx);
165
166 uint16_t virtio_recv_pkts(struct virtqueue *vq, void **io, uint32_t *len, uint16_t io_cnt);
167
168 /**
169 * Start a new request on the current vring head position and associate it
170 * with an opaque cookie object. The previous request in given vq will be
171 * made visible to the device in hopes it can be processed early, but there's
172 * no guarantee it will be until the device is notified with \c
173 * virtqueue_req_flush. This behavior is simply an optimization and virtqueues
174 * must always be flushed. Empty requests (with no descriptors added) will be
175 * ignored. The device owning given virtqueue must be started.
176 *
177 * \param vq virtio queue
178 * \param cookie opaque object to associate with this request. Once the request
179 * is sent, processed and a response is received, the same object will be
180 * returned to the user after calling the virtio poll API.
181 * \param iovcnt number of required iovectors for the request. This can be
182 * higher than than the actual number of iovectors to be added.
183 * \return 0 on success or negative errno otherwise. If the `iovcnt` is
184 * greater than virtqueue depth, -EINVAL is returned. If simply not enough
185 * iovectors are available, -ENOMEM is returned.
186 */
187 int virtqueue_req_start(struct virtqueue *vq, void *cookie, int iovcnt);
188
189 /**
190 * Flush a virtqueue. This will notify the device if it's required.
191 * The device owning given virtqueue must be started.
192 *
193 * \param vq virtio queue
194 */
195 void virtqueue_req_flush(struct virtqueue *vq);
196
197 /**
198 * Abort the very last request in a virtqueue. This will restore virtqueue
199 * state to the point before the last request was created. Note that this
200 * is only effective if a queue hasn't been flushed yet. The device owning
201 * given virtqueue must be started.
202 *
203 * \param vq virtio queue
204 */
205 void virtqueue_req_abort(struct virtqueue *vq);
206
207 /**
208 * Add iovec chain to the last created request. This call does not provide any
209 * error-checking. The caller has to ensure that he doesn't add more iovs than
210 * what was specified during request creation. The device owning given virtqueue
211 * must be started.
212 *
213 * \param vq virtio queue
214 * \param iovs iovec array
215 * \param iovcnt number of iovs in iovec array
216 * \param desc_type type of all given iovectors
217 */
218 void virtqueue_req_add_iovs(struct virtqueue *vq, struct iovec *iovs, uint16_t iovcnt,
219 enum spdk_virtio_desc_type desc_type);
220
221 /**
222 * Construct a virtio device. The device will be in stopped state by default.
223 * Before doing any I/O, it has to be manually started via \c virtio_dev_restart.
224 *
225 * \param vdev memory for virtio device, must be zeroed
226 * \param name name for the virtio device
227 * \param ops backend callbacks
228 * \param ops_ctx argument for the backend callbacks
229 * \return zero on success, or negative error code otherwise
230 */
231 int virtio_dev_construct(struct virtio_dev *vdev, const char *name,
232 const struct virtio_dev_ops *ops, void *ops_ctx);
233
234 /**
235 * Reset the device and prepare it to be `virtio_dev_start`ed. This call
236 * will also renegotiate feature flags.
237 *
238 * \param vdev virtio device
239 * \param req_features features this driver supports. A VIRTIO_F_VERSION_1
240 * flag will be automatically appended, as legacy devices are not supported.
241 */
242 int virtio_dev_reset(struct virtio_dev *vdev, uint64_t req_features);
243
244 /**
245 * Notify the host to start processing this virtio device. This is
246 * a blocking call that won't return until the host has started.
247 * This will also allocate virtqueues.
248 *
249 * \param vdev virtio device
250 * \param max_queues number of queues to allocate. The max number of
251 * usable I/O queues is also limited by the host device. `vdev` will be
252 * started successfully even if the host supports less queues than requested.
253 * \param fixed_queue_num number of queues preceding the first
254 * request queue. For Virtio-SCSI this is equal to 2, as there are
255 * additional event and control queues.
256 */
257 int virtio_dev_start(struct virtio_dev *vdev, uint16_t max_queues,
258 uint16_t fixed_queues_num);
259
260 /**
261 * Stop the host from processing the device. This is a blocking call
262 * that won't return until all outstanding I/O has been processed on
263 * the host (virtio device) side. In order to re-start the device, it
264 * has to be `virtio_dev_reset` first.
265 *
266 * \param vdev virtio device
267 */
268 void virtio_dev_stop(struct virtio_dev *vdev);
269
270 /**
271 * Destruct a virtio device. Note that it must be in the stopped state.
272 * The virtio_dev should be manually freed afterwards.
273 *
274 * \param vdev virtio device
275 */
276 void virtio_dev_destruct(struct virtio_dev *vdev);
277
278 /**
279 * Bind a virtqueue with given index to the current thread;
280 *
281 * This function is thread-safe.
282 *
283 * \param vdev vhost device
284 * \param index virtqueue index
285 * \return 0 on success, -1 in case a virtqueue with given index either
286 * does not exists or is already acquired.
287 */
288 int virtio_dev_acquire_queue(struct virtio_dev *vdev, uint16_t index);
289
290 /**
291 * Look for unused queue and bind it to the current thread. This will
292 * scan the queues in range from *start_index* (inclusive) up to
293 * vdev->max_queues (exclusive).
294 *
295 * This function is thread-safe.
296 *
297 * \param vdev vhost device
298 * \param start_index virtqueue index to start looking from
299 * \return index of acquired queue or -1 in case no unused queue in given range
300 * has been found
301 */
302 int32_t virtio_dev_find_and_acquire_queue(struct virtio_dev *vdev, uint16_t start_index);
303
304 /**
305 * Get thread that acquired given virtqueue.
306 *
307 * This function is thread-safe.
308 *
309 * \param vdev vhost device
310 * \param index index of virtqueue
311 * \return thread that acquired given virtqueue. If the queue is unused
312 * or doesn't exist a NULL is returned.
313 */
314 struct spdk_thread *virtio_dev_queue_get_thread(struct virtio_dev *vdev, uint16_t index);
315
316 /**
317 * Check if virtqueue with given index is acquired.
318 *
319 * This function is thread-safe.
320 *
321 * \param vdev vhost device
322 * \param index index of virtqueue
323 * \return virtqueue acquire status. in case of invalid index *false* is returned.
324 */
325 bool virtio_dev_queue_is_acquired(struct virtio_dev *vdev, uint16_t index);
326
327 /**
328 * Release previously acquired queue.
329 *
330 * This function must be called from the thread that acquired the queue.
331 *
332 * \param vdev vhost device
333 * \param index index of virtqueue to release
334 */
335 void virtio_dev_release_queue(struct virtio_dev *vdev, uint16_t index);
336
337 /**
338 * Get Virtio status flags.
339 *
340 * \param vdev virtio device
341 */
342 uint8_t virtio_dev_get_status(struct virtio_dev *vdev);
343
344 /**
345 * Set Virtio status flag. The flags have to be set in very specific order
346 * defined the VIRTIO 1.0 spec section 3.1.1. To unset the flags, stop the
347 * device or set \c VIRTIO_CONFIG_S_RESET status flag. There is no way to
348 * unset only particular flags.
349 *
350 * \param vdev virtio device
351 * \param flag flag to set
352 */
353 void virtio_dev_set_status(struct virtio_dev *vdev, uint8_t flag);
354
355 /**
356 * Write raw data into the device config at given offset. This call does not
357 * provide any error checking.
358 *
359 * \param vdev virtio device
360 * \param offset offset in bytes
361 * \param src pointer to data to copy from
362 * \param len length of data to copy in bytes
363 * \return 0 on success, negative errno otherwise
364 */
365 int virtio_dev_write_dev_config(struct virtio_dev *vdev, size_t offset, const void *src, int len);
366
367 /**
368 * Read raw data from the device config at given offset. This call does not
369 * provide any error checking.
370 *
371 * \param vdev virtio device
372 * \param offset offset in bytes
373 * \param dst pointer to buffer to copy data into
374 * \param len length of data to copy in bytes
375 * \return 0 on success, negative errno otherwise
376 */
377 int virtio_dev_read_dev_config(struct virtio_dev *vdev, size_t offset, void *dst, int len);
378
379 /**
380 * Get backend-specific ops for given device.
381 *
382 * \param vdev virtio device
383 */
384 const struct virtio_dev_ops *virtio_dev_backend_ops(struct virtio_dev *vdev);
385
386 /**
387 * Check if the device has negotiated given feature bit.
388 *
389 * \param vdev virtio device
390 * \param bit feature bit
391 */
392 static inline bool
virtio_dev_has_feature(struct virtio_dev * vdev,uint64_t bit)393 virtio_dev_has_feature(struct virtio_dev *vdev, uint64_t bit)
394 {
395 return !!(vdev->negotiated_features & (1ULL << bit));
396 }
397
398 /**
399 * Dump all device specific information into given json stream.
400 *
401 * \param vdev virtio device
402 * \param w json stream
403 */
404 void virtio_dev_dump_json_info(struct virtio_dev *vdev, struct spdk_json_write_ctx *w);
405
406 /**
407 * Enumerate all PCI Virtio devices of given type on the system.
408 *
409 * \param enum_cb a function to be called for each valid PCI device.
410 * If a virtio_dev is has been created, the callback should return 0.
411 * Returning any other value will cause the PCI context to be freed,
412 * making it unusable.
413 * \param enum_ctx additional opaque context to be passed into `enum_cb`
414 * \param pci_device_id PCI Device ID of devices to iterate through
415 */
416 int virtio_pci_dev_enumerate(virtio_pci_create_cb enum_cb, void *enum_ctx,
417 uint16_t pci_device_id);
418
419 /**
420 * Attach a PCI Virtio device of given type.
421 *
422 * \param create_cb callback to create a virtio_dev.
423 * If virtio_dev is has been created, the callback should return 0.
424 * Returning any other value will cause the PCI context to be freed,
425 * making it unusable.
426 * \param enum_ctx additional opaque context to be passed into `enum_cb`
427 * \param device_id Device ID of devices to iterate through
428 * \param pci_addr PCI address of the device to attach
429 */
430 int virtio_pci_dev_attach(virtio_pci_create_cb create_cb, void *enum_ctx,
431 uint16_t device_id, struct spdk_pci_addr *pci_addr);
432
433 /**
434 * Connect to a vhost-user device and init corresponding virtio_dev struct.
435 * The virtio_dev will have to be freed with \c virtio_dev_free.
436 *
437 * \param vdev preallocated vhost device struct to operate on
438 * \param name name of this virtio device
439 * \param path path to the Unix domain socket of the vhost-user device
440 * \param queue_size size of each of the queues
441 * \return virtio device
442 */
443 int virtio_user_dev_init(struct virtio_dev *vdev, const char *name, const char *path,
444 uint32_t queue_size);
445
446 /**
447 * Connect to a vfio-user device and init corresponding virtio_dev struct.
448 * The virtio_dev will have to be freed with \c virtio_dev_free.
449 *
450 * \param vdev preallocated vhost device struct to operate on
451 * \param name name of this virtio device
452 * \param path path to the Unix domain socket of the vhost-user device
453 * \return virtio device
454 */
455 int virtio_vfio_user_dev_init(struct virtio_dev *vdev, const char *name, const char *path);
456
457 /**
458 * Initialize virtio_dev for a given PCI device.
459 * The virtio_dev has to be freed with \c virtio_dev_destruct.
460 *
461 * \param vdev preallocated vhost device struct to operate on
462 * \param name name of this virtio device
463 * \param pci_ctx context of the PCI device
464 * \return 0 on success, -1 on error.
465 */
466 int virtio_pci_dev_init(struct virtio_dev *vdev, const char *name,
467 struct virtio_pci_ctx *pci_ctx);
468
469 /**
470 * Process the uevent which is accepted from the kernel and the
471 * uevent descript the physical device hot add or remove action.
472 *
473 * \param fd the file descriptor of the kobject netlink socket
474 * \param device_id virtio device ID used to represent virtio-blk or other device.
475 * \return the name of the virtio device on success, NULL means it
476 * is not a suitable uevent.
477 */
478 const char *virtio_pci_dev_event_process(int fd, uint16_t device_id);
479
480 #endif /* SPDK_VIRTIO_H */
481