xref: /spdk/lib/vhost/vhost_internal.h (revision 5fd9561f54daa8eff7f3bcb56c789655bca846b1)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #ifndef SPDK_VHOST_INTERNAL_H
35 #define SPDK_VHOST_INTERNAL_H
36 #include <linux/virtio_config.h>
37 
38 #include "spdk/stdinc.h"
39 
40 #include <rte_vhost.h>
41 
42 #include "spdk_internal/vhost_user.h"
43 #include "spdk/bdev.h"
44 #include "spdk/log.h"
45 #include "spdk/util.h"
46 #include "spdk/rpc.h"
47 #include "spdk/config.h"
48 
49 #define SPDK_VHOST_MAX_VQUEUES	256
50 #define SPDK_VHOST_MAX_VQ_SIZE	1024
51 
52 #define SPDK_VHOST_SCSI_CTRLR_MAX_DEVS 8
53 
54 #define SPDK_VHOST_IOVS_MAX 129
55 
56 #define SPDK_VHOST_VQ_MAX_SUBMISSIONS	32
57 
58 /*
59  * Rate at which stats are checked for interrupt coalescing.
60  */
61 #define SPDK_VHOST_STATS_CHECK_INTERVAL_MS 10
62 /*
63  * Default threshold at which interrupts start to be coalesced.
64  */
65 #define SPDK_VHOST_VQ_IOPS_COALESCING_THRESHOLD 60000
66 
67 /*
68  * Currently coalescing is not used by default.
69  * Setting this to value > 0 here or by RPC will enable coalescing.
70  */
71 #define SPDK_VHOST_COALESCING_DELAY_BASE_US 0
72 
73 #define SPDK_VHOST_FEATURES ((1ULL << VHOST_F_LOG_ALL) | \
74 	(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
75 	(1ULL << VIRTIO_F_VERSION_1) | \
76 	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
77 	(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
78 	(1ULL << VIRTIO_RING_F_INDIRECT_DESC) | \
79 	(1ULL << VIRTIO_F_RING_PACKED) | \
80 	(1ULL << VIRTIO_F_ANY_LAYOUT))
81 
82 #define SPDK_VHOST_DISABLED_FEATURES ((1ULL << VIRTIO_RING_F_EVENT_IDX) | \
83 	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY))
84 
85 #define VRING_DESC_F_AVAIL	(1ULL << VRING_PACKED_DESC_F_AVAIL)
86 #define VRING_DESC_F_USED	(1ULL << VRING_PACKED_DESC_F_USED)
87 #define VRING_DESC_F_AVAIL_USED	(VRING_DESC_F_AVAIL | VRING_DESC_F_USED)
88 
89 typedef struct rte_vhost_resubmit_desc spdk_vhost_resubmit_desc;
90 typedef struct rte_vhost_resubmit_info spdk_vhost_resubmit_info;
91 typedef struct rte_vhost_inflight_desc_packed	spdk_vhost_inflight_desc;
92 
93 struct spdk_vhost_virtqueue {
94 	struct rte_vhost_vring vring;
95 	struct rte_vhost_ring_inflight vring_inflight;
96 	uint16_t last_avail_idx;
97 	uint16_t last_used_idx;
98 
99 	struct {
100 		/* To mark a descriptor as available in packed ring
101 		 * Equal to avail_wrap_counter in spec.
102 		 */
103 		uint8_t avail_phase	: 1;
104 		/* To mark a descriptor as used in packed ring
105 		 * Equal to used_wrap_counter in spec.
106 		 */
107 		uint8_t used_phase	: 1;
108 		uint8_t padding		: 5;
109 		bool packed_ring	: 1;
110 	} packed;
111 
112 	void *tasks;
113 
114 	/* Request count from last stats check */
115 	uint32_t req_cnt;
116 
117 	/* Request count from last event */
118 	uint16_t used_req_cnt;
119 
120 	/* How long interrupt is delayed */
121 	uint32_t irq_delay_time;
122 
123 	/* Next time when we need to send event */
124 	uint64_t next_event_time;
125 
126 	/* Associated vhost_virtqueue in the virtio device's virtqueue list */
127 	uint32_t vring_idx;
128 
129 	struct spdk_vhost_session *vsession;
130 
131 	struct spdk_interrupt *intr;
132 } __attribute((aligned(SPDK_CACHE_LINE_SIZE)));
133 
134 struct spdk_vhost_session {
135 	struct spdk_vhost_dev *vdev;
136 
137 	/* rte_vhost connection ID. */
138 	int vid;
139 
140 	/* Unique session ID. */
141 	uint64_t id;
142 	/* Unique session name. */
143 	char *name;
144 
145 	bool initialized;
146 	bool started;
147 	bool needs_restart;
148 	bool forced_polling;
149 	bool interrupt_mode;
150 
151 	struct rte_vhost_memory *mem;
152 
153 	int task_cnt;
154 
155 	uint16_t max_queues;
156 
157 	uint64_t negotiated_features;
158 
159 	/* Local copy of device coalescing settings. */
160 	uint32_t coalescing_delay_time_base;
161 	uint32_t coalescing_io_rate_threshold;
162 
163 	/* Next time when stats for event coalescing will be checked. */
164 	uint64_t next_stats_check_time;
165 
166 	/* Interval used for event coalescing checking. */
167 	uint64_t stats_check_interval;
168 
169 	/* Session's stop poller will only try limited times to destroy the session. */
170 	uint32_t stop_retry_count;
171 
172 	struct spdk_vhost_virtqueue virtqueue[SPDK_VHOST_MAX_VQUEUES];
173 
174 	TAILQ_ENTRY(spdk_vhost_session) tailq;
175 };
176 
177 struct spdk_vhost_user_dev {
178 	struct spdk_vhost_dev *vdev;
179 
180 	const struct spdk_vhost_user_dev_backend *user_backend;
181 
182 	/* Saved original values used to setup coalescing to avoid integer
183 	 * rounding issues during save/load config.
184 	 */
185 	uint32_t coalescing_delay_us;
186 	uint32_t coalescing_iops_threshold;
187 
188 	/* Current connections to the device */
189 	TAILQ_HEAD(, spdk_vhost_session) vsessions;
190 
191 	/* Increment-only session counter */
192 	uint64_t vsessions_num;
193 
194 	/* Number of started and actively polled sessions */
195 	uint32_t active_session_num;
196 
197 	/* Number of pending asynchronous operations */
198 	uint32_t pending_async_op_num;
199 };
200 
201 struct spdk_vhost_dev {
202 	char *name;
203 	char *path;
204 
205 	struct spdk_thread *thread;
206 	bool registered;
207 
208 	uint64_t virtio_features;
209 	uint64_t disabled_features;
210 	uint64_t protocol_features;
211 	bool packed_ring_recovery;
212 
213 	const struct spdk_vhost_dev_backend *backend;
214 
215 	/* Context passed from transport */
216 	void *ctxt;
217 
218 	TAILQ_ENTRY(spdk_vhost_dev) tailq;
219 };
220 
221 /**
222  * \param vdev vhost device.
223  * \param vsession vhost session.
224  * \param arg user-provided parameter.
225  *
226  * \return negative values will break the foreach call, meaning
227  * the function won't be called again. Return codes zero and
228  * positive don't have any effect.
229  */
230 typedef int (*spdk_vhost_session_fn)(struct spdk_vhost_dev *vdev,
231 				     struct spdk_vhost_session *vsession,
232 				     void *arg);
233 
234 /**
235  * \param vdev vhost device.
236  * \param arg user-provided parameter.
237  */
238 typedef void (*spdk_vhost_dev_fn)(struct spdk_vhost_dev *vdev, void *arg);
239 
240 struct spdk_vhost_user_dev_backend {
241 	/**
242 	 * Size of additional per-session context data
243 	 * allocated whenever a new client connects.
244 	 */
245 	size_t session_ctx_size;
246 
247 	int (*start_session)(struct spdk_vhost_session *vsession);
248 	int (*stop_session)(struct spdk_vhost_session *vsession);
249 };
250 
251 struct spdk_vhost_dev_backend {
252 	int (*vhost_get_config)(struct spdk_vhost_dev *vdev, uint8_t *config, uint32_t len);
253 	int (*vhost_set_config)(struct spdk_vhost_dev *vdev, uint8_t *config,
254 				uint32_t offset, uint32_t size, uint32_t flags);
255 
256 	void (*dump_info_json)(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w);
257 	void (*write_config_json)(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w);
258 	int (*remove_device)(struct spdk_vhost_dev *vdev);
259 };
260 
261 void *vhost_gpa_to_vva(struct spdk_vhost_session *vsession, uint64_t addr, uint64_t len);
262 
263 uint16_t vhost_vq_avail_ring_get(struct spdk_vhost_virtqueue *vq, uint16_t *reqs,
264 				 uint16_t reqs_len);
265 
266 /**
267  * Get a virtio split descriptor at given index in given virtqueue.
268  * The descriptor will provide access to the entire descriptor
269  * chain. The subsequent descriptors are accessible via
270  * \c spdk_vhost_vring_desc_get_next.
271  * \param vsession vhost session
272  * \param vq virtqueue
273  * \param req_idx descriptor index
274  * \param desc pointer to be set to the descriptor
275  * \param desc_table descriptor table to be used with
276  * \c spdk_vhost_vring_desc_get_next. This might be either
277  * default virtqueue descriptor table or per-chain indirect
278  * table.
279  * \param desc_table_size size of the *desc_table*
280  * \return 0 on success, -1 if given index is invalid.
281  * If -1 is returned, the content of params is undefined.
282  */
283 int vhost_vq_get_desc(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *vq,
284 		      uint16_t req_idx, struct vring_desc **desc, struct vring_desc **desc_table,
285 		      uint32_t *desc_table_size);
286 
287 /**
288  * Get a virtio packed descriptor at given index in given virtqueue.
289  * The descriptor will provide access to the entire descriptor
290  * chain. The subsequent descriptors are accessible via
291  * \c vhost_vring_packed_desc_get_next.
292  * \param vsession vhost session
293  * \param vq virtqueue
294  * \param req_idx descriptor index
295  * \param desc pointer to be set to the descriptor
296  * \param desc_table descriptor table to be used with
297  * \c spdk_vhost_vring_desc_get_next. This might be either
298  * \c NULL or per-chain indirect table.
299  * \param desc_table_size size of the *desc_table*
300  * \return 0 on success, -1 if given index is invalid.
301  * If -1 is returned, the content of params is undefined.
302  */
303 int vhost_vq_get_desc_packed(struct spdk_vhost_session *vsession,
304 			     struct spdk_vhost_virtqueue *virtqueue,
305 			     uint16_t req_idx, struct vring_packed_desc **desc,
306 			     struct vring_packed_desc **desc_table, uint32_t *desc_table_size);
307 
308 int vhost_inflight_queue_get_desc(struct spdk_vhost_session *vsession,
309 				  spdk_vhost_inflight_desc *desc_array,
310 				  uint16_t req_idx, spdk_vhost_inflight_desc **desc,
311 				  struct vring_packed_desc  **desc_table, uint32_t *desc_table_size);
312 
313 /**
314  * Send IRQ/call client (if pending) for \c vq.
315  * \param vsession vhost session
316  * \param vq virtqueue
317  * \return
318  *   0 - if no interrupt was signalled
319  *   1 - if interrupt was signalled
320  */
321 int vhost_vq_used_signal(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *vq);
322 
323 
324 /**
325  * Send IRQs for all queues that need to be signaled.
326  * \param vsession vhost session
327  * \param vq virtqueue
328  */
329 void vhost_session_used_signal(struct spdk_vhost_session *vsession);
330 
331 /**
332  * Send IRQs for the queue that need to be signaled.
333  * \param vq virtqueue
334  */
335 void vhost_session_vq_used_signal(struct spdk_vhost_virtqueue *virtqueue);
336 
337 void vhost_vq_used_ring_enqueue(struct spdk_vhost_session *vsession,
338 				struct spdk_vhost_virtqueue *vq,
339 				uint16_t id, uint32_t len);
340 
341 /**
342  * Enqueue the entry to the used ring when device complete the request.
343  * \param vsession vhost session
344  * \param vq virtqueue
345  * \req_idx descriptor index. It's the first index of this descriptor chain.
346  * \num_descs descriptor count. It's the count of the number of buffers in the chain.
347  * \buffer_id descriptor buffer ID.
348  * \length device write length. Specify the length of the buffer that has been initialized
349  * (written to) by the device
350  * \inflight_head the head idx of this IO inflight desc chain.
351  */
352 void vhost_vq_packed_ring_enqueue(struct spdk_vhost_session *vsession,
353 				  struct spdk_vhost_virtqueue *virtqueue,
354 				  uint16_t num_descs, uint16_t buffer_id,
355 				  uint32_t length, uint16_t inflight_head);
356 
357 /**
358  * Get subsequent descriptor from given table.
359  * \param desc current descriptor, will be set to the
360  * next descriptor (NULL in case this is the last
361  * descriptor in the chain or the next desc is invalid)
362  * \param desc_table descriptor table
363  * \param desc_table_size size of the *desc_table*
364  * \return 0 on success, -1 if given index is invalid
365  * The *desc* param will be set regardless of the
366  * return value.
367  */
368 int vhost_vring_desc_get_next(struct vring_desc **desc,
369 			      struct vring_desc *desc_table, uint32_t desc_table_size);
370 static inline bool
371 vhost_vring_desc_is_wr(struct vring_desc *cur_desc)
372 {
373 	return !!(cur_desc->flags & VRING_DESC_F_WRITE);
374 }
375 
376 int vhost_vring_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov,
377 			    uint16_t *iov_index, const struct vring_desc *desc);
378 
379 bool vhost_vq_packed_ring_is_avail(struct spdk_vhost_virtqueue *virtqueue);
380 
381 /**
382  * Get subsequent descriptor from vq or desc table.
383  * \param desc current descriptor, will be set to the
384  * next descriptor (NULL in case this is the last
385  * descriptor in the chain or the next desc is invalid)
386  * \req_idx index of current desc, will be set to the next
387  * index. If desc_table != NULL the req_idx is the the vring index
388  * or the req_idx is the desc_table index.
389  * \param desc_table descriptor table
390  * \param desc_table_size size of the *desc_table*
391  * \return 0 on success, -1 if given index is invalid
392  * The *desc* param will be set regardless of the
393  * return value.
394  */
395 int vhost_vring_packed_desc_get_next(struct vring_packed_desc **desc, uint16_t *req_idx,
396 				     struct spdk_vhost_virtqueue *vq,
397 				     struct vring_packed_desc *desc_table,
398 				     uint32_t desc_table_size);
399 
400 bool vhost_vring_packed_desc_is_wr(struct vring_packed_desc *cur_desc);
401 
402 int vhost_vring_packed_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov,
403 				   uint16_t *iov_index, const struct vring_packed_desc *desc);
404 
405 bool vhost_vring_inflight_desc_is_wr(spdk_vhost_inflight_desc *cur_desc);
406 
407 int vhost_vring_inflight_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov,
408 				     uint16_t *iov_index, const spdk_vhost_inflight_desc *desc);
409 
410 uint16_t vhost_vring_packed_desc_get_buffer_id(struct spdk_vhost_virtqueue *vq, uint16_t req_idx,
411 		uint16_t *num_descs);
412 
413 static inline bool __attribute__((always_inline))
414 vhost_dev_has_feature(struct spdk_vhost_session *vsession, unsigned feature_id)
415 {
416 	return vsession->negotiated_features & (1ULL << feature_id);
417 }
418 
419 int vhost_dev_register(struct spdk_vhost_dev *vdev, const char *name, const char *mask_str,
420 		       const struct spdk_vhost_dev_backend *backend,
421 		       const struct spdk_vhost_user_dev_backend *user_backend);
422 int vhost_dev_unregister(struct spdk_vhost_dev *vdev);
423 
424 void vhost_dump_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w);
425 
426 /*
427  * Set vhost session to run in interrupt or poll mode
428  */
429 void vhost_user_session_set_interrupt_mode(struct spdk_vhost_session *vsession,
430 		bool interrupt_mode);
431 
432 /*
433  * Memory registration functions used in start/stop device callbacks
434  */
435 void vhost_session_mem_register(struct rte_vhost_memory *mem);
436 void vhost_session_mem_unregister(struct rte_vhost_memory *mem);
437 
438 /*
439  * Call a function for each session of the provided vhost device.
440  * The function will be called one-by-one on each session's thread.
441  *
442  * \param vdev vhost device
443  * \param fn function to call on each session's thread
444  * \param cpl_fn function to be called at the end of the iteration on
445  * the vhost management thread.
446  * Optional, can be NULL.
447  * \param arg additional argument to the both callbacks
448  */
449 void vhost_user_dev_foreach_session(struct spdk_vhost_dev *dev,
450 				    spdk_vhost_session_fn fn,
451 				    spdk_vhost_dev_fn cpl_fn,
452 				    void *arg);
453 
454 /**
455  * Call a function on the provided lcore and block until either
456  * vhost_user_session_start_done() or vhost_user_session_stop_done()
457  * is called.
458  *
459  * This must be called under the global vhost mutex, which this function
460  * will unlock for the time it's waiting. It's meant to be called only
461  * from start/stop session callbacks.
462  *
463  * \param vsession vhost session
464  * \param cb_fn the function to call. The void *arg parameter in cb_fn
465  * is always NULL.
466  * \param timeout_sec timeout in seconds. This function will still
467  * block after the timeout expires, but will print the provided errmsg.
468  * \param errmsg error message to print once the timeout expires
469  * \return return the code passed to spdk_vhost_session_event_done().
470  */
471 int vhost_user_session_send_event(struct spdk_vhost_session *vsession,
472 				  spdk_vhost_session_fn cb_fn, unsigned timeout_sec,
473 				  const char *errmsg);
474 
475 /**
476  * Finish a blocking spdk_vhost_user_session_send_event() call and finally
477  * start the session. This must be called on the target lcore, which
478  * will now receive all session-related messages (e.g. from
479  * vhost_user_dev_foreach_session()).
480  *
481  * Must be called under the global vhost lock.
482  *
483  * \param vsession vhost session
484  * \param response return code
485  */
486 void vhost_user_session_start_done(struct spdk_vhost_session *vsession, int response);
487 
488 /**
489  * Finish a blocking spdk_vhost_user_session_send_event() call and finally
490  * stop the session. This must be called on the session's lcore which
491  * used to receive all session-related messages (e.g. from
492  * vhost_user_dev_foreach_session()). After this call, the session-
493  * related messages will be once again processed by any arbitrary thread.
494  *
495  * Must be called under the global vhost lock.
496  *
497  * Must be called under the global vhost mutex.
498  *
499  * \param vsession vhost session
500  * \param response return code
501  */
502 void vhost_user_session_stop_done(struct spdk_vhost_session *vsession, int response);
503 
504 struct spdk_vhost_session *vhost_session_find_by_vid(int vid);
505 void vhost_session_install_rte_compat_hooks(struct spdk_vhost_session *vsession);
506 int vhost_register_unix_socket(const char *path, const char *ctrl_name,
507 			       uint64_t virtio_features, uint64_t disabled_features, uint64_t protocol_features);
508 int vhost_driver_unregister(const char *path);
509 int vhost_get_mem_table(int vid, struct rte_vhost_memory **mem);
510 int vhost_get_negotiated_features(int vid, uint64_t *negotiated_features);
511 
512 int remove_vhost_controller(struct spdk_vhost_dev *vdev);
513 
514 struct spdk_io_channel *vhost_blk_get_io_channel(struct spdk_vhost_dev *vdev);
515 void vhost_blk_put_io_channel(struct spdk_io_channel *ch);
516 
517 /* Function calls from vhost.c to rte_vhost_user.c,
518  * shall removed once virtio transport abstraction is complete. */
519 int vhost_user_session_set_coalescing(struct spdk_vhost_dev *dev,
520 				      struct spdk_vhost_session *vsession, void *ctx);
521 int vhost_user_dev_set_coalescing(struct spdk_vhost_user_dev *user_dev, uint32_t delay_base_us,
522 				  uint32_t iops_threshold);
523 int vhost_user_dev_register(struct spdk_vhost_dev *vdev, const char *name,
524 			    struct spdk_cpuset *cpumask, const struct spdk_vhost_user_dev_backend *user_backend);
525 int vhost_user_dev_unregister(struct spdk_vhost_dev *vdev);
526 int vhost_user_init(void);
527 typedef void (*vhost_fini_cb)(void *ctx);
528 void vhost_user_fini(vhost_fini_cb vhost_cb);
529 
530 struct spdk_vhost_blk_task {
531 	struct spdk_bdev_io *bdev_io;
532 
533 	volatile uint8_t *status;
534 
535 	/* for io wait */
536 	struct spdk_bdev_io_wait_entry bdev_io_wait;
537 	struct spdk_io_channel *bdev_io_wait_ch;
538 	struct spdk_vhost_dev *bdev_io_wait_vdev;
539 
540 	/** Number of bytes that were written. */
541 	uint32_t used_len;
542 	uint16_t iovcnt;
543 	struct iovec iovs[SPDK_VHOST_IOVS_MAX];
544 
545 	/** Size of whole payload in bytes */
546 	uint32_t payload_size;
547 };
548 
549 #endif /* SPDK_VHOST_INTERNAL_H */
550