xref: /spdk/lib/vhost/vhost_internal.h (revision 407e88fd2ab020d753e33014cf759353a9901b51)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #ifndef SPDK_VHOST_INTERNAL_H
35 #define SPDK_VHOST_INTERNAL_H
36 
37 #include "spdk/stdinc.h"
38 
39 #include <rte_vhost.h>
40 
41 #include "spdk_internal/log.h"
42 #include "spdk/event.h"
43 #include "spdk/rpc.h"
44 #include "spdk/config.h"
45 
46 #define SPDK_CACHE_LINE_SIZE RTE_CACHE_LINE_SIZE
47 
48 #ifndef VHOST_USER_F_PROTOCOL_FEATURES
49 #define VHOST_USER_F_PROTOCOL_FEATURES	30
50 #endif
51 
52 #ifndef VIRTIO_F_VERSION_1
53 #define VIRTIO_F_VERSION_1 32
54 #endif
55 
56 #ifndef VIRTIO_BLK_F_MQ
57 #define VIRTIO_BLK_F_MQ		12	/* support more than one vq */
58 #endif
59 
60 #ifndef VIRTIO_BLK_F_CONFIG_WCE
61 #define VIRTIO_BLK_F_CONFIG_WCE	11
62 #endif
63 
64 #define SPDK_VHOST_MAX_VQUEUES	256
65 #define SPDK_VHOST_MAX_VQ_SIZE	1024
66 
67 #define SPDK_VHOST_SCSI_CTRLR_MAX_DEVS 8
68 
69 #define SPDK_VHOST_IOVS_MAX 129
70 
71 /*
72  * Rate at which stats are checked for interrupt coalescing.
73  */
74 #define SPDK_VHOST_STATS_CHECK_INTERVAL_MS 10
75 /*
76  * Default threshold at which interrupts start to be coalesced.
77  */
78 #define SPDK_VHOST_VQ_IOPS_COALESCING_THRESHOLD 60000
79 
80 /*
81  * Currently coalescing is not used by default.
82  * Setting this to value > 0 here or by RPC will enable coalescing.
83  */
84 #define SPDK_VHOST_COALESCING_DELAY_BASE_US 0
85 
86 
87 #define SPDK_VHOST_FEATURES ((1ULL << VHOST_F_LOG_ALL) | \
88 	(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
89 	(1ULL << VIRTIO_F_VERSION_1) | \
90 	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
91 	(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
92 	(1ULL << VIRTIO_RING_F_INDIRECT_DESC))
93 
94 #define SPDK_VHOST_DISABLED_FEATURES ((1ULL << VIRTIO_RING_F_EVENT_IDX) | \
95 	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY))
96 
97 struct vhost_poll_group;
98 
99 struct spdk_vhost_virtqueue {
100 	struct rte_vhost_vring vring;
101 	uint16_t last_avail_idx;
102 	uint16_t last_used_idx;
103 
104 	void *tasks;
105 
106 	/* Request count from last stats check */
107 	uint32_t req_cnt;
108 
109 	/* Request count from last event */
110 	uint16_t used_req_cnt;
111 
112 	/* How long interrupt is delayed */
113 	uint32_t irq_delay_time;
114 
115 	/* Next time when we need to send event */
116 	uint64_t next_event_time;
117 
118 	/* Associated vhost_virtqueue in the virtio device's virtqueue list */
119 	uint32_t vring_idx;
120 } __attribute((aligned(SPDK_CACHE_LINE_SIZE)));
121 
122 struct spdk_vhost_session {
123 	struct spdk_vhost_dev *vdev;
124 
125 	/* rte_vhost connection ID. */
126 	int vid;
127 
128 	/* Unique session ID. */
129 	uint64_t id;
130 	/* Unique session name. */
131 	char *name;
132 
133 	struct vhost_poll_group *poll_group;
134 
135 	bool initialized;
136 	bool started;
137 	bool needs_restart;
138 	bool forced_polling;
139 
140 	struct rte_vhost_memory *mem;
141 
142 	int task_cnt;
143 
144 	uint16_t max_queues;
145 
146 	uint64_t negotiated_features;
147 
148 	/* Local copy of device coalescing settings. */
149 	uint32_t coalescing_delay_time_base;
150 	uint32_t coalescing_io_rate_threshold;
151 
152 	/* Next time when stats for event coalescing will be checked. */
153 	uint64_t next_stats_check_time;
154 
155 	/* Interval used for event coalescing checking. */
156 	uint64_t stats_check_interval;
157 
158 	struct spdk_vhost_virtqueue virtqueue[SPDK_VHOST_MAX_VQUEUES];
159 
160 	TAILQ_ENTRY(spdk_vhost_session) tailq;
161 };
162 
163 struct spdk_vhost_dev {
164 	char *name;
165 	char *path;
166 
167 	struct spdk_cpuset *cpumask;
168 	bool registered;
169 
170 	const struct spdk_vhost_dev_backend *backend;
171 
172 	/* Saved orginal values used to setup coalescing to avoid integer
173 	 * rounding issues during save/load config.
174 	 */
175 	uint32_t coalescing_delay_us;
176 	uint32_t coalescing_iops_threshold;
177 
178 	/* Current connections to the device */
179 	TAILQ_HEAD(, spdk_vhost_session) vsessions;
180 
181 	/* Increment-only session counter */
182 	uint64_t vsessions_num;
183 
184 	/* Number of started and actively polled sessions */
185 	uint32_t active_session_num;
186 
187 	/* Number of pending asynchronous operations */
188 	uint32_t pending_async_op_num;
189 
190 	TAILQ_ENTRY(spdk_vhost_dev) tailq;
191 };
192 
193 /**
194  * Synchronized vhost session event used for backend callbacks.
195  *
196  * \param vdev vhost device. If the device has been deleted
197  * in the meantime, this function will be called one last
198  * time with vdev == NULL.
199  * \param vsession vhost session. If all sessions have been
200  * iterated through, this function will be called one last
201  * time with vsession == NULL.
202  * \param arg user-provided parameter.
203  *
204  * \return negative values will break the foreach call, meaning
205  * the function won't be called again. Return codes zero and
206  * positive don't have any effect.
207  */
208 typedef int (*spdk_vhost_session_fn)(struct spdk_vhost_dev *vdev,
209 				     struct spdk_vhost_session *vsession,
210 				     void *arg);
211 
212 struct spdk_vhost_dev_backend {
213 	uint64_t virtio_features;
214 	uint64_t disabled_features;
215 
216 	/**
217 	 * Size of additional per-session context data
218 	 * allocated whenever a new client connects.
219 	 */
220 	size_t session_ctx_size;
221 
222 	int (*start_session)(struct spdk_vhost_session *vsession);
223 	int (*stop_session)(struct spdk_vhost_session *vsession);
224 
225 	int (*vhost_get_config)(struct spdk_vhost_dev *vdev, uint8_t *config, uint32_t len);
226 	int (*vhost_set_config)(struct spdk_vhost_dev *vdev, uint8_t *config,
227 				uint32_t offset, uint32_t size, uint32_t flags);
228 
229 	void (*dump_info_json)(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w);
230 	void (*write_config_json)(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w);
231 	int (*remove_device)(struct spdk_vhost_dev *vdev);
232 };
233 
234 void *spdk_vhost_gpa_to_vva(struct spdk_vhost_session *vsession, uint64_t addr, uint64_t len);
235 
236 uint16_t spdk_vhost_vq_avail_ring_get(struct spdk_vhost_virtqueue *vq, uint16_t *reqs,
237 				      uint16_t reqs_len);
238 
239 /**
240  * Get a virtio descriptor at given index in given virtqueue.
241  * The descriptor will provide access to the entire descriptor
242  * chain. The subsequent descriptors are accesible via
243  * \c spdk_vhost_vring_desc_get_next.
244  * \param vsession vhost session
245  * \param vq virtqueue
246  * \param req_idx descriptor index
247  * \param desc pointer to be set to the descriptor
248  * \param desc_table descriptor table to be used with
249  * \c spdk_vhost_vring_desc_get_next. This might be either
250  * default virtqueue descriptor table or per-chain indirect
251  * table.
252  * \param desc_table_size size of the *desc_table*
253  * \return 0 on success, -1 if given index is invalid.
254  * If -1 is returned, the content of params is undefined.
255  */
256 int spdk_vhost_vq_get_desc(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *vq,
257 			   uint16_t req_idx, struct vring_desc **desc, struct vring_desc **desc_table,
258 			   uint32_t *desc_table_size);
259 
260 /**
261  * Send IRQ/call client (if pending) for \c vq.
262  * \param vsession vhost session
263  * \param vq virtqueue
264  * \return
265  *   0 - if no interrupt was signalled
266  *   1 - if interrupt was signalled
267  */
268 int spdk_vhost_vq_used_signal(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *vq);
269 
270 
271 /**
272  * Send IRQs for all queues that need to be signaled.
273  * \param vsession vhost session
274  * \param vq virtqueue
275  */
276 void spdk_vhost_session_used_signal(struct spdk_vhost_session *vsession);
277 
278 void spdk_vhost_vq_used_ring_enqueue(struct spdk_vhost_session *vsession,
279 				     struct spdk_vhost_virtqueue *vq,
280 				     uint16_t id, uint32_t len);
281 
282 /**
283  * Get subsequent descriptor from given table.
284  * \param desc current descriptor, will be set to the
285  * next descriptor (NULL in case this is the last
286  * descriptor in the chain or the next desc is invalid)
287  * \param desc_table descriptor table
288  * \param desc_table_size size of the *desc_table*
289  * \return 0 on success, -1 if given index is invalid
290  * The *desc* param will be set regardless of the
291  * return value.
292  */
293 int spdk_vhost_vring_desc_get_next(struct vring_desc **desc,
294 				   struct vring_desc *desc_table, uint32_t desc_table_size);
295 bool spdk_vhost_vring_desc_is_wr(struct vring_desc *cur_desc);
296 
297 int spdk_vhost_vring_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov,
298 				 uint16_t *iov_index, const struct vring_desc *desc);
299 
300 static inline bool __attribute__((always_inline))
301 spdk_vhost_dev_has_feature(struct spdk_vhost_session *vsession, unsigned feature_id)
302 {
303 	return vsession->negotiated_features & (1ULL << feature_id);
304 }
305 
306 int spdk_vhost_dev_register(struct spdk_vhost_dev *vdev, const char *name, const char *mask_str,
307 			    const struct spdk_vhost_dev_backend *backend);
308 int spdk_vhost_dev_unregister(struct spdk_vhost_dev *vdev);
309 
310 int spdk_vhost_scsi_controller_construct(void);
311 int spdk_vhost_blk_controller_construct(void);
312 void spdk_vhost_dump_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w);
313 
314 /*
315  * Call function for each active session on the provided
316  * vhost device. The function will be called one-by-one
317  * on each session's thread.
318  *
319  * \param vdev vhost device
320  * \param fn function to call
321  * \param arg additional argument to \c fn
322  */
323 void spdk_vhost_dev_foreach_session(struct spdk_vhost_dev *dev,
324 				    spdk_vhost_session_fn fn, void *arg);
325 
326 /**
327  * Call a function on the provided lcore and block until either
328  * spdk_vhost_session_start_done() or spdk_vhost_session_stop_done()
329  * is called.
330  *
331  * This must be called under the global vhost mutex, which this function
332  * will unlock for the time it's waiting. It's meant to be called only
333  * from start/stop session callbacks.
334  *
335  * \param pg designated session's poll group
336  * \param vsession vhost session
337  * \param cb_fn the function to call. The void *arg parameter in cb_fn
338  * is always NULL.
339  * \param timeout_sec timeout in seconds. This function will still
340  * block after the timeout expires, but will print the provided errmsg.
341  * \param errmsg error message to print once the timeout expires
342  * \return return the code passed to spdk_vhost_session_event_done().
343  */
344 int spdk_vhost_session_send_event(struct vhost_poll_group *pg,
345 				  struct spdk_vhost_session *vsession,
346 				  spdk_vhost_session_fn cb_fn, unsigned timeout_sec,
347 				  const char *errmsg);
348 
349 /**
350  * Finish a blocking spdk_vhost_session_send_event() call and finally
351  * start the session. This must be called on the target lcore, which
352  * will now receive all session-related messages (e.g. from
353  * spdk_vhost_dev_foreach_session()).
354  *
355  * Must be called under the global vhost lock.
356  *
357  * \param vsession vhost session
358  * \param response return code
359  */
360 void spdk_vhost_session_start_done(struct spdk_vhost_session *vsession, int response);
361 
362 /**
363  * Finish a blocking spdk_vhost_session_send_event() call and finally
364  * stop the session. This must be called on the session's lcore which
365  * used to receive all session-related messages (e.g. from
366  * spdk_vhost_dev_foreach_session()). After this call, the session-
367  * related messages will be once again processed by any arbitrary thread.
368  *
369  * Must be called under the global vhost lock.
370  *
371  * Must be called under the global vhost mutex.
372  *
373  * \param vsession vhost session
374  * \param response return code
375  */
376 void spdk_vhost_session_stop_done(struct spdk_vhost_session *vsession, int response);
377 
378 struct spdk_vhost_session *spdk_vhost_session_find_by_vid(int vid);
379 void spdk_vhost_session_install_rte_compat_hooks(struct spdk_vhost_session *vsession);
380 void spdk_vhost_dev_install_rte_compat_hooks(struct spdk_vhost_dev *vdev);
381 
382 struct vhost_poll_group *spdk_vhost_get_poll_group(struct spdk_cpuset *cpumask);
383 void spdk_vhost_put_poll_group(struct vhost_poll_group *pg);
384 
385 int spdk_remove_vhost_controller(struct spdk_vhost_dev *vdev);
386 
387 #ifdef SPDK_CONFIG_VHOST_INTERNAL_LIB
388 int spdk_vhost_nvme_admin_passthrough(int vid, void *cmd, void *cqe, void *buf);
389 int spdk_vhost_nvme_set_cq_call(int vid, uint16_t qid, int fd);
390 int spdk_vhost_nvme_set_bar_mr(int vid, void *bar_addr, uint64_t bar_size);
391 int spdk_vhost_nvme_get_cap(int vid, uint64_t *cap);
392 int spdk_vhost_nvme_controller_construct(void);
393 int spdk_vhost_nvme_dev_construct(const char *name, const char *cpumask, uint32_t io_queues);
394 int spdk_vhost_nvme_dev_remove(struct spdk_vhost_dev *vdev);
395 int spdk_vhost_nvme_dev_add_ns(struct spdk_vhost_dev *vdev,
396 			       const char *bdev_name);
397 #endif
398 
399 #endif /* SPDK_VHOST_INTERNAL_H */
400