xref: /spdk/lib/vhost/vhost_internal.h (revision 367c980b453f48310e52d2574afe7d2774df800c)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #ifndef SPDK_VHOST_INTERNAL_H
35 #define SPDK_VHOST_INTERNAL_H
36 #include <linux/virtio_config.h>
37 
38 #include "spdk/stdinc.h"
39 
40 #include <rte_vhost.h>
41 
42 #include "spdk_internal/vhost_user.h"
43 #include "spdk_internal/log.h"
44 #include "spdk/util.h"
45 #include "spdk/rpc.h"
46 #include "spdk/config.h"
47 
48 #define SPDK_VHOST_MAX_VQUEUES	256
49 #define SPDK_VHOST_MAX_VQ_SIZE	1024
50 
51 #define SPDK_VHOST_SCSI_CTRLR_MAX_DEVS 8
52 
53 #define SPDK_VHOST_IOVS_MAX 129
54 
55 #define SPDK_VHOST_VQ_MAX_SUBMISSIONS	32
56 
57 /*
58  * Rate at which stats are checked for interrupt coalescing.
59  */
60 #define SPDK_VHOST_STATS_CHECK_INTERVAL_MS 10
61 /*
62  * Default threshold at which interrupts start to be coalesced.
63  */
64 #define SPDK_VHOST_VQ_IOPS_COALESCING_THRESHOLD 60000
65 
66 /*
67  * Currently coalescing is not used by default.
68  * Setting this to value > 0 here or by RPC will enable coalescing.
69  */
70 #define SPDK_VHOST_COALESCING_DELAY_BASE_US 0
71 
72 #define SPDK_VHOST_FEATURES ((1ULL << VHOST_F_LOG_ALL) | \
73 	(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
74 	(1ULL << VIRTIO_F_VERSION_1) | \
75 	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
76 	(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
77 	(1ULL << VIRTIO_RING_F_INDIRECT_DESC) | \
78 	(1ULL << VIRTIO_F_RING_PACKED))
79 
80 #define SPDK_VHOST_DISABLED_FEATURES ((1ULL << VIRTIO_RING_F_EVENT_IDX) | \
81 	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY))
82 
83 #define VRING_DESC_F_AVAIL	(1ULL << VRING_PACKED_DESC_F_AVAIL)
84 #define VRING_DESC_F_USED	(1ULL << VRING_PACKED_DESC_F_USED)
85 #define VRING_DESC_F_AVAIL_USED	(VRING_DESC_F_AVAIL | VRING_DESC_F_USED)
86 
87 typedef struct rte_vhost_resubmit_desc spdk_vhost_resubmit_desc;
88 typedef struct rte_vhost_resubmit_info spdk_vhost_resubmit_info;
89 
90 struct spdk_vhost_virtqueue {
91 	struct rte_vhost_vring vring;
92 	struct rte_vhost_ring_inflight vring_inflight;
93 	uint16_t last_avail_idx;
94 	uint16_t last_used_idx;
95 
96 	struct {
97 		/* To mark a descriptor as available in packed ring
98 		 * Equal to avail_wrap_counter in spec.
99 		 */
100 		uint8_t avail_phase	: 1;
101 		/* To mark a descriptor as used in packed ring
102 		 * Equal to used_wrap_counter in spec.
103 		 */
104 		uint8_t used_phase	: 1;
105 		uint8_t padding		: 5;
106 		bool packed_ring	: 1;
107 	} packed;
108 
109 	void *tasks;
110 
111 	/* Request count from last stats check */
112 	uint32_t req_cnt;
113 
114 	/* Request count from last event */
115 	uint16_t used_req_cnt;
116 
117 	/* How long interrupt is delayed */
118 	uint32_t irq_delay_time;
119 
120 	/* Next time when we need to send event */
121 	uint64_t next_event_time;
122 
123 	/* Associated vhost_virtqueue in the virtio device's virtqueue list */
124 	uint32_t vring_idx;
125 } __attribute((aligned(SPDK_CACHE_LINE_SIZE)));
126 
127 struct spdk_vhost_session {
128 	struct spdk_vhost_dev *vdev;
129 
130 	/* rte_vhost connection ID. */
131 	int vid;
132 
133 	/* Unique session ID. */
134 	uint64_t id;
135 	/* Unique session name. */
136 	char *name;
137 
138 	bool initialized;
139 	bool started;
140 	bool needs_restart;
141 	bool forced_polling;
142 
143 	struct rte_vhost_memory *mem;
144 
145 	int task_cnt;
146 
147 	uint16_t max_queues;
148 
149 	uint64_t negotiated_features;
150 
151 	/* Local copy of device coalescing settings. */
152 	uint32_t coalescing_delay_time_base;
153 	uint32_t coalescing_io_rate_threshold;
154 
155 	/* Next time when stats for event coalescing will be checked. */
156 	uint64_t next_stats_check_time;
157 
158 	/* Interval used for event coalescing checking. */
159 	uint64_t stats_check_interval;
160 
161 	struct spdk_vhost_virtqueue virtqueue[SPDK_VHOST_MAX_VQUEUES];
162 
163 	TAILQ_ENTRY(spdk_vhost_session) tailq;
164 };
165 
166 struct spdk_vhost_dev {
167 	char *name;
168 	char *path;
169 
170 	struct spdk_thread *thread;
171 	bool registered;
172 
173 	uint64_t virtio_features;
174 	uint64_t disabled_features;
175 	uint64_t protocol_features;
176 
177 	const struct spdk_vhost_dev_backend *backend;
178 
179 	/* Saved orginal values used to setup coalescing to avoid integer
180 	 * rounding issues during save/load config.
181 	 */
182 	uint32_t coalescing_delay_us;
183 	uint32_t coalescing_iops_threshold;
184 
185 	/* Current connections to the device */
186 	TAILQ_HEAD(, spdk_vhost_session) vsessions;
187 
188 	/* Increment-only session counter */
189 	uint64_t vsessions_num;
190 
191 	/* Number of started and actively polled sessions */
192 	uint32_t active_session_num;
193 
194 	/* Number of pending asynchronous operations */
195 	uint32_t pending_async_op_num;
196 
197 	TAILQ_ENTRY(spdk_vhost_dev) tailq;
198 };
199 
200 /**
201  * \param vdev vhost device.
202  * \param vsession vhost session.
203  * \param arg user-provided parameter.
204  *
205  * \return negative values will break the foreach call, meaning
206  * the function won't be called again. Return codes zero and
207  * positive don't have any effect.
208  */
209 typedef int (*spdk_vhost_session_fn)(struct spdk_vhost_dev *vdev,
210 				     struct spdk_vhost_session *vsession,
211 				     void *arg);
212 
213 /**
214  * \param vdev vhost device.
215  * \param arg user-provided parameter.
216  */
217 typedef void (*spdk_vhost_dev_fn)(struct spdk_vhost_dev *vdev, void *arg);
218 
219 struct spdk_vhost_dev_backend {
220 	/**
221 	 * Size of additional per-session context data
222 	 * allocated whenever a new client connects.
223 	 */
224 	size_t session_ctx_size;
225 
226 	int (*start_session)(struct spdk_vhost_session *vsession);
227 	int (*stop_session)(struct spdk_vhost_session *vsession);
228 
229 	int (*vhost_get_config)(struct spdk_vhost_dev *vdev, uint8_t *config, uint32_t len);
230 	int (*vhost_set_config)(struct spdk_vhost_dev *vdev, uint8_t *config,
231 				uint32_t offset, uint32_t size, uint32_t flags);
232 
233 	void (*dump_info_json)(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w);
234 	void (*write_config_json)(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w);
235 	int (*remove_device)(struct spdk_vhost_dev *vdev);
236 };
237 
238 void *vhost_gpa_to_vva(struct spdk_vhost_session *vsession, uint64_t addr, uint64_t len);
239 
240 uint16_t vhost_vq_avail_ring_get(struct spdk_vhost_virtqueue *vq, uint16_t *reqs,
241 				 uint16_t reqs_len);
242 
243 /**
244  * Get a virtio split descriptor at given index in given virtqueue.
245  * The descriptor will provide access to the entire descriptor
246  * chain. The subsequent descriptors are accesible via
247  * \c spdk_vhost_vring_desc_get_next.
248  * \param vsession vhost session
249  * \param vq virtqueue
250  * \param req_idx descriptor index
251  * \param desc pointer to be set to the descriptor
252  * \param desc_table descriptor table to be used with
253  * \c spdk_vhost_vring_desc_get_next. This might be either
254  * default virtqueue descriptor table or per-chain indirect
255  * table.
256  * \param desc_table_size size of the *desc_table*
257  * \return 0 on success, -1 if given index is invalid.
258  * If -1 is returned, the content of params is undefined.
259  */
260 int vhost_vq_get_desc(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *vq,
261 		      uint16_t req_idx, struct vring_desc **desc, struct vring_desc **desc_table,
262 		      uint32_t *desc_table_size);
263 
264 /**
265  * Get a virtio packed descriptor at given index in given virtqueue.
266  * The descriptor will provide access to the entire descriptor
267  * chain. The subsequent descriptors are accesible via
268  * \c vhost_vring_packed_desc_get_next.
269  * \param vsession vhost session
270  * \param vq virtqueue
271  * \param req_idx descriptor index
272  * \param desc pointer to be set to the descriptor
273  * \param desc_table descriptor table to be used with
274  * \c spdk_vhost_vring_desc_get_next. This might be either
275  * \c NULL or per-chain indirect table.
276  * \param desc_table_size size of the *desc_table*
277  * \return 0 on success, -1 if given index is invalid.
278  * If -1 is returned, the content of params is undefined.
279  */
280 int vhost_vq_get_desc_packed(struct spdk_vhost_session *vsession,
281 			     struct spdk_vhost_virtqueue *virtqueue,
282 			     uint16_t req_idx, struct vring_packed_desc **desc,
283 			     struct vring_packed_desc **desc_table, uint32_t *desc_table_size);
284 
285 /**
286  * Send IRQ/call client (if pending) for \c vq.
287  * \param vsession vhost session
288  * \param vq virtqueue
289  * \return
290  *   0 - if no interrupt was signalled
291  *   1 - if interrupt was signalled
292  */
293 int vhost_vq_used_signal(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *vq);
294 
295 
296 /**
297  * Send IRQs for all queues that need to be signaled.
298  * \param vsession vhost session
299  * \param vq virtqueue
300  */
301 void vhost_session_used_signal(struct spdk_vhost_session *vsession);
302 
303 void vhost_vq_used_ring_enqueue(struct spdk_vhost_session *vsession,
304 				struct spdk_vhost_virtqueue *vq,
305 				uint16_t id, uint32_t len);
306 
307 /**
308  * Enqueue the entry to the used ring when device complete the request.
309  * \param vsession vhost session
310  * \param vq virtqueue
311  * \req_idx descriptor index. It's the first index of this descriptor chain.
312  * \num_descs descriptor count. It's the count of the number of buffers in the chain.
313  * \buffer_id descriptor buffer ID.
314  * \length device write length. Specify the length of the buffer that has been initialized
315  * (written to) by the device
316  */
317 void vhost_vq_packed_ring_enqueue(struct spdk_vhost_session *vsession,
318 				  struct spdk_vhost_virtqueue *virtqueue,
319 				  uint16_t num_descs, uint16_t buffer_id,
320 				  uint32_t length);
321 
322 /**
323  * Get subsequent descriptor from given table.
324  * \param desc current descriptor, will be set to the
325  * next descriptor (NULL in case this is the last
326  * descriptor in the chain or the next desc is invalid)
327  * \param desc_table descriptor table
328  * \param desc_table_size size of the *desc_table*
329  * \return 0 on success, -1 if given index is invalid
330  * The *desc* param will be set regardless of the
331  * return value.
332  */
333 int vhost_vring_desc_get_next(struct vring_desc **desc,
334 			      struct vring_desc *desc_table, uint32_t desc_table_size);
335 static inline bool
336 vhost_vring_desc_is_wr(struct vring_desc *cur_desc)
337 {
338 	return !!(cur_desc->flags & VRING_DESC_F_WRITE);
339 }
340 
341 int vhost_vring_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov,
342 			    uint16_t *iov_index, const struct vring_desc *desc);
343 
344 bool vhost_vq_packed_ring_is_avail(struct spdk_vhost_virtqueue *virtqueue);
345 
346 /**
347  * Get subsequent descriptor from vq or desc table.
348  * \param desc current descriptor, will be set to the
349  * next descriptor (NULL in case this is the last
350  * descriptor in the chain or the next desc is invalid)
351  * \req_idx index of current desc, will be set to the next
352  * index. If desc_table != NULL the req_idx is the the vring index
353  * or the req_idx is the desc_table index.
354  * \param desc_table descriptor table
355  * \param desc_table_size size of the *desc_table*
356  * \return 0 on success, -1 if given index is invalid
357  * The *desc* param will be set regardless of the
358  * return value.
359  */
360 int vhost_vring_packed_desc_get_next(struct vring_packed_desc **desc, uint16_t *req_idx,
361 				     struct spdk_vhost_virtqueue *vq,
362 				     struct vring_packed_desc *desc_table,
363 				     uint32_t desc_table_size);
364 
365 bool vhost_vring_packed_desc_is_wr(struct vring_packed_desc *cur_desc);
366 
367 int vhost_vring_packed_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov,
368 				   uint16_t *iov_index, const struct vring_packed_desc *desc);
369 
370 uint16_t vhost_vring_packed_desc_get_buffer_id(struct spdk_vhost_virtqueue *vq, uint16_t req_idx,
371 		uint16_t *num_descs);
372 
373 static inline bool __attribute__((always_inline))
374 vhost_dev_has_feature(struct spdk_vhost_session *vsession, unsigned feature_id)
375 {
376 	return vsession->negotiated_features & (1ULL << feature_id);
377 }
378 
379 int vhost_dev_register(struct spdk_vhost_dev *vdev, const char *name, const char *mask_str,
380 		       const struct spdk_vhost_dev_backend *backend);
381 int vhost_dev_unregister(struct spdk_vhost_dev *vdev);
382 
383 int vhost_scsi_controller_construct(void);
384 int vhost_blk_controller_construct(void);
385 void vhost_dump_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w);
386 
387 /*
388  * Vhost callbacks for vhost_device_ops interface
389  */
390 
391 int vhost_new_connection_cb(int vid, const char *ifname);
392 int vhost_start_device_cb(int vid);
393 int vhost_stop_device_cb(int vid);
394 int vhost_destroy_connection_cb(int vid);
395 
396 #ifdef SPDK_CONFIG_VHOST_INTERNAL_LIB
397 int vhost_get_config_cb(int vid, uint8_t *config, uint32_t len);
398 int vhost_set_config_cb(int vid, uint8_t *config, uint32_t offset,
399 			uint32_t size, uint32_t flags);
400 #endif
401 
402 /*
403  * Memory registration functions used in start/stop device callbacks
404  */
405 void vhost_session_mem_register(struct rte_vhost_memory *mem);
406 void vhost_session_mem_unregister(struct rte_vhost_memory *mem);
407 
408 /*
409  * Call a function for each session of the provided vhost device.
410  * The function will be called one-by-one on each session's thread.
411  *
412  * \param vdev vhost device
413  * \param fn function to call on each session's thread
414  * \param cpl_fn function to be called at the end of the iteration on
415  * the vhost management thread.
416  * Optional, can be NULL.
417  * \param arg additional argument to the both callbacks
418  */
419 void vhost_dev_foreach_session(struct spdk_vhost_dev *dev,
420 			       spdk_vhost_session_fn fn,
421 			       spdk_vhost_dev_fn cpl_fn,
422 			       void *arg);
423 
424 /**
425  * Call a function on the provided lcore and block until either
426  * spdk_vhost_session_start_done() or spdk_vhost_session_stop_done()
427  * is called.
428  *
429  * This must be called under the global vhost mutex, which this function
430  * will unlock for the time it's waiting. It's meant to be called only
431  * from start/stop session callbacks.
432  *
433  * \param vsession vhost session
434  * \param cb_fn the function to call. The void *arg parameter in cb_fn
435  * is always NULL.
436  * \param timeout_sec timeout in seconds. This function will still
437  * block after the timeout expires, but will print the provided errmsg.
438  * \param errmsg error message to print once the timeout expires
439  * \return return the code passed to spdk_vhost_session_event_done().
440  */
441 int vhost_session_send_event(struct spdk_vhost_session *vsession,
442 			     spdk_vhost_session_fn cb_fn, unsigned timeout_sec,
443 			     const char *errmsg);
444 
445 /**
446  * Finish a blocking spdk_vhost_session_send_event() call and finally
447  * start the session. This must be called on the target lcore, which
448  * will now receive all session-related messages (e.g. from
449  * spdk_vhost_dev_foreach_session()).
450  *
451  * Must be called under the global vhost lock.
452  *
453  * \param vsession vhost session
454  * \param response return code
455  */
456 void vhost_session_start_done(struct spdk_vhost_session *vsession, int response);
457 
458 /**
459  * Finish a blocking spdk_vhost_session_send_event() call and finally
460  * stop the session. This must be called on the session's lcore which
461  * used to receive all session-related messages (e.g. from
462  * spdk_vhost_dev_foreach_session()). After this call, the session-
463  * related messages will be once again processed by any arbitrary thread.
464  *
465  * Must be called under the global vhost lock.
466  *
467  * Must be called under the global vhost mutex.
468  *
469  * \param vsession vhost session
470  * \param response return code
471  */
472 void vhost_session_stop_done(struct spdk_vhost_session *vsession, int response);
473 
474 struct spdk_vhost_session *vhost_session_find_by_vid(int vid);
475 void vhost_session_install_rte_compat_hooks(struct spdk_vhost_session *vsession);
476 int vhost_register_unix_socket(const char *path, const char *ctrl_name,
477 			       uint64_t virtio_features, uint64_t disabled_features, uint64_t protocol_features);
478 int vhost_driver_unregister(const char *path);
479 int vhost_get_mem_table(int vid, struct rte_vhost_memory **mem);
480 int vhost_get_negotiated_features(int vid, uint64_t *negotiated_features);
481 
482 int remove_vhost_controller(struct spdk_vhost_dev *vdev);
483 
484 #ifdef SPDK_CONFIG_VHOST_INTERNAL_LIB
485 int vhost_nvme_admin_passthrough(int vid, void *cmd, void *cqe, void *buf);
486 int vhost_nvme_set_cq_call(int vid, uint16_t qid, int fd);
487 int vhost_nvme_set_bar_mr(int vid, void *bar_addr, uint64_t bar_size);
488 int vhost_nvme_get_cap(int vid, uint64_t *cap);
489 int vhost_nvme_controller_construct(void);
490 int vhost_nvme_dev_construct(const char *name, const char *cpumask, uint32_t io_queues);
491 int vhost_nvme_dev_remove(struct spdk_vhost_dev *vdev);
492 int vhost_nvme_dev_add_ns(struct spdk_vhost_dev *vdev,
493 			  const char *bdev_name);
494 #endif
495 
496 #endif /* SPDK_VHOST_INTERNAL_H */
497