1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #ifndef SPDK_VHOST_INTERNAL_H 35 #define SPDK_VHOST_INTERNAL_H 36 37 #include "spdk/stdinc.h" 38 39 #include <rte_vhost.h> 40 41 #include "spdk_internal/log.h" 42 #include "spdk/event.h" 43 #include "spdk/rpc.h" 44 #include "spdk/config.h" 45 46 #define SPDK_CACHE_LINE_SIZE RTE_CACHE_LINE_SIZE 47 48 #ifndef VHOST_USER_F_PROTOCOL_FEATURES 49 #define VHOST_USER_F_PROTOCOL_FEATURES 30 50 #endif 51 52 #ifndef VIRTIO_F_VERSION_1 53 #define VIRTIO_F_VERSION_1 32 54 #endif 55 56 #ifndef VIRTIO_BLK_F_MQ 57 #define VIRTIO_BLK_F_MQ 12 /* support more than one vq */ 58 #endif 59 60 #ifndef VIRTIO_BLK_F_CONFIG_WCE 61 #define VIRTIO_BLK_F_CONFIG_WCE 11 62 #endif 63 64 #define SPDK_VHOST_MAX_VQUEUES 256 65 #define SPDK_VHOST_MAX_VQ_SIZE 1024 66 67 #define SPDK_VHOST_SCSI_CTRLR_MAX_DEVS 8 68 69 #define SPDK_VHOST_IOVS_MAX 129 70 71 /* 72 * Rate at which stats are checked for interrupt coalescing. 73 */ 74 #define SPDK_VHOST_STATS_CHECK_INTERVAL_MS 10 75 /* 76 * Default threshold at which interrupts start to be coalesced. 77 */ 78 #define SPDK_VHOST_VQ_IOPS_COALESCING_THRESHOLD 60000 79 80 /* 81 * Currently coalescing is not used by default. 82 * Setting this to value > 0 here or by RPC will enable coalescing. 83 */ 84 #define SPDK_VHOST_COALESCING_DELAY_BASE_US 0 85 86 87 #define SPDK_VHOST_FEATURES ((1ULL << VHOST_F_LOG_ALL) | \ 88 (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \ 89 (1ULL << VIRTIO_F_VERSION_1) | \ 90 (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \ 91 (1ULL << VIRTIO_RING_F_EVENT_IDX) | \ 92 (1ULL << VIRTIO_RING_F_INDIRECT_DESC)) 93 94 #define SPDK_VHOST_DISABLED_FEATURES ((1ULL << VIRTIO_RING_F_EVENT_IDX) | \ 95 (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY)) 96 97 struct vhost_poll_group; 98 99 struct spdk_vhost_virtqueue { 100 struct rte_vhost_vring vring; 101 uint16_t last_avail_idx; 102 uint16_t last_used_idx; 103 104 void *tasks; 105 106 /* Request count from last stats check */ 107 uint32_t req_cnt; 108 109 /* Request count from last event */ 110 uint16_t used_req_cnt; 111 112 /* How long interrupt is delayed */ 113 uint32_t irq_delay_time; 114 115 /* Next time when we need to send event */ 116 uint64_t next_event_time; 117 118 /* Associated vhost_virtqueue in the virtio device's virtqueue list */ 119 uint32_t vring_idx; 120 } __attribute((aligned(SPDK_CACHE_LINE_SIZE))); 121 122 struct spdk_vhost_session { 123 struct spdk_vhost_dev *vdev; 124 125 /* rte_vhost connection ID. */ 126 int vid; 127 128 /* Unique session ID. */ 129 uint64_t id; 130 /* Unique session name. */ 131 char *name; 132 133 struct vhost_poll_group *poll_group; 134 135 bool initialized; 136 bool started; 137 bool needs_restart; 138 bool forced_polling; 139 140 struct rte_vhost_memory *mem; 141 142 int task_cnt; 143 144 uint16_t max_queues; 145 146 uint64_t negotiated_features; 147 148 /* Local copy of device coalescing settings. */ 149 uint32_t coalescing_delay_time_base; 150 uint32_t coalescing_io_rate_threshold; 151 152 /* Next time when stats for event coalescing will be checked. */ 153 uint64_t next_stats_check_time; 154 155 /* Interval used for event coalescing checking. */ 156 uint64_t stats_check_interval; 157 158 struct spdk_vhost_virtqueue virtqueue[SPDK_VHOST_MAX_VQUEUES]; 159 160 TAILQ_ENTRY(spdk_vhost_session) tailq; 161 }; 162 163 struct spdk_vhost_dev { 164 char *name; 165 char *path; 166 167 struct spdk_cpuset *cpumask; 168 bool registered; 169 170 const struct spdk_vhost_dev_backend *backend; 171 172 /* Saved orginal values used to setup coalescing to avoid integer 173 * rounding issues during save/load config. 174 */ 175 uint32_t coalescing_delay_us; 176 uint32_t coalescing_iops_threshold; 177 178 /* Current connections to the device */ 179 TAILQ_HEAD(, spdk_vhost_session) vsessions; 180 181 /* Increment-only session counter */ 182 uint64_t vsessions_num; 183 184 /* Number of started and actively polled sessions */ 185 uint32_t active_session_num; 186 187 /* Number of pending asynchronous operations */ 188 uint32_t pending_async_op_num; 189 190 TAILQ_ENTRY(spdk_vhost_dev) tailq; 191 }; 192 193 /** 194 * Synchronized vhost session event used for backend callbacks. 195 * 196 * \param vdev vhost device. If the device has been deleted 197 * in the meantime, this function will be called one last 198 * time with vdev == NULL. 199 * \param vsession vhost session. If all sessions have been 200 * iterated through, this function will be called one last 201 * time with vsession == NULL. 202 * \param arg user-provided parameter. 203 * 204 * \return negative values will break the foreach call, meaning 205 * the function won't be called again. Return codes zero and 206 * positive don't have any effect. 207 */ 208 typedef int (*spdk_vhost_session_fn)(struct spdk_vhost_dev *vdev, 209 struct spdk_vhost_session *vsession, 210 void *arg); 211 212 struct spdk_vhost_dev_backend { 213 uint64_t virtio_features; 214 uint64_t disabled_features; 215 216 /** 217 * Size of additional per-session context data 218 * allocated whenever a new client connects. 219 */ 220 size_t session_ctx_size; 221 222 int (*start_session)(struct spdk_vhost_session *vsession); 223 int (*stop_session)(struct spdk_vhost_session *vsession); 224 225 int (*vhost_get_config)(struct spdk_vhost_dev *vdev, uint8_t *config, uint32_t len); 226 int (*vhost_set_config)(struct spdk_vhost_dev *vdev, uint8_t *config, 227 uint32_t offset, uint32_t size, uint32_t flags); 228 229 void (*dump_info_json)(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w); 230 void (*write_config_json)(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w); 231 int (*remove_device)(struct spdk_vhost_dev *vdev); 232 }; 233 234 void *spdk_vhost_gpa_to_vva(struct spdk_vhost_session *vsession, uint64_t addr, uint64_t len); 235 236 uint16_t spdk_vhost_vq_avail_ring_get(struct spdk_vhost_virtqueue *vq, uint16_t *reqs, 237 uint16_t reqs_len); 238 239 /** 240 * Get a virtio descriptor at given index in given virtqueue. 241 * The descriptor will provide access to the entire descriptor 242 * chain. The subsequent descriptors are accesible via 243 * \c spdk_vhost_vring_desc_get_next. 244 * \param vsession vhost session 245 * \param vq virtqueue 246 * \param req_idx descriptor index 247 * \param desc pointer to be set to the descriptor 248 * \param desc_table descriptor table to be used with 249 * \c spdk_vhost_vring_desc_get_next. This might be either 250 * default virtqueue descriptor table or per-chain indirect 251 * table. 252 * \param desc_table_size size of the *desc_table* 253 * \return 0 on success, -1 if given index is invalid. 254 * If -1 is returned, the content of params is undefined. 255 */ 256 int spdk_vhost_vq_get_desc(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *vq, 257 uint16_t req_idx, struct vring_desc **desc, struct vring_desc **desc_table, 258 uint32_t *desc_table_size); 259 260 /** 261 * Send IRQ/call client (if pending) for \c vq. 262 * \param vsession vhost session 263 * \param vq virtqueue 264 * \return 265 * 0 - if no interrupt was signalled 266 * 1 - if interrupt was signalled 267 */ 268 int spdk_vhost_vq_used_signal(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *vq); 269 270 271 /** 272 * Send IRQs for all queues that need to be signaled. 273 * \param vsession vhost session 274 * \param vq virtqueue 275 */ 276 void spdk_vhost_session_used_signal(struct spdk_vhost_session *vsession); 277 278 void spdk_vhost_vq_used_ring_enqueue(struct spdk_vhost_session *vsession, 279 struct spdk_vhost_virtqueue *vq, 280 uint16_t id, uint32_t len); 281 282 /** 283 * Get subsequent descriptor from given table. 284 * \param desc current descriptor, will be set to the 285 * next descriptor (NULL in case this is the last 286 * descriptor in the chain or the next desc is invalid) 287 * \param desc_table descriptor table 288 * \param desc_table_size size of the *desc_table* 289 * \return 0 on success, -1 if given index is invalid 290 * The *desc* param will be set regardless of the 291 * return value. 292 */ 293 int spdk_vhost_vring_desc_get_next(struct vring_desc **desc, 294 struct vring_desc *desc_table, uint32_t desc_table_size); 295 bool spdk_vhost_vring_desc_is_wr(struct vring_desc *cur_desc); 296 297 int spdk_vhost_vring_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov, 298 uint16_t *iov_index, const struct vring_desc *desc); 299 300 static inline bool __attribute__((always_inline)) 301 spdk_vhost_dev_has_feature(struct spdk_vhost_session *vsession, unsigned feature_id) 302 { 303 return vsession->negotiated_features & (1ULL << feature_id); 304 } 305 306 int spdk_vhost_dev_register(struct spdk_vhost_dev *vdev, const char *name, const char *mask_str, 307 const struct spdk_vhost_dev_backend *backend); 308 int spdk_vhost_dev_unregister(struct spdk_vhost_dev *vdev); 309 310 int spdk_vhost_scsi_controller_construct(void); 311 int spdk_vhost_blk_controller_construct(void); 312 void spdk_vhost_dump_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w); 313 314 /* 315 * Call function for each active session on the provided 316 * vhost device. The function will be called one-by-one 317 * on each session's thread. 318 * 319 * \param vdev vhost device 320 * \param fn function to call 321 * \param arg additional argument to \c fn 322 */ 323 void spdk_vhost_dev_foreach_session(struct spdk_vhost_dev *dev, 324 spdk_vhost_session_fn fn, void *arg); 325 326 /** 327 * Call a function on the provided lcore and block until either 328 * spdk_vhost_session_start_done() or spdk_vhost_session_stop_done() 329 * is called. 330 * 331 * This must be called under the global vhost mutex, which this function 332 * will unlock for the time it's waiting. It's meant to be called only 333 * from start/stop session callbacks. 334 * 335 * \param pg designated session's poll group 336 * \param vsession vhost session 337 * \param cb_fn the function to call. The void *arg parameter in cb_fn 338 * is always NULL. 339 * \param timeout_sec timeout in seconds. This function will still 340 * block after the timeout expires, but will print the provided errmsg. 341 * \param errmsg error message to print once the timeout expires 342 * \return return the code passed to spdk_vhost_session_event_done(). 343 */ 344 int spdk_vhost_session_send_event(struct vhost_poll_group *pg, 345 struct spdk_vhost_session *vsession, 346 spdk_vhost_session_fn cb_fn, unsigned timeout_sec, 347 const char *errmsg); 348 349 /** 350 * Finish a blocking spdk_vhost_session_send_event() call and finally 351 * start the session. This must be called on the target lcore, which 352 * will now receive all session-related messages (e.g. from 353 * spdk_vhost_dev_foreach_session()). 354 * 355 * Must be called under the global vhost lock. 356 * 357 * \param vsession vhost session 358 * \param response return code 359 */ 360 void spdk_vhost_session_start_done(struct spdk_vhost_session *vsession, int response); 361 362 /** 363 * Finish a blocking spdk_vhost_session_send_event() call and finally 364 * stop the session. This must be called on the session's lcore which 365 * used to receive all session-related messages (e.g. from 366 * spdk_vhost_dev_foreach_session()). After this call, the session- 367 * related messages will be once again processed by any arbitrary thread. 368 * 369 * Must be called under the global vhost lock. 370 * 371 * Must be called under the global vhost mutex. 372 * 373 * \param vsession vhost session 374 * \param response return code 375 */ 376 void spdk_vhost_session_stop_done(struct spdk_vhost_session *vsession, int response); 377 378 struct spdk_vhost_session *spdk_vhost_session_find_by_vid(int vid); 379 void spdk_vhost_session_install_rte_compat_hooks(struct spdk_vhost_session *vsession); 380 void spdk_vhost_dev_install_rte_compat_hooks(struct spdk_vhost_dev *vdev); 381 382 struct vhost_poll_group *spdk_vhost_get_poll_group(struct spdk_cpuset *cpumask); 383 void spdk_vhost_put_poll_group(struct vhost_poll_group *pg); 384 385 int spdk_remove_vhost_controller(struct spdk_vhost_dev *vdev); 386 387 #ifdef SPDK_CONFIG_VHOST_INTERNAL_LIB 388 int spdk_vhost_nvme_admin_passthrough(int vid, void *cmd, void *cqe, void *buf); 389 int spdk_vhost_nvme_set_cq_call(int vid, uint16_t qid, int fd); 390 int spdk_vhost_nvme_set_bar_mr(int vid, void *bar_addr, uint64_t bar_size); 391 int spdk_vhost_nvme_get_cap(int vid, uint64_t *cap); 392 int spdk_vhost_nvme_controller_construct(void); 393 int spdk_vhost_nvme_dev_construct(const char *name, const char *cpumask, uint32_t io_queues); 394 int spdk_vhost_nvme_dev_remove(struct spdk_vhost_dev *vdev); 395 int spdk_vhost_nvme_dev_add_ns(struct spdk_vhost_dev *vdev, 396 const char *bdev_name); 397 #endif 398 399 #endif /* SPDK_VHOST_INTERNAL_H */ 400