1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #ifndef SPDK_VHOST_INTERNAL_H 35 #define SPDK_VHOST_INTERNAL_H 36 #include <linux/virtio_config.h> 37 38 #include "spdk/stdinc.h" 39 40 #include <rte_vhost.h> 41 42 #include "spdk_internal/vhost_user.h" 43 #include "spdk_internal/log.h" 44 #include "spdk/util.h" 45 #include "spdk/rpc.h" 46 #include "spdk/config.h" 47 48 #define SPDK_VHOST_MAX_VQUEUES 256 49 #define SPDK_VHOST_MAX_VQ_SIZE 1024 50 51 #define SPDK_VHOST_SCSI_CTRLR_MAX_DEVS 8 52 53 #define SPDK_VHOST_IOVS_MAX 129 54 55 #define SPDK_VHOST_VQ_MAX_SUBMISSIONS 32 56 57 /* 58 * Rate at which stats are checked for interrupt coalescing. 59 */ 60 #define SPDK_VHOST_STATS_CHECK_INTERVAL_MS 10 61 /* 62 * Default threshold at which interrupts start to be coalesced. 63 */ 64 #define SPDK_VHOST_VQ_IOPS_COALESCING_THRESHOLD 60000 65 66 /* 67 * Currently coalescing is not used by default. 68 * Setting this to value > 0 here or by RPC will enable coalescing. 69 */ 70 #define SPDK_VHOST_COALESCING_DELAY_BASE_US 0 71 72 #define SPDK_VHOST_FEATURES ((1ULL << VHOST_F_LOG_ALL) | \ 73 (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \ 74 (1ULL << VIRTIO_F_VERSION_1) | \ 75 (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \ 76 (1ULL << VIRTIO_RING_F_EVENT_IDX) | \ 77 (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | \ 78 (1ULL << VIRTIO_F_RING_PACKED)) 79 80 #define SPDK_VHOST_DISABLED_FEATURES ((1ULL << VIRTIO_RING_F_EVENT_IDX) | \ 81 (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY)) 82 83 #define VRING_DESC_F_AVAIL (1ULL << VRING_PACKED_DESC_F_AVAIL) 84 #define VRING_DESC_F_USED (1ULL << VRING_PACKED_DESC_F_USED) 85 #define VRING_DESC_F_AVAIL_USED (VRING_DESC_F_AVAIL | VRING_DESC_F_USED) 86 87 typedef struct rte_vhost_resubmit_desc spdk_vhost_resubmit_desc; 88 typedef struct rte_vhost_resubmit_info spdk_vhost_resubmit_info; 89 90 struct spdk_vhost_virtqueue { 91 struct rte_vhost_vring vring; 92 struct rte_vhost_ring_inflight vring_inflight; 93 uint16_t last_avail_idx; 94 uint16_t last_used_idx; 95 96 struct { 97 /* To mark a descriptor as available in packed ring 98 * Equal to avail_wrap_counter in spec. 99 */ 100 uint8_t avail_phase : 1; 101 /* To mark a descriptor as used in packed ring 102 * Equal to used_wrap_counter in spec. 103 */ 104 uint8_t used_phase : 1; 105 uint8_t padding : 5; 106 bool packed_ring : 1; 107 } packed; 108 109 void *tasks; 110 111 /* Request count from last stats check */ 112 uint32_t req_cnt; 113 114 /* Request count from last event */ 115 uint16_t used_req_cnt; 116 117 /* How long interrupt is delayed */ 118 uint32_t irq_delay_time; 119 120 /* Next time when we need to send event */ 121 uint64_t next_event_time; 122 123 /* Associated vhost_virtqueue in the virtio device's virtqueue list */ 124 uint32_t vring_idx; 125 } __attribute((aligned(SPDK_CACHE_LINE_SIZE))); 126 127 struct spdk_vhost_session { 128 struct spdk_vhost_dev *vdev; 129 130 /* rte_vhost connection ID. */ 131 int vid; 132 133 /* Unique session ID. */ 134 uint64_t id; 135 /* Unique session name. */ 136 char *name; 137 138 bool initialized; 139 bool started; 140 bool needs_restart; 141 bool forced_polling; 142 143 struct rte_vhost_memory *mem; 144 145 int task_cnt; 146 147 uint16_t max_queues; 148 149 uint64_t negotiated_features; 150 151 /* Local copy of device coalescing settings. */ 152 uint32_t coalescing_delay_time_base; 153 uint32_t coalescing_io_rate_threshold; 154 155 /* Next time when stats for event coalescing will be checked. */ 156 uint64_t next_stats_check_time; 157 158 /* Interval used for event coalescing checking. */ 159 uint64_t stats_check_interval; 160 161 struct spdk_vhost_virtqueue virtqueue[SPDK_VHOST_MAX_VQUEUES]; 162 163 TAILQ_ENTRY(spdk_vhost_session) tailq; 164 }; 165 166 struct spdk_vhost_dev { 167 char *name; 168 char *path; 169 170 struct spdk_thread *thread; 171 bool registered; 172 173 uint64_t virtio_features; 174 uint64_t disabled_features; 175 uint64_t protocol_features; 176 177 const struct spdk_vhost_dev_backend *backend; 178 179 /* Saved orginal values used to setup coalescing to avoid integer 180 * rounding issues during save/load config. 181 */ 182 uint32_t coalescing_delay_us; 183 uint32_t coalescing_iops_threshold; 184 185 /* Current connections to the device */ 186 TAILQ_HEAD(, spdk_vhost_session) vsessions; 187 188 /* Increment-only session counter */ 189 uint64_t vsessions_num; 190 191 /* Number of started and actively polled sessions */ 192 uint32_t active_session_num; 193 194 /* Number of pending asynchronous operations */ 195 uint32_t pending_async_op_num; 196 197 TAILQ_ENTRY(spdk_vhost_dev) tailq; 198 }; 199 200 /** 201 * \param vdev vhost device. 202 * \param vsession vhost session. 203 * \param arg user-provided parameter. 204 * 205 * \return negative values will break the foreach call, meaning 206 * the function won't be called again. Return codes zero and 207 * positive don't have any effect. 208 */ 209 typedef int (*spdk_vhost_session_fn)(struct spdk_vhost_dev *vdev, 210 struct spdk_vhost_session *vsession, 211 void *arg); 212 213 /** 214 * \param vdev vhost device. 215 * \param arg user-provided parameter. 216 */ 217 typedef void (*spdk_vhost_dev_fn)(struct spdk_vhost_dev *vdev, void *arg); 218 219 struct spdk_vhost_dev_backend { 220 /** 221 * Size of additional per-session context data 222 * allocated whenever a new client connects. 223 */ 224 size_t session_ctx_size; 225 226 int (*start_session)(struct spdk_vhost_session *vsession); 227 int (*stop_session)(struct spdk_vhost_session *vsession); 228 229 int (*vhost_get_config)(struct spdk_vhost_dev *vdev, uint8_t *config, uint32_t len); 230 int (*vhost_set_config)(struct spdk_vhost_dev *vdev, uint8_t *config, 231 uint32_t offset, uint32_t size, uint32_t flags); 232 233 void (*dump_info_json)(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w); 234 void (*write_config_json)(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w); 235 int (*remove_device)(struct spdk_vhost_dev *vdev); 236 }; 237 238 void *vhost_gpa_to_vva(struct spdk_vhost_session *vsession, uint64_t addr, uint64_t len); 239 240 uint16_t vhost_vq_avail_ring_get(struct spdk_vhost_virtqueue *vq, uint16_t *reqs, 241 uint16_t reqs_len); 242 243 /** 244 * Get a virtio split descriptor at given index in given virtqueue. 245 * The descriptor will provide access to the entire descriptor 246 * chain. The subsequent descriptors are accesible via 247 * \c spdk_vhost_vring_desc_get_next. 248 * \param vsession vhost session 249 * \param vq virtqueue 250 * \param req_idx descriptor index 251 * \param desc pointer to be set to the descriptor 252 * \param desc_table descriptor table to be used with 253 * \c spdk_vhost_vring_desc_get_next. This might be either 254 * default virtqueue descriptor table or per-chain indirect 255 * table. 256 * \param desc_table_size size of the *desc_table* 257 * \return 0 on success, -1 if given index is invalid. 258 * If -1 is returned, the content of params is undefined. 259 */ 260 int vhost_vq_get_desc(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *vq, 261 uint16_t req_idx, struct vring_desc **desc, struct vring_desc **desc_table, 262 uint32_t *desc_table_size); 263 264 /** 265 * Get a virtio packed descriptor at given index in given virtqueue. 266 * The descriptor will provide access to the entire descriptor 267 * chain. The subsequent descriptors are accesible via 268 * \c vhost_vring_packed_desc_get_next. 269 * \param vsession vhost session 270 * \param vq virtqueue 271 * \param req_idx descriptor index 272 * \param desc pointer to be set to the descriptor 273 * \param desc_table descriptor table to be used with 274 * \c spdk_vhost_vring_desc_get_next. This might be either 275 * \c NULL or per-chain indirect table. 276 * \param desc_table_size size of the *desc_table* 277 * \return 0 on success, -1 if given index is invalid. 278 * If -1 is returned, the content of params is undefined. 279 */ 280 int vhost_vq_get_desc_packed(struct spdk_vhost_session *vsession, 281 struct spdk_vhost_virtqueue *virtqueue, 282 uint16_t req_idx, struct vring_packed_desc **desc, 283 struct vring_packed_desc **desc_table, uint32_t *desc_table_size); 284 285 /** 286 * Send IRQ/call client (if pending) for \c vq. 287 * \param vsession vhost session 288 * \param vq virtqueue 289 * \return 290 * 0 - if no interrupt was signalled 291 * 1 - if interrupt was signalled 292 */ 293 int vhost_vq_used_signal(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *vq); 294 295 296 /** 297 * Send IRQs for all queues that need to be signaled. 298 * \param vsession vhost session 299 * \param vq virtqueue 300 */ 301 void vhost_session_used_signal(struct spdk_vhost_session *vsession); 302 303 void vhost_vq_used_ring_enqueue(struct spdk_vhost_session *vsession, 304 struct spdk_vhost_virtqueue *vq, 305 uint16_t id, uint32_t len); 306 307 /** 308 * Enqueue the entry to the used ring when device complete the request. 309 * \param vsession vhost session 310 * \param vq virtqueue 311 * \req_idx descriptor index. It's the first index of this descriptor chain. 312 * \num_descs descriptor count. It's the count of the number of buffers in the chain. 313 * \buffer_id descriptor buffer ID. 314 * \length device write length. Specify the length of the buffer that has been initialized 315 * (written to) by the device 316 */ 317 void vhost_vq_packed_ring_enqueue(struct spdk_vhost_session *vsession, 318 struct spdk_vhost_virtqueue *virtqueue, 319 uint16_t num_descs, uint16_t buffer_id, 320 uint32_t length); 321 322 /** 323 * Get subsequent descriptor from given table. 324 * \param desc current descriptor, will be set to the 325 * next descriptor (NULL in case this is the last 326 * descriptor in the chain or the next desc is invalid) 327 * \param desc_table descriptor table 328 * \param desc_table_size size of the *desc_table* 329 * \return 0 on success, -1 if given index is invalid 330 * The *desc* param will be set regardless of the 331 * return value. 332 */ 333 int vhost_vring_desc_get_next(struct vring_desc **desc, 334 struct vring_desc *desc_table, uint32_t desc_table_size); 335 static inline bool 336 vhost_vring_desc_is_wr(struct vring_desc *cur_desc) 337 { 338 return !!(cur_desc->flags & VRING_DESC_F_WRITE); 339 } 340 341 int vhost_vring_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov, 342 uint16_t *iov_index, const struct vring_desc *desc); 343 344 bool vhost_vq_packed_ring_is_avail(struct spdk_vhost_virtqueue *virtqueue); 345 346 /** 347 * Get subsequent descriptor from vq or desc table. 348 * \param desc current descriptor, will be set to the 349 * next descriptor (NULL in case this is the last 350 * descriptor in the chain or the next desc is invalid) 351 * \req_idx index of current desc, will be set to the next 352 * index. If desc_table != NULL the req_idx is the the vring index 353 * or the req_idx is the desc_table index. 354 * \param desc_table descriptor table 355 * \param desc_table_size size of the *desc_table* 356 * \return 0 on success, -1 if given index is invalid 357 * The *desc* param will be set regardless of the 358 * return value. 359 */ 360 int vhost_vring_packed_desc_get_next(struct vring_packed_desc **desc, uint16_t *req_idx, 361 struct spdk_vhost_virtqueue *vq, 362 struct vring_packed_desc *desc_table, 363 uint32_t desc_table_size); 364 365 bool vhost_vring_packed_desc_is_wr(struct vring_packed_desc *cur_desc); 366 367 int vhost_vring_packed_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov, 368 uint16_t *iov_index, const struct vring_packed_desc *desc); 369 370 uint16_t vhost_vring_packed_desc_get_buffer_id(struct spdk_vhost_virtqueue *vq, uint16_t req_idx, 371 uint16_t *num_descs); 372 373 static inline bool __attribute__((always_inline)) 374 vhost_dev_has_feature(struct spdk_vhost_session *vsession, unsigned feature_id) 375 { 376 return vsession->negotiated_features & (1ULL << feature_id); 377 } 378 379 int vhost_dev_register(struct spdk_vhost_dev *vdev, const char *name, const char *mask_str, 380 const struct spdk_vhost_dev_backend *backend); 381 int vhost_dev_unregister(struct spdk_vhost_dev *vdev); 382 383 int vhost_scsi_controller_construct(void); 384 int vhost_blk_controller_construct(void); 385 void vhost_dump_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w); 386 387 /* 388 * Vhost callbacks for vhost_device_ops interface 389 */ 390 391 int vhost_new_connection_cb(int vid, const char *ifname); 392 int vhost_start_device_cb(int vid); 393 int vhost_stop_device_cb(int vid); 394 int vhost_destroy_connection_cb(int vid); 395 396 #ifdef SPDK_CONFIG_VHOST_INTERNAL_LIB 397 int vhost_get_config_cb(int vid, uint8_t *config, uint32_t len); 398 int vhost_set_config_cb(int vid, uint8_t *config, uint32_t offset, 399 uint32_t size, uint32_t flags); 400 #endif 401 402 /* 403 * Memory registration functions used in start/stop device callbacks 404 */ 405 void vhost_session_mem_register(struct rte_vhost_memory *mem); 406 void vhost_session_mem_unregister(struct rte_vhost_memory *mem); 407 408 /* 409 * Call a function for each session of the provided vhost device. 410 * The function will be called one-by-one on each session's thread. 411 * 412 * \param vdev vhost device 413 * \param fn function to call on each session's thread 414 * \param cpl_fn function to be called at the end of the iteration on 415 * the vhost management thread. 416 * Optional, can be NULL. 417 * \param arg additional argument to the both callbacks 418 */ 419 void vhost_dev_foreach_session(struct spdk_vhost_dev *dev, 420 spdk_vhost_session_fn fn, 421 spdk_vhost_dev_fn cpl_fn, 422 void *arg); 423 424 /** 425 * Call a function on the provided lcore and block until either 426 * spdk_vhost_session_start_done() or spdk_vhost_session_stop_done() 427 * is called. 428 * 429 * This must be called under the global vhost mutex, which this function 430 * will unlock for the time it's waiting. It's meant to be called only 431 * from start/stop session callbacks. 432 * 433 * \param vsession vhost session 434 * \param cb_fn the function to call. The void *arg parameter in cb_fn 435 * is always NULL. 436 * \param timeout_sec timeout in seconds. This function will still 437 * block after the timeout expires, but will print the provided errmsg. 438 * \param errmsg error message to print once the timeout expires 439 * \return return the code passed to spdk_vhost_session_event_done(). 440 */ 441 int vhost_session_send_event(struct spdk_vhost_session *vsession, 442 spdk_vhost_session_fn cb_fn, unsigned timeout_sec, 443 const char *errmsg); 444 445 /** 446 * Finish a blocking spdk_vhost_session_send_event() call and finally 447 * start the session. This must be called on the target lcore, which 448 * will now receive all session-related messages (e.g. from 449 * spdk_vhost_dev_foreach_session()). 450 * 451 * Must be called under the global vhost lock. 452 * 453 * \param vsession vhost session 454 * \param response return code 455 */ 456 void vhost_session_start_done(struct spdk_vhost_session *vsession, int response); 457 458 /** 459 * Finish a blocking spdk_vhost_session_send_event() call and finally 460 * stop the session. This must be called on the session's lcore which 461 * used to receive all session-related messages (e.g. from 462 * spdk_vhost_dev_foreach_session()). After this call, the session- 463 * related messages will be once again processed by any arbitrary thread. 464 * 465 * Must be called under the global vhost lock. 466 * 467 * Must be called under the global vhost mutex. 468 * 469 * \param vsession vhost session 470 * \param response return code 471 */ 472 void vhost_session_stop_done(struct spdk_vhost_session *vsession, int response); 473 474 struct spdk_vhost_session *vhost_session_find_by_vid(int vid); 475 void vhost_session_install_rte_compat_hooks(struct spdk_vhost_session *vsession); 476 int vhost_register_unix_socket(const char *path, const char *ctrl_name, 477 uint64_t virtio_features, uint64_t disabled_features, uint64_t protocol_features); 478 int vhost_driver_unregister(const char *path); 479 int vhost_get_mem_table(int vid, struct rte_vhost_memory **mem); 480 int vhost_get_negotiated_features(int vid, uint64_t *negotiated_features); 481 482 int remove_vhost_controller(struct spdk_vhost_dev *vdev); 483 484 #ifdef SPDK_CONFIG_VHOST_INTERNAL_LIB 485 int vhost_nvme_admin_passthrough(int vid, void *cmd, void *cqe, void *buf); 486 int vhost_nvme_set_cq_call(int vid, uint16_t qid, int fd); 487 int vhost_nvme_set_bar_mr(int vid, void *bar_addr, uint64_t bar_size); 488 int vhost_nvme_get_cap(int vid, uint64_t *cap); 489 int vhost_nvme_controller_construct(void); 490 int vhost_nvme_dev_construct(const char *name, const char *cpumask, uint32_t io_queues); 491 int vhost_nvme_dev_remove(struct spdk_vhost_dev *vdev); 492 int vhost_nvme_dev_add_ns(struct spdk_vhost_dev *vdev, 493 const char *bdev_name); 494 #endif 495 496 #endif /* SPDK_VHOST_INTERNAL_H */ 497