1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #ifndef SPDK_VHOST_INTERNAL_H 35 #define SPDK_VHOST_INTERNAL_H 36 #include <linux/virtio_config.h> 37 38 #include "spdk/stdinc.h" 39 40 #include <rte_vhost.h> 41 42 #include "spdk_internal/vhost_user.h" 43 #include "spdk/bdev.h" 44 #include "spdk/log.h" 45 #include "spdk/util.h" 46 #include "spdk/rpc.h" 47 #include "spdk/config.h" 48 49 #define SPDK_VHOST_MAX_VQUEUES 256 50 #define SPDK_VHOST_MAX_VQ_SIZE 1024 51 52 #define SPDK_VHOST_SCSI_CTRLR_MAX_DEVS 8 53 54 #define SPDK_VHOST_IOVS_MAX 129 55 56 #define SPDK_VHOST_VQ_MAX_SUBMISSIONS 32 57 58 /* 59 * Rate at which stats are checked for interrupt coalescing. 60 */ 61 #define SPDK_VHOST_STATS_CHECK_INTERVAL_MS 10 62 /* 63 * Default threshold at which interrupts start to be coalesced. 64 */ 65 #define SPDK_VHOST_VQ_IOPS_COALESCING_THRESHOLD 60000 66 67 /* 68 * Currently coalescing is not used by default. 69 * Setting this to value > 0 here or by RPC will enable coalescing. 70 */ 71 #define SPDK_VHOST_COALESCING_DELAY_BASE_US 0 72 73 #define SPDK_VHOST_FEATURES ((1ULL << VHOST_F_LOG_ALL) | \ 74 (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \ 75 (1ULL << VIRTIO_F_VERSION_1) | \ 76 (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \ 77 (1ULL << VIRTIO_RING_F_EVENT_IDX) | \ 78 (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | \ 79 (1ULL << VIRTIO_F_RING_PACKED) | \ 80 (1ULL << VIRTIO_F_ANY_LAYOUT)) 81 82 #define SPDK_VHOST_DISABLED_FEATURES ((1ULL << VIRTIO_RING_F_EVENT_IDX) | \ 83 (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY)) 84 85 #define VRING_DESC_F_AVAIL (1ULL << VRING_PACKED_DESC_F_AVAIL) 86 #define VRING_DESC_F_USED (1ULL << VRING_PACKED_DESC_F_USED) 87 #define VRING_DESC_F_AVAIL_USED (VRING_DESC_F_AVAIL | VRING_DESC_F_USED) 88 89 typedef struct rte_vhost_resubmit_desc spdk_vhost_resubmit_desc; 90 typedef struct rte_vhost_resubmit_info spdk_vhost_resubmit_info; 91 typedef struct rte_vhost_inflight_desc_packed spdk_vhost_inflight_desc; 92 93 struct spdk_vhost_virtqueue { 94 struct rte_vhost_vring vring; 95 struct rte_vhost_ring_inflight vring_inflight; 96 uint16_t last_avail_idx; 97 uint16_t last_used_idx; 98 99 struct { 100 /* To mark a descriptor as available in packed ring 101 * Equal to avail_wrap_counter in spec. 102 */ 103 uint8_t avail_phase : 1; 104 /* To mark a descriptor as used in packed ring 105 * Equal to used_wrap_counter in spec. 106 */ 107 uint8_t used_phase : 1; 108 uint8_t padding : 5; 109 bool packed_ring : 1; 110 } packed; 111 112 void *tasks; 113 114 /* Request count from last stats check */ 115 uint32_t req_cnt; 116 117 /* Request count from last event */ 118 uint16_t used_req_cnt; 119 120 /* How long interrupt is delayed */ 121 uint32_t irq_delay_time; 122 123 /* Next time when we need to send event */ 124 uint64_t next_event_time; 125 126 /* Associated vhost_virtqueue in the virtio device's virtqueue list */ 127 uint32_t vring_idx; 128 129 struct spdk_vhost_session *vsession; 130 131 struct spdk_interrupt *intr; 132 } __attribute((aligned(SPDK_CACHE_LINE_SIZE))); 133 134 struct spdk_vhost_session { 135 struct spdk_vhost_dev *vdev; 136 137 /* rte_vhost connection ID. */ 138 int vid; 139 140 /* Unique session ID. */ 141 uint64_t id; 142 /* Unique session name. */ 143 char *name; 144 145 bool initialized; 146 bool started; 147 bool needs_restart; 148 bool forced_polling; 149 bool interrupt_mode; 150 151 struct rte_vhost_memory *mem; 152 153 int task_cnt; 154 155 uint16_t max_queues; 156 157 uint64_t negotiated_features; 158 159 /* Local copy of device coalescing settings. */ 160 uint32_t coalescing_delay_time_base; 161 uint32_t coalescing_io_rate_threshold; 162 163 /* Next time when stats for event coalescing will be checked. */ 164 uint64_t next_stats_check_time; 165 166 /* Interval used for event coalescing checking. */ 167 uint64_t stats_check_interval; 168 169 /* Session's stop poller will only try limited times to destroy the session. */ 170 uint32_t stop_retry_count; 171 172 struct spdk_vhost_virtqueue virtqueue[SPDK_VHOST_MAX_VQUEUES]; 173 174 TAILQ_ENTRY(spdk_vhost_session) tailq; 175 }; 176 177 struct spdk_vhost_user_dev { 178 struct spdk_vhost_dev *vdev; 179 180 const struct spdk_vhost_user_dev_backend *user_backend; 181 182 /* Saved original values used to setup coalescing to avoid integer 183 * rounding issues during save/load config. 184 */ 185 uint32_t coalescing_delay_us; 186 uint32_t coalescing_iops_threshold; 187 188 /* Current connections to the device */ 189 TAILQ_HEAD(, spdk_vhost_session) vsessions; 190 191 /* Increment-only session counter */ 192 uint64_t vsessions_num; 193 194 /* Number of started and actively polled sessions */ 195 uint32_t active_session_num; 196 197 /* Number of pending asynchronous operations */ 198 uint32_t pending_async_op_num; 199 }; 200 201 struct spdk_vhost_dev { 202 char *name; 203 char *path; 204 205 struct spdk_thread *thread; 206 bool registered; 207 208 uint64_t virtio_features; 209 uint64_t disabled_features; 210 uint64_t protocol_features; 211 bool packed_ring_recovery; 212 213 const struct spdk_vhost_dev_backend *backend; 214 215 /* Context passed from transport */ 216 void *ctxt; 217 218 TAILQ_ENTRY(spdk_vhost_dev) tailq; 219 }; 220 221 /** 222 * \param vdev vhost device. 223 * \param vsession vhost session. 224 * \param arg user-provided parameter. 225 * 226 * \return negative values will break the foreach call, meaning 227 * the function won't be called again. Return codes zero and 228 * positive don't have any effect. 229 */ 230 typedef int (*spdk_vhost_session_fn)(struct spdk_vhost_dev *vdev, 231 struct spdk_vhost_session *vsession, 232 void *arg); 233 234 /** 235 * \param vdev vhost device. 236 * \param arg user-provided parameter. 237 */ 238 typedef void (*spdk_vhost_dev_fn)(struct spdk_vhost_dev *vdev, void *arg); 239 240 struct spdk_vhost_user_dev_backend { 241 /** 242 * Size of additional per-session context data 243 * allocated whenever a new client connects. 244 */ 245 size_t session_ctx_size; 246 247 int (*start_session)(struct spdk_vhost_session *vsession); 248 int (*stop_session)(struct spdk_vhost_session *vsession); 249 }; 250 251 struct spdk_vhost_dev_backend { 252 int (*vhost_get_config)(struct spdk_vhost_dev *vdev, uint8_t *config, uint32_t len); 253 int (*vhost_set_config)(struct spdk_vhost_dev *vdev, uint8_t *config, 254 uint32_t offset, uint32_t size, uint32_t flags); 255 256 void (*dump_info_json)(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w); 257 void (*write_config_json)(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w); 258 int (*remove_device)(struct spdk_vhost_dev *vdev); 259 }; 260 261 void *vhost_gpa_to_vva(struct spdk_vhost_session *vsession, uint64_t addr, uint64_t len); 262 263 uint16_t vhost_vq_avail_ring_get(struct spdk_vhost_virtqueue *vq, uint16_t *reqs, 264 uint16_t reqs_len); 265 266 /** 267 * Get a virtio split descriptor at given index in given virtqueue. 268 * The descriptor will provide access to the entire descriptor 269 * chain. The subsequent descriptors are accessible via 270 * \c spdk_vhost_vring_desc_get_next. 271 * \param vsession vhost session 272 * \param vq virtqueue 273 * \param req_idx descriptor index 274 * \param desc pointer to be set to the descriptor 275 * \param desc_table descriptor table to be used with 276 * \c spdk_vhost_vring_desc_get_next. This might be either 277 * default virtqueue descriptor table or per-chain indirect 278 * table. 279 * \param desc_table_size size of the *desc_table* 280 * \return 0 on success, -1 if given index is invalid. 281 * If -1 is returned, the content of params is undefined. 282 */ 283 int vhost_vq_get_desc(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *vq, 284 uint16_t req_idx, struct vring_desc **desc, struct vring_desc **desc_table, 285 uint32_t *desc_table_size); 286 287 /** 288 * Get a virtio packed descriptor at given index in given virtqueue. 289 * The descriptor will provide access to the entire descriptor 290 * chain. The subsequent descriptors are accessible via 291 * \c vhost_vring_packed_desc_get_next. 292 * \param vsession vhost session 293 * \param vq virtqueue 294 * \param req_idx descriptor index 295 * \param desc pointer to be set to the descriptor 296 * \param desc_table descriptor table to be used with 297 * \c spdk_vhost_vring_desc_get_next. This might be either 298 * \c NULL or per-chain indirect table. 299 * \param desc_table_size size of the *desc_table* 300 * \return 0 on success, -1 if given index is invalid. 301 * If -1 is returned, the content of params is undefined. 302 */ 303 int vhost_vq_get_desc_packed(struct spdk_vhost_session *vsession, 304 struct spdk_vhost_virtqueue *virtqueue, 305 uint16_t req_idx, struct vring_packed_desc **desc, 306 struct vring_packed_desc **desc_table, uint32_t *desc_table_size); 307 308 int vhost_inflight_queue_get_desc(struct spdk_vhost_session *vsession, 309 spdk_vhost_inflight_desc *desc_array, 310 uint16_t req_idx, spdk_vhost_inflight_desc **desc, 311 struct vring_packed_desc **desc_table, uint32_t *desc_table_size); 312 313 /** 314 * Send IRQ/call client (if pending) for \c vq. 315 * \param vsession vhost session 316 * \param vq virtqueue 317 * \return 318 * 0 - if no interrupt was signalled 319 * 1 - if interrupt was signalled 320 */ 321 int vhost_vq_used_signal(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *vq); 322 323 324 /** 325 * Send IRQs for all queues that need to be signaled. 326 * \param vsession vhost session 327 * \param vq virtqueue 328 */ 329 void vhost_session_used_signal(struct spdk_vhost_session *vsession); 330 331 /** 332 * Send IRQs for the queue that need to be signaled. 333 * \param vq virtqueue 334 */ 335 void vhost_session_vq_used_signal(struct spdk_vhost_virtqueue *virtqueue); 336 337 void vhost_vq_used_ring_enqueue(struct spdk_vhost_session *vsession, 338 struct spdk_vhost_virtqueue *vq, 339 uint16_t id, uint32_t len); 340 341 /** 342 * Enqueue the entry to the used ring when device complete the request. 343 * \param vsession vhost session 344 * \param vq virtqueue 345 * \req_idx descriptor index. It's the first index of this descriptor chain. 346 * \num_descs descriptor count. It's the count of the number of buffers in the chain. 347 * \buffer_id descriptor buffer ID. 348 * \length device write length. Specify the length of the buffer that has been initialized 349 * (written to) by the device 350 * \inflight_head the head idx of this IO inflight desc chain. 351 */ 352 void vhost_vq_packed_ring_enqueue(struct spdk_vhost_session *vsession, 353 struct spdk_vhost_virtqueue *virtqueue, 354 uint16_t num_descs, uint16_t buffer_id, 355 uint32_t length, uint16_t inflight_head); 356 357 /** 358 * Get subsequent descriptor from given table. 359 * \param desc current descriptor, will be set to the 360 * next descriptor (NULL in case this is the last 361 * descriptor in the chain or the next desc is invalid) 362 * \param desc_table descriptor table 363 * \param desc_table_size size of the *desc_table* 364 * \return 0 on success, -1 if given index is invalid 365 * The *desc* param will be set regardless of the 366 * return value. 367 */ 368 int vhost_vring_desc_get_next(struct vring_desc **desc, 369 struct vring_desc *desc_table, uint32_t desc_table_size); 370 static inline bool 371 vhost_vring_desc_is_wr(struct vring_desc *cur_desc) 372 { 373 return !!(cur_desc->flags & VRING_DESC_F_WRITE); 374 } 375 376 int vhost_vring_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov, 377 uint16_t *iov_index, const struct vring_desc *desc); 378 379 bool vhost_vq_packed_ring_is_avail(struct spdk_vhost_virtqueue *virtqueue); 380 381 /** 382 * Get subsequent descriptor from vq or desc table. 383 * \param desc current descriptor, will be set to the 384 * next descriptor (NULL in case this is the last 385 * descriptor in the chain or the next desc is invalid) 386 * \req_idx index of current desc, will be set to the next 387 * index. If desc_table != NULL the req_idx is the the vring index 388 * or the req_idx is the desc_table index. 389 * \param desc_table descriptor table 390 * \param desc_table_size size of the *desc_table* 391 * \return 0 on success, -1 if given index is invalid 392 * The *desc* param will be set regardless of the 393 * return value. 394 */ 395 int vhost_vring_packed_desc_get_next(struct vring_packed_desc **desc, uint16_t *req_idx, 396 struct spdk_vhost_virtqueue *vq, 397 struct vring_packed_desc *desc_table, 398 uint32_t desc_table_size); 399 400 bool vhost_vring_packed_desc_is_wr(struct vring_packed_desc *cur_desc); 401 402 int vhost_vring_packed_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov, 403 uint16_t *iov_index, const struct vring_packed_desc *desc); 404 405 bool vhost_vring_inflight_desc_is_wr(spdk_vhost_inflight_desc *cur_desc); 406 407 int vhost_vring_inflight_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov, 408 uint16_t *iov_index, const spdk_vhost_inflight_desc *desc); 409 410 uint16_t vhost_vring_packed_desc_get_buffer_id(struct spdk_vhost_virtqueue *vq, uint16_t req_idx, 411 uint16_t *num_descs); 412 413 static inline bool __attribute__((always_inline)) 414 vhost_dev_has_feature(struct spdk_vhost_session *vsession, unsigned feature_id) 415 { 416 return vsession->negotiated_features & (1ULL << feature_id); 417 } 418 419 int vhost_dev_register(struct spdk_vhost_dev *vdev, const char *name, const char *mask_str, 420 const struct spdk_vhost_dev_backend *backend, 421 const struct spdk_vhost_user_dev_backend *user_backend); 422 int vhost_dev_unregister(struct spdk_vhost_dev *vdev); 423 424 void vhost_dump_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w); 425 426 /* 427 * Set vhost session to run in interrupt or poll mode 428 */ 429 void vhost_user_session_set_interrupt_mode(struct spdk_vhost_session *vsession, 430 bool interrupt_mode); 431 432 /* 433 * Memory registration functions used in start/stop device callbacks 434 */ 435 void vhost_session_mem_register(struct rte_vhost_memory *mem); 436 void vhost_session_mem_unregister(struct rte_vhost_memory *mem); 437 438 /* 439 * Call a function for each session of the provided vhost device. 440 * The function will be called one-by-one on each session's thread. 441 * 442 * \param vdev vhost device 443 * \param fn function to call on each session's thread 444 * \param cpl_fn function to be called at the end of the iteration on 445 * the vhost management thread. 446 * Optional, can be NULL. 447 * \param arg additional argument to the both callbacks 448 */ 449 void vhost_user_dev_foreach_session(struct spdk_vhost_dev *dev, 450 spdk_vhost_session_fn fn, 451 spdk_vhost_dev_fn cpl_fn, 452 void *arg); 453 454 /** 455 * Call a function on the provided lcore and block until either 456 * vhost_user_session_start_done() or vhost_user_session_stop_done() 457 * is called. 458 * 459 * This must be called under the global vhost mutex, which this function 460 * will unlock for the time it's waiting. It's meant to be called only 461 * from start/stop session callbacks. 462 * 463 * \param vsession vhost session 464 * \param cb_fn the function to call. The void *arg parameter in cb_fn 465 * is always NULL. 466 * \param timeout_sec timeout in seconds. This function will still 467 * block after the timeout expires, but will print the provided errmsg. 468 * \param errmsg error message to print once the timeout expires 469 * \return return the code passed to spdk_vhost_session_event_done(). 470 */ 471 int vhost_user_session_send_event(struct spdk_vhost_session *vsession, 472 spdk_vhost_session_fn cb_fn, unsigned timeout_sec, 473 const char *errmsg); 474 475 /** 476 * Finish a blocking spdk_vhost_user_session_send_event() call and finally 477 * start the session. This must be called on the target lcore, which 478 * will now receive all session-related messages (e.g. from 479 * vhost_user_dev_foreach_session()). 480 * 481 * Must be called under the global vhost lock. 482 * 483 * \param vsession vhost session 484 * \param response return code 485 */ 486 void vhost_user_session_start_done(struct spdk_vhost_session *vsession, int response); 487 488 /** 489 * Finish a blocking spdk_vhost_user_session_send_event() call and finally 490 * stop the session. This must be called on the session's lcore which 491 * used to receive all session-related messages (e.g. from 492 * vhost_user_dev_foreach_session()). After this call, the session- 493 * related messages will be once again processed by any arbitrary thread. 494 * 495 * Must be called under the global vhost lock. 496 * 497 * Must be called under the global vhost mutex. 498 * 499 * \param vsession vhost session 500 * \param response return code 501 */ 502 void vhost_user_session_stop_done(struct spdk_vhost_session *vsession, int response); 503 504 struct spdk_vhost_session *vhost_session_find_by_vid(int vid); 505 void vhost_session_install_rte_compat_hooks(struct spdk_vhost_session *vsession); 506 int vhost_register_unix_socket(const char *path, const char *ctrl_name, 507 uint64_t virtio_features, uint64_t disabled_features, uint64_t protocol_features); 508 int vhost_driver_unregister(const char *path); 509 int vhost_get_mem_table(int vid, struct rte_vhost_memory **mem); 510 int vhost_get_negotiated_features(int vid, uint64_t *negotiated_features); 511 512 int remove_vhost_controller(struct spdk_vhost_dev *vdev); 513 514 struct spdk_io_channel *vhost_blk_get_io_channel(struct spdk_vhost_dev *vdev); 515 void vhost_blk_put_io_channel(struct spdk_io_channel *ch); 516 517 /* Function calls from vhost.c to rte_vhost_user.c, 518 * shall removed once virtio transport abstraction is complete. */ 519 int vhost_user_session_set_coalescing(struct spdk_vhost_dev *dev, 520 struct spdk_vhost_session *vsession, void *ctx); 521 int vhost_user_dev_set_coalescing(struct spdk_vhost_user_dev *user_dev, uint32_t delay_base_us, 522 uint32_t iops_threshold); 523 int vhost_user_dev_register(struct spdk_vhost_dev *vdev, const char *name, 524 struct spdk_cpuset *cpumask, const struct spdk_vhost_user_dev_backend *user_backend); 525 int vhost_user_dev_unregister(struct spdk_vhost_dev *vdev); 526 int vhost_user_init(void); 527 typedef void (*vhost_fini_cb)(void *ctx); 528 void vhost_user_fini(vhost_fini_cb vhost_cb); 529 530 struct spdk_vhost_blk_task { 531 struct spdk_bdev_io *bdev_io; 532 533 volatile uint8_t *status; 534 535 /* for io wait */ 536 struct spdk_bdev_io_wait_entry bdev_io_wait; 537 struct spdk_io_channel *bdev_io_wait_ch; 538 struct spdk_vhost_dev *bdev_io_wait_vdev; 539 540 /** Number of bytes that were written. */ 541 uint32_t used_len; 542 uint16_t iovcnt; 543 struct iovec iovs[SPDK_VHOST_IOVS_MAX]; 544 545 /** Size of whole payload in bytes */ 546 uint32_t payload_size; 547 }; 548 549 #endif /* SPDK_VHOST_INTERNAL_H */ 550