1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #ifndef __NVME_INTERNAL_H__ 35 #define __NVME_INTERNAL_H__ 36 37 #include "spdk/config.h" 38 #include "spdk/likely.h" 39 #include "spdk/stdinc.h" 40 41 #include "spdk/nvme.h" 42 43 #if defined(__i386__) || defined(__x86_64__) 44 #include <x86intrin.h> 45 #endif 46 47 #include "spdk/queue.h" 48 #include "spdk/barrier.h" 49 #include "spdk/bit_array.h" 50 #include "spdk/mmio.h" 51 #include "spdk/pci_ids.h" 52 #include "spdk/util.h" 53 #include "spdk/nvme_intel.h" 54 #include "spdk/nvmf_spec.h" 55 #include "spdk/uuid.h" 56 57 #include "spdk_internal/assert.h" 58 #include "spdk_internal/log.h" 59 #include "spdk_internal/memory.h" 60 61 extern pid_t g_spdk_nvme_pid; 62 63 /* 64 * Some Intel devices support vendor-unique read latency log page even 65 * though the log page directory says otherwise. 66 */ 67 #define NVME_INTEL_QUIRK_READ_LATENCY 0x1 68 69 /* 70 * Some Intel devices support vendor-unique write latency log page even 71 * though the log page directory says otherwise. 72 */ 73 #define NVME_INTEL_QUIRK_WRITE_LATENCY 0x2 74 75 /* 76 * The controller needs a delay before starts checking the device 77 * readiness, which is done by reading the NVME_CSTS_RDY bit. 78 */ 79 #define NVME_QUIRK_DELAY_BEFORE_CHK_RDY 0x4 80 81 /* 82 * The controller performs best when I/O is split on particular 83 * LBA boundaries. 84 */ 85 #define NVME_INTEL_QUIRK_STRIPING 0x8 86 87 /* 88 * The controller needs a delay after allocating an I/O queue pair 89 * before it is ready to accept I/O commands. 90 */ 91 #define NVME_QUIRK_DELAY_AFTER_QUEUE_ALLOC 0x10 92 93 /* 94 * Earlier NVMe devices do not indicate whether unmapped blocks 95 * will read all zeroes or not. This define indicates that the 96 * device does in fact read all zeroes after an unmap event 97 */ 98 #define NVME_QUIRK_READ_ZERO_AFTER_DEALLOCATE 0x20 99 100 /* 101 * The controller doesn't handle Identify value others than 0 or 1 correctly. 102 */ 103 #define NVME_QUIRK_IDENTIFY_CNS 0x40 104 105 /* 106 * The controller supports Open Channel command set if matching additional 107 * condition, like the first byte (value 0x1) in the vendor specific 108 * bits of the namespace identify structure is set. 109 */ 110 #define NVME_QUIRK_OCSSD 0x80 111 112 /* 113 * The controller has an Intel vendor ID but does not support Intel vendor-specific 114 * log pages. This is primarily for QEMU emulated SSDs which report an Intel vendor 115 * ID but do not support these log pages. 116 */ 117 #define NVME_INTEL_QUIRK_NO_LOG_PAGES 0x100 118 119 /* 120 * The controller does not set SHST_COMPLETE in a reasonable amount of time. This 121 * is primarily seen in virtual VMWare NVMe SSDs. This quirk merely adds an additional 122 * error message that on VMWare NVMe SSDs, the shutdown timeout may be expected. 123 */ 124 #define NVME_QUIRK_SHST_COMPLETE 0x200 125 126 #define NVME_MAX_ASYNC_EVENTS (8) 127 128 #define NVME_MAX_ADMIN_TIMEOUT_IN_SECS (30) 129 130 /* Maximum log page size to fetch for AERs. */ 131 #define NVME_MAX_AER_LOG_SIZE (4096) 132 133 /* 134 * NVME_MAX_IO_QUEUES in nvme_spec.h defines the 64K spec-limit, but this 135 * define specifies the maximum number of queues this driver will actually 136 * try to configure, if available. 137 */ 138 #define DEFAULT_MAX_IO_QUEUES (1024) 139 #define DEFAULT_IO_QUEUE_SIZE (256) 140 141 #define DEFAULT_ADMIN_QUEUE_REQUESTS (32) 142 #define DEFAULT_IO_QUEUE_REQUESTS (512) 143 144 #define MIN_KEEP_ALIVE_TIMEOUT_IN_MS (10000) 145 146 /* We want to fit submission and completion rings each in a single 2MB 147 * hugepage to ensure physical address contiguity. 148 */ 149 #define MAX_IO_QUEUE_ENTRIES (VALUE_2MB / spdk_max( \ 150 sizeof(struct spdk_nvme_cmd), \ 151 sizeof(struct spdk_nvme_cpl))) 152 153 enum nvme_payload_type { 154 NVME_PAYLOAD_TYPE_INVALID = 0, 155 156 /** nvme_request::u.payload.contig_buffer is valid for this request */ 157 NVME_PAYLOAD_TYPE_CONTIG, 158 159 /** nvme_request::u.sgl is valid for this request */ 160 NVME_PAYLOAD_TYPE_SGL, 161 }; 162 163 /** 164 * Descriptor for a request data payload. 165 */ 166 struct nvme_payload { 167 /** 168 * Functions for retrieving physical addresses for scattered payloads. 169 */ 170 spdk_nvme_req_reset_sgl_cb reset_sgl_fn; 171 spdk_nvme_req_next_sge_cb next_sge_fn; 172 173 /** 174 * If reset_sgl_fn == NULL, this is a contig payload, and contig_or_cb_arg contains the 175 * virtual memory address of a single virtually contiguous buffer. 176 * 177 * If reset_sgl_fn != NULL, this is a SGL payload, and contig_or_cb_arg contains the 178 * cb_arg that will be passed to the SGL callback functions. 179 */ 180 void *contig_or_cb_arg; 181 182 /** Virtual memory address of a single virtually contiguous metadata buffer */ 183 void *md; 184 }; 185 186 #define NVME_PAYLOAD_CONTIG(contig_, md_) \ 187 (struct nvme_payload) { \ 188 .reset_sgl_fn = NULL, \ 189 .next_sge_fn = NULL, \ 190 .contig_or_cb_arg = (contig_), \ 191 .md = (md_), \ 192 } 193 194 #define NVME_PAYLOAD_SGL(reset_sgl_fn_, next_sge_fn_, cb_arg_, md_) \ 195 (struct nvme_payload) { \ 196 .reset_sgl_fn = (reset_sgl_fn_), \ 197 .next_sge_fn = (next_sge_fn_), \ 198 .contig_or_cb_arg = (cb_arg_), \ 199 .md = (md_), \ 200 } 201 202 static inline enum nvme_payload_type 203 nvme_payload_type(const struct nvme_payload *payload) { 204 return payload->reset_sgl_fn ? NVME_PAYLOAD_TYPE_SGL : NVME_PAYLOAD_TYPE_CONTIG; 205 } 206 207 struct nvme_error_cmd { 208 bool do_not_submit; 209 uint64_t timeout_tsc; 210 uint32_t err_count; 211 uint8_t opc; 212 struct spdk_nvme_status status; 213 TAILQ_ENTRY(nvme_error_cmd) link; 214 }; 215 216 struct nvme_request { 217 struct spdk_nvme_cmd cmd; 218 219 uint8_t retries; 220 221 bool timed_out; 222 223 /** 224 * Number of children requests still outstanding for this 225 * request which was split into multiple child requests. 226 */ 227 uint16_t num_children; 228 229 /** 230 * Offset in bytes from the beginning of payload for this request. 231 * This is used for I/O commands that are split into multiple requests. 232 */ 233 uint32_t payload_offset; 234 uint32_t md_offset; 235 236 uint32_t payload_size; 237 238 /** 239 * Timeout ticks for error injection requests, can be extended in future 240 * to support per-request timeout feature. 241 */ 242 uint64_t timeout_tsc; 243 244 /** 245 * Data payload for this request's command. 246 */ 247 struct nvme_payload payload; 248 249 spdk_nvme_cmd_cb cb_fn; 250 void *cb_arg; 251 STAILQ_ENTRY(nvme_request) stailq; 252 253 struct spdk_nvme_qpair *qpair; 254 255 /* 256 * The value of spdk_get_ticks() when the request was submitted to the hardware. 257 * Only set if ctrlr->timeout_enabled is true. 258 */ 259 uint64_t submit_tick; 260 261 /** 262 * The active admin request can be moved to a per process pending 263 * list based on the saved pid to tell which process it belongs 264 * to. The cpl saves the original completion information which 265 * is used in the completion callback. 266 * NOTE: these below two fields are only used for admin request. 267 */ 268 pid_t pid; 269 struct spdk_nvme_cpl cpl; 270 271 /** 272 * The following members should not be reordered with members 273 * above. These members are only needed when splitting 274 * requests which is done rarely, and the driver is careful 275 * to not touch the following fields until a split operation is 276 * needed, to avoid touching an extra cacheline. 277 */ 278 279 /** 280 * Points to the outstanding child requests for a parent request. 281 * Only valid if a request was split into multiple children 282 * requests, and is not initialized for non-split requests. 283 */ 284 TAILQ_HEAD(, nvme_request) children; 285 286 /** 287 * Linked-list pointers for a child request in its parent's list. 288 */ 289 TAILQ_ENTRY(nvme_request) child_tailq; 290 291 /** 292 * Points to a parent request if part of a split request, 293 * NULL otherwise. 294 */ 295 struct nvme_request *parent; 296 297 /** 298 * Completion status for a parent request. Initialized to all 0's 299 * (SUCCESS) before child requests are submitted. If a child 300 * request completes with error, the error status is copied here, 301 * to ensure that the parent request is also completed with error 302 * status once all child requests are completed. 303 */ 304 struct spdk_nvme_cpl parent_status; 305 306 /** 307 * The user_cb_fn and user_cb_arg fields are used for holding the original 308 * callback data when using nvme_allocate_request_user_copy. 309 */ 310 spdk_nvme_cmd_cb user_cb_fn; 311 void *user_cb_arg; 312 void *user_buffer; 313 }; 314 315 struct nvme_completion_poll_status { 316 struct spdk_nvme_cpl cpl; 317 bool done; 318 }; 319 320 struct nvme_async_event_request { 321 struct spdk_nvme_ctrlr *ctrlr; 322 struct nvme_request *req; 323 struct spdk_nvme_cpl cpl; 324 }; 325 326 struct spdk_nvme_qpair { 327 struct spdk_nvme_ctrlr *ctrlr; 328 329 uint16_t id; 330 331 uint8_t qprio; 332 333 /* 334 * Members for handling IO qpair deletion inside of a completion context. 335 * These are specifically defined as single bits, so that they do not 336 * push this data structure out to another cacheline. 337 */ 338 uint8_t in_completion_context : 1; 339 uint8_t delete_after_completion_context: 1; 340 341 /* 342 * Set when no deletion notification is needed. For example, the process 343 * which allocated this qpair exited unexpectedly. 344 */ 345 uint8_t no_deletion_notification_needed: 1; 346 347 enum spdk_nvme_transport_type trtype; 348 349 STAILQ_HEAD(, nvme_request) free_req; 350 STAILQ_HEAD(, nvme_request) queued_req; 351 352 /** Commands opcode in this list will return error */ 353 TAILQ_HEAD(, nvme_error_cmd) err_cmd_head; 354 /** Requests in this list will return error */ 355 STAILQ_HEAD(, nvme_request) err_req_head; 356 357 /* List entry for spdk_nvme_ctrlr::active_io_qpairs */ 358 TAILQ_ENTRY(spdk_nvme_qpair) tailq; 359 360 /* List entry for spdk_nvme_ctrlr_process::allocated_io_qpairs */ 361 TAILQ_ENTRY(spdk_nvme_qpair) per_process_tailq; 362 363 struct spdk_nvme_ctrlr_process *active_proc; 364 365 void *req_buf; 366 }; 367 368 struct spdk_nvme_ns { 369 struct spdk_nvme_ctrlr *ctrlr; 370 uint32_t sector_size; 371 372 /* 373 * Size of data transferred as part of each block, 374 * including metadata if FLBAS indicates the metadata is transferred 375 * as part of the data buffer at the end of each LBA. 376 */ 377 uint32_t extended_lba_size; 378 379 uint32_t md_size; 380 uint32_t pi_type; 381 uint32_t sectors_per_max_io; 382 uint32_t sectors_per_stripe; 383 uint32_t id; 384 uint16_t flags; 385 386 /* Namespace Identification Descriptor List (CNS = 03h) */ 387 uint8_t id_desc_list[4096]; 388 }; 389 390 /** 391 * State of struct spdk_nvme_ctrlr (in particular, during initialization). 392 */ 393 enum nvme_ctrlr_state { 394 /** 395 * Wait before initializing the controller. 396 */ 397 NVME_CTRLR_STATE_INIT_DELAY, 398 399 /** 400 * Controller has not been initialized yet. 401 */ 402 NVME_CTRLR_STATE_INIT, 403 404 /** 405 * Waiting for CSTS.RDY to transition from 0 to 1 so that CC.EN may be set to 0. 406 */ 407 NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1, 408 409 /** 410 * Waiting for CSTS.RDY to transition from 1 to 0 so that CC.EN may be set to 1. 411 */ 412 NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, 413 414 /** 415 * Enable the controller by writing CC.EN to 1 416 */ 417 NVME_CTRLR_STATE_ENABLE, 418 419 /** 420 * Waiting for CSTS.RDY to transition from 0 to 1 after enabling the controller. 421 */ 422 NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, 423 424 /** 425 * Enable the Admin queue of the controller. 426 */ 427 NVME_CTRLR_STATE_ENABLE_ADMIN_QUEUE, 428 429 /** 430 * Identify Controller command will be sent to then controller. 431 */ 432 NVME_CTRLR_STATE_IDENTIFY, 433 434 /** 435 * Waiting for Identify Controller command be completed. 436 */ 437 NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY, 438 439 /** 440 * Set Number of Queues of the controller. 441 */ 442 NVME_CTRLR_STATE_SET_NUM_QUEUES, 443 444 /** 445 * Waiting for Set Num of Queues command to be completed. 446 */ 447 NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES, 448 449 /** 450 * Get Number of Queues of the controller. 451 */ 452 NVME_CTRLR_STATE_GET_NUM_QUEUES, 453 454 /** 455 * Waiting for Get Num of Queues command to be completed. 456 */ 457 NVME_CTRLR_STATE_WAIT_FOR_GET_NUM_QUEUES, 458 459 /** 460 * Construct Namespace data structures of the controller. 461 */ 462 NVME_CTRLR_STATE_CONSTRUCT_NS, 463 464 /** 465 * Get active Namespace list of the controller. 466 */ 467 NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS, 468 469 /** 470 * Get Identify Namespace Data structure for each NS. 471 */ 472 NVME_CTRLR_STATE_IDENTIFY_NS, 473 474 /** 475 * Waiting for the Identify Namespace commands to be completed. 476 */ 477 NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS, 478 479 /** 480 * Get Identify Namespace Identification Descriptors. 481 */ 482 NVME_CTRLR_STATE_IDENTIFY_ID_DESCS, 483 484 /** 485 * Waiting for the Identify Namespace Identification 486 * Descriptors to be completed. 487 */ 488 NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS, 489 490 /** 491 * Configure AER of the controller. 492 */ 493 NVME_CTRLR_STATE_CONFIGURE_AER, 494 495 /** 496 * Waiting for the Configure AER to be completed. 497 */ 498 NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER, 499 500 /** 501 * Set supported log pages of the controller. 502 */ 503 NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES, 504 505 /** 506 * Set supported features of the controller. 507 */ 508 NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES, 509 510 /** 511 * Set Doorbell Buffer Config of the controller. 512 */ 513 NVME_CTRLR_STATE_SET_DB_BUF_CFG, 514 515 /** 516 * Waiting for Doorbell Buffer Config to be completed. 517 */ 518 NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG, 519 520 /** 521 * Set Keep Alive Timeout of the controller. 522 */ 523 NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT, 524 525 /** 526 * Waiting for Set Keep Alive Timeout to be completed. 527 */ 528 NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT, 529 530 /** 531 * Set Host ID of the controller. 532 */ 533 NVME_CTRLR_STATE_SET_HOST_ID, 534 535 /** 536 * Waiting for Set Host ID to be completed. 537 */ 538 NVME_CTRLR_STATE_WAIT_FOR_HOST_ID, 539 540 /** 541 * Controller initialization has completed and the controller is ready. 542 */ 543 NVME_CTRLR_STATE_READY, 544 545 /** 546 * Controller inilialization has an error. 547 */ 548 NVME_CTRLR_STATE_ERROR 549 }; 550 551 #define NVME_TIMEOUT_INFINITE UINT64_MAX 552 553 /* 554 * Used to track properties for all processes accessing the controller. 555 */ 556 struct spdk_nvme_ctrlr_process { 557 /** Whether it is the primary process */ 558 bool is_primary; 559 560 /** Process ID */ 561 pid_t pid; 562 563 /** Active admin requests to be completed */ 564 STAILQ_HEAD(, nvme_request) active_reqs; 565 566 TAILQ_ENTRY(spdk_nvme_ctrlr_process) tailq; 567 568 /** Per process PCI device handle */ 569 struct spdk_pci_device *devhandle; 570 571 /** Reference to track the number of attachment to this controller. */ 572 int ref; 573 574 /** Allocated IO qpairs */ 575 TAILQ_HEAD(, spdk_nvme_qpair) allocated_io_qpairs; 576 577 spdk_nvme_aer_cb aer_cb_fn; 578 void *aer_cb_arg; 579 580 /** 581 * A function pointer to timeout callback function 582 */ 583 spdk_nvme_timeout_cb timeout_cb_fn; 584 void *timeout_cb_arg; 585 uint64_t timeout_ticks; 586 }; 587 588 /* 589 * One of these per allocated PCI device. 590 */ 591 struct spdk_nvme_ctrlr { 592 /* Hot data (accessed in I/O path) starts here. */ 593 594 /** Array of namespaces indexed by nsid - 1 */ 595 struct spdk_nvme_ns *ns; 596 597 struct spdk_nvme_transport_id trid; 598 599 uint32_t num_ns; 600 601 bool is_removed; 602 603 bool is_resetting; 604 605 bool is_failed; 606 607 bool is_shutdown; 608 609 bool timeout_enabled; 610 611 uint16_t max_sges; 612 613 uint16_t cntlid; 614 615 /** Controller support flags */ 616 uint64_t flags; 617 618 /* Cold data (not accessed in normal I/O path) is after this point. */ 619 620 union spdk_nvme_cap_register cap; 621 union spdk_nvme_vs_register vs; 622 623 enum nvme_ctrlr_state state; 624 uint64_t state_timeout_tsc; 625 626 uint64_t next_keep_alive_tick; 627 uint64_t keep_alive_interval_ticks; 628 629 TAILQ_ENTRY(spdk_nvme_ctrlr) tailq; 630 631 /** All the log pages supported */ 632 bool log_page_supported[256]; 633 634 /** All the features supported */ 635 bool feature_supported[256]; 636 637 /** maximum i/o size in bytes */ 638 uint32_t max_xfer_size; 639 640 /** minimum page size supported by this controller in bytes */ 641 uint32_t min_page_size; 642 643 /** selected memory page size for this controller in bytes */ 644 uint32_t page_size; 645 646 uint32_t num_aers; 647 struct nvme_async_event_request aer[NVME_MAX_ASYNC_EVENTS]; 648 649 /** guards access to the controller itself, including admin queues */ 650 pthread_mutex_t ctrlr_lock; 651 652 653 struct spdk_nvme_qpair *adminq; 654 655 /** shadow doorbell buffer */ 656 uint32_t *shadow_doorbell; 657 /** eventidx buffer */ 658 uint32_t *eventidx; 659 660 /** 661 * Identify Controller data. 662 */ 663 struct spdk_nvme_ctrlr_data cdata; 664 665 /** 666 * Keep track of active namespaces 667 */ 668 uint32_t *active_ns_list; 669 670 /** 671 * Array of Identify Namespace data. 672 * 673 * Stored separately from ns since nsdata should not normally be accessed during I/O. 674 */ 675 struct spdk_nvme_ns_data *nsdata; 676 677 struct spdk_bit_array *free_io_qids; 678 TAILQ_HEAD(, spdk_nvme_qpair) active_io_qpairs; 679 680 struct spdk_nvme_ctrlr_opts opts; 681 682 uint64_t quirks; 683 684 /* Extra sleep time during controller initialization */ 685 uint64_t sleep_timeout_tsc; 686 687 /** Track all the processes manage this controller */ 688 TAILQ_HEAD(, spdk_nvme_ctrlr_process) active_procs; 689 690 691 STAILQ_HEAD(, nvme_request) queued_aborts; 692 uint32_t outstanding_aborts; 693 }; 694 695 struct spdk_nvme_probe_ctx { 696 struct spdk_nvme_transport_id trid; 697 void *cb_ctx; 698 spdk_nvme_probe_cb probe_cb; 699 spdk_nvme_attach_cb attach_cb; 700 spdk_nvme_remove_cb remove_cb; 701 TAILQ_HEAD(, spdk_nvme_ctrlr) init_ctrlrs; 702 }; 703 704 struct nvme_driver { 705 pthread_mutex_t lock; 706 707 /** Multi-process shared attached controller list */ 708 TAILQ_HEAD(, spdk_nvme_ctrlr) shared_attached_ctrlrs; 709 710 bool initialized; 711 struct spdk_uuid default_extended_host_id; 712 }; 713 714 extern struct nvme_driver *g_spdk_nvme_driver; 715 716 int nvme_driver_init(void); 717 718 #define nvme_delay usleep 719 720 static inline bool 721 nvme_qpair_is_admin_queue(struct spdk_nvme_qpair *qpair) 722 { 723 return qpair->id == 0; 724 } 725 726 static inline bool 727 nvme_qpair_is_io_queue(struct spdk_nvme_qpair *qpair) 728 { 729 return qpair->id != 0; 730 } 731 732 static inline int 733 nvme_robust_mutex_lock(pthread_mutex_t *mtx) 734 { 735 int rc = pthread_mutex_lock(mtx); 736 737 #ifndef __FreeBSD__ 738 if (rc == EOWNERDEAD) { 739 rc = pthread_mutex_consistent(mtx); 740 } 741 #endif 742 743 return rc; 744 } 745 746 static inline int 747 nvme_robust_mutex_unlock(pthread_mutex_t *mtx) 748 { 749 return pthread_mutex_unlock(mtx); 750 } 751 752 /* Admin functions */ 753 int nvme_ctrlr_cmd_identify(struct spdk_nvme_ctrlr *ctrlr, 754 uint8_t cns, uint16_t cntid, uint32_t nsid, 755 void *payload, size_t payload_size, 756 spdk_nvme_cmd_cb cb_fn, void *cb_arg); 757 int nvme_ctrlr_cmd_set_num_queues(struct spdk_nvme_ctrlr *ctrlr, 758 uint32_t num_queues, spdk_nvme_cmd_cb cb_fn, 759 void *cb_arg); 760 int nvme_ctrlr_cmd_get_num_queues(struct spdk_nvme_ctrlr *ctrlr, 761 spdk_nvme_cmd_cb cb_fn, void *cb_arg); 762 int nvme_ctrlr_cmd_set_async_event_config(struct spdk_nvme_ctrlr *ctrlr, 763 union spdk_nvme_feat_async_event_configuration config, 764 spdk_nvme_cmd_cb cb_fn, void *cb_arg); 765 int nvme_ctrlr_cmd_set_host_id(struct spdk_nvme_ctrlr *ctrlr, void *host_id, uint32_t host_id_size, 766 spdk_nvme_cmd_cb cb_fn, void *cb_arg); 767 int nvme_ctrlr_cmd_attach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, 768 struct spdk_nvme_ctrlr_list *payload, spdk_nvme_cmd_cb cb_fn, void *cb_arg); 769 int nvme_ctrlr_cmd_detach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, 770 struct spdk_nvme_ctrlr_list *payload, spdk_nvme_cmd_cb cb_fn, void *cb_arg); 771 int nvme_ctrlr_cmd_create_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns_data *payload, 772 spdk_nvme_cmd_cb cb_fn, void *cb_arg); 773 int nvme_ctrlr_cmd_doorbell_buffer_config(struct spdk_nvme_ctrlr *ctrlr, 774 uint64_t prp1, uint64_t prp2, 775 spdk_nvme_cmd_cb cb_fn, void *cb_arg); 776 int nvme_ctrlr_cmd_delete_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, spdk_nvme_cmd_cb cb_fn, 777 void *cb_arg); 778 int nvme_ctrlr_cmd_format(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, 779 struct spdk_nvme_format *format, spdk_nvme_cmd_cb cb_fn, void *cb_arg); 780 int nvme_ctrlr_cmd_fw_commit(struct spdk_nvme_ctrlr *ctrlr, 781 const struct spdk_nvme_fw_commit *fw_commit, 782 spdk_nvme_cmd_cb cb_fn, void *cb_arg); 783 int nvme_ctrlr_cmd_fw_image_download(struct spdk_nvme_ctrlr *ctrlr, 784 uint32_t size, uint32_t offset, void *payload, 785 spdk_nvme_cmd_cb cb_fn, void *cb_arg); 786 int nvme_ctrlr_cmd_security_receive(struct spdk_nvme_ctrlr *ctrlr, uint8_t secp, uint16_t spsp, 787 uint8_t nssf, void *payload, uint32_t payload_size, 788 spdk_nvme_cmd_cb cb_fn, void *cb_arg); 789 int nvme_ctrlr_cmd_security_send(struct spdk_nvme_ctrlr *ctrlr, uint8_t secp, 790 uint16_t spsp, uint8_t nssf, void *payload, 791 uint32_t payload_size, spdk_nvme_cmd_cb cb_fn, void *cb_arg); 792 void nvme_completion_poll_cb(void *arg, const struct spdk_nvme_cpl *cpl); 793 int spdk_nvme_wait_for_completion(struct spdk_nvme_qpair *qpair, 794 struct nvme_completion_poll_status *status); 795 int spdk_nvme_wait_for_completion_robust_lock(struct spdk_nvme_qpair *qpair, 796 struct nvme_completion_poll_status *status, 797 pthread_mutex_t *robust_mutex); 798 int spdk_nvme_wait_for_completion_timeout(struct spdk_nvme_qpair *qpair, 799 struct nvme_completion_poll_status *status, 800 uint64_t timeout_in_secs); 801 802 struct spdk_nvme_ctrlr_process *spdk_nvme_ctrlr_get_process(struct spdk_nvme_ctrlr *ctrlr, 803 pid_t pid); 804 struct spdk_nvme_ctrlr_process *spdk_nvme_ctrlr_get_current_process(struct spdk_nvme_ctrlr *ctrlr); 805 int nvme_ctrlr_add_process(struct spdk_nvme_ctrlr *ctrlr, void *devhandle); 806 void nvme_ctrlr_free_processes(struct spdk_nvme_ctrlr *ctrlr); 807 struct spdk_pci_device *nvme_ctrlr_proc_get_devhandle(struct spdk_nvme_ctrlr *ctrlr); 808 809 int nvme_ctrlr_probe(const struct spdk_nvme_transport_id *trid, 810 struct spdk_nvme_probe_ctx *probe_ctx, void *devhandle); 811 812 int nvme_ctrlr_construct(struct spdk_nvme_ctrlr *ctrlr); 813 void nvme_ctrlr_destruct_finish(struct spdk_nvme_ctrlr *ctrlr); 814 void nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr); 815 void nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr, bool hot_remove); 816 int nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr); 817 void nvme_ctrlr_connected(struct spdk_nvme_probe_ctx *probe_ctx, 818 struct spdk_nvme_ctrlr *ctrlr); 819 820 int nvme_ctrlr_submit_admin_request(struct spdk_nvme_ctrlr *ctrlr, 821 struct nvme_request *req); 822 int nvme_ctrlr_get_cap(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cap_register *cap); 823 int nvme_ctrlr_get_vs(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_vs_register *vs); 824 int nvme_ctrlr_get_cmbsz(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cmbsz_register *cmbsz); 825 void nvme_ctrlr_init_cap(struct spdk_nvme_ctrlr *ctrlr, const union spdk_nvme_cap_register *cap, 826 const union spdk_nvme_vs_register *vs); 827 int nvme_qpair_init(struct spdk_nvme_qpair *qpair, uint16_t id, 828 struct spdk_nvme_ctrlr *ctrlr, 829 enum spdk_nvme_qprio qprio, 830 uint32_t num_requests); 831 void nvme_qpair_deinit(struct spdk_nvme_qpair *qpair); 832 void nvme_qpair_enable(struct spdk_nvme_qpair *qpair); 833 void nvme_qpair_disable(struct spdk_nvme_qpair *qpair); 834 int nvme_qpair_submit_request(struct spdk_nvme_qpair *qpair, 835 struct nvme_request *req); 836 837 int nvme_ctrlr_identify_active_ns(struct spdk_nvme_ctrlr *ctrlr); 838 void nvme_ns_set_identify_data(struct spdk_nvme_ns *ns); 839 int nvme_ns_construct(struct spdk_nvme_ns *ns, uint32_t id, 840 struct spdk_nvme_ctrlr *ctrlr); 841 void nvme_ns_destruct(struct spdk_nvme_ns *ns); 842 843 int nvme_fabric_ctrlr_set_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t value); 844 int nvme_fabric_ctrlr_set_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t value); 845 int nvme_fabric_ctrlr_get_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t *value); 846 int nvme_fabric_ctrlr_get_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t *value); 847 int nvme_fabric_ctrlr_discover(struct spdk_nvme_ctrlr *ctrlr, 848 struct spdk_nvme_probe_ctx *probe_ctx); 849 int nvme_fabric_qpair_connect(struct spdk_nvme_qpair *qpair, uint32_t num_entries); 850 851 static inline struct nvme_request * 852 nvme_allocate_request(struct spdk_nvme_qpair *qpair, 853 const struct nvme_payload *payload, uint32_t payload_size, 854 spdk_nvme_cmd_cb cb_fn, void *cb_arg) 855 { 856 struct nvme_request *req; 857 858 req = STAILQ_FIRST(&qpair->free_req); 859 if (req == NULL) { 860 return req; 861 } 862 863 STAILQ_REMOVE_HEAD(&qpair->free_req, stailq); 864 865 /* 866 * Only memset/zero fields that need it. All other fields 867 * will be initialized appropriately either later in this 868 * function, or before they are needed later in the 869 * submission patch. For example, the children 870 * TAILQ_ENTRY and following members are 871 * only used as part of I/O splitting so we avoid 872 * memsetting them until it is actually needed. 873 * They will be initialized in nvme_request_add_child() 874 * if the request is split. 875 */ 876 memset(req, 0, offsetof(struct nvme_request, payload_size)); 877 878 req->cb_fn = cb_fn; 879 req->cb_arg = cb_arg; 880 req->payload = *payload; 881 req->payload_size = payload_size; 882 req->qpair = qpair; 883 req->pid = g_spdk_nvme_pid; 884 885 return req; 886 } 887 888 static inline struct nvme_request * 889 nvme_allocate_request_contig(struct spdk_nvme_qpair *qpair, 890 void *buffer, uint32_t payload_size, 891 spdk_nvme_cmd_cb cb_fn, void *cb_arg) 892 { 893 struct nvme_payload payload; 894 895 payload = NVME_PAYLOAD_CONTIG(buffer, NULL); 896 897 return nvme_allocate_request(qpair, &payload, payload_size, cb_fn, cb_arg); 898 } 899 900 static inline struct nvme_request * 901 nvme_allocate_request_null(struct spdk_nvme_qpair *qpair, spdk_nvme_cmd_cb cb_fn, void *cb_arg) 902 { 903 return nvme_allocate_request_contig(qpair, NULL, 0, cb_fn, cb_arg); 904 } 905 906 struct nvme_request *nvme_allocate_request_user_copy(struct spdk_nvme_qpair *qpair, 907 void *buffer, uint32_t payload_size, 908 spdk_nvme_cmd_cb cb_fn, void *cb_arg, bool host_to_controller); 909 910 static inline void 911 nvme_complete_request(spdk_nvme_cmd_cb cb_fn, void *cb_arg, 912 struct nvme_request *req, struct spdk_nvme_cpl *cpl) 913 { 914 struct spdk_nvme_qpair *qpair = req->qpair; 915 struct spdk_nvme_cpl err_cpl; 916 struct nvme_error_cmd *cmd; 917 918 /* error injection at completion path, 919 * only inject for successful completed commands 920 */ 921 if (spdk_unlikely(!TAILQ_EMPTY(&qpair->err_cmd_head) && 922 !spdk_nvme_cpl_is_error(cpl))) { 923 TAILQ_FOREACH(cmd, &qpair->err_cmd_head, link) { 924 925 if (cmd->do_not_submit) { 926 continue; 927 } 928 929 if ((cmd->opc == req->cmd.opc) && cmd->err_count) { 930 931 err_cpl = *cpl; 932 err_cpl.status.sct = cmd->status.sct; 933 err_cpl.status.sc = cmd->status.sc; 934 935 cpl = &err_cpl; 936 cmd->err_count--; 937 break; 938 } 939 } 940 } 941 942 if (cb_fn) { 943 cb_fn(cb_arg, cpl); 944 } 945 } 946 947 static inline void 948 nvme_free_request(struct nvme_request *req) 949 { 950 assert(req != NULL); 951 assert(req->num_children == 0); 952 assert(req->qpair != NULL); 953 954 STAILQ_INSERT_HEAD(&req->qpair->free_req, req, stailq); 955 } 956 957 void nvme_request_remove_child(struct nvme_request *parent, struct nvme_request *child); 958 int nvme_request_check_timeout(struct nvme_request *req, uint16_t cid, 959 struct spdk_nvme_ctrlr_process *active_proc, uint64_t now_tick); 960 uint64_t nvme_get_quirks(const struct spdk_pci_id *id); 961 962 int nvme_robust_mutex_init_shared(pthread_mutex_t *mtx); 963 int nvme_robust_mutex_init_recursive_shared(pthread_mutex_t *mtx); 964 965 const char *spdk_nvme_cpl_get_status_string(const struct spdk_nvme_status *status); 966 bool nvme_completion_is_retry(const struct spdk_nvme_cpl *cpl); 967 void nvme_qpair_print_command(struct spdk_nvme_qpair *qpair, struct spdk_nvme_cmd *cmd); 968 void nvme_qpair_print_completion(struct spdk_nvme_qpair *qpair, struct spdk_nvme_cpl *cpl); 969 970 struct spdk_nvme_ctrlr *spdk_nvme_get_ctrlr_by_trid_unsafe( 971 const struct spdk_nvme_transport_id *trid); 972 973 /* Transport specific functions */ 974 #define DECLARE_TRANSPORT(name) \ 975 struct spdk_nvme_ctrlr *nvme_ ## name ## _ctrlr_construct(const struct spdk_nvme_transport_id *trid, const struct spdk_nvme_ctrlr_opts *opts, \ 976 void *devhandle); \ 977 int nvme_ ## name ## _ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr); \ 978 int nvme_ ## name ## _ctrlr_scan(struct spdk_nvme_probe_ctx *probe_ctx, bool direct_connect); \ 979 int nvme_ ## name ## _ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr); \ 980 int nvme_ ## name ## _ctrlr_set_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t value); \ 981 int nvme_ ## name ## _ctrlr_set_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t value); \ 982 int nvme_ ## name ## _ctrlr_get_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t *value); \ 983 int nvme_ ## name ## _ctrlr_get_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t *value); \ 984 uint32_t nvme_ ## name ## _ctrlr_get_max_xfer_size(struct spdk_nvme_ctrlr *ctrlr); \ 985 uint16_t nvme_ ## name ## _ctrlr_get_max_sges(struct spdk_nvme_ctrlr *ctrlr); \ 986 struct spdk_nvme_qpair *nvme_ ## name ## _ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid, const struct spdk_nvme_io_qpair_opts *opts); \ 987 void *nvme_ ## name ## _ctrlr_alloc_cmb_io_buffer(struct spdk_nvme_ctrlr *ctrlr, size_t size); \ 988 int nvme_ ## name ## _ctrlr_free_cmb_io_buffer(struct spdk_nvme_ctrlr *ctrlr, void *buf, size_t size); \ 989 int nvme_ ## name ## _ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair); \ 990 int nvme_ ## name ## _ctrlr_reinit_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair); \ 991 int nvme_ ## name ## _qpair_enable(struct spdk_nvme_qpair *qpair); \ 992 int nvme_ ## name ## _qpair_disable(struct spdk_nvme_qpair *qpair); \ 993 int nvme_ ## name ## _qpair_reset(struct spdk_nvme_qpair *qpair); \ 994 int nvme_ ## name ## _qpair_fail(struct spdk_nvme_qpair *qpair); \ 995 int nvme_ ## name ## _qpair_submit_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req); \ 996 int32_t nvme_ ## name ## _qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_completions); 997 998 DECLARE_TRANSPORT(transport) /* generic transport dispatch functions */ 999 DECLARE_TRANSPORT(pcie) 1000 DECLARE_TRANSPORT(tcp) 1001 #ifdef SPDK_CONFIG_RDMA 1002 DECLARE_TRANSPORT(rdma) 1003 #endif 1004 1005 #undef DECLARE_TRANSPORT 1006 1007 /* 1008 * Below ref related functions must be called with the global 1009 * driver lock held for the multi-process condition. 1010 * Within these functions, the per ctrlr ctrlr_lock is also 1011 * acquired for the multi-thread condition. 1012 */ 1013 void nvme_ctrlr_proc_get_ref(struct spdk_nvme_ctrlr *ctrlr); 1014 void nvme_ctrlr_proc_put_ref(struct spdk_nvme_ctrlr *ctrlr); 1015 int nvme_ctrlr_get_ref_count(struct spdk_nvme_ctrlr *ctrlr); 1016 1017 static inline bool 1018 _is_page_aligned(uint64_t address, uint64_t page_size) 1019 { 1020 return (address & (page_size - 1)) == 0; 1021 } 1022 1023 #endif /* __NVME_INTERNAL_H__ */ 1024