1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 36 #include "nvme_internal.h" 37 38 #include "spdk/env.h" 39 #include "spdk/string.h" 40 41 static int nvme_ctrlr_construct_and_submit_aer(struct spdk_nvme_ctrlr *ctrlr, 42 struct nvme_async_event_request *aer); 43 44 static int 45 nvme_ctrlr_get_cc(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cc_register *cc) 46 { 47 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cc.raw), 48 &cc->raw); 49 } 50 51 static int 52 nvme_ctrlr_get_csts(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_csts_register *csts) 53 { 54 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, csts.raw), 55 &csts->raw); 56 } 57 58 int 59 nvme_ctrlr_get_cap(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cap_register *cap) 60 { 61 return nvme_transport_ctrlr_get_reg_8(ctrlr, offsetof(struct spdk_nvme_registers, cap.raw), 62 &cap->raw); 63 } 64 65 int 66 nvme_ctrlr_get_vs(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_vs_register *vs) 67 { 68 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, vs.raw), 69 &vs->raw); 70 } 71 72 static int 73 nvme_ctrlr_set_cc(struct spdk_nvme_ctrlr *ctrlr, const union spdk_nvme_cc_register *cc) 74 { 75 return nvme_transport_ctrlr_set_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cc.raw), 76 cc->raw); 77 } 78 79 void 80 spdk_nvme_ctrlr_get_default_ctrlr_opts(struct spdk_nvme_ctrlr_opts *opts, size_t opts_size) 81 { 82 char host_id_str[SPDK_UUID_STRING_LEN]; 83 84 assert(opts); 85 86 memset(opts, 0, opts_size); 87 88 #define FIELD_OK(field) \ 89 offsetof(struct spdk_nvme_ctrlr_opts, field) + sizeof(opts->field) <= opts_size 90 91 if (FIELD_OK(num_io_queues)) { 92 opts->num_io_queues = DEFAULT_MAX_IO_QUEUES; 93 } 94 95 if (FIELD_OK(use_cmb_sqs)) { 96 opts->use_cmb_sqs = true; 97 } 98 99 if (FIELD_OK(arb_mechanism)) { 100 opts->arb_mechanism = SPDK_NVME_CC_AMS_RR; 101 } 102 103 if (FIELD_OK(keep_alive_timeout_ms)) { 104 opts->keep_alive_timeout_ms = 10 * 1000; 105 } 106 107 if (FIELD_OK(io_queue_size)) { 108 opts->io_queue_size = DEFAULT_IO_QUEUE_SIZE; 109 } 110 111 if (FIELD_OK(io_queue_requests)) { 112 opts->io_queue_requests = DEFAULT_IO_QUEUE_REQUESTS; 113 } 114 115 if (FIELD_OK(host_id)) { 116 memset(opts->host_id, 0, sizeof(opts->host_id)); 117 } 118 119 if (nvme_driver_init() == 0) { 120 if (FIELD_OK(extended_host_id)) { 121 memcpy(opts->extended_host_id, &g_spdk_nvme_driver->default_extended_host_id, 122 sizeof(opts->extended_host_id)); 123 } 124 125 if (FIELD_OK(hostnqn)) { 126 spdk_uuid_fmt_lower(host_id_str, sizeof(host_id_str), 127 &g_spdk_nvme_driver->default_extended_host_id); 128 snprintf(opts->hostnqn, sizeof(opts->hostnqn), "2014-08.org.nvmexpress:uuid:%s", host_id_str); 129 } 130 } 131 132 if (FIELD_OK(src_addr)) { 133 memset(opts->src_addr, 0, sizeof(opts->src_addr)); 134 } 135 136 if (FIELD_OK(src_svcid)) { 137 memset(opts->src_svcid, 0, sizeof(opts->src_svcid)); 138 } 139 140 if (FIELD_OK(command_set)) { 141 opts->command_set = SPDK_NVME_CC_CSS_NVM; 142 } 143 #undef FIELD_OK 144 } 145 146 /** 147 * This function will be called when the process allocates the IO qpair. 148 * Note: the ctrlr_lock must be held when calling this function. 149 */ 150 static void 151 nvme_ctrlr_proc_add_io_qpair(struct spdk_nvme_qpair *qpair) 152 { 153 struct spdk_nvme_ctrlr_process *active_proc; 154 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 155 156 active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr); 157 if (active_proc) { 158 TAILQ_INSERT_TAIL(&active_proc->allocated_io_qpairs, qpair, per_process_tailq); 159 qpair->active_proc = active_proc; 160 } 161 } 162 163 /** 164 * This function will be called when the process frees the IO qpair. 165 * Note: the ctrlr_lock must be held when calling this function. 166 */ 167 static void 168 nvme_ctrlr_proc_remove_io_qpair(struct spdk_nvme_qpair *qpair) 169 { 170 struct spdk_nvme_ctrlr_process *active_proc; 171 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 172 struct spdk_nvme_qpair *active_qpair, *tmp_qpair; 173 174 active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr); 175 if (!active_proc) { 176 return; 177 } 178 179 TAILQ_FOREACH_SAFE(active_qpair, &active_proc->allocated_io_qpairs, 180 per_process_tailq, tmp_qpair) { 181 if (active_qpair == qpair) { 182 TAILQ_REMOVE(&active_proc->allocated_io_qpairs, 183 active_qpair, per_process_tailq); 184 185 break; 186 } 187 } 188 } 189 190 void 191 spdk_nvme_ctrlr_get_default_io_qpair_opts(struct spdk_nvme_ctrlr *ctrlr, 192 struct spdk_nvme_io_qpair_opts *opts, 193 size_t opts_size) 194 { 195 assert(ctrlr); 196 197 assert(opts); 198 199 memset(opts, 0, opts_size); 200 201 #define FIELD_OK(field) \ 202 offsetof(struct spdk_nvme_io_qpair_opts, field) + sizeof(opts->field) <= opts_size 203 204 if (FIELD_OK(qprio)) { 205 opts->qprio = SPDK_NVME_QPRIO_URGENT; 206 } 207 208 if (FIELD_OK(io_queue_size)) { 209 opts->io_queue_size = ctrlr->opts.io_queue_size; 210 } 211 212 if (FIELD_OK(io_queue_requests)) { 213 opts->io_queue_requests = ctrlr->opts.io_queue_requests; 214 } 215 216 #undef FIELD_OK 217 } 218 219 struct spdk_nvme_qpair * 220 spdk_nvme_ctrlr_alloc_io_qpair(struct spdk_nvme_ctrlr *ctrlr, 221 const struct spdk_nvme_io_qpair_opts *user_opts, 222 size_t opts_size) 223 { 224 uint32_t qid; 225 struct spdk_nvme_qpair *qpair; 226 union spdk_nvme_cc_register cc; 227 struct spdk_nvme_io_qpair_opts opts; 228 229 if (!ctrlr) { 230 return NULL; 231 } 232 233 /* 234 * Get the default options, then overwrite them with the user-provided options 235 * up to opts_size. 236 * 237 * This allows for extensions of the opts structure without breaking 238 * ABI compatibility. 239 */ 240 spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &opts, sizeof(opts)); 241 if (user_opts) { 242 memcpy(&opts, user_opts, spdk_min(sizeof(opts), opts_size)); 243 } 244 245 if (nvme_ctrlr_get_cc(ctrlr, &cc)) { 246 SPDK_ERRLOG("get_cc failed\n"); 247 return NULL; 248 } 249 250 /* Only the low 2 bits (values 0, 1, 2, 3) of QPRIO are valid. */ 251 if ((opts.qprio & 3) != opts.qprio) { 252 return NULL; 253 } 254 255 /* 256 * Only value SPDK_NVME_QPRIO_URGENT(0) is valid for the 257 * default round robin arbitration method. 258 */ 259 if ((cc.bits.ams == SPDK_NVME_CC_AMS_RR) && (opts.qprio != SPDK_NVME_QPRIO_URGENT)) { 260 SPDK_ERRLOG("invalid queue priority for default round robin arbitration method\n"); 261 return NULL; 262 } 263 264 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 265 266 /* 267 * Get the first available I/O queue ID. 268 */ 269 qid = spdk_bit_array_find_first_set(ctrlr->free_io_qids, 1); 270 if (qid > ctrlr->opts.num_io_queues) { 271 SPDK_ERRLOG("No free I/O queue IDs\n"); 272 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 273 return NULL; 274 } 275 276 qpair = nvme_transport_ctrlr_create_io_qpair(ctrlr, qid, &opts); 277 if (qpair == NULL) { 278 SPDK_ERRLOG("nvme_transport_ctrlr_create_io_qpair() failed\n"); 279 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 280 return NULL; 281 } 282 spdk_bit_array_clear(ctrlr->free_io_qids, qid); 283 TAILQ_INSERT_TAIL(&ctrlr->active_io_qpairs, qpair, tailq); 284 285 nvme_ctrlr_proc_add_io_qpair(qpair); 286 287 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 288 289 if (ctrlr->quirks & NVME_QUIRK_DELAY_AFTER_QUEUE_ALLOC) { 290 spdk_delay_us(100); 291 } 292 293 return qpair; 294 } 295 296 int 297 spdk_nvme_ctrlr_free_io_qpair(struct spdk_nvme_qpair *qpair) 298 { 299 struct spdk_nvme_ctrlr *ctrlr; 300 301 if (qpair == NULL) { 302 return 0; 303 } 304 305 ctrlr = qpair->ctrlr; 306 307 if (qpair->in_completion_context) { 308 /* 309 * There are many cases where it is convenient to delete an io qpair in the context 310 * of that qpair's completion routine. To handle this properly, set a flag here 311 * so that the completion routine will perform an actual delete after the context 312 * unwinds. 313 */ 314 qpair->delete_after_completion_context = 1; 315 return 0; 316 } 317 318 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 319 320 nvme_ctrlr_proc_remove_io_qpair(qpair); 321 322 TAILQ_REMOVE(&ctrlr->active_io_qpairs, qpair, tailq); 323 spdk_bit_array_set(ctrlr->free_io_qids, qpair->id); 324 325 if (nvme_transport_ctrlr_delete_io_qpair(ctrlr, qpair)) { 326 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 327 return -1; 328 } 329 330 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 331 return 0; 332 } 333 334 static void 335 nvme_ctrlr_construct_intel_support_log_page_list(struct spdk_nvme_ctrlr *ctrlr, 336 struct spdk_nvme_intel_log_page_directory *log_page_directory) 337 { 338 if (log_page_directory == NULL) { 339 return; 340 } 341 342 if (ctrlr->cdata.vid != SPDK_PCI_VID_INTEL) { 343 return; 344 } 345 346 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY] = true; 347 348 if (log_page_directory->read_latency_log_len || 349 (ctrlr->quirks & NVME_INTEL_QUIRK_READ_LATENCY)) { 350 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_READ_CMD_LATENCY] = true; 351 } 352 if (log_page_directory->write_latency_log_len || 353 (ctrlr->quirks & NVME_INTEL_QUIRK_WRITE_LATENCY)) { 354 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_WRITE_CMD_LATENCY] = true; 355 } 356 if (log_page_directory->temperature_statistics_log_len) { 357 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_TEMPERATURE] = true; 358 } 359 if (log_page_directory->smart_log_len) { 360 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_SMART] = true; 361 } 362 if (log_page_directory->marketing_description_log_len) { 363 ctrlr->log_page_supported[SPDK_NVME_INTEL_MARKETING_DESCRIPTION] = true; 364 } 365 } 366 367 static int nvme_ctrlr_set_intel_support_log_pages(struct spdk_nvme_ctrlr *ctrlr) 368 { 369 uint64_t phys_addr = 0; 370 struct nvme_completion_poll_status status; 371 struct spdk_nvme_intel_log_page_directory *log_page_directory; 372 373 log_page_directory = spdk_dma_zmalloc(sizeof(struct spdk_nvme_intel_log_page_directory), 374 64, &phys_addr); 375 if (log_page_directory == NULL) { 376 SPDK_ERRLOG("could not allocate log_page_directory\n"); 377 return -ENXIO; 378 } 379 380 spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY, SPDK_NVME_GLOBAL_NS_TAG, 381 log_page_directory, sizeof(struct spdk_nvme_intel_log_page_directory), 0, 382 nvme_completion_poll_cb, 383 &status); 384 if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) { 385 spdk_dma_free(log_page_directory); 386 SPDK_ERRLOG("nvme_ctrlr_cmd_get_log_page failed!\n"); 387 return -ENXIO; 388 } 389 390 nvme_ctrlr_construct_intel_support_log_page_list(ctrlr, log_page_directory); 391 spdk_dma_free(log_page_directory); 392 return 0; 393 } 394 395 static void 396 nvme_ctrlr_set_supported_log_pages(struct spdk_nvme_ctrlr *ctrlr) 397 { 398 memset(ctrlr->log_page_supported, 0, sizeof(ctrlr->log_page_supported)); 399 /* Mandatory pages */ 400 ctrlr->log_page_supported[SPDK_NVME_LOG_ERROR] = true; 401 ctrlr->log_page_supported[SPDK_NVME_LOG_HEALTH_INFORMATION] = true; 402 ctrlr->log_page_supported[SPDK_NVME_LOG_FIRMWARE_SLOT] = true; 403 if (ctrlr->cdata.lpa.celp) { 404 ctrlr->log_page_supported[SPDK_NVME_LOG_COMMAND_EFFECTS_LOG] = true; 405 } 406 if (ctrlr->cdata.vid == SPDK_PCI_VID_INTEL) { 407 nvme_ctrlr_set_intel_support_log_pages(ctrlr); 408 } 409 } 410 411 static void 412 nvme_ctrlr_set_intel_supported_features(struct spdk_nvme_ctrlr *ctrlr) 413 { 414 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_MAX_LBA] = true; 415 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_NATIVE_MAX_LBA] = true; 416 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_POWER_GOVERNOR_SETTING] = true; 417 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_SMBUS_ADDRESS] = true; 418 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_LED_PATTERN] = true; 419 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_RESET_TIMED_WORKLOAD_COUNTERS] = true; 420 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING] = true; 421 } 422 423 static void 424 nvme_ctrlr_set_supported_features(struct spdk_nvme_ctrlr *ctrlr) 425 { 426 memset(ctrlr->feature_supported, 0, sizeof(ctrlr->feature_supported)); 427 /* Mandatory features */ 428 ctrlr->feature_supported[SPDK_NVME_FEAT_ARBITRATION] = true; 429 ctrlr->feature_supported[SPDK_NVME_FEAT_POWER_MANAGEMENT] = true; 430 ctrlr->feature_supported[SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD] = true; 431 ctrlr->feature_supported[SPDK_NVME_FEAT_ERROR_RECOVERY] = true; 432 ctrlr->feature_supported[SPDK_NVME_FEAT_NUMBER_OF_QUEUES] = true; 433 ctrlr->feature_supported[SPDK_NVME_FEAT_INTERRUPT_COALESCING] = true; 434 ctrlr->feature_supported[SPDK_NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION] = true; 435 ctrlr->feature_supported[SPDK_NVME_FEAT_WRITE_ATOMICITY] = true; 436 ctrlr->feature_supported[SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION] = true; 437 /* Optional features */ 438 if (ctrlr->cdata.vwc.present) { 439 ctrlr->feature_supported[SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE] = true; 440 } 441 if (ctrlr->cdata.apsta.supported) { 442 ctrlr->feature_supported[SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION] = true; 443 } 444 if (ctrlr->cdata.hmpre) { 445 ctrlr->feature_supported[SPDK_NVME_FEAT_HOST_MEM_BUFFER] = true; 446 } 447 if (ctrlr->cdata.vid == SPDK_PCI_VID_INTEL) { 448 nvme_ctrlr_set_intel_supported_features(ctrlr); 449 } 450 } 451 452 void 453 nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr, bool hot_remove) 454 { 455 /* 456 * Set the flag here and leave the work failure of qpairs to 457 * spdk_nvme_qpair_process_completions(). 458 */ 459 if (hot_remove) { 460 ctrlr->is_removed = true; 461 } 462 ctrlr->is_failed = true; 463 SPDK_ERRLOG("ctrlr %s in failed state.\n", ctrlr->trid.traddr); 464 } 465 466 static void 467 nvme_ctrlr_shutdown(struct spdk_nvme_ctrlr *ctrlr) 468 { 469 union spdk_nvme_cc_register cc; 470 union spdk_nvme_csts_register csts; 471 uint32_t ms_waited = 0; 472 uint32_t shutdown_timeout_ms; 473 474 if (ctrlr->is_removed) { 475 return; 476 } 477 478 if (nvme_ctrlr_get_cc(ctrlr, &cc)) { 479 SPDK_ERRLOG("get_cc() failed\n"); 480 return; 481 } 482 483 cc.bits.shn = SPDK_NVME_SHN_NORMAL; 484 485 if (nvme_ctrlr_set_cc(ctrlr, &cc)) { 486 SPDK_ERRLOG("set_cc() failed\n"); 487 return; 488 } 489 490 /* 491 * The NVMe specification defines RTD3E to be the time between 492 * setting SHN = 1 until the controller will set SHST = 10b. 493 * If the device doesn't report RTD3 entry latency, or if it 494 * reports RTD3 entry latency less than 10 seconds, pick 495 * 10 seconds as a reasonable amount of time to 496 * wait before proceeding. 497 */ 498 SPDK_DEBUGLOG(SPDK_LOG_NVME, "RTD3E = %" PRIu32 " us\n", ctrlr->cdata.rtd3e); 499 shutdown_timeout_ms = (ctrlr->cdata.rtd3e + 999) / 1000; 500 shutdown_timeout_ms = spdk_max(shutdown_timeout_ms, 10000); 501 SPDK_DEBUGLOG(SPDK_LOG_NVME, "shutdown timeout = %" PRIu32 " ms\n", shutdown_timeout_ms); 502 503 do { 504 if (nvme_ctrlr_get_csts(ctrlr, &csts)) { 505 SPDK_ERRLOG("get_csts() failed\n"); 506 return; 507 } 508 509 if (csts.bits.shst == SPDK_NVME_SHST_COMPLETE) { 510 SPDK_DEBUGLOG(SPDK_LOG_NVME, "shutdown complete in %u milliseconds\n", 511 ms_waited); 512 return; 513 } 514 515 nvme_delay(1000); 516 ms_waited++; 517 } while (ms_waited < shutdown_timeout_ms); 518 519 SPDK_ERRLOG("did not shutdown within %u milliseconds\n", shutdown_timeout_ms); 520 } 521 522 static int 523 nvme_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr) 524 { 525 union spdk_nvme_cc_register cc; 526 int rc; 527 528 rc = nvme_transport_ctrlr_enable(ctrlr); 529 if (rc != 0) { 530 SPDK_ERRLOG("transport ctrlr_enable failed\n"); 531 return rc; 532 } 533 534 if (nvme_ctrlr_get_cc(ctrlr, &cc)) { 535 SPDK_ERRLOG("get_cc() failed\n"); 536 return -EIO; 537 } 538 539 if (cc.bits.en != 0) { 540 SPDK_ERRLOG("%s called with CC.EN = 1\n", __func__); 541 return -EINVAL; 542 } 543 544 cc.bits.en = 1; 545 cc.bits.css = 0; 546 cc.bits.shn = 0; 547 cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */ 548 cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */ 549 550 /* Page size is 2 ^ (12 + mps). */ 551 cc.bits.mps = spdk_u32log2(ctrlr->page_size) - 12; 552 553 if (ctrlr->cap.bits.css == 0) { 554 SPDK_INFOLOG(SPDK_LOG_NVME, 555 "Drive reports no command sets supported. Assuming NVM is supported.\n"); 556 ctrlr->cap.bits.css = SPDK_NVME_CAP_CSS_NVM; 557 } 558 559 if (!(ctrlr->cap.bits.css & (1u << ctrlr->opts.command_set))) { 560 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Requested I/O command set %u but supported mask is 0x%x\n", 561 ctrlr->opts.command_set, ctrlr->cap.bits.css); 562 return -EINVAL; 563 } 564 565 cc.bits.css = ctrlr->opts.command_set; 566 567 switch (ctrlr->opts.arb_mechanism) { 568 case SPDK_NVME_CC_AMS_RR: 569 break; 570 case SPDK_NVME_CC_AMS_WRR: 571 if (SPDK_NVME_CAP_AMS_WRR & ctrlr->cap.bits.ams) { 572 break; 573 } 574 return -EINVAL; 575 case SPDK_NVME_CC_AMS_VS: 576 if (SPDK_NVME_CAP_AMS_VS & ctrlr->cap.bits.ams) { 577 break; 578 } 579 return -EINVAL; 580 default: 581 return -EINVAL; 582 } 583 584 cc.bits.ams = ctrlr->opts.arb_mechanism; 585 586 if (nvme_ctrlr_set_cc(ctrlr, &cc)) { 587 SPDK_ERRLOG("set_cc() failed\n"); 588 return -EIO; 589 } 590 591 return 0; 592 } 593 594 #ifdef DEBUG 595 static const char * 596 nvme_ctrlr_state_string(enum nvme_ctrlr_state state) 597 { 598 switch (state) { 599 case NVME_CTRLR_STATE_INIT: 600 return "init"; 601 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1: 602 return "disable and wait for CSTS.RDY = 1"; 603 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0: 604 return "disable and wait for CSTS.RDY = 0"; 605 case NVME_CTRLR_STATE_ENABLE: 606 return "enable controller by writing CC.EN = 1"; 607 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1: 608 return "wait for CSTS.RDY = 1"; 609 case NVME_CTRLR_STATE_READY: 610 return "ready"; 611 } 612 return "unknown"; 613 }; 614 #endif /* DEBUG */ 615 616 static void 617 nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state, 618 uint64_t timeout_in_ms) 619 { 620 ctrlr->state = state; 621 if (timeout_in_ms == NVME_TIMEOUT_INFINITE) { 622 SPDK_DEBUGLOG(SPDK_LOG_NVME, "setting state to %s (no timeout)\n", 623 nvme_ctrlr_state_string(ctrlr->state)); 624 ctrlr->state_timeout_tsc = NVME_TIMEOUT_INFINITE; 625 } else { 626 SPDK_DEBUGLOG(SPDK_LOG_NVME, "setting state to %s (timeout %" PRIu64 " ms)\n", 627 nvme_ctrlr_state_string(ctrlr->state), timeout_in_ms); 628 ctrlr->state_timeout_tsc = spdk_get_ticks() + (timeout_in_ms * spdk_get_ticks_hz()) / 1000; 629 } 630 } 631 632 static void 633 nvme_ctrlr_free_doorbell_buffer(struct spdk_nvme_ctrlr *ctrlr) 634 { 635 if (ctrlr->shadow_doorbell) { 636 spdk_dma_free(ctrlr->shadow_doorbell); 637 ctrlr->shadow_doorbell = NULL; 638 } 639 640 if (ctrlr->eventidx) { 641 spdk_dma_free(ctrlr->eventidx); 642 ctrlr->eventidx = NULL; 643 } 644 } 645 646 static int 647 nvme_ctrlr_set_doorbell_buffer_config(struct spdk_nvme_ctrlr *ctrlr) 648 { 649 int rc; 650 struct nvme_completion_poll_status status; 651 uint64_t prp1, prp2; 652 653 if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) { 654 return 0; 655 } 656 657 /* only 1 page size for doorbell buffer */ 658 ctrlr->shadow_doorbell = spdk_dma_zmalloc(ctrlr->page_size, ctrlr->page_size, 659 &prp1); 660 if (ctrlr->shadow_doorbell == NULL) { 661 return -1; 662 } 663 664 ctrlr->eventidx = spdk_dma_zmalloc(ctrlr->page_size, ctrlr->page_size, &prp2); 665 if (ctrlr->eventidx == NULL) { 666 goto error; 667 } 668 669 rc = nvme_ctrlr_cmd_doorbell_buffer_config(ctrlr, prp1, prp2, 670 nvme_completion_poll_cb, &status); 671 if (rc != 0) { 672 goto error; 673 } 674 675 if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) { 676 goto error; 677 } 678 679 SPDK_INFOLOG(SPDK_LOG_NVME, "NVMe controller: %s doorbell buffer config enabled\n", 680 ctrlr->trid.traddr); 681 682 return 0; 683 684 error: 685 nvme_ctrlr_free_doorbell_buffer(ctrlr); 686 return -1; 687 } 688 689 int 690 spdk_nvme_ctrlr_reset(struct spdk_nvme_ctrlr *ctrlr) 691 { 692 int rc = 0; 693 struct spdk_nvme_qpair *qpair; 694 struct nvme_request *req, *tmp; 695 696 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 697 698 if (ctrlr->is_resetting || ctrlr->is_failed) { 699 /* 700 * Controller is already resetting or has failed. Return 701 * immediately since there is no need to kick off another 702 * reset in these cases. 703 */ 704 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 705 return 0; 706 } 707 708 ctrlr->is_resetting = true; 709 710 SPDK_NOTICELOG("resetting controller\n"); 711 712 /* Free all of the queued abort requests */ 713 STAILQ_FOREACH_SAFE(req, &ctrlr->queued_aborts, stailq, tmp) { 714 STAILQ_REMOVE_HEAD(&ctrlr->queued_aborts, stailq); 715 nvme_free_request(req); 716 ctrlr->outstanding_aborts--; 717 } 718 719 /* Disable all queues before disabling the controller hardware. */ 720 nvme_qpair_disable(ctrlr->adminq); 721 TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) { 722 nvme_qpair_disable(qpair); 723 } 724 725 /* Doorbell buffer config is invalid during reset */ 726 nvme_ctrlr_free_doorbell_buffer(ctrlr); 727 728 /* Set the state back to INIT to cause a full hardware reset. */ 729 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE); 730 731 while (ctrlr->state != NVME_CTRLR_STATE_READY) { 732 if (nvme_ctrlr_process_init(ctrlr) != 0) { 733 SPDK_ERRLOG("%s: controller reinitialization failed\n", __func__); 734 nvme_ctrlr_fail(ctrlr, false); 735 rc = -1; 736 break; 737 } 738 } 739 740 if (!ctrlr->is_failed) { 741 /* Reinitialize qpairs */ 742 TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) { 743 if (nvme_transport_ctrlr_reinit_io_qpair(ctrlr, qpair) != 0) { 744 nvme_ctrlr_fail(ctrlr, false); 745 rc = -1; 746 } 747 } 748 } 749 750 ctrlr->is_resetting = false; 751 752 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 753 754 return rc; 755 } 756 757 static int 758 nvme_ctrlr_identify(struct spdk_nvme_ctrlr *ctrlr) 759 { 760 struct nvme_completion_poll_status status; 761 int rc; 762 763 rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_CTRLR, 0, 0, 764 &ctrlr->cdata, sizeof(ctrlr->cdata), 765 nvme_completion_poll_cb, &status); 766 if (rc != 0) { 767 return rc; 768 } 769 770 if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) { 771 SPDK_ERRLOG("nvme_identify_controller failed!\n"); 772 return -ENXIO; 773 } 774 775 /* 776 * Use MDTS to ensure our default max_xfer_size doesn't exceed what the 777 * controller supports. 778 */ 779 ctrlr->max_xfer_size = nvme_transport_ctrlr_get_max_xfer_size(ctrlr); 780 SPDK_DEBUGLOG(SPDK_LOG_NVME, "transport max_xfer_size %u\n", ctrlr->max_xfer_size); 781 if (ctrlr->cdata.mdts > 0) { 782 ctrlr->max_xfer_size = spdk_min(ctrlr->max_xfer_size, 783 ctrlr->min_page_size * (1 << (ctrlr->cdata.mdts))); 784 SPDK_DEBUGLOG(SPDK_LOG_NVME, "MDTS max_xfer_size %u\n", ctrlr->max_xfer_size); 785 } 786 787 SPDK_DEBUGLOG(SPDK_LOG_NVME, "CNTLID 0x%04" PRIx16 "\n", ctrlr->cdata.cntlid); 788 if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { 789 ctrlr->cntlid = ctrlr->cdata.cntlid; 790 } else { 791 /* 792 * Fabrics controllers should already have CNTLID from the Connect command. 793 * 794 * If CNTLID from Connect doesn't match CNTLID in the Identify Controller data, 795 * trust the one from Connect. 796 */ 797 if (ctrlr->cntlid != ctrlr->cdata.cntlid) { 798 SPDK_DEBUGLOG(SPDK_LOG_NVME, 799 "Identify CNTLID 0x%04" PRIx16 " != Connect CNTLID 0x%04" PRIx16 "\n", 800 ctrlr->cdata.cntlid, ctrlr->cntlid); 801 } 802 } 803 804 return 0; 805 } 806 807 808 int 809 nvme_ctrlr_identify_active_ns(struct spdk_nvme_ctrlr *ctrlr) 810 { 811 struct nvme_completion_poll_status status; 812 int rc; 813 uint32_t i; 814 uint32_t num_pages; 815 uint32_t next_nsid = 0; 816 uint32_t *new_ns_list = NULL; 817 818 819 /* 820 * The allocated size must be a multiple of sizeof(struct spdk_nvme_ns_list) 821 */ 822 num_pages = (ctrlr->num_ns * sizeof(new_ns_list[0]) - 1) / sizeof(struct spdk_nvme_ns_list) + 1; 823 new_ns_list = spdk_dma_zmalloc(num_pages * sizeof(struct spdk_nvme_ns_list), ctrlr->page_size, 824 NULL); 825 if (!new_ns_list) { 826 SPDK_ERRLOG("Failed to allocate active_ns_list!\n"); 827 return -ENOMEM; 828 } 829 830 if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 1, 0) && !(ctrlr->quirks & NVME_QUIRK_IDENTIFY_CNS)) { 831 /* 832 * Iterate through the pages and fetch each chunk of 1024 namespaces until 833 * there are no more active namespaces 834 */ 835 for (i = 0; i < num_pages; i++) { 836 rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_ACTIVE_NS_LIST, 0, next_nsid, 837 &new_ns_list[1024 * i], sizeof(struct spdk_nvme_ns_list), 838 nvme_completion_poll_cb, &status); 839 if (rc != 0) { 840 goto fail; 841 } 842 if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) { 843 SPDK_ERRLOG("nvme_ctrlr_cmd_identify_active_ns_list failed!\n"); 844 rc = -ENXIO; 845 goto fail; 846 } 847 next_nsid = new_ns_list[1024 * i + 1023]; 848 if (next_nsid == 0) { 849 /* 850 * No more active namespaces found, no need to fetch additional chunks 851 */ 852 break; 853 } 854 } 855 856 } else { 857 /* 858 * Controller doesn't support active ns list CNS 0x02 so dummy up 859 * an active ns list 860 */ 861 for (i = 0; i < ctrlr->num_ns; i++) { 862 new_ns_list[i] = i + 1; 863 } 864 } 865 866 /* 867 * Now that that the list is properly setup, we can swap it in to the ctrlr and 868 * free up the previous one. 869 */ 870 spdk_dma_free(ctrlr->active_ns_list); 871 ctrlr->active_ns_list = new_ns_list; 872 873 return 0; 874 fail: 875 spdk_dma_free(new_ns_list); 876 return rc; 877 } 878 879 static int 880 nvme_ctrlr_set_num_qpairs(struct spdk_nvme_ctrlr *ctrlr) 881 { 882 struct nvme_completion_poll_status status; 883 uint32_t cq_allocated, sq_allocated, min_allocated, i; 884 int rc; 885 886 if (ctrlr->opts.num_io_queues > SPDK_NVME_MAX_IO_QUEUES) { 887 SPDK_NOTICELOG("Limiting requested num_io_queues %u to max %d\n", 888 ctrlr->opts.num_io_queues, SPDK_NVME_MAX_IO_QUEUES); 889 ctrlr->opts.num_io_queues = SPDK_NVME_MAX_IO_QUEUES; 890 } else if (ctrlr->opts.num_io_queues < 1) { 891 SPDK_NOTICELOG("Requested num_io_queues 0, increasing to 1\n"); 892 ctrlr->opts.num_io_queues = 1; 893 } 894 895 rc = nvme_ctrlr_cmd_set_num_queues(ctrlr, ctrlr->opts.num_io_queues, 896 nvme_completion_poll_cb, &status); 897 if (rc != 0) { 898 return rc; 899 } 900 901 if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) { 902 SPDK_ERRLOG("Set Features - Number of Queues failed!\n"); 903 } 904 905 /* Obtain the number of queues allocated using Get Features. */ 906 rc = nvme_ctrlr_cmd_get_num_queues(ctrlr, nvme_completion_poll_cb, &status); 907 if (rc != 0) { 908 return rc; 909 } 910 911 if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) { 912 SPDK_ERRLOG("Get Features - Number of Queues failed!\n"); 913 ctrlr->opts.num_io_queues = 0; 914 } else { 915 /* 916 * Data in cdw0 is 0-based. 917 * Lower 16-bits indicate number of submission queues allocated. 918 * Upper 16-bits indicate number of completion queues allocated. 919 */ 920 sq_allocated = (status.cpl.cdw0 & 0xFFFF) + 1; 921 cq_allocated = (status.cpl.cdw0 >> 16) + 1; 922 923 /* 924 * For 1:1 queue mapping, set number of allocated queues to be minimum of 925 * submission and completion queues. 926 */ 927 min_allocated = spdk_min(sq_allocated, cq_allocated); 928 929 /* Set number of queues to be minimum of requested and actually allocated. */ 930 ctrlr->opts.num_io_queues = spdk_min(min_allocated, ctrlr->opts.num_io_queues); 931 } 932 933 ctrlr->free_io_qids = spdk_bit_array_create(ctrlr->opts.num_io_queues + 1); 934 if (ctrlr->free_io_qids == NULL) { 935 return -ENOMEM; 936 } 937 938 /* Initialize list of free I/O queue IDs. QID 0 is the admin queue. */ 939 spdk_bit_array_clear(ctrlr->free_io_qids, 0); 940 for (i = 1; i <= ctrlr->opts.num_io_queues; i++) { 941 spdk_bit_array_set(ctrlr->free_io_qids, i); 942 } 943 944 return 0; 945 } 946 947 static int 948 nvme_ctrlr_set_keep_alive_timeout(struct spdk_nvme_ctrlr *ctrlr) 949 { 950 struct nvme_completion_poll_status status; 951 uint32_t keep_alive_interval_ms; 952 int rc; 953 954 if (ctrlr->opts.keep_alive_timeout_ms == 0) { 955 return 0; 956 } 957 958 if (ctrlr->cdata.kas == 0) { 959 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Controller KAS is 0 - not enabling Keep Alive\n"); 960 ctrlr->opts.keep_alive_timeout_ms = 0; 961 return 0; 962 } 963 964 /* Retrieve actual keep alive timeout, since the controller may have adjusted it. */ 965 rc = spdk_nvme_ctrlr_cmd_get_feature(ctrlr, SPDK_NVME_FEAT_KEEP_ALIVE_TIMER, 0, NULL, 0, 966 nvme_completion_poll_cb, &status); 967 if (rc != 0) { 968 SPDK_ERRLOG("Keep alive timeout Get Feature failed: %d\n", rc); 969 ctrlr->opts.keep_alive_timeout_ms = 0; 970 return rc; 971 } 972 973 if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) { 974 SPDK_ERRLOG("Keep alive timeout Get Feature failed: SC %x SCT %x\n", 975 status.cpl.status.sc, status.cpl.status.sct); 976 ctrlr->opts.keep_alive_timeout_ms = 0; 977 return -ENXIO; 978 } 979 980 if (ctrlr->opts.keep_alive_timeout_ms != status.cpl.cdw0) { 981 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Controller adjusted keep alive timeout to %u ms\n", 982 status.cpl.cdw0); 983 } 984 985 ctrlr->opts.keep_alive_timeout_ms = status.cpl.cdw0; 986 987 keep_alive_interval_ms = ctrlr->opts.keep_alive_timeout_ms / 2; 988 if (keep_alive_interval_ms == 0) { 989 keep_alive_interval_ms = 1; 990 } 991 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Sending keep alive every %u ms\n", keep_alive_interval_ms); 992 993 ctrlr->keep_alive_interval_ticks = (keep_alive_interval_ms * spdk_get_ticks_hz()) / UINT64_C(1000); 994 995 /* Schedule the first Keep Alive to be sent as soon as possible. */ 996 ctrlr->next_keep_alive_tick = spdk_get_ticks(); 997 998 return 0; 999 } 1000 1001 static int 1002 nvme_ctrlr_set_host_id(struct spdk_nvme_ctrlr *ctrlr) 1003 { 1004 struct nvme_completion_poll_status status; 1005 uint8_t *host_id; 1006 uint32_t host_id_size; 1007 int rc; 1008 1009 if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) { 1010 /* 1011 * NVMe-oF sends the host ID during Connect and doesn't allow 1012 * Set Features - Host Identifier after Connect, so we don't need to do anything here. 1013 */ 1014 SPDK_DEBUGLOG(SPDK_LOG_NVME, "NVMe-oF transport - not sending Set Features - Host ID\n"); 1015 return 0; 1016 } 1017 1018 if (ctrlr->cdata.ctratt.host_id_exhid_supported) { 1019 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Using 128-bit extended host identifier\n"); 1020 host_id = ctrlr->opts.extended_host_id; 1021 host_id_size = sizeof(ctrlr->opts.extended_host_id); 1022 } else { 1023 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Using 64-bit host identifier\n"); 1024 host_id = ctrlr->opts.host_id; 1025 host_id_size = sizeof(ctrlr->opts.host_id); 1026 } 1027 1028 /* If the user specified an all-zeroes host identifier, don't send the command. */ 1029 if (spdk_mem_all_zero(host_id, host_id_size)) { 1030 SPDK_DEBUGLOG(SPDK_LOG_NVME, 1031 "User did not specify host ID - not sending Set Features - Host ID\n"); 1032 return 0; 1033 } 1034 1035 SPDK_TRACEDUMP(SPDK_LOG_NVME, "host_id", host_id, host_id_size); 1036 1037 rc = nvme_ctrlr_cmd_set_host_id(ctrlr, host_id, host_id_size, nvme_completion_poll_cb, &status); 1038 if (rc != 0) { 1039 SPDK_ERRLOG("Set Features - Host ID failed: %d\n", rc); 1040 return rc; 1041 } 1042 1043 if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) { 1044 SPDK_WARNLOG("Set Features - Host ID failed: SC 0x%x SCT 0x%x\n", 1045 status.cpl.status.sc, status.cpl.status.sct); 1046 /* 1047 * Treat Set Features - Host ID failure as non-fatal, since the Host ID feature 1048 * is optional. 1049 */ 1050 return 0; 1051 } 1052 1053 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Set Features - Host ID was successful\n"); 1054 return 0; 1055 } 1056 1057 static void 1058 nvme_ctrlr_destruct_namespaces(struct spdk_nvme_ctrlr *ctrlr) 1059 { 1060 if (ctrlr->ns) { 1061 uint32_t i, num_ns = ctrlr->num_ns; 1062 1063 for (i = 0; i < num_ns; i++) { 1064 nvme_ns_destruct(&ctrlr->ns[i]); 1065 } 1066 1067 spdk_dma_free(ctrlr->ns); 1068 ctrlr->ns = NULL; 1069 ctrlr->num_ns = 0; 1070 } 1071 1072 if (ctrlr->nsdata) { 1073 spdk_dma_free(ctrlr->nsdata); 1074 ctrlr->nsdata = NULL; 1075 } 1076 1077 spdk_dma_free(ctrlr->active_ns_list); 1078 ctrlr->active_ns_list = NULL; 1079 } 1080 1081 static int 1082 nvme_ctrlr_update_namespaces(struct spdk_nvme_ctrlr *ctrlr) 1083 { 1084 uint32_t i, nn = ctrlr->cdata.nn; 1085 struct spdk_nvme_ns_data *nsdata; 1086 1087 if (nvme_ctrlr_identify_active_ns(ctrlr)) { 1088 return -1; 1089 } 1090 1091 for (i = 0; i < nn; i++) { 1092 struct spdk_nvme_ns *ns = &ctrlr->ns[i]; 1093 uint32_t nsid = i + 1; 1094 nsdata = &ctrlr->nsdata[nsid - 1]; 1095 1096 if ((nsdata->ncap == 0) && spdk_nvme_ctrlr_is_active_ns(ctrlr, nsid)) { 1097 if (nvme_ns_construct(ns, nsid, ctrlr) != 0) { 1098 continue; 1099 } 1100 } 1101 1102 if (nsdata->ncap && !spdk_nvme_ctrlr_is_active_ns(ctrlr, nsid)) { 1103 nvme_ns_destruct(ns); 1104 } 1105 } 1106 1107 return 0; 1108 } 1109 1110 static int 1111 nvme_ctrlr_construct_namespaces(struct spdk_nvme_ctrlr *ctrlr) 1112 { 1113 uint32_t nn = ctrlr->cdata.nn; 1114 uint64_t phys_addr = 0; 1115 1116 /* ctrlr->num_ns may be 0 (startup) or a different number of namespaces (reset), 1117 * so check if we need to reallocate. 1118 */ 1119 if (nn != ctrlr->num_ns) { 1120 nvme_ctrlr_destruct_namespaces(ctrlr); 1121 1122 if (nn == 0) { 1123 SPDK_WARNLOG("controller has 0 namespaces\n"); 1124 return 0; 1125 } 1126 1127 ctrlr->ns = spdk_dma_zmalloc(nn * sizeof(struct spdk_nvme_ns), 64, 1128 &phys_addr); 1129 if (ctrlr->ns == NULL) { 1130 goto fail; 1131 } 1132 1133 ctrlr->nsdata = spdk_dma_zmalloc(nn * sizeof(struct spdk_nvme_ns_data), 64, 1134 &phys_addr); 1135 if (ctrlr->nsdata == NULL) { 1136 goto fail; 1137 } 1138 1139 ctrlr->num_ns = nn; 1140 } 1141 1142 if (nvme_ctrlr_update_namespaces(ctrlr)) { 1143 goto fail; 1144 } 1145 return 0; 1146 1147 fail: 1148 nvme_ctrlr_destruct_namespaces(ctrlr); 1149 return -1; 1150 } 1151 1152 static void 1153 nvme_ctrlr_async_event_cb(void *arg, const struct spdk_nvme_cpl *cpl) 1154 { 1155 struct nvme_async_event_request *aer = arg; 1156 struct spdk_nvme_ctrlr *ctrlr = aer->ctrlr; 1157 struct spdk_nvme_ctrlr_process *active_proc; 1158 union spdk_nvme_async_event_completion event; 1159 1160 if (cpl->status.sc == SPDK_NVME_SC_ABORTED_SQ_DELETION) { 1161 /* 1162 * This is simulated when controller is being shut down, to 1163 * effectively abort outstanding asynchronous event requests 1164 * and make sure all memory is freed. Do not repost the 1165 * request in this case. 1166 */ 1167 return; 1168 } 1169 1170 event.raw = cpl->cdw0; 1171 if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) && 1172 (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED)) { 1173 nvme_ctrlr_update_namespaces(ctrlr); 1174 } 1175 1176 active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr); 1177 if (active_proc && active_proc->aer_cb_fn) { 1178 active_proc->aer_cb_fn(active_proc->aer_cb_arg, cpl); 1179 } 1180 1181 /* 1182 * Repost another asynchronous event request to replace the one 1183 * that just completed. 1184 */ 1185 if (nvme_ctrlr_construct_and_submit_aer(ctrlr, aer)) { 1186 /* 1187 * We can't do anything to recover from a failure here, 1188 * so just print a warning message and leave the AER unsubmitted. 1189 */ 1190 SPDK_ERRLOG("resubmitting AER failed!\n"); 1191 } 1192 } 1193 1194 static int 1195 nvme_ctrlr_construct_and_submit_aer(struct spdk_nvme_ctrlr *ctrlr, 1196 struct nvme_async_event_request *aer) 1197 { 1198 struct nvme_request *req; 1199 1200 aer->ctrlr = ctrlr; 1201 req = nvme_allocate_request_null(ctrlr->adminq, nvme_ctrlr_async_event_cb, aer); 1202 aer->req = req; 1203 if (req == NULL) { 1204 return -1; 1205 } 1206 1207 req->cmd.opc = SPDK_NVME_OPC_ASYNC_EVENT_REQUEST; 1208 return nvme_ctrlr_submit_admin_request(ctrlr, req); 1209 } 1210 1211 static int 1212 _nvme_ctrlr_configure_aer(struct spdk_nvme_ctrlr *ctrlr) 1213 { 1214 union spdk_nvme_feat_async_event_configuration config; 1215 struct nvme_completion_poll_status status; 1216 int rc; 1217 1218 config.raw = 0; 1219 config.bits.crit_warn.bits.available_spare = 1; 1220 config.bits.crit_warn.bits.temperature = 1; 1221 config.bits.crit_warn.bits.device_reliability = 1; 1222 config.bits.crit_warn.bits.read_only = 1; 1223 config.bits.crit_warn.bits.volatile_memory_backup = 1; 1224 1225 if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 2, 0)) { 1226 if (ctrlr->cdata.oaes.ns_attribute_notices) { 1227 config.bits.ns_attr_notice = 1; 1228 } 1229 if (ctrlr->cdata.oaes.fw_activation_notices) { 1230 config.bits.fw_activation_notice = 1; 1231 } 1232 } 1233 if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 3, 0) && ctrlr->cdata.lpa.telemetry) { 1234 config.bits.telemetry_log_notice = 1; 1235 } 1236 1237 rc = nvme_ctrlr_cmd_set_async_event_config(ctrlr, config, nvme_completion_poll_cb, &status); 1238 if (rc != 0) { 1239 return rc; 1240 } 1241 1242 if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) { 1243 return -ENXIO; 1244 } 1245 1246 return 0; 1247 } 1248 1249 static int 1250 nvme_ctrlr_configure_aer(struct spdk_nvme_ctrlr *ctrlr) 1251 { 1252 struct nvme_async_event_request *aer; 1253 uint32_t i; 1254 int rc; 1255 1256 rc = _nvme_ctrlr_configure_aer(ctrlr); 1257 if (rc != 0) { 1258 SPDK_NOTICELOG("nvme_ctrlr_configure_aer failed!\n"); 1259 return 0; 1260 } 1261 1262 /* aerl is a zero-based value, so we need to add 1 here. */ 1263 ctrlr->num_aers = spdk_min(NVME_MAX_ASYNC_EVENTS, (ctrlr->cdata.aerl + 1)); 1264 1265 for (i = 0; i < ctrlr->num_aers; i++) { 1266 aer = &ctrlr->aer[i]; 1267 if (nvme_ctrlr_construct_and_submit_aer(ctrlr, aer)) { 1268 SPDK_ERRLOG("nvme_ctrlr_construct_and_submit_aer failed!\n"); 1269 return -1; 1270 } 1271 } 1272 1273 return 0; 1274 } 1275 1276 struct spdk_nvme_ctrlr_process * 1277 spdk_nvme_ctrlr_get_process(struct spdk_nvme_ctrlr *ctrlr, pid_t pid) 1278 { 1279 struct spdk_nvme_ctrlr_process *active_proc; 1280 1281 TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) { 1282 if (active_proc->pid == pid) { 1283 return active_proc; 1284 } 1285 } 1286 1287 return NULL; 1288 } 1289 1290 struct spdk_nvme_ctrlr_process * 1291 spdk_nvme_ctrlr_get_current_process(struct spdk_nvme_ctrlr *ctrlr) 1292 { 1293 return spdk_nvme_ctrlr_get_process(ctrlr, getpid()); 1294 } 1295 1296 /** 1297 * This function will be called when a process is using the controller. 1298 * 1. For the primary process, it is called when constructing the controller. 1299 * 2. For the secondary process, it is called at probing the controller. 1300 * Note: will check whether the process is already added for the same process. 1301 */ 1302 int 1303 nvme_ctrlr_add_process(struct spdk_nvme_ctrlr *ctrlr, void *devhandle) 1304 { 1305 struct spdk_nvme_ctrlr_process *ctrlr_proc; 1306 pid_t pid = getpid(); 1307 1308 /* Check whether the process is already added or not */ 1309 if (spdk_nvme_ctrlr_get_process(ctrlr, pid)) { 1310 return 0; 1311 } 1312 1313 /* Initialize the per process properties for this ctrlr */ 1314 ctrlr_proc = spdk_dma_zmalloc(sizeof(struct spdk_nvme_ctrlr_process), 64, NULL); 1315 if (ctrlr_proc == NULL) { 1316 SPDK_ERRLOG("failed to allocate memory to track the process props\n"); 1317 1318 return -1; 1319 } 1320 1321 ctrlr_proc->is_primary = spdk_process_is_primary(); 1322 ctrlr_proc->pid = pid; 1323 STAILQ_INIT(&ctrlr_proc->active_reqs); 1324 ctrlr_proc->devhandle = devhandle; 1325 ctrlr_proc->ref = 0; 1326 TAILQ_INIT(&ctrlr_proc->allocated_io_qpairs); 1327 1328 TAILQ_INSERT_TAIL(&ctrlr->active_procs, ctrlr_proc, tailq); 1329 1330 return 0; 1331 } 1332 1333 /** 1334 * This function will be called when the process detaches the controller. 1335 * Note: the ctrlr_lock must be held when calling this function. 1336 */ 1337 static void 1338 nvme_ctrlr_remove_process(struct spdk_nvme_ctrlr *ctrlr, 1339 struct spdk_nvme_ctrlr_process *proc) 1340 { 1341 struct spdk_nvme_qpair *qpair, *tmp_qpair; 1342 1343 assert(STAILQ_EMPTY(&proc->active_reqs)); 1344 1345 TAILQ_FOREACH_SAFE(qpair, &proc->allocated_io_qpairs, per_process_tailq, tmp_qpair) { 1346 spdk_nvme_ctrlr_free_io_qpair(qpair); 1347 } 1348 1349 TAILQ_REMOVE(&ctrlr->active_procs, proc, tailq); 1350 1351 spdk_dma_free(proc); 1352 } 1353 1354 /** 1355 * This function will be called when the process exited unexpectedly 1356 * in order to free any incomplete nvme request, allocated IO qpairs 1357 * and allocated memory. 1358 * Note: the ctrlr_lock must be held when calling this function. 1359 */ 1360 static void 1361 nvme_ctrlr_cleanup_process(struct spdk_nvme_ctrlr_process *proc) 1362 { 1363 struct nvme_request *req, *tmp_req; 1364 struct spdk_nvme_qpair *qpair, *tmp_qpair; 1365 1366 STAILQ_FOREACH_SAFE(req, &proc->active_reqs, stailq, tmp_req) { 1367 STAILQ_REMOVE(&proc->active_reqs, req, nvme_request, stailq); 1368 1369 assert(req->pid == proc->pid); 1370 1371 nvme_free_request(req); 1372 } 1373 1374 TAILQ_FOREACH_SAFE(qpair, &proc->allocated_io_qpairs, per_process_tailq, tmp_qpair) { 1375 TAILQ_REMOVE(&proc->allocated_io_qpairs, qpair, per_process_tailq); 1376 1377 /* 1378 * The process may have been killed while some qpairs were in their 1379 * completion context. Clear that flag here to allow these IO 1380 * qpairs to be deleted. 1381 */ 1382 qpair->in_completion_context = 0; 1383 1384 qpair->no_deletion_notification_needed = 1; 1385 1386 spdk_nvme_ctrlr_free_io_qpair(qpair); 1387 } 1388 1389 spdk_dma_free(proc); 1390 } 1391 1392 /** 1393 * This function will be called when destructing the controller. 1394 * 1. There is no more admin request on this controller. 1395 * 2. Clean up any left resource allocation when its associated process is gone. 1396 */ 1397 void 1398 nvme_ctrlr_free_processes(struct spdk_nvme_ctrlr *ctrlr) 1399 { 1400 struct spdk_nvme_ctrlr_process *active_proc, *tmp; 1401 1402 /* Free all the processes' properties and make sure no pending admin IOs */ 1403 TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) { 1404 TAILQ_REMOVE(&ctrlr->active_procs, active_proc, tailq); 1405 1406 assert(STAILQ_EMPTY(&active_proc->active_reqs)); 1407 1408 spdk_dma_free(active_proc); 1409 } 1410 } 1411 1412 /** 1413 * This function will be called when any other process attaches or 1414 * detaches the controller in order to cleanup those unexpectedly 1415 * terminated processes. 1416 * Note: the ctrlr_lock must be held when calling this function. 1417 */ 1418 static int 1419 nvme_ctrlr_remove_inactive_proc(struct spdk_nvme_ctrlr *ctrlr) 1420 { 1421 struct spdk_nvme_ctrlr_process *active_proc, *tmp; 1422 int active_proc_count = 0; 1423 1424 TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) { 1425 if ((kill(active_proc->pid, 0) == -1) && (errno == ESRCH)) { 1426 SPDK_ERRLOG("process %d terminated unexpected\n", active_proc->pid); 1427 1428 TAILQ_REMOVE(&ctrlr->active_procs, active_proc, tailq); 1429 1430 nvme_ctrlr_cleanup_process(active_proc); 1431 } else { 1432 active_proc_count++; 1433 } 1434 } 1435 1436 return active_proc_count; 1437 } 1438 1439 void 1440 nvme_ctrlr_proc_get_ref(struct spdk_nvme_ctrlr *ctrlr) 1441 { 1442 struct spdk_nvme_ctrlr_process *active_proc; 1443 1444 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1445 1446 nvme_ctrlr_remove_inactive_proc(ctrlr); 1447 1448 active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr); 1449 if (active_proc) { 1450 active_proc->ref++; 1451 } 1452 1453 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1454 } 1455 1456 void 1457 nvme_ctrlr_proc_put_ref(struct spdk_nvme_ctrlr *ctrlr) 1458 { 1459 struct spdk_nvme_ctrlr_process *active_proc; 1460 int proc_count; 1461 1462 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1463 1464 proc_count = nvme_ctrlr_remove_inactive_proc(ctrlr); 1465 1466 active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr); 1467 if (active_proc) { 1468 active_proc->ref--; 1469 assert(active_proc->ref >= 0); 1470 1471 /* 1472 * The last active process will be removed at the end of 1473 * the destruction of the controller. 1474 */ 1475 if (active_proc->ref == 0 && proc_count != 1) { 1476 nvme_ctrlr_remove_process(ctrlr, active_proc); 1477 } 1478 } 1479 1480 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1481 } 1482 1483 int 1484 nvme_ctrlr_get_ref_count(struct spdk_nvme_ctrlr *ctrlr) 1485 { 1486 struct spdk_nvme_ctrlr_process *active_proc; 1487 int ref = 0; 1488 1489 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1490 1491 nvme_ctrlr_remove_inactive_proc(ctrlr); 1492 1493 TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) { 1494 ref += active_proc->ref; 1495 } 1496 1497 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1498 1499 return ref; 1500 } 1501 1502 /** 1503 * Get the PCI device handle which is only visible to its associated process. 1504 */ 1505 struct spdk_pci_device * 1506 nvme_ctrlr_proc_get_devhandle(struct spdk_nvme_ctrlr *ctrlr) 1507 { 1508 struct spdk_nvme_ctrlr_process *active_proc; 1509 struct spdk_pci_device *devhandle = NULL; 1510 1511 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1512 1513 active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr); 1514 if (active_proc) { 1515 devhandle = active_proc->devhandle; 1516 } 1517 1518 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1519 1520 return devhandle; 1521 } 1522 1523 /** 1524 * This function will be called repeatedly during initialization until the controller is ready. 1525 */ 1526 int 1527 nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr) 1528 { 1529 union spdk_nvme_cc_register cc; 1530 union spdk_nvme_csts_register csts; 1531 uint32_t ready_timeout_in_ms; 1532 int rc; 1533 1534 /* 1535 * May need to avoid accessing any register on the target controller 1536 * for a while. Return early without touching the FSM. 1537 * Check sleep_timeout_tsc > 0 for unit test. 1538 */ 1539 if ((ctrlr->sleep_timeout_tsc > 0) && 1540 (spdk_get_ticks() <= ctrlr->sleep_timeout_tsc)) { 1541 return 0; 1542 } 1543 ctrlr->sleep_timeout_tsc = 0; 1544 1545 if (nvme_ctrlr_get_cc(ctrlr, &cc) || 1546 nvme_ctrlr_get_csts(ctrlr, &csts)) { 1547 if (ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE) { 1548 /* While a device is resetting, it may be unable to service MMIO reads 1549 * temporarily. Allow for this case. 1550 */ 1551 SPDK_ERRLOG("Get registers failed while waiting for CSTS.RDY == 0\n"); 1552 goto init_timeout; 1553 } 1554 SPDK_ERRLOG("Failed to read CC and CSTS in state %d\n", ctrlr->state); 1555 nvme_ctrlr_fail(ctrlr, false); 1556 return -EIO; 1557 } 1558 1559 ready_timeout_in_ms = 500 * ctrlr->cap.bits.to; 1560 1561 /* 1562 * Check if the current initialization step is done or has timed out. 1563 */ 1564 switch (ctrlr->state) { 1565 case NVME_CTRLR_STATE_INIT: 1566 /* Begin the hardware initialization by making sure the controller is disabled. */ 1567 if (cc.bits.en) { 1568 SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 1\n"); 1569 /* 1570 * Controller is currently enabled. We need to disable it to cause a reset. 1571 * 1572 * If CC.EN = 1 && CSTS.RDY = 0, the controller is in the process of becoming ready. 1573 * Wait for the ready bit to be 1 before disabling the controller. 1574 */ 1575 if (csts.bits.rdy == 0) { 1576 SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 1 && CSTS.RDY = 0 - waiting for reset to complete\n"); 1577 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1, ready_timeout_in_ms); 1578 return 0; 1579 } 1580 1581 /* CC.EN = 1 && CSTS.RDY == 1, so we can immediately disable the controller. */ 1582 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Setting CC.EN = 0\n"); 1583 cc.bits.en = 0; 1584 if (nvme_ctrlr_set_cc(ctrlr, &cc)) { 1585 SPDK_ERRLOG("set_cc() failed\n"); 1586 nvme_ctrlr_fail(ctrlr, false); 1587 return -EIO; 1588 } 1589 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms); 1590 1591 /* 1592 * Wait 2 secsonds before accessing PCI registers. 1593 * Not using sleep() to avoid blocking other controller's initialization. 1594 */ 1595 if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) { 1596 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Applying quirk: delay 2 seconds before reading registers\n"); 1597 ctrlr->sleep_timeout_tsc = spdk_get_ticks() + 2 * spdk_get_ticks_hz(); 1598 } 1599 return 0; 1600 } else { 1601 if (csts.bits.rdy == 1) { 1602 SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 0 && CSTS.RDY = 1 - waiting for shutdown to complete\n"); 1603 } 1604 1605 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms); 1606 return 0; 1607 } 1608 break; 1609 1610 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1: 1611 if (csts.bits.rdy == 1) { 1612 SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 1 && CSTS.RDY = 1 - disabling controller\n"); 1613 /* CC.EN = 1 && CSTS.RDY = 1, so we can set CC.EN = 0 now. */ 1614 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Setting CC.EN = 0\n"); 1615 cc.bits.en = 0; 1616 if (nvme_ctrlr_set_cc(ctrlr, &cc)) { 1617 SPDK_ERRLOG("set_cc() failed\n"); 1618 nvme_ctrlr_fail(ctrlr, false); 1619 return -EIO; 1620 } 1621 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms); 1622 return 0; 1623 } 1624 break; 1625 1626 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0: 1627 if (csts.bits.rdy == 0) { 1628 SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 0 && CSTS.RDY = 0\n"); 1629 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE, ready_timeout_in_ms); 1630 /* 1631 * Delay 100us before setting CC.EN = 1. Some NVMe SSDs miss CC.EN getting 1632 * set to 1 if it is too soon after CSTS.RDY is reported as 0. 1633 */ 1634 spdk_delay_us(100); 1635 return 0; 1636 } 1637 break; 1638 1639 case NVME_CTRLR_STATE_ENABLE: 1640 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Setting CC.EN = 1\n"); 1641 rc = nvme_ctrlr_enable(ctrlr); 1642 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, ready_timeout_in_ms); 1643 return rc; 1644 1645 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1: 1646 if (csts.bits.rdy == 1) { 1647 SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 1 && CSTS.RDY = 1 - controller is ready\n"); 1648 /* 1649 * The controller has been enabled. 1650 * Perform the rest of initialization in nvme_ctrlr_start() serially. 1651 */ 1652 rc = nvme_ctrlr_start(ctrlr); 1653 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE); 1654 return rc; 1655 } 1656 break; 1657 1658 case NVME_CTRLR_STATE_READY: 1659 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Ctrlr already in ready state\n"); 1660 return 0; 1661 1662 default: 1663 assert(0); 1664 nvme_ctrlr_fail(ctrlr, false); 1665 return -1; 1666 } 1667 1668 init_timeout: 1669 if (ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE && 1670 spdk_get_ticks() > ctrlr->state_timeout_tsc) { 1671 SPDK_ERRLOG("Initialization timed out in state %d\n", ctrlr->state); 1672 nvme_ctrlr_fail(ctrlr, false); 1673 return -1; 1674 } 1675 1676 return 0; 1677 } 1678 1679 int 1680 nvme_ctrlr_start(struct spdk_nvme_ctrlr *ctrlr) 1681 { 1682 nvme_transport_qpair_reset(ctrlr->adminq); 1683 1684 nvme_qpair_enable(ctrlr->adminq); 1685 1686 if (nvme_ctrlr_identify(ctrlr) != 0) { 1687 return -1; 1688 } 1689 1690 if (nvme_ctrlr_set_num_qpairs(ctrlr) != 0) { 1691 return -1; 1692 } 1693 1694 if (nvme_ctrlr_construct_namespaces(ctrlr) != 0) { 1695 return -1; 1696 } 1697 1698 if (nvme_ctrlr_configure_aer(ctrlr) != 0) { 1699 return -1; 1700 } 1701 1702 nvme_ctrlr_set_supported_log_pages(ctrlr); 1703 nvme_ctrlr_set_supported_features(ctrlr); 1704 1705 if (ctrlr->cdata.sgls.supported) { 1706 ctrlr->flags |= SPDK_NVME_CTRLR_SGL_SUPPORTED; 1707 ctrlr->max_sges = nvme_transport_ctrlr_get_max_sges(ctrlr); 1708 } 1709 1710 if (ctrlr->cdata.oacs.doorbell_buffer_config) { 1711 if (nvme_ctrlr_set_doorbell_buffer_config(ctrlr)) { 1712 SPDK_WARNLOG("Doorbell buffer config failed\n"); 1713 } 1714 } 1715 1716 1717 if (nvme_ctrlr_set_keep_alive_timeout(ctrlr) != 0) { 1718 SPDK_ERRLOG("Setting keep alive timeout failed\n"); 1719 return -1; 1720 } 1721 1722 if (nvme_ctrlr_set_host_id(ctrlr) != 0) { 1723 return -1; 1724 } 1725 1726 return 0; 1727 } 1728 1729 int 1730 nvme_robust_mutex_init_recursive_shared(pthread_mutex_t *mtx) 1731 { 1732 pthread_mutexattr_t attr; 1733 int rc = 0; 1734 1735 if (pthread_mutexattr_init(&attr)) { 1736 return -1; 1737 } 1738 if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE) || 1739 #ifndef __FreeBSD__ 1740 pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST) || 1741 pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED) || 1742 #endif 1743 pthread_mutex_init(mtx, &attr)) { 1744 rc = -1; 1745 } 1746 pthread_mutexattr_destroy(&attr); 1747 return rc; 1748 } 1749 1750 int 1751 nvme_ctrlr_construct(struct spdk_nvme_ctrlr *ctrlr) 1752 { 1753 int rc; 1754 1755 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE); 1756 ctrlr->flags = 0; 1757 ctrlr->free_io_qids = NULL; 1758 ctrlr->is_resetting = false; 1759 ctrlr->is_failed = false; 1760 1761 TAILQ_INIT(&ctrlr->active_io_qpairs); 1762 STAILQ_INIT(&ctrlr->queued_aborts); 1763 ctrlr->outstanding_aborts = 0; 1764 1765 rc = nvme_robust_mutex_init_recursive_shared(&ctrlr->ctrlr_lock); 1766 if (rc != 0) { 1767 return rc; 1768 } 1769 1770 TAILQ_INIT(&ctrlr->active_procs); 1771 1772 return rc; 1773 } 1774 1775 /* This function should be called once at ctrlr initialization to set up constant properties. */ 1776 void 1777 nvme_ctrlr_init_cap(struct spdk_nvme_ctrlr *ctrlr, const union spdk_nvme_cap_register *cap, 1778 const union spdk_nvme_vs_register *vs) 1779 { 1780 ctrlr->cap = *cap; 1781 ctrlr->vs = *vs; 1782 1783 ctrlr->min_page_size = 1u << (12 + ctrlr->cap.bits.mpsmin); 1784 1785 /* For now, always select page_size == min_page_size. */ 1786 ctrlr->page_size = ctrlr->min_page_size; 1787 1788 ctrlr->opts.io_queue_size = spdk_max(ctrlr->opts.io_queue_size, SPDK_NVME_IO_QUEUE_MIN_ENTRIES); 1789 ctrlr->opts.io_queue_size = spdk_min(ctrlr->opts.io_queue_size, ctrlr->cap.bits.mqes + 1u); 1790 1791 ctrlr->opts.io_queue_requests = spdk_max(ctrlr->opts.io_queue_requests, ctrlr->opts.io_queue_size); 1792 } 1793 1794 void 1795 nvme_ctrlr_destruct_finish(struct spdk_nvme_ctrlr *ctrlr) 1796 { 1797 pthread_mutex_destroy(&ctrlr->ctrlr_lock); 1798 } 1799 1800 void 1801 nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) 1802 { 1803 struct spdk_nvme_qpair *qpair, *tmp; 1804 1805 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Prepare to destruct SSD: %s\n", ctrlr->trid.traddr); 1806 TAILQ_FOREACH_SAFE(qpair, &ctrlr->active_io_qpairs, tailq, tmp) { 1807 spdk_nvme_ctrlr_free_io_qpair(qpair); 1808 } 1809 1810 nvme_ctrlr_free_doorbell_buffer(ctrlr); 1811 1812 nvme_ctrlr_shutdown(ctrlr); 1813 1814 nvme_ctrlr_destruct_namespaces(ctrlr); 1815 1816 spdk_bit_array_free(&ctrlr->free_io_qids); 1817 1818 nvme_transport_ctrlr_destruct(ctrlr); 1819 } 1820 1821 int 1822 nvme_ctrlr_submit_admin_request(struct spdk_nvme_ctrlr *ctrlr, 1823 struct nvme_request *req) 1824 { 1825 return nvme_qpair_submit_request(ctrlr->adminq, req); 1826 } 1827 1828 static void 1829 nvme_keep_alive_completion(void *cb_ctx, const struct spdk_nvme_cpl *cpl) 1830 { 1831 /* Do nothing */ 1832 } 1833 1834 /* 1835 * Check if we need to send a Keep Alive command. 1836 * Caller must hold ctrlr->ctrlr_lock. 1837 */ 1838 static void 1839 nvme_ctrlr_keep_alive(struct spdk_nvme_ctrlr *ctrlr) 1840 { 1841 uint64_t now; 1842 struct nvme_request *req; 1843 struct spdk_nvme_cmd *cmd; 1844 int rc; 1845 1846 now = spdk_get_ticks(); 1847 if (now < ctrlr->next_keep_alive_tick) { 1848 return; 1849 } 1850 1851 req = nvme_allocate_request_null(ctrlr->adminq, nvme_keep_alive_completion, NULL); 1852 if (req == NULL) { 1853 return; 1854 } 1855 1856 cmd = &req->cmd; 1857 cmd->opc = SPDK_NVME_OPC_KEEP_ALIVE; 1858 1859 rc = nvme_ctrlr_submit_admin_request(ctrlr, req); 1860 if (rc != 0) { 1861 SPDK_ERRLOG("Submitting Keep Alive failed\n"); 1862 } 1863 1864 ctrlr->next_keep_alive_tick = now + ctrlr->keep_alive_interval_ticks; 1865 } 1866 1867 int32_t 1868 spdk_nvme_ctrlr_process_admin_completions(struct spdk_nvme_ctrlr *ctrlr) 1869 { 1870 int32_t num_completions; 1871 1872 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1873 if (ctrlr->keep_alive_interval_ticks) { 1874 nvme_ctrlr_keep_alive(ctrlr); 1875 } 1876 num_completions = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 1877 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1878 1879 return num_completions; 1880 } 1881 1882 const struct spdk_nvme_ctrlr_data * 1883 spdk_nvme_ctrlr_get_data(struct spdk_nvme_ctrlr *ctrlr) 1884 { 1885 return &ctrlr->cdata; 1886 } 1887 1888 union spdk_nvme_csts_register spdk_nvme_ctrlr_get_regs_csts(struct spdk_nvme_ctrlr *ctrlr) 1889 { 1890 union spdk_nvme_csts_register csts; 1891 1892 if (nvme_ctrlr_get_csts(ctrlr, &csts)) { 1893 csts.raw = 0xFFFFFFFFu; 1894 } 1895 return csts; 1896 } 1897 1898 union spdk_nvme_cap_register spdk_nvme_ctrlr_get_regs_cap(struct spdk_nvme_ctrlr *ctrlr) 1899 { 1900 return ctrlr->cap; 1901 } 1902 1903 union spdk_nvme_vs_register spdk_nvme_ctrlr_get_regs_vs(struct spdk_nvme_ctrlr *ctrlr) 1904 { 1905 return ctrlr->vs; 1906 } 1907 1908 uint32_t 1909 spdk_nvme_ctrlr_get_num_ns(struct spdk_nvme_ctrlr *ctrlr) 1910 { 1911 return ctrlr->num_ns; 1912 } 1913 1914 static int32_t 1915 spdk_nvme_ctrlr_active_ns_idx(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 1916 { 1917 int32_t result = -1; 1918 1919 if (ctrlr->active_ns_list == NULL || nsid == 0 || nsid > ctrlr->num_ns) { 1920 return result; 1921 } 1922 1923 int32_t lower = 0; 1924 int32_t upper = ctrlr->num_ns - 1; 1925 int32_t mid; 1926 1927 while (lower <= upper) { 1928 mid = lower + (upper - lower) / 2; 1929 if (ctrlr->active_ns_list[mid] == nsid) { 1930 result = mid; 1931 break; 1932 } else { 1933 if (ctrlr->active_ns_list[mid] != 0 && ctrlr->active_ns_list[mid] < nsid) { 1934 lower = mid + 1; 1935 } else { 1936 upper = mid - 1; 1937 } 1938 1939 } 1940 } 1941 1942 return result; 1943 } 1944 1945 bool 1946 spdk_nvme_ctrlr_is_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 1947 { 1948 return spdk_nvme_ctrlr_active_ns_idx(ctrlr, nsid) != -1; 1949 } 1950 1951 uint32_t 1952 spdk_nvme_ctrlr_get_first_active_ns(struct spdk_nvme_ctrlr *ctrlr) 1953 { 1954 return ctrlr->active_ns_list ? ctrlr->active_ns_list[0] : 0; 1955 } 1956 1957 uint32_t 1958 spdk_nvme_ctrlr_get_next_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t prev_nsid) 1959 { 1960 int32_t nsid_idx = spdk_nvme_ctrlr_active_ns_idx(ctrlr, prev_nsid); 1961 if (ctrlr->active_ns_list && nsid_idx >= 0 && (uint32_t)nsid_idx < ctrlr->num_ns - 1) { 1962 return ctrlr->active_ns_list[nsid_idx + 1]; 1963 } 1964 return 0; 1965 } 1966 1967 struct spdk_nvme_ns * 1968 spdk_nvme_ctrlr_get_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 1969 { 1970 if (nsid < 1 || nsid > ctrlr->num_ns) { 1971 return NULL; 1972 } 1973 1974 return &ctrlr->ns[nsid - 1]; 1975 } 1976 1977 struct spdk_pci_device * 1978 spdk_nvme_ctrlr_get_pci_device(struct spdk_nvme_ctrlr *ctrlr) 1979 { 1980 if (ctrlr == NULL) { 1981 return NULL; 1982 } 1983 1984 if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) { 1985 return NULL; 1986 } 1987 1988 return nvme_ctrlr_proc_get_devhandle(ctrlr); 1989 } 1990 1991 uint32_t 1992 spdk_nvme_ctrlr_get_max_xfer_size(const struct spdk_nvme_ctrlr *ctrlr) 1993 { 1994 return ctrlr->max_xfer_size; 1995 } 1996 1997 void 1998 spdk_nvme_ctrlr_register_aer_callback(struct spdk_nvme_ctrlr *ctrlr, 1999 spdk_nvme_aer_cb aer_cb_fn, 2000 void *aer_cb_arg) 2001 { 2002 struct spdk_nvme_ctrlr_process *active_proc; 2003 2004 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 2005 2006 active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr); 2007 if (active_proc) { 2008 active_proc->aer_cb_fn = aer_cb_fn; 2009 active_proc->aer_cb_arg = aer_cb_arg; 2010 } 2011 2012 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 2013 } 2014 2015 void 2016 spdk_nvme_ctrlr_register_timeout_callback(struct spdk_nvme_ctrlr *ctrlr, 2017 uint32_t nvme_timeout, spdk_nvme_timeout_cb cb_fn, void *cb_arg) 2018 { 2019 struct spdk_nvme_ctrlr_process *active_proc; 2020 2021 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 2022 2023 active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr); 2024 if (active_proc) { 2025 active_proc->timeout_ticks = nvme_timeout * spdk_get_ticks_hz(); 2026 active_proc->timeout_cb_fn = cb_fn; 2027 active_proc->timeout_cb_arg = cb_arg; 2028 } 2029 2030 ctrlr->timeout_enabled = true; 2031 2032 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 2033 } 2034 2035 bool 2036 spdk_nvme_ctrlr_is_log_page_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t log_page) 2037 { 2038 /* No bounds check necessary, since log_page is uint8_t and log_page_supported has 256 entries */ 2039 SPDK_STATIC_ASSERT(sizeof(ctrlr->log_page_supported) == 256, "log_page_supported size mismatch"); 2040 return ctrlr->log_page_supported[log_page]; 2041 } 2042 2043 bool 2044 spdk_nvme_ctrlr_is_feature_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t feature_code) 2045 { 2046 /* No bounds check necessary, since feature_code is uint8_t and feature_supported has 256 entries */ 2047 SPDK_STATIC_ASSERT(sizeof(ctrlr->feature_supported) == 256, "feature_supported size mismatch"); 2048 return ctrlr->feature_supported[feature_code]; 2049 } 2050 2051 int 2052 spdk_nvme_ctrlr_attach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, 2053 struct spdk_nvme_ctrlr_list *payload) 2054 { 2055 struct nvme_completion_poll_status status; 2056 int res; 2057 struct spdk_nvme_ns *ns; 2058 2059 res = nvme_ctrlr_cmd_attach_ns(ctrlr, nsid, payload, 2060 nvme_completion_poll_cb, &status); 2061 if (res) { 2062 return res; 2063 } 2064 if (spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) { 2065 SPDK_ERRLOG("spdk_nvme_ctrlr_attach_ns failed!\n"); 2066 return -ENXIO; 2067 } 2068 2069 res = nvme_ctrlr_identify_active_ns(ctrlr); 2070 if (res) { 2071 return res; 2072 } 2073 2074 ns = &ctrlr->ns[nsid - 1]; 2075 return nvme_ns_construct(ns, nsid, ctrlr); 2076 } 2077 2078 int 2079 spdk_nvme_ctrlr_detach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, 2080 struct spdk_nvme_ctrlr_list *payload) 2081 { 2082 struct nvme_completion_poll_status status; 2083 int res; 2084 struct spdk_nvme_ns *ns; 2085 2086 res = nvme_ctrlr_cmd_detach_ns(ctrlr, nsid, payload, 2087 nvme_completion_poll_cb, &status); 2088 if (res) { 2089 return res; 2090 } 2091 if (spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) { 2092 SPDK_ERRLOG("spdk_nvme_ctrlr_detach_ns failed!\n"); 2093 return -ENXIO; 2094 } 2095 2096 res = nvme_ctrlr_identify_active_ns(ctrlr); 2097 if (res) { 2098 return res; 2099 } 2100 2101 ns = &ctrlr->ns[nsid - 1]; 2102 /* Inactive NS */ 2103 nvme_ns_destruct(ns); 2104 2105 return 0; 2106 } 2107 2108 uint32_t 2109 spdk_nvme_ctrlr_create_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns_data *payload) 2110 { 2111 struct nvme_completion_poll_status status; 2112 int res; 2113 uint32_t nsid; 2114 struct spdk_nvme_ns *ns; 2115 2116 res = nvme_ctrlr_cmd_create_ns(ctrlr, payload, nvme_completion_poll_cb, &status); 2117 if (res) { 2118 return 0; 2119 } 2120 if (spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) { 2121 SPDK_ERRLOG("spdk_nvme_ctrlr_create_ns failed!\n"); 2122 return 0; 2123 } 2124 2125 nsid = status.cpl.cdw0; 2126 ns = &ctrlr->ns[nsid - 1]; 2127 /* Inactive NS */ 2128 res = nvme_ns_construct(ns, nsid, ctrlr); 2129 if (res) { 2130 return 0; 2131 } 2132 2133 /* Return the namespace ID that was created */ 2134 return nsid; 2135 } 2136 2137 int 2138 spdk_nvme_ctrlr_delete_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 2139 { 2140 struct nvme_completion_poll_status status; 2141 int res; 2142 struct spdk_nvme_ns *ns; 2143 2144 res = nvme_ctrlr_cmd_delete_ns(ctrlr, nsid, nvme_completion_poll_cb, &status); 2145 if (res) { 2146 return res; 2147 } 2148 if (spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) { 2149 SPDK_ERRLOG("spdk_nvme_ctrlr_delete_ns failed!\n"); 2150 return -ENXIO; 2151 } 2152 2153 res = nvme_ctrlr_identify_active_ns(ctrlr); 2154 if (res) { 2155 return res; 2156 } 2157 2158 ns = &ctrlr->ns[nsid - 1]; 2159 nvme_ns_destruct(ns); 2160 2161 return 0; 2162 } 2163 2164 int 2165 spdk_nvme_ctrlr_format(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, 2166 struct spdk_nvme_format *format) 2167 { 2168 struct nvme_completion_poll_status status; 2169 int res; 2170 2171 res = nvme_ctrlr_cmd_format(ctrlr, nsid, format, nvme_completion_poll_cb, 2172 &status); 2173 if (res) { 2174 return res; 2175 } 2176 if (spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) { 2177 SPDK_ERRLOG("spdk_nvme_ctrlr_format failed!\n"); 2178 return -ENXIO; 2179 } 2180 2181 return spdk_nvme_ctrlr_reset(ctrlr); 2182 } 2183 2184 int 2185 spdk_nvme_ctrlr_update_firmware(struct spdk_nvme_ctrlr *ctrlr, void *payload, uint32_t size, 2186 int slot, enum spdk_nvme_fw_commit_action commit_action, struct spdk_nvme_status *completion_status) 2187 { 2188 struct spdk_nvme_fw_commit fw_commit; 2189 struct nvme_completion_poll_status status; 2190 int res; 2191 unsigned int size_remaining; 2192 unsigned int offset; 2193 unsigned int transfer; 2194 void *p; 2195 2196 if (!completion_status) { 2197 return -EINVAL; 2198 } 2199 memset(completion_status, 0, sizeof(struct spdk_nvme_status)); 2200 if (size % 4) { 2201 SPDK_ERRLOG("spdk_nvme_ctrlr_update_firmware invalid size!\n"); 2202 return -1; 2203 } 2204 2205 /* Current support only for SPDK_NVME_FW_COMMIT_REPLACE_IMG 2206 * and SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG 2207 */ 2208 if ((commit_action != SPDK_NVME_FW_COMMIT_REPLACE_IMG) && 2209 (commit_action != SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG)) { 2210 SPDK_ERRLOG("spdk_nvme_ctrlr_update_firmware invalid command!\n"); 2211 return -1; 2212 } 2213 2214 /* Firmware download */ 2215 size_remaining = size; 2216 offset = 0; 2217 p = payload; 2218 2219 while (size_remaining > 0) { 2220 transfer = spdk_min(size_remaining, ctrlr->min_page_size); 2221 2222 res = nvme_ctrlr_cmd_fw_image_download(ctrlr, transfer, offset, p, 2223 nvme_completion_poll_cb, 2224 &status); 2225 if (res) { 2226 return res; 2227 } 2228 2229 if (spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) { 2230 SPDK_ERRLOG("spdk_nvme_ctrlr_fw_image_download failed!\n"); 2231 return -ENXIO; 2232 } 2233 p += transfer; 2234 offset += transfer; 2235 size_remaining -= transfer; 2236 } 2237 2238 /* Firmware commit */ 2239 memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit)); 2240 fw_commit.fs = slot; 2241 fw_commit.ca = commit_action; 2242 2243 res = nvme_ctrlr_cmd_fw_commit(ctrlr, &fw_commit, nvme_completion_poll_cb, 2244 &status); 2245 if (res) { 2246 return res; 2247 } 2248 2249 res = spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock); 2250 2251 memcpy(completion_status, &status.cpl.status, sizeof(struct spdk_nvme_status)); 2252 2253 if (res) { 2254 if (status.cpl.status.sct != SPDK_NVME_SCT_COMMAND_SPECIFIC || 2255 status.cpl.status.sc != SPDK_NVME_SC_FIRMWARE_REQ_NVM_RESET) { 2256 if (status.cpl.status.sct == SPDK_NVME_SCT_COMMAND_SPECIFIC && 2257 status.cpl.status.sc == SPDK_NVME_SC_FIRMWARE_REQ_CONVENTIONAL_RESET) { 2258 SPDK_NOTICELOG("firmware activation requires conventional reset to be performed. !\n"); 2259 } else { 2260 SPDK_ERRLOG("nvme_ctrlr_cmd_fw_commit failed!\n"); 2261 } 2262 return -ENXIO; 2263 } 2264 } 2265 2266 return spdk_nvme_ctrlr_reset(ctrlr); 2267 } 2268 2269 void * 2270 spdk_nvme_ctrlr_alloc_cmb_io_buffer(struct spdk_nvme_ctrlr *ctrlr, size_t size) 2271 { 2272 void *buf; 2273 2274 if (size == 0) { 2275 return NULL; 2276 } 2277 2278 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 2279 buf = nvme_transport_ctrlr_alloc_cmb_io_buffer(ctrlr, size); 2280 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 2281 2282 return buf; 2283 } 2284 2285 void 2286 spdk_nvme_ctrlr_free_cmb_io_buffer(struct spdk_nvme_ctrlr *ctrlr, void *buf, size_t size) 2287 { 2288 if (buf && size) { 2289 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 2290 nvme_transport_ctrlr_free_cmb_io_buffer(ctrlr, buf, size); 2291 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 2292 } 2293 } 2294