1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 36 #include "nvme_internal.h" 37 38 #include "spdk/env.h" 39 #include "spdk/string.h" 40 41 static int nvme_ctrlr_construct_and_submit_aer(struct spdk_nvme_ctrlr *ctrlr, 42 struct nvme_async_event_request *aer); 43 44 static int 45 nvme_ctrlr_get_cc(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cc_register *cc) 46 { 47 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cc.raw), 48 &cc->raw); 49 } 50 51 static int 52 nvme_ctrlr_get_csts(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_csts_register *csts) 53 { 54 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, csts.raw), 55 &csts->raw); 56 } 57 58 int 59 nvme_ctrlr_get_cap(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cap_register *cap) 60 { 61 return nvme_transport_ctrlr_get_reg_8(ctrlr, offsetof(struct spdk_nvme_registers, cap.raw), 62 &cap->raw); 63 } 64 65 int 66 nvme_ctrlr_get_vs(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_vs_register *vs) 67 { 68 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, vs.raw), 69 &vs->raw); 70 } 71 72 static int 73 nvme_ctrlr_set_cc(struct spdk_nvme_ctrlr *ctrlr, const union spdk_nvme_cc_register *cc) 74 { 75 return nvme_transport_ctrlr_set_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cc.raw), 76 cc->raw); 77 } 78 79 void 80 spdk_nvme_ctrlr_get_default_ctrlr_opts(struct spdk_nvme_ctrlr_opts *opts, size_t opts_size) 81 { 82 char host_id_str[SPDK_UUID_STRING_LEN]; 83 84 assert(opts); 85 86 memset(opts, 0, opts_size); 87 88 #define FIELD_OK(field) \ 89 offsetof(struct spdk_nvme_ctrlr_opts, field) + sizeof(opts->field) <= opts_size 90 91 if (FIELD_OK(num_io_queues)) { 92 opts->num_io_queues = DEFAULT_MAX_IO_QUEUES; 93 } 94 95 if (FIELD_OK(use_cmb_sqs)) { 96 opts->use_cmb_sqs = true; 97 } 98 99 if (FIELD_OK(arb_mechanism)) { 100 opts->arb_mechanism = SPDK_NVME_CC_AMS_RR; 101 } 102 103 if (FIELD_OK(keep_alive_timeout_ms)) { 104 opts->keep_alive_timeout_ms = 10 * 1000; 105 } 106 107 if (FIELD_OK(io_queue_size)) { 108 opts->io_queue_size = DEFAULT_IO_QUEUE_SIZE; 109 } 110 111 if (FIELD_OK(io_queue_requests)) { 112 opts->io_queue_requests = DEFAULT_IO_QUEUE_REQUESTS; 113 } 114 115 if (FIELD_OK(host_id)) { 116 memset(opts->host_id, 0, sizeof(opts->host_id)); 117 } 118 119 if (FIELD_OK(extended_host_id)) { 120 memcpy(opts->extended_host_id, &g_spdk_nvme_driver->default_extended_host_id, 121 sizeof(opts->extended_host_id)); 122 } 123 124 if (FIELD_OK(hostnqn)) { 125 spdk_uuid_fmt_lower(host_id_str, sizeof(host_id_str), 126 &g_spdk_nvme_driver->default_extended_host_id); 127 snprintf(opts->hostnqn, sizeof(opts->hostnqn), "2014-08.org.nvmexpress:uuid:%s", host_id_str); 128 } 129 130 if (FIELD_OK(src_addr)) { 131 memset(opts->src_addr, 0, sizeof(opts->src_addr)); 132 } 133 134 if (FIELD_OK(src_svcid)) { 135 memset(opts->src_svcid, 0, sizeof(opts->src_svcid)); 136 } 137 138 if (FIELD_OK(command_set)) { 139 opts->command_set = SPDK_NVME_CC_CSS_NVM; 140 } 141 #undef FIELD_OK 142 } 143 144 /** 145 * This function will be called when the process allocates the IO qpair. 146 * Note: the ctrlr_lock must be held when calling this function. 147 */ 148 static void 149 nvme_ctrlr_proc_add_io_qpair(struct spdk_nvme_qpair *qpair) 150 { 151 struct spdk_nvme_ctrlr_process *active_proc; 152 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 153 154 active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr); 155 if (active_proc) { 156 TAILQ_INSERT_TAIL(&active_proc->allocated_io_qpairs, qpair, per_process_tailq); 157 qpair->active_proc = active_proc; 158 } 159 } 160 161 /** 162 * This function will be called when the process frees the IO qpair. 163 * Note: the ctrlr_lock must be held when calling this function. 164 */ 165 static void 166 nvme_ctrlr_proc_remove_io_qpair(struct spdk_nvme_qpair *qpair) 167 { 168 struct spdk_nvme_ctrlr_process *active_proc; 169 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 170 struct spdk_nvme_qpair *active_qpair, *tmp_qpair; 171 172 active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr); 173 if (!active_proc) { 174 return; 175 } 176 177 TAILQ_FOREACH_SAFE(active_qpair, &active_proc->allocated_io_qpairs, 178 per_process_tailq, tmp_qpair) { 179 if (active_qpair == qpair) { 180 TAILQ_REMOVE(&active_proc->allocated_io_qpairs, 181 active_qpair, per_process_tailq); 182 183 break; 184 } 185 } 186 } 187 188 void 189 spdk_nvme_ctrlr_get_default_io_qpair_opts(struct spdk_nvme_ctrlr *ctrlr, 190 struct spdk_nvme_io_qpair_opts *opts, 191 size_t opts_size) 192 { 193 assert(ctrlr); 194 195 assert(opts); 196 197 memset(opts, 0, opts_size); 198 199 #define FIELD_OK(field) \ 200 offsetof(struct spdk_nvme_io_qpair_opts, field) + sizeof(opts->field) <= opts_size 201 202 if (FIELD_OK(qprio)) { 203 opts->qprio = SPDK_NVME_QPRIO_URGENT; 204 } 205 206 if (FIELD_OK(io_queue_size)) { 207 opts->io_queue_size = ctrlr->opts.io_queue_size; 208 } 209 210 if (FIELD_OK(io_queue_requests)) { 211 opts->io_queue_requests = ctrlr->opts.io_queue_requests; 212 } 213 214 #undef FIELD_OK 215 } 216 217 struct spdk_nvme_qpair * 218 spdk_nvme_ctrlr_alloc_io_qpair(struct spdk_nvme_ctrlr *ctrlr, 219 const struct spdk_nvme_io_qpair_opts *user_opts, 220 size_t opts_size) 221 { 222 uint32_t qid; 223 struct spdk_nvme_qpair *qpair; 224 union spdk_nvme_cc_register cc; 225 struct spdk_nvme_io_qpair_opts opts; 226 227 if (!ctrlr) { 228 return NULL; 229 } 230 231 /* 232 * Get the default options, then overwrite them with the user-provided options 233 * up to opts_size. 234 * 235 * This allows for extensions of the opts structure without breaking 236 * ABI compatibility. 237 */ 238 spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &opts, sizeof(opts)); 239 if (user_opts) { 240 memcpy(&opts, user_opts, spdk_min(sizeof(opts), opts_size)); 241 } 242 243 if (nvme_ctrlr_get_cc(ctrlr, &cc)) { 244 SPDK_ERRLOG("get_cc failed\n"); 245 return NULL; 246 } 247 248 /* Only the low 2 bits (values 0, 1, 2, 3) of QPRIO are valid. */ 249 if ((opts.qprio & 3) != opts.qprio) { 250 return NULL; 251 } 252 253 /* 254 * Only value SPDK_NVME_QPRIO_URGENT(0) is valid for the 255 * default round robin arbitration method. 256 */ 257 if ((cc.bits.ams == SPDK_NVME_CC_AMS_RR) && (opts.qprio != SPDK_NVME_QPRIO_URGENT)) { 258 SPDK_ERRLOG("invalid queue priority for default round robin arbitration method\n"); 259 return NULL; 260 } 261 262 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 263 264 /* 265 * Get the first available I/O queue ID. 266 */ 267 qid = spdk_bit_array_find_first_set(ctrlr->free_io_qids, 1); 268 if (qid > ctrlr->opts.num_io_queues) { 269 SPDK_ERRLOG("No free I/O queue IDs\n"); 270 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 271 return NULL; 272 } 273 274 qpair = nvme_transport_ctrlr_create_io_qpair(ctrlr, qid, &opts); 275 if (qpair == NULL) { 276 SPDK_ERRLOG("nvme_transport_ctrlr_create_io_qpair() failed\n"); 277 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 278 return NULL; 279 } 280 spdk_bit_array_clear(ctrlr->free_io_qids, qid); 281 TAILQ_INSERT_TAIL(&ctrlr->active_io_qpairs, qpair, tailq); 282 283 nvme_ctrlr_proc_add_io_qpair(qpair); 284 285 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 286 287 if (ctrlr->quirks & NVME_QUIRK_DELAY_AFTER_QUEUE_ALLOC) { 288 spdk_delay_us(100); 289 } 290 291 return qpair; 292 } 293 294 int 295 spdk_nvme_ctrlr_free_io_qpair(struct spdk_nvme_qpair *qpair) 296 { 297 struct spdk_nvme_ctrlr *ctrlr; 298 299 if (qpair == NULL) { 300 return 0; 301 } 302 303 ctrlr = qpair->ctrlr; 304 305 if (qpair->in_completion_context) { 306 /* 307 * There are many cases where it is convenient to delete an io qpair in the context 308 * of that qpair's completion routine. To handle this properly, set a flag here 309 * so that the completion routine will perform an actual delete after the context 310 * unwinds. 311 */ 312 qpair->delete_after_completion_context = 1; 313 return 0; 314 } 315 316 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 317 318 nvme_ctrlr_proc_remove_io_qpair(qpair); 319 320 TAILQ_REMOVE(&ctrlr->active_io_qpairs, qpair, tailq); 321 spdk_bit_array_set(ctrlr->free_io_qids, qpair->id); 322 323 if (nvme_transport_ctrlr_delete_io_qpair(ctrlr, qpair)) { 324 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 325 return -1; 326 } 327 328 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 329 return 0; 330 } 331 332 static void 333 nvme_ctrlr_construct_intel_support_log_page_list(struct spdk_nvme_ctrlr *ctrlr, 334 struct spdk_nvme_intel_log_page_directory *log_page_directory) 335 { 336 if (log_page_directory == NULL) { 337 return; 338 } 339 340 if (ctrlr->cdata.vid != SPDK_PCI_VID_INTEL) { 341 return; 342 } 343 344 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY] = true; 345 346 if (log_page_directory->read_latency_log_len || 347 (ctrlr->quirks & NVME_INTEL_QUIRK_READ_LATENCY)) { 348 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_READ_CMD_LATENCY] = true; 349 } 350 if (log_page_directory->write_latency_log_len || 351 (ctrlr->quirks & NVME_INTEL_QUIRK_WRITE_LATENCY)) { 352 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_WRITE_CMD_LATENCY] = true; 353 } 354 if (log_page_directory->temperature_statistics_log_len) { 355 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_TEMPERATURE] = true; 356 } 357 if (log_page_directory->smart_log_len) { 358 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_SMART] = true; 359 } 360 if (log_page_directory->marketing_description_log_len) { 361 ctrlr->log_page_supported[SPDK_NVME_INTEL_MARKETING_DESCRIPTION] = true; 362 } 363 } 364 365 static int nvme_ctrlr_set_intel_support_log_pages(struct spdk_nvme_ctrlr *ctrlr) 366 { 367 uint64_t phys_addr = 0; 368 struct nvme_completion_poll_status status; 369 struct spdk_nvme_intel_log_page_directory *log_page_directory; 370 371 log_page_directory = spdk_dma_zmalloc(sizeof(struct spdk_nvme_intel_log_page_directory), 372 64, &phys_addr); 373 if (log_page_directory == NULL) { 374 SPDK_ERRLOG("could not allocate log_page_directory\n"); 375 return -ENXIO; 376 } 377 378 spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY, SPDK_NVME_GLOBAL_NS_TAG, 379 log_page_directory, sizeof(struct spdk_nvme_intel_log_page_directory), 0, 380 nvme_completion_poll_cb, 381 &status); 382 if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) { 383 spdk_dma_free(log_page_directory); 384 SPDK_ERRLOG("nvme_ctrlr_cmd_get_log_page failed!\n"); 385 return -ENXIO; 386 } 387 388 nvme_ctrlr_construct_intel_support_log_page_list(ctrlr, log_page_directory); 389 spdk_dma_free(log_page_directory); 390 return 0; 391 } 392 393 static void 394 nvme_ctrlr_set_supported_log_pages(struct spdk_nvme_ctrlr *ctrlr) 395 { 396 memset(ctrlr->log_page_supported, 0, sizeof(ctrlr->log_page_supported)); 397 /* Mandatory pages */ 398 ctrlr->log_page_supported[SPDK_NVME_LOG_ERROR] = true; 399 ctrlr->log_page_supported[SPDK_NVME_LOG_HEALTH_INFORMATION] = true; 400 ctrlr->log_page_supported[SPDK_NVME_LOG_FIRMWARE_SLOT] = true; 401 if (ctrlr->cdata.lpa.celp) { 402 ctrlr->log_page_supported[SPDK_NVME_LOG_COMMAND_EFFECTS_LOG] = true; 403 } 404 if (ctrlr->cdata.vid == SPDK_PCI_VID_INTEL) { 405 nvme_ctrlr_set_intel_support_log_pages(ctrlr); 406 } 407 } 408 409 static void 410 nvme_ctrlr_set_intel_supported_features(struct spdk_nvme_ctrlr *ctrlr) 411 { 412 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_MAX_LBA] = true; 413 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_NATIVE_MAX_LBA] = true; 414 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_POWER_GOVERNOR_SETTING] = true; 415 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_SMBUS_ADDRESS] = true; 416 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_LED_PATTERN] = true; 417 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_RESET_TIMED_WORKLOAD_COUNTERS] = true; 418 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING] = true; 419 } 420 421 static void 422 nvme_ctrlr_set_supported_features(struct spdk_nvme_ctrlr *ctrlr) 423 { 424 memset(ctrlr->feature_supported, 0, sizeof(ctrlr->feature_supported)); 425 /* Mandatory features */ 426 ctrlr->feature_supported[SPDK_NVME_FEAT_ARBITRATION] = true; 427 ctrlr->feature_supported[SPDK_NVME_FEAT_POWER_MANAGEMENT] = true; 428 ctrlr->feature_supported[SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD] = true; 429 ctrlr->feature_supported[SPDK_NVME_FEAT_ERROR_RECOVERY] = true; 430 ctrlr->feature_supported[SPDK_NVME_FEAT_NUMBER_OF_QUEUES] = true; 431 ctrlr->feature_supported[SPDK_NVME_FEAT_INTERRUPT_COALESCING] = true; 432 ctrlr->feature_supported[SPDK_NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION] = true; 433 ctrlr->feature_supported[SPDK_NVME_FEAT_WRITE_ATOMICITY] = true; 434 ctrlr->feature_supported[SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION] = true; 435 /* Optional features */ 436 if (ctrlr->cdata.vwc.present) { 437 ctrlr->feature_supported[SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE] = true; 438 } 439 if (ctrlr->cdata.apsta.supported) { 440 ctrlr->feature_supported[SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION] = true; 441 } 442 if (ctrlr->cdata.hmpre) { 443 ctrlr->feature_supported[SPDK_NVME_FEAT_HOST_MEM_BUFFER] = true; 444 } 445 if (ctrlr->cdata.vid == SPDK_PCI_VID_INTEL) { 446 nvme_ctrlr_set_intel_supported_features(ctrlr); 447 } 448 } 449 450 void 451 nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr, bool hot_remove) 452 { 453 /* 454 * Set the flag here and leave the work failure of qpairs to 455 * spdk_nvme_qpair_process_completions(). 456 */ 457 if (hot_remove) { 458 ctrlr->is_removed = true; 459 } 460 ctrlr->is_failed = true; 461 SPDK_ERRLOG("ctrlr %s in failed state.\n", ctrlr->trid.traddr); 462 } 463 464 static void 465 nvme_ctrlr_shutdown(struct spdk_nvme_ctrlr *ctrlr) 466 { 467 union spdk_nvme_cc_register cc; 468 union spdk_nvme_csts_register csts; 469 uint32_t ms_waited = 0; 470 uint32_t shutdown_timeout_ms; 471 472 if (ctrlr->is_removed) { 473 return; 474 } 475 476 if (nvme_ctrlr_get_cc(ctrlr, &cc)) { 477 SPDK_ERRLOG("get_cc() failed\n"); 478 return; 479 } 480 481 cc.bits.shn = SPDK_NVME_SHN_NORMAL; 482 483 if (nvme_ctrlr_set_cc(ctrlr, &cc)) { 484 SPDK_ERRLOG("set_cc() failed\n"); 485 return; 486 } 487 488 /* 489 * The NVMe specification defines RTD3E to be the time between 490 * setting SHN = 1 until the controller will set SHST = 10b. 491 * If the device doesn't report RTD3 entry latency, or if it 492 * reports RTD3 entry latency less than 10 seconds, pick 493 * 10 seconds as a reasonable amount of time to 494 * wait before proceeding. 495 */ 496 SPDK_DEBUGLOG(SPDK_LOG_NVME, "RTD3E = %" PRIu32 " us\n", ctrlr->cdata.rtd3e); 497 shutdown_timeout_ms = (ctrlr->cdata.rtd3e + 999) / 1000; 498 shutdown_timeout_ms = spdk_max(shutdown_timeout_ms, 10000); 499 SPDK_DEBUGLOG(SPDK_LOG_NVME, "shutdown timeout = %" PRIu32 " ms\n", shutdown_timeout_ms); 500 501 do { 502 if (nvme_ctrlr_get_csts(ctrlr, &csts)) { 503 SPDK_ERRLOG("get_csts() failed\n"); 504 return; 505 } 506 507 if (csts.bits.shst == SPDK_NVME_SHST_COMPLETE) { 508 SPDK_DEBUGLOG(SPDK_LOG_NVME, "shutdown complete in %u milliseconds\n", 509 ms_waited); 510 return; 511 } 512 513 nvme_delay(1000); 514 ms_waited++; 515 } while (ms_waited < shutdown_timeout_ms); 516 517 SPDK_ERRLOG("did not shutdown within %u milliseconds\n", shutdown_timeout_ms); 518 } 519 520 static int 521 nvme_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr) 522 { 523 union spdk_nvme_cc_register cc; 524 int rc; 525 526 rc = nvme_transport_ctrlr_enable(ctrlr); 527 if (rc != 0) { 528 SPDK_ERRLOG("transport ctrlr_enable failed\n"); 529 return rc; 530 } 531 532 if (nvme_ctrlr_get_cc(ctrlr, &cc)) { 533 SPDK_ERRLOG("get_cc() failed\n"); 534 return -EIO; 535 } 536 537 if (cc.bits.en != 0) { 538 SPDK_ERRLOG("%s called with CC.EN = 1\n", __func__); 539 return -EINVAL; 540 } 541 542 cc.bits.en = 1; 543 cc.bits.css = 0; 544 cc.bits.shn = 0; 545 cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */ 546 cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */ 547 548 /* Page size is 2 ^ (12 + mps). */ 549 cc.bits.mps = spdk_u32log2(ctrlr->page_size) - 12; 550 551 if (ctrlr->cap.bits.css == 0) { 552 SPDK_INFOLOG(SPDK_LOG_NVME, 553 "Drive reports no command sets supported. Assuming NVM is supported.\n"); 554 ctrlr->cap.bits.css = SPDK_NVME_CAP_CSS_NVM; 555 } 556 557 if (!(ctrlr->cap.bits.css & (1u << ctrlr->opts.command_set))) { 558 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Requested I/O command set %u but supported mask is 0x%x\n", 559 ctrlr->opts.command_set, ctrlr->cap.bits.css); 560 return -EINVAL; 561 } 562 563 cc.bits.css = ctrlr->opts.command_set; 564 565 switch (ctrlr->opts.arb_mechanism) { 566 case SPDK_NVME_CC_AMS_RR: 567 break; 568 case SPDK_NVME_CC_AMS_WRR: 569 if (SPDK_NVME_CAP_AMS_WRR & ctrlr->cap.bits.ams) { 570 break; 571 } 572 return -EINVAL; 573 case SPDK_NVME_CC_AMS_VS: 574 if (SPDK_NVME_CAP_AMS_VS & ctrlr->cap.bits.ams) { 575 break; 576 } 577 return -EINVAL; 578 default: 579 return -EINVAL; 580 } 581 582 cc.bits.ams = ctrlr->opts.arb_mechanism; 583 584 if (nvme_ctrlr_set_cc(ctrlr, &cc)) { 585 SPDK_ERRLOG("set_cc() failed\n"); 586 return -EIO; 587 } 588 589 return 0; 590 } 591 592 #ifdef DEBUG 593 static const char * 594 nvme_ctrlr_state_string(enum nvme_ctrlr_state state) 595 { 596 switch (state) { 597 case NVME_CTRLR_STATE_INIT: 598 return "init"; 599 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1: 600 return "disable and wait for CSTS.RDY = 1"; 601 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0: 602 return "disable and wait for CSTS.RDY = 0"; 603 case NVME_CTRLR_STATE_ENABLE: 604 return "enable controller by writing CC.EN = 1"; 605 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1: 606 return "wait for CSTS.RDY = 1"; 607 case NVME_CTRLR_STATE_READY: 608 return "ready"; 609 } 610 return "unknown"; 611 }; 612 #endif /* DEBUG */ 613 614 static void 615 nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state, 616 uint64_t timeout_in_ms) 617 { 618 ctrlr->state = state; 619 if (timeout_in_ms == NVME_TIMEOUT_INFINITE) { 620 SPDK_DEBUGLOG(SPDK_LOG_NVME, "setting state to %s (no timeout)\n", 621 nvme_ctrlr_state_string(ctrlr->state)); 622 ctrlr->state_timeout_tsc = NVME_TIMEOUT_INFINITE; 623 } else { 624 SPDK_DEBUGLOG(SPDK_LOG_NVME, "setting state to %s (timeout %" PRIu64 " ms)\n", 625 nvme_ctrlr_state_string(ctrlr->state), timeout_in_ms); 626 ctrlr->state_timeout_tsc = spdk_get_ticks() + (timeout_in_ms * spdk_get_ticks_hz()) / 1000; 627 } 628 } 629 630 static void 631 nvme_ctrlr_free_doorbell_buffer(struct spdk_nvme_ctrlr *ctrlr) 632 { 633 if (ctrlr->shadow_doorbell) { 634 spdk_dma_free(ctrlr->shadow_doorbell); 635 ctrlr->shadow_doorbell = NULL; 636 } 637 638 if (ctrlr->eventidx) { 639 spdk_dma_free(ctrlr->eventidx); 640 ctrlr->eventidx = NULL; 641 } 642 } 643 644 static int 645 nvme_ctrlr_set_doorbell_buffer_config(struct spdk_nvme_ctrlr *ctrlr) 646 { 647 int rc; 648 struct nvme_completion_poll_status status; 649 uint64_t prp1, prp2; 650 651 if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) { 652 return 0; 653 } 654 655 /* only 1 page size for doorbell buffer */ 656 ctrlr->shadow_doorbell = spdk_dma_zmalloc(ctrlr->page_size, ctrlr->page_size, 657 &prp1); 658 if (ctrlr->shadow_doorbell == NULL) { 659 return -1; 660 } 661 662 ctrlr->eventidx = spdk_dma_zmalloc(ctrlr->page_size, ctrlr->page_size, &prp2); 663 if (ctrlr->eventidx == NULL) { 664 goto error; 665 } 666 667 rc = nvme_ctrlr_cmd_doorbell_buffer_config(ctrlr, prp1, prp2, 668 nvme_completion_poll_cb, &status); 669 if (rc != 0) { 670 goto error; 671 } 672 673 if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) { 674 goto error; 675 } 676 677 SPDK_INFOLOG(SPDK_LOG_NVME, "NVMe controller: %s doorbell buffer config enabled\n", 678 ctrlr->trid.traddr); 679 680 return 0; 681 682 error: 683 nvme_ctrlr_free_doorbell_buffer(ctrlr); 684 return -1; 685 } 686 687 int 688 spdk_nvme_ctrlr_reset(struct spdk_nvme_ctrlr *ctrlr) 689 { 690 int rc = 0; 691 struct spdk_nvme_qpair *qpair; 692 struct nvme_request *req, *tmp; 693 694 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 695 696 if (ctrlr->is_resetting || ctrlr->is_failed) { 697 /* 698 * Controller is already resetting or has failed. Return 699 * immediately since there is no need to kick off another 700 * reset in these cases. 701 */ 702 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 703 return 0; 704 } 705 706 ctrlr->is_resetting = true; 707 708 SPDK_NOTICELOG("resetting controller\n"); 709 710 /* Free all of the queued abort requests */ 711 STAILQ_FOREACH_SAFE(req, &ctrlr->queued_aborts, stailq, tmp) { 712 STAILQ_REMOVE_HEAD(&ctrlr->queued_aborts, stailq); 713 nvme_free_request(req); 714 ctrlr->outstanding_aborts--; 715 } 716 717 /* Disable all queues before disabling the controller hardware. */ 718 nvme_qpair_disable(ctrlr->adminq); 719 TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) { 720 nvme_qpair_disable(qpair); 721 } 722 723 /* Doorbell buffer config is invalid during reset */ 724 nvme_ctrlr_free_doorbell_buffer(ctrlr); 725 726 /* Set the state back to INIT to cause a full hardware reset. */ 727 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE); 728 729 while (ctrlr->state != NVME_CTRLR_STATE_READY) { 730 if (nvme_ctrlr_process_init(ctrlr) != 0) { 731 SPDK_ERRLOG("%s: controller reinitialization failed\n", __func__); 732 nvme_ctrlr_fail(ctrlr, false); 733 rc = -1; 734 break; 735 } 736 } 737 738 if (!ctrlr->is_failed) { 739 /* Reinitialize qpairs */ 740 TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) { 741 if (nvme_transport_ctrlr_reinit_io_qpair(ctrlr, qpair) != 0) { 742 nvme_ctrlr_fail(ctrlr, false); 743 rc = -1; 744 } 745 } 746 } 747 748 ctrlr->is_resetting = false; 749 750 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 751 752 return rc; 753 } 754 755 static int 756 nvme_ctrlr_identify(struct spdk_nvme_ctrlr *ctrlr) 757 { 758 struct nvme_completion_poll_status status; 759 int rc; 760 761 rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_CTRLR, 0, 0, 762 &ctrlr->cdata, sizeof(ctrlr->cdata), 763 nvme_completion_poll_cb, &status); 764 if (rc != 0) { 765 return rc; 766 } 767 768 if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) { 769 SPDK_ERRLOG("nvme_identify_controller failed!\n"); 770 return -ENXIO; 771 } 772 773 /* 774 * Use MDTS to ensure our default max_xfer_size doesn't exceed what the 775 * controller supports. 776 */ 777 ctrlr->max_xfer_size = nvme_transport_ctrlr_get_max_xfer_size(ctrlr); 778 SPDK_DEBUGLOG(SPDK_LOG_NVME, "transport max_xfer_size %u\n", ctrlr->max_xfer_size); 779 if (ctrlr->cdata.mdts > 0) { 780 ctrlr->max_xfer_size = spdk_min(ctrlr->max_xfer_size, 781 ctrlr->min_page_size * (1 << (ctrlr->cdata.mdts))); 782 SPDK_DEBUGLOG(SPDK_LOG_NVME, "MDTS max_xfer_size %u\n", ctrlr->max_xfer_size); 783 } 784 785 return 0; 786 } 787 788 789 int 790 nvme_ctrlr_identify_active_ns(struct spdk_nvme_ctrlr *ctrlr) 791 { 792 struct nvme_completion_poll_status status; 793 int rc; 794 uint32_t i; 795 uint32_t num_pages; 796 uint32_t next_nsid = 0; 797 uint32_t *new_ns_list = NULL; 798 799 800 /* 801 * The allocated size must be a multiple of sizeof(struct spdk_nvme_ns_list) 802 */ 803 num_pages = (ctrlr->num_ns * sizeof(new_ns_list[0]) - 1) / sizeof(struct spdk_nvme_ns_list) + 1; 804 new_ns_list = spdk_dma_zmalloc(num_pages * sizeof(struct spdk_nvme_ns_list), ctrlr->page_size, 805 NULL); 806 if (!new_ns_list) { 807 SPDK_ERRLOG("Failed to allocate active_ns_list!\n"); 808 return -ENOMEM; 809 } 810 811 if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 1, 0) && !(ctrlr->quirks & NVME_QUIRK_IDENTIFY_CNS)) { 812 /* 813 * Iterate through the pages and fetch each chunk of 1024 namespaces until 814 * there are no more active namespaces 815 */ 816 for (i = 0; i < num_pages; i++) { 817 rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_ACTIVE_NS_LIST, 0, next_nsid, 818 &new_ns_list[1024 * i], sizeof(struct spdk_nvme_ns_list), 819 nvme_completion_poll_cb, &status); 820 if (rc != 0) { 821 goto fail; 822 } 823 if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) { 824 SPDK_ERRLOG("nvme_ctrlr_cmd_identify_active_ns_list failed!\n"); 825 rc = -ENXIO; 826 goto fail; 827 } 828 next_nsid = new_ns_list[1024 * i + 1023]; 829 if (next_nsid == 0) { 830 /* 831 * No more active namespaces found, no need to fetch additional chunks 832 */ 833 break; 834 } 835 } 836 837 } else { 838 /* 839 * Controller doesn't support active ns list CNS 0x02 so dummy up 840 * an active ns list 841 */ 842 for (i = 0; i < ctrlr->num_ns; i++) { 843 new_ns_list[i] = i + 1; 844 } 845 } 846 847 /* 848 * Now that that the list is properly setup, we can swap it in to the ctrlr and 849 * free up the previous one. 850 */ 851 spdk_dma_free(ctrlr->active_ns_list); 852 ctrlr->active_ns_list = new_ns_list; 853 854 return 0; 855 fail: 856 spdk_dma_free(new_ns_list); 857 return rc; 858 } 859 860 static int 861 nvme_ctrlr_set_num_qpairs(struct spdk_nvme_ctrlr *ctrlr) 862 { 863 struct nvme_completion_poll_status status; 864 uint32_t cq_allocated, sq_allocated, min_allocated, i; 865 int rc; 866 867 if (ctrlr->opts.num_io_queues > SPDK_NVME_MAX_IO_QUEUES) { 868 SPDK_NOTICELOG("Limiting requested num_io_queues %u to max %d\n", 869 ctrlr->opts.num_io_queues, SPDK_NVME_MAX_IO_QUEUES); 870 ctrlr->opts.num_io_queues = SPDK_NVME_MAX_IO_QUEUES; 871 } else if (ctrlr->opts.num_io_queues < 1) { 872 SPDK_NOTICELOG("Requested num_io_queues 0, increasing to 1\n"); 873 ctrlr->opts.num_io_queues = 1; 874 } 875 876 rc = nvme_ctrlr_cmd_set_num_queues(ctrlr, ctrlr->opts.num_io_queues, 877 nvme_completion_poll_cb, &status); 878 if (rc != 0) { 879 return rc; 880 } 881 882 if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) { 883 SPDK_ERRLOG("Set Features - Number of Queues failed!\n"); 884 } 885 886 /* Obtain the number of queues allocated using Get Features. */ 887 rc = nvme_ctrlr_cmd_get_num_queues(ctrlr, nvme_completion_poll_cb, &status); 888 if (rc != 0) { 889 return rc; 890 } 891 892 if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) { 893 SPDK_ERRLOG("Get Features - Number of Queues failed!\n"); 894 ctrlr->opts.num_io_queues = 0; 895 } else { 896 /* 897 * Data in cdw0 is 0-based. 898 * Lower 16-bits indicate number of submission queues allocated. 899 * Upper 16-bits indicate number of completion queues allocated. 900 */ 901 sq_allocated = (status.cpl.cdw0 & 0xFFFF) + 1; 902 cq_allocated = (status.cpl.cdw0 >> 16) + 1; 903 904 /* 905 * For 1:1 queue mapping, set number of allocated queues to be minimum of 906 * submission and completion queues. 907 */ 908 min_allocated = spdk_min(sq_allocated, cq_allocated); 909 910 /* Set number of queues to be minimum of requested and actually allocated. */ 911 ctrlr->opts.num_io_queues = spdk_min(min_allocated, ctrlr->opts.num_io_queues); 912 } 913 914 ctrlr->free_io_qids = spdk_bit_array_create(ctrlr->opts.num_io_queues + 1); 915 if (ctrlr->free_io_qids == NULL) { 916 return -ENOMEM; 917 } 918 919 /* Initialize list of free I/O queue IDs. QID 0 is the admin queue. */ 920 spdk_bit_array_clear(ctrlr->free_io_qids, 0); 921 for (i = 1; i <= ctrlr->opts.num_io_queues; i++) { 922 spdk_bit_array_set(ctrlr->free_io_qids, i); 923 } 924 925 return 0; 926 } 927 928 static int 929 nvme_ctrlr_set_keep_alive_timeout(struct spdk_nvme_ctrlr *ctrlr) 930 { 931 struct nvme_completion_poll_status status; 932 uint32_t keep_alive_interval_ms; 933 int rc; 934 935 if (ctrlr->opts.keep_alive_timeout_ms == 0) { 936 return 0; 937 } 938 939 if (ctrlr->cdata.kas == 0) { 940 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Controller KAS is 0 - not enabling Keep Alive\n"); 941 ctrlr->opts.keep_alive_timeout_ms = 0; 942 return 0; 943 } 944 945 /* Retrieve actual keep alive timeout, since the controller may have adjusted it. */ 946 rc = spdk_nvme_ctrlr_cmd_get_feature(ctrlr, SPDK_NVME_FEAT_KEEP_ALIVE_TIMER, 0, NULL, 0, 947 nvme_completion_poll_cb, &status); 948 if (rc != 0) { 949 SPDK_ERRLOG("Keep alive timeout Get Feature failed: %d\n", rc); 950 ctrlr->opts.keep_alive_timeout_ms = 0; 951 return rc; 952 } 953 954 if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) { 955 SPDK_ERRLOG("Keep alive timeout Get Feature failed: SC %x SCT %x\n", 956 status.cpl.status.sc, status.cpl.status.sct); 957 ctrlr->opts.keep_alive_timeout_ms = 0; 958 return -ENXIO; 959 } 960 961 if (ctrlr->opts.keep_alive_timeout_ms != status.cpl.cdw0) { 962 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Controller adjusted keep alive timeout to %u ms\n", 963 status.cpl.cdw0); 964 } 965 966 ctrlr->opts.keep_alive_timeout_ms = status.cpl.cdw0; 967 968 keep_alive_interval_ms = ctrlr->opts.keep_alive_timeout_ms / 2; 969 if (keep_alive_interval_ms == 0) { 970 keep_alive_interval_ms = 1; 971 } 972 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Sending keep alive every %u ms\n", keep_alive_interval_ms); 973 974 ctrlr->keep_alive_interval_ticks = (keep_alive_interval_ms * spdk_get_ticks_hz()) / UINT64_C(1000); 975 976 /* Schedule the first Keep Alive to be sent as soon as possible. */ 977 ctrlr->next_keep_alive_tick = spdk_get_ticks(); 978 979 return 0; 980 } 981 982 static int 983 nvme_ctrlr_set_host_id(struct spdk_nvme_ctrlr *ctrlr) 984 { 985 struct nvme_completion_poll_status status; 986 uint8_t *host_id; 987 uint32_t host_id_size; 988 int rc; 989 990 if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) { 991 /* 992 * NVMe-oF sends the host ID during Connect and doesn't allow 993 * Set Features - Host Identifier after Connect, so we don't need to do anything here. 994 */ 995 SPDK_DEBUGLOG(SPDK_LOG_NVME, "NVMe-oF transport - not sending Set Features - Host ID\n"); 996 return 0; 997 } 998 999 if (ctrlr->cdata.ctratt.host_id_exhid_supported) { 1000 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Using 128-bit extended host identifier\n"); 1001 host_id = ctrlr->opts.extended_host_id; 1002 host_id_size = sizeof(ctrlr->opts.extended_host_id); 1003 } else { 1004 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Using 64-bit host identifier\n"); 1005 host_id = ctrlr->opts.host_id; 1006 host_id_size = sizeof(ctrlr->opts.host_id); 1007 } 1008 1009 /* If the user specified an all-zeroes host identifier, don't send the command. */ 1010 if (spdk_mem_all_zero(host_id, host_id_size)) { 1011 SPDK_DEBUGLOG(SPDK_LOG_NVME, 1012 "User did not specify host ID - not sending Set Features - Host ID\n"); 1013 return 0; 1014 } 1015 1016 SPDK_TRACEDUMP(SPDK_LOG_NVME, "host_id", host_id, host_id_size); 1017 1018 rc = nvme_ctrlr_cmd_set_host_id(ctrlr, host_id, host_id_size, nvme_completion_poll_cb, &status); 1019 if (rc != 0) { 1020 SPDK_ERRLOG("Set Features - Host ID failed: %d\n", rc); 1021 return rc; 1022 } 1023 1024 if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) { 1025 SPDK_WARNLOG("Set Features - Host ID failed: SC 0x%x SCT 0x%x\n", 1026 status.cpl.status.sc, status.cpl.status.sct); 1027 /* 1028 * Treat Set Features - Host ID failure as non-fatal, since the Host ID feature 1029 * is optional. 1030 */ 1031 return 0; 1032 } 1033 1034 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Set Features - Host ID was successful\n"); 1035 return 0; 1036 } 1037 1038 static void 1039 nvme_ctrlr_destruct_namespaces(struct spdk_nvme_ctrlr *ctrlr) 1040 { 1041 if (ctrlr->ns) { 1042 uint32_t i, num_ns = ctrlr->num_ns; 1043 1044 for (i = 0; i < num_ns; i++) { 1045 nvme_ns_destruct(&ctrlr->ns[i]); 1046 } 1047 1048 spdk_dma_free(ctrlr->ns); 1049 ctrlr->ns = NULL; 1050 ctrlr->num_ns = 0; 1051 } 1052 1053 if (ctrlr->nsdata) { 1054 spdk_dma_free(ctrlr->nsdata); 1055 ctrlr->nsdata = NULL; 1056 } 1057 1058 spdk_dma_free(ctrlr->active_ns_list); 1059 ctrlr->active_ns_list = NULL; 1060 } 1061 1062 static int 1063 nvme_ctrlr_update_namespaces(struct spdk_nvme_ctrlr *ctrlr) 1064 { 1065 uint32_t i, nn = ctrlr->cdata.nn; 1066 struct spdk_nvme_ns_data *nsdata; 1067 1068 if (nvme_ctrlr_identify_active_ns(ctrlr)) { 1069 return -1; 1070 } 1071 1072 for (i = 0; i < nn; i++) { 1073 struct spdk_nvme_ns *ns = &ctrlr->ns[i]; 1074 uint32_t nsid = i + 1; 1075 nsdata = &ctrlr->nsdata[nsid - 1]; 1076 1077 if ((nsdata->ncap == 0) && spdk_nvme_ctrlr_is_active_ns(ctrlr, nsid)) { 1078 if (nvme_ns_construct(ns, nsid, ctrlr) != 0) { 1079 continue; 1080 } 1081 } 1082 1083 if (nsdata->ncap && !spdk_nvme_ctrlr_is_active_ns(ctrlr, nsid)) { 1084 nvme_ns_destruct(ns); 1085 } 1086 } 1087 1088 return 0; 1089 } 1090 1091 static int 1092 nvme_ctrlr_construct_namespaces(struct spdk_nvme_ctrlr *ctrlr) 1093 { 1094 uint32_t nn = ctrlr->cdata.nn; 1095 uint64_t phys_addr = 0; 1096 1097 /* ctrlr->num_ns may be 0 (startup) or a different number of namespaces (reset), 1098 * so check if we need to reallocate. 1099 */ 1100 if (nn != ctrlr->num_ns) { 1101 nvme_ctrlr_destruct_namespaces(ctrlr); 1102 1103 if (nn == 0) { 1104 SPDK_WARNLOG("controller has 0 namespaces\n"); 1105 return 0; 1106 } 1107 1108 ctrlr->ns = spdk_dma_zmalloc(nn * sizeof(struct spdk_nvme_ns), 64, 1109 &phys_addr); 1110 if (ctrlr->ns == NULL) { 1111 goto fail; 1112 } 1113 1114 ctrlr->nsdata = spdk_dma_zmalloc(nn * sizeof(struct spdk_nvme_ns_data), 64, 1115 &phys_addr); 1116 if (ctrlr->nsdata == NULL) { 1117 goto fail; 1118 } 1119 1120 ctrlr->num_ns = nn; 1121 } 1122 1123 if (nvme_ctrlr_update_namespaces(ctrlr)) { 1124 goto fail; 1125 } 1126 return 0; 1127 1128 fail: 1129 nvme_ctrlr_destruct_namespaces(ctrlr); 1130 return -1; 1131 } 1132 1133 static void 1134 nvme_ctrlr_async_event_cb(void *arg, const struct spdk_nvme_cpl *cpl) 1135 { 1136 struct nvme_async_event_request *aer = arg; 1137 struct spdk_nvme_ctrlr *ctrlr = aer->ctrlr; 1138 struct spdk_nvme_ctrlr_process *active_proc; 1139 union spdk_nvme_async_event_completion event; 1140 1141 if (cpl->status.sc == SPDK_NVME_SC_ABORTED_SQ_DELETION) { 1142 /* 1143 * This is simulated when controller is being shut down, to 1144 * effectively abort outstanding asynchronous event requests 1145 * and make sure all memory is freed. Do not repost the 1146 * request in this case. 1147 */ 1148 return; 1149 } 1150 1151 event.raw = cpl->cdw0; 1152 if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) && 1153 (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED)) { 1154 nvme_ctrlr_update_namespaces(ctrlr); 1155 } 1156 1157 active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr); 1158 if (active_proc && active_proc->aer_cb_fn) { 1159 active_proc->aer_cb_fn(active_proc->aer_cb_arg, cpl); 1160 } 1161 1162 /* 1163 * Repost another asynchronous event request to replace the one 1164 * that just completed. 1165 */ 1166 if (nvme_ctrlr_construct_and_submit_aer(ctrlr, aer)) { 1167 /* 1168 * We can't do anything to recover from a failure here, 1169 * so just print a warning message and leave the AER unsubmitted. 1170 */ 1171 SPDK_ERRLOG("resubmitting AER failed!\n"); 1172 } 1173 } 1174 1175 static int 1176 nvme_ctrlr_construct_and_submit_aer(struct spdk_nvme_ctrlr *ctrlr, 1177 struct nvme_async_event_request *aer) 1178 { 1179 struct nvme_request *req; 1180 1181 aer->ctrlr = ctrlr; 1182 req = nvme_allocate_request_null(ctrlr->adminq, nvme_ctrlr_async_event_cb, aer); 1183 aer->req = req; 1184 if (req == NULL) { 1185 return -1; 1186 } 1187 1188 req->cmd.opc = SPDK_NVME_OPC_ASYNC_EVENT_REQUEST; 1189 return nvme_ctrlr_submit_admin_request(ctrlr, req); 1190 } 1191 1192 static int 1193 _nvme_ctrlr_configure_aer(struct spdk_nvme_ctrlr *ctrlr) 1194 { 1195 union spdk_nvme_feat_async_event_configuration config; 1196 struct nvme_completion_poll_status status; 1197 int rc; 1198 1199 config.raw = 0; 1200 config.bits.crit_warn.bits.available_spare = 1; 1201 config.bits.crit_warn.bits.temperature = 1; 1202 config.bits.crit_warn.bits.device_reliability = 1; 1203 config.bits.crit_warn.bits.read_only = 1; 1204 config.bits.crit_warn.bits.volatile_memory_backup = 1; 1205 1206 if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 2, 0)) { 1207 if (ctrlr->cdata.oaes.ns_attribute_notices) { 1208 config.bits.ns_attr_notice = 1; 1209 } 1210 if (ctrlr->cdata.oaes.fw_activation_notices) { 1211 config.bits.fw_activation_notice = 1; 1212 } 1213 } 1214 if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 3, 0) && ctrlr->cdata.lpa.telemetry) { 1215 config.bits.telemetry_log_notice = 1; 1216 } 1217 1218 rc = nvme_ctrlr_cmd_set_async_event_config(ctrlr, config, nvme_completion_poll_cb, &status); 1219 if (rc != 0) { 1220 return rc; 1221 } 1222 1223 if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) { 1224 return -ENXIO; 1225 } 1226 1227 return 0; 1228 } 1229 1230 static int 1231 nvme_ctrlr_configure_aer(struct spdk_nvme_ctrlr *ctrlr) 1232 { 1233 struct nvme_async_event_request *aer; 1234 uint32_t i; 1235 int rc; 1236 1237 rc = _nvme_ctrlr_configure_aer(ctrlr); 1238 if (rc != 0) { 1239 SPDK_NOTICELOG("nvme_ctrlr_configure_aer failed!\n"); 1240 return 0; 1241 } 1242 1243 /* aerl is a zero-based value, so we need to add 1 here. */ 1244 ctrlr->num_aers = spdk_min(NVME_MAX_ASYNC_EVENTS, (ctrlr->cdata.aerl + 1)); 1245 1246 for (i = 0; i < ctrlr->num_aers; i++) { 1247 aer = &ctrlr->aer[i]; 1248 if (nvme_ctrlr_construct_and_submit_aer(ctrlr, aer)) { 1249 SPDK_ERRLOG("nvme_ctrlr_construct_and_submit_aer failed!\n"); 1250 return -1; 1251 } 1252 } 1253 1254 return 0; 1255 } 1256 1257 struct spdk_nvme_ctrlr_process * 1258 spdk_nvme_ctrlr_get_process(struct spdk_nvme_ctrlr *ctrlr, pid_t pid) 1259 { 1260 struct spdk_nvme_ctrlr_process *active_proc; 1261 1262 TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) { 1263 if (active_proc->pid == pid) { 1264 return active_proc; 1265 } 1266 } 1267 1268 return NULL; 1269 } 1270 1271 struct spdk_nvme_ctrlr_process * 1272 spdk_nvme_ctrlr_get_current_process(struct spdk_nvme_ctrlr *ctrlr) 1273 { 1274 return spdk_nvme_ctrlr_get_process(ctrlr, getpid()); 1275 } 1276 1277 /** 1278 * This function will be called when a process is using the controller. 1279 * 1. For the primary process, it is called when constructing the controller. 1280 * 2. For the secondary process, it is called at probing the controller. 1281 * Note: will check whether the process is already added for the same process. 1282 */ 1283 int 1284 nvme_ctrlr_add_process(struct spdk_nvme_ctrlr *ctrlr, void *devhandle) 1285 { 1286 struct spdk_nvme_ctrlr_process *ctrlr_proc; 1287 pid_t pid = getpid(); 1288 1289 /* Check whether the process is already added or not */ 1290 if (spdk_nvme_ctrlr_get_process(ctrlr, pid)) { 1291 return 0; 1292 } 1293 1294 /* Initialize the per process properties for this ctrlr */ 1295 ctrlr_proc = spdk_dma_zmalloc(sizeof(struct spdk_nvme_ctrlr_process), 64, NULL); 1296 if (ctrlr_proc == NULL) { 1297 SPDK_ERRLOG("failed to allocate memory to track the process props\n"); 1298 1299 return -1; 1300 } 1301 1302 ctrlr_proc->is_primary = spdk_process_is_primary(); 1303 ctrlr_proc->pid = pid; 1304 STAILQ_INIT(&ctrlr_proc->active_reqs); 1305 ctrlr_proc->devhandle = devhandle; 1306 ctrlr_proc->ref = 0; 1307 TAILQ_INIT(&ctrlr_proc->allocated_io_qpairs); 1308 1309 TAILQ_INSERT_TAIL(&ctrlr->active_procs, ctrlr_proc, tailq); 1310 1311 return 0; 1312 } 1313 1314 /** 1315 * This function will be called when the process detaches the controller. 1316 * Note: the ctrlr_lock must be held when calling this function. 1317 */ 1318 static void 1319 nvme_ctrlr_remove_process(struct spdk_nvme_ctrlr *ctrlr, 1320 struct spdk_nvme_ctrlr_process *proc) 1321 { 1322 struct spdk_nvme_qpair *qpair, *tmp_qpair; 1323 1324 assert(STAILQ_EMPTY(&proc->active_reqs)); 1325 1326 TAILQ_FOREACH_SAFE(qpair, &proc->allocated_io_qpairs, per_process_tailq, tmp_qpair) { 1327 spdk_nvme_ctrlr_free_io_qpair(qpair); 1328 } 1329 1330 TAILQ_REMOVE(&ctrlr->active_procs, proc, tailq); 1331 1332 spdk_dma_free(proc); 1333 } 1334 1335 /** 1336 * This function will be called when the process exited unexpectedly 1337 * in order to free any incomplete nvme request, allocated IO qpairs 1338 * and allocated memory. 1339 * Note: the ctrlr_lock must be held when calling this function. 1340 */ 1341 static void 1342 nvme_ctrlr_cleanup_process(struct spdk_nvme_ctrlr_process *proc) 1343 { 1344 struct nvme_request *req, *tmp_req; 1345 struct spdk_nvme_qpair *qpair, *tmp_qpair; 1346 1347 STAILQ_FOREACH_SAFE(req, &proc->active_reqs, stailq, tmp_req) { 1348 STAILQ_REMOVE(&proc->active_reqs, req, nvme_request, stailq); 1349 1350 assert(req->pid == proc->pid); 1351 1352 nvme_free_request(req); 1353 } 1354 1355 TAILQ_FOREACH_SAFE(qpair, &proc->allocated_io_qpairs, per_process_tailq, tmp_qpair) { 1356 TAILQ_REMOVE(&proc->allocated_io_qpairs, qpair, per_process_tailq); 1357 1358 /* 1359 * The process may have been killed while some qpairs were in their 1360 * completion context. Clear that flag here to allow these IO 1361 * qpairs to be deleted. 1362 */ 1363 qpair->in_completion_context = 0; 1364 1365 qpair->no_deletion_notification_needed = 1; 1366 1367 spdk_nvme_ctrlr_free_io_qpair(qpair); 1368 } 1369 1370 spdk_dma_free(proc); 1371 } 1372 1373 /** 1374 * This function will be called when destructing the controller. 1375 * 1. There is no more admin request on this controller. 1376 * 2. Clean up any left resource allocation when its associated process is gone. 1377 */ 1378 void 1379 nvme_ctrlr_free_processes(struct spdk_nvme_ctrlr *ctrlr) 1380 { 1381 struct spdk_nvme_ctrlr_process *active_proc, *tmp; 1382 1383 /* Free all the processes' properties and make sure no pending admin IOs */ 1384 TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) { 1385 TAILQ_REMOVE(&ctrlr->active_procs, active_proc, tailq); 1386 1387 assert(STAILQ_EMPTY(&active_proc->active_reqs)); 1388 1389 spdk_dma_free(active_proc); 1390 } 1391 } 1392 1393 /** 1394 * This function will be called when any other process attaches or 1395 * detaches the controller in order to cleanup those unexpectedly 1396 * terminated processes. 1397 * Note: the ctrlr_lock must be held when calling this function. 1398 */ 1399 static int 1400 nvme_ctrlr_remove_inactive_proc(struct spdk_nvme_ctrlr *ctrlr) 1401 { 1402 struct spdk_nvme_ctrlr_process *active_proc, *tmp; 1403 int active_proc_count = 0; 1404 1405 TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) { 1406 if ((kill(active_proc->pid, 0) == -1) && (errno == ESRCH)) { 1407 SPDK_ERRLOG("process %d terminated unexpected\n", active_proc->pid); 1408 1409 TAILQ_REMOVE(&ctrlr->active_procs, active_proc, tailq); 1410 1411 nvme_ctrlr_cleanup_process(active_proc); 1412 } else { 1413 active_proc_count++; 1414 } 1415 } 1416 1417 return active_proc_count; 1418 } 1419 1420 void 1421 nvme_ctrlr_proc_get_ref(struct spdk_nvme_ctrlr *ctrlr) 1422 { 1423 struct spdk_nvme_ctrlr_process *active_proc; 1424 1425 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1426 1427 nvme_ctrlr_remove_inactive_proc(ctrlr); 1428 1429 active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr); 1430 if (active_proc) { 1431 active_proc->ref++; 1432 } 1433 1434 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1435 } 1436 1437 void 1438 nvme_ctrlr_proc_put_ref(struct spdk_nvme_ctrlr *ctrlr) 1439 { 1440 struct spdk_nvme_ctrlr_process *active_proc; 1441 int proc_count; 1442 1443 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1444 1445 proc_count = nvme_ctrlr_remove_inactive_proc(ctrlr); 1446 1447 active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr); 1448 if (active_proc) { 1449 active_proc->ref--; 1450 assert(active_proc->ref >= 0); 1451 1452 /* 1453 * The last active process will be removed at the end of 1454 * the destruction of the controller. 1455 */ 1456 if (active_proc->ref == 0 && proc_count != 1) { 1457 nvme_ctrlr_remove_process(ctrlr, active_proc); 1458 } 1459 } 1460 1461 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1462 } 1463 1464 int 1465 nvme_ctrlr_get_ref_count(struct spdk_nvme_ctrlr *ctrlr) 1466 { 1467 struct spdk_nvme_ctrlr_process *active_proc; 1468 int ref = 0; 1469 1470 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1471 1472 nvme_ctrlr_remove_inactive_proc(ctrlr); 1473 1474 TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) { 1475 ref += active_proc->ref; 1476 } 1477 1478 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1479 1480 return ref; 1481 } 1482 1483 /** 1484 * Get the PCI device handle which is only visible to its associated process. 1485 */ 1486 struct spdk_pci_device * 1487 nvme_ctrlr_proc_get_devhandle(struct spdk_nvme_ctrlr *ctrlr) 1488 { 1489 struct spdk_nvme_ctrlr_process *active_proc; 1490 struct spdk_pci_device *devhandle = NULL; 1491 1492 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1493 1494 active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr); 1495 if (active_proc) { 1496 devhandle = active_proc->devhandle; 1497 } 1498 1499 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1500 1501 return devhandle; 1502 } 1503 1504 /** 1505 * This function will be called repeatedly during initialization until the controller is ready. 1506 */ 1507 int 1508 nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr) 1509 { 1510 union spdk_nvme_cc_register cc; 1511 union spdk_nvme_csts_register csts; 1512 uint32_t ready_timeout_in_ms; 1513 int rc; 1514 1515 /* 1516 * May need to avoid accessing any register on the target controller 1517 * for a while. Return early without touching the FSM. 1518 * Check sleep_timeout_tsc > 0 for unit test. 1519 */ 1520 if ((ctrlr->sleep_timeout_tsc > 0) && 1521 (spdk_get_ticks() <= ctrlr->sleep_timeout_tsc)) { 1522 return 0; 1523 } 1524 ctrlr->sleep_timeout_tsc = 0; 1525 1526 if (nvme_ctrlr_get_cc(ctrlr, &cc) || 1527 nvme_ctrlr_get_csts(ctrlr, &csts)) { 1528 if (ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE) { 1529 /* While a device is resetting, it may be unable to service MMIO reads 1530 * temporarily. Allow for this case. 1531 */ 1532 SPDK_ERRLOG("Get registers failed while waiting for CSTS.RDY == 0\n"); 1533 goto init_timeout; 1534 } 1535 SPDK_ERRLOG("Failed to read CC and CSTS in state %d\n", ctrlr->state); 1536 nvme_ctrlr_fail(ctrlr, false); 1537 return -EIO; 1538 } 1539 1540 ready_timeout_in_ms = 500 * ctrlr->cap.bits.to; 1541 1542 /* 1543 * Check if the current initialization step is done or has timed out. 1544 */ 1545 switch (ctrlr->state) { 1546 case NVME_CTRLR_STATE_INIT: 1547 /* Begin the hardware initialization by making sure the controller is disabled. */ 1548 if (cc.bits.en) { 1549 SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 1\n"); 1550 /* 1551 * Controller is currently enabled. We need to disable it to cause a reset. 1552 * 1553 * If CC.EN = 1 && CSTS.RDY = 0, the controller is in the process of becoming ready. 1554 * Wait for the ready bit to be 1 before disabling the controller. 1555 */ 1556 if (csts.bits.rdy == 0) { 1557 SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 1 && CSTS.RDY = 0 - waiting for reset to complete\n"); 1558 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1, ready_timeout_in_ms); 1559 return 0; 1560 } 1561 1562 /* CC.EN = 1 && CSTS.RDY == 1, so we can immediately disable the controller. */ 1563 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Setting CC.EN = 0\n"); 1564 cc.bits.en = 0; 1565 if (nvme_ctrlr_set_cc(ctrlr, &cc)) { 1566 SPDK_ERRLOG("set_cc() failed\n"); 1567 nvme_ctrlr_fail(ctrlr, false); 1568 return -EIO; 1569 } 1570 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms); 1571 1572 /* 1573 * Wait 2 secsonds before accessing PCI registers. 1574 * Not using sleep() to avoid blocking other controller's initialization. 1575 */ 1576 if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) { 1577 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Applying quirk: delay 2 seconds before reading registers\n"); 1578 ctrlr->sleep_timeout_tsc = spdk_get_ticks() + 2 * spdk_get_ticks_hz(); 1579 } 1580 return 0; 1581 } else { 1582 if (csts.bits.rdy == 1) { 1583 SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 0 && CSTS.RDY = 1 - waiting for shutdown to complete\n"); 1584 } 1585 1586 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms); 1587 return 0; 1588 } 1589 break; 1590 1591 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1: 1592 if (csts.bits.rdy == 1) { 1593 SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 1 && CSTS.RDY = 1 - disabling controller\n"); 1594 /* CC.EN = 1 && CSTS.RDY = 1, so we can set CC.EN = 0 now. */ 1595 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Setting CC.EN = 0\n"); 1596 cc.bits.en = 0; 1597 if (nvme_ctrlr_set_cc(ctrlr, &cc)) { 1598 SPDK_ERRLOG("set_cc() failed\n"); 1599 nvme_ctrlr_fail(ctrlr, false); 1600 return -EIO; 1601 } 1602 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms); 1603 return 0; 1604 } 1605 break; 1606 1607 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0: 1608 if (csts.bits.rdy == 0) { 1609 SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 0 && CSTS.RDY = 0\n"); 1610 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE, ready_timeout_in_ms); 1611 /* 1612 * Delay 100us before setting CC.EN = 1. Some NVMe SSDs miss CC.EN getting 1613 * set to 1 if it is too soon after CSTS.RDY is reported as 0. 1614 */ 1615 spdk_delay_us(100); 1616 return 0; 1617 } 1618 break; 1619 1620 case NVME_CTRLR_STATE_ENABLE: 1621 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Setting CC.EN = 1\n"); 1622 rc = nvme_ctrlr_enable(ctrlr); 1623 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, ready_timeout_in_ms); 1624 return rc; 1625 1626 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1: 1627 if (csts.bits.rdy == 1) { 1628 SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 1 && CSTS.RDY = 1 - controller is ready\n"); 1629 /* 1630 * The controller has been enabled. 1631 * Perform the rest of initialization in nvme_ctrlr_start() serially. 1632 */ 1633 rc = nvme_ctrlr_start(ctrlr); 1634 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE); 1635 return rc; 1636 } 1637 break; 1638 1639 case NVME_CTRLR_STATE_READY: 1640 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Ctrlr already in ready state\n"); 1641 return 0; 1642 1643 default: 1644 assert(0); 1645 nvme_ctrlr_fail(ctrlr, false); 1646 return -1; 1647 } 1648 1649 init_timeout: 1650 if (ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE && 1651 spdk_get_ticks() > ctrlr->state_timeout_tsc) { 1652 SPDK_ERRLOG("Initialization timed out in state %d\n", ctrlr->state); 1653 nvme_ctrlr_fail(ctrlr, false); 1654 return -1; 1655 } 1656 1657 return 0; 1658 } 1659 1660 int 1661 nvme_ctrlr_start(struct spdk_nvme_ctrlr *ctrlr) 1662 { 1663 nvme_transport_qpair_reset(ctrlr->adminq); 1664 1665 nvme_qpair_enable(ctrlr->adminq); 1666 1667 if (nvme_ctrlr_identify(ctrlr) != 0) { 1668 return -1; 1669 } 1670 1671 if (nvme_ctrlr_set_num_qpairs(ctrlr) != 0) { 1672 return -1; 1673 } 1674 1675 if (nvme_ctrlr_construct_namespaces(ctrlr) != 0) { 1676 return -1; 1677 } 1678 1679 if (nvme_ctrlr_configure_aer(ctrlr) != 0) { 1680 return -1; 1681 } 1682 1683 nvme_ctrlr_set_supported_log_pages(ctrlr); 1684 nvme_ctrlr_set_supported_features(ctrlr); 1685 1686 if (ctrlr->cdata.sgls.supported) { 1687 ctrlr->flags |= SPDK_NVME_CTRLR_SGL_SUPPORTED; 1688 ctrlr->max_sges = nvme_transport_ctrlr_get_max_sges(ctrlr); 1689 } 1690 1691 if (ctrlr->cdata.oacs.doorbell_buffer_config) { 1692 if (nvme_ctrlr_set_doorbell_buffer_config(ctrlr)) { 1693 SPDK_WARNLOG("Doorbell buffer config failed\n"); 1694 } 1695 } 1696 1697 1698 if (nvme_ctrlr_set_keep_alive_timeout(ctrlr) != 0) { 1699 SPDK_ERRLOG("Setting keep alive timeout failed\n"); 1700 return -1; 1701 } 1702 1703 if (nvme_ctrlr_set_host_id(ctrlr) != 0) { 1704 return -1; 1705 } 1706 1707 return 0; 1708 } 1709 1710 int 1711 nvme_robust_mutex_init_recursive_shared(pthread_mutex_t *mtx) 1712 { 1713 pthread_mutexattr_t attr; 1714 int rc = 0; 1715 1716 if (pthread_mutexattr_init(&attr)) { 1717 return -1; 1718 } 1719 if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE) || 1720 #ifndef __FreeBSD__ 1721 pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST) || 1722 pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED) || 1723 #endif 1724 pthread_mutex_init(mtx, &attr)) { 1725 rc = -1; 1726 } 1727 pthread_mutexattr_destroy(&attr); 1728 return rc; 1729 } 1730 1731 int 1732 nvme_ctrlr_construct(struct spdk_nvme_ctrlr *ctrlr) 1733 { 1734 int rc; 1735 1736 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE); 1737 ctrlr->flags = 0; 1738 ctrlr->free_io_qids = NULL; 1739 ctrlr->is_resetting = false; 1740 ctrlr->is_failed = false; 1741 1742 TAILQ_INIT(&ctrlr->active_io_qpairs); 1743 STAILQ_INIT(&ctrlr->queued_aborts); 1744 ctrlr->outstanding_aborts = 0; 1745 1746 rc = nvme_robust_mutex_init_recursive_shared(&ctrlr->ctrlr_lock); 1747 if (rc != 0) { 1748 return rc; 1749 } 1750 1751 TAILQ_INIT(&ctrlr->active_procs); 1752 1753 return rc; 1754 } 1755 1756 /* This function should be called once at ctrlr initialization to set up constant properties. */ 1757 void 1758 nvme_ctrlr_init_cap(struct spdk_nvme_ctrlr *ctrlr, const union spdk_nvme_cap_register *cap, 1759 const union spdk_nvme_vs_register *vs) 1760 { 1761 ctrlr->cap = *cap; 1762 ctrlr->vs = *vs; 1763 1764 ctrlr->min_page_size = 1u << (12 + ctrlr->cap.bits.mpsmin); 1765 1766 /* For now, always select page_size == min_page_size. */ 1767 ctrlr->page_size = ctrlr->min_page_size; 1768 1769 ctrlr->opts.io_queue_size = spdk_max(ctrlr->opts.io_queue_size, SPDK_NVME_IO_QUEUE_MIN_ENTRIES); 1770 ctrlr->opts.io_queue_size = spdk_min(ctrlr->opts.io_queue_size, ctrlr->cap.bits.mqes + 1u); 1771 1772 ctrlr->opts.io_queue_requests = spdk_max(ctrlr->opts.io_queue_requests, ctrlr->opts.io_queue_size); 1773 } 1774 1775 void 1776 nvme_ctrlr_destruct_finish(struct spdk_nvme_ctrlr *ctrlr) 1777 { 1778 pthread_mutex_destroy(&ctrlr->ctrlr_lock); 1779 } 1780 1781 void 1782 nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) 1783 { 1784 struct spdk_nvme_qpair *qpair, *tmp; 1785 1786 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Prepare to destruct SSD: %s\n", ctrlr->trid.traddr); 1787 TAILQ_FOREACH_SAFE(qpair, &ctrlr->active_io_qpairs, tailq, tmp) { 1788 spdk_nvme_ctrlr_free_io_qpair(qpair); 1789 } 1790 1791 nvme_ctrlr_free_doorbell_buffer(ctrlr); 1792 1793 nvme_ctrlr_shutdown(ctrlr); 1794 1795 nvme_ctrlr_destruct_namespaces(ctrlr); 1796 1797 spdk_bit_array_free(&ctrlr->free_io_qids); 1798 1799 nvme_transport_ctrlr_destruct(ctrlr); 1800 } 1801 1802 int 1803 nvme_ctrlr_submit_admin_request(struct spdk_nvme_ctrlr *ctrlr, 1804 struct nvme_request *req) 1805 { 1806 return nvme_qpair_submit_request(ctrlr->adminq, req); 1807 } 1808 1809 static void 1810 nvme_keep_alive_completion(void *cb_ctx, const struct spdk_nvme_cpl *cpl) 1811 { 1812 /* Do nothing */ 1813 } 1814 1815 /* 1816 * Check if we need to send a Keep Alive command. 1817 * Caller must hold ctrlr->ctrlr_lock. 1818 */ 1819 static void 1820 nvme_ctrlr_keep_alive(struct spdk_nvme_ctrlr *ctrlr) 1821 { 1822 uint64_t now; 1823 struct nvme_request *req; 1824 struct spdk_nvme_cmd *cmd; 1825 int rc; 1826 1827 now = spdk_get_ticks(); 1828 if (now < ctrlr->next_keep_alive_tick) { 1829 return; 1830 } 1831 1832 req = nvme_allocate_request_null(ctrlr->adminq, nvme_keep_alive_completion, NULL); 1833 if (req == NULL) { 1834 return; 1835 } 1836 1837 cmd = &req->cmd; 1838 cmd->opc = SPDK_NVME_OPC_KEEP_ALIVE; 1839 1840 rc = nvme_ctrlr_submit_admin_request(ctrlr, req); 1841 if (rc != 0) { 1842 SPDK_ERRLOG("Submitting Keep Alive failed\n"); 1843 } 1844 1845 ctrlr->next_keep_alive_tick = now + ctrlr->keep_alive_interval_ticks; 1846 } 1847 1848 int32_t 1849 spdk_nvme_ctrlr_process_admin_completions(struct spdk_nvme_ctrlr *ctrlr) 1850 { 1851 int32_t num_completions; 1852 1853 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1854 if (ctrlr->keep_alive_interval_ticks) { 1855 nvme_ctrlr_keep_alive(ctrlr); 1856 } 1857 num_completions = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 1858 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1859 1860 return num_completions; 1861 } 1862 1863 const struct spdk_nvme_ctrlr_data * 1864 spdk_nvme_ctrlr_get_data(struct spdk_nvme_ctrlr *ctrlr) 1865 { 1866 return &ctrlr->cdata; 1867 } 1868 1869 union spdk_nvme_csts_register spdk_nvme_ctrlr_get_regs_csts(struct spdk_nvme_ctrlr *ctrlr) 1870 { 1871 union spdk_nvme_csts_register csts; 1872 1873 if (nvme_ctrlr_get_csts(ctrlr, &csts)) { 1874 csts.raw = 0xFFFFFFFFu; 1875 } 1876 return csts; 1877 } 1878 1879 union spdk_nvme_cap_register spdk_nvme_ctrlr_get_regs_cap(struct spdk_nvme_ctrlr *ctrlr) 1880 { 1881 return ctrlr->cap; 1882 } 1883 1884 union spdk_nvme_vs_register spdk_nvme_ctrlr_get_regs_vs(struct spdk_nvme_ctrlr *ctrlr) 1885 { 1886 return ctrlr->vs; 1887 } 1888 1889 uint32_t 1890 spdk_nvme_ctrlr_get_num_ns(struct spdk_nvme_ctrlr *ctrlr) 1891 { 1892 return ctrlr->num_ns; 1893 } 1894 1895 static int32_t 1896 spdk_nvme_ctrlr_active_ns_idx(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 1897 { 1898 int32_t result = -1; 1899 1900 if (ctrlr->active_ns_list == NULL || nsid == 0 || nsid > ctrlr->num_ns) { 1901 return result; 1902 } 1903 1904 int32_t lower = 0; 1905 int32_t upper = ctrlr->num_ns - 1; 1906 int32_t mid; 1907 1908 while (lower <= upper) { 1909 mid = lower + (upper - lower) / 2; 1910 if (ctrlr->active_ns_list[mid] == nsid) { 1911 result = mid; 1912 break; 1913 } else { 1914 if (ctrlr->active_ns_list[mid] != 0 && ctrlr->active_ns_list[mid] < nsid) { 1915 lower = mid + 1; 1916 } else { 1917 upper = mid - 1; 1918 } 1919 1920 } 1921 } 1922 1923 return result; 1924 } 1925 1926 bool 1927 spdk_nvme_ctrlr_is_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 1928 { 1929 return spdk_nvme_ctrlr_active_ns_idx(ctrlr, nsid) != -1; 1930 } 1931 1932 uint32_t 1933 spdk_nvme_ctrlr_get_first_active_ns(struct spdk_nvme_ctrlr *ctrlr) 1934 { 1935 return ctrlr->active_ns_list ? ctrlr->active_ns_list[0] : 0; 1936 } 1937 1938 uint32_t 1939 spdk_nvme_ctrlr_get_next_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t prev_nsid) 1940 { 1941 int32_t nsid_idx = spdk_nvme_ctrlr_active_ns_idx(ctrlr, prev_nsid); 1942 if (ctrlr->active_ns_list && nsid_idx >= 0 && (uint32_t)nsid_idx < ctrlr->num_ns - 1) { 1943 return ctrlr->active_ns_list[nsid_idx + 1]; 1944 } 1945 return 0; 1946 } 1947 1948 struct spdk_nvme_ns * 1949 spdk_nvme_ctrlr_get_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 1950 { 1951 if (nsid < 1 || nsid > ctrlr->num_ns) { 1952 return NULL; 1953 } 1954 1955 return &ctrlr->ns[nsid - 1]; 1956 } 1957 1958 struct spdk_pci_device * 1959 spdk_nvme_ctrlr_get_pci_device(struct spdk_nvme_ctrlr *ctrlr) 1960 { 1961 if (ctrlr == NULL) { 1962 return NULL; 1963 } 1964 1965 if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) { 1966 return NULL; 1967 } 1968 1969 return nvme_ctrlr_proc_get_devhandle(ctrlr); 1970 } 1971 1972 uint32_t 1973 spdk_nvme_ctrlr_get_max_xfer_size(const struct spdk_nvme_ctrlr *ctrlr) 1974 { 1975 return ctrlr->max_xfer_size; 1976 } 1977 1978 void 1979 spdk_nvme_ctrlr_register_aer_callback(struct spdk_nvme_ctrlr *ctrlr, 1980 spdk_nvme_aer_cb aer_cb_fn, 1981 void *aer_cb_arg) 1982 { 1983 struct spdk_nvme_ctrlr_process *active_proc; 1984 1985 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1986 1987 active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr); 1988 if (active_proc) { 1989 active_proc->aer_cb_fn = aer_cb_fn; 1990 active_proc->aer_cb_arg = aer_cb_arg; 1991 } 1992 1993 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1994 } 1995 1996 void 1997 spdk_nvme_ctrlr_register_timeout_callback(struct spdk_nvme_ctrlr *ctrlr, 1998 uint32_t nvme_timeout, spdk_nvme_timeout_cb cb_fn, void *cb_arg) 1999 { 2000 struct spdk_nvme_ctrlr_process *active_proc; 2001 2002 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 2003 2004 active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr); 2005 if (active_proc) { 2006 active_proc->timeout_ticks = nvme_timeout * spdk_get_ticks_hz(); 2007 active_proc->timeout_cb_fn = cb_fn; 2008 active_proc->timeout_cb_arg = cb_arg; 2009 } 2010 2011 ctrlr->timeout_enabled = true; 2012 2013 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 2014 } 2015 2016 bool 2017 spdk_nvme_ctrlr_is_log_page_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t log_page) 2018 { 2019 /* No bounds check necessary, since log_page is uint8_t and log_page_supported has 256 entries */ 2020 SPDK_STATIC_ASSERT(sizeof(ctrlr->log_page_supported) == 256, "log_page_supported size mismatch"); 2021 return ctrlr->log_page_supported[log_page]; 2022 } 2023 2024 bool 2025 spdk_nvme_ctrlr_is_feature_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t feature_code) 2026 { 2027 /* No bounds check necessary, since feature_code is uint8_t and feature_supported has 256 entries */ 2028 SPDK_STATIC_ASSERT(sizeof(ctrlr->feature_supported) == 256, "feature_supported size mismatch"); 2029 return ctrlr->feature_supported[feature_code]; 2030 } 2031 2032 int 2033 spdk_nvme_ctrlr_attach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, 2034 struct spdk_nvme_ctrlr_list *payload) 2035 { 2036 struct nvme_completion_poll_status status; 2037 int res; 2038 struct spdk_nvme_ns *ns; 2039 2040 res = nvme_ctrlr_cmd_attach_ns(ctrlr, nsid, payload, 2041 nvme_completion_poll_cb, &status); 2042 if (res) { 2043 return res; 2044 } 2045 if (spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) { 2046 SPDK_ERRLOG("spdk_nvme_ctrlr_attach_ns failed!\n"); 2047 return -ENXIO; 2048 } 2049 2050 res = nvme_ctrlr_identify_active_ns(ctrlr); 2051 if (res) { 2052 return res; 2053 } 2054 2055 ns = &ctrlr->ns[nsid - 1]; 2056 return nvme_ns_construct(ns, nsid, ctrlr); 2057 } 2058 2059 int 2060 spdk_nvme_ctrlr_detach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, 2061 struct spdk_nvme_ctrlr_list *payload) 2062 { 2063 struct nvme_completion_poll_status status; 2064 int res; 2065 struct spdk_nvme_ns *ns; 2066 2067 res = nvme_ctrlr_cmd_detach_ns(ctrlr, nsid, payload, 2068 nvme_completion_poll_cb, &status); 2069 if (res) { 2070 return res; 2071 } 2072 if (spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) { 2073 SPDK_ERRLOG("spdk_nvme_ctrlr_detach_ns failed!\n"); 2074 return -ENXIO; 2075 } 2076 2077 res = nvme_ctrlr_identify_active_ns(ctrlr); 2078 if (res) { 2079 return res; 2080 } 2081 2082 ns = &ctrlr->ns[nsid - 1]; 2083 /* Inactive NS */ 2084 nvme_ns_destruct(ns); 2085 2086 return 0; 2087 } 2088 2089 uint32_t 2090 spdk_nvme_ctrlr_create_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns_data *payload) 2091 { 2092 struct nvme_completion_poll_status status; 2093 int res; 2094 uint32_t nsid; 2095 struct spdk_nvme_ns *ns; 2096 2097 res = nvme_ctrlr_cmd_create_ns(ctrlr, payload, nvme_completion_poll_cb, &status); 2098 if (res) { 2099 return 0; 2100 } 2101 if (spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) { 2102 SPDK_ERRLOG("spdk_nvme_ctrlr_create_ns failed!\n"); 2103 return 0; 2104 } 2105 2106 nsid = status.cpl.cdw0; 2107 ns = &ctrlr->ns[nsid - 1]; 2108 /* Inactive NS */ 2109 res = nvme_ns_construct(ns, nsid, ctrlr); 2110 if (res) { 2111 return 0; 2112 } 2113 2114 /* Return the namespace ID that was created */ 2115 return nsid; 2116 } 2117 2118 int 2119 spdk_nvme_ctrlr_delete_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 2120 { 2121 struct nvme_completion_poll_status status; 2122 int res; 2123 struct spdk_nvme_ns *ns; 2124 2125 res = nvme_ctrlr_cmd_delete_ns(ctrlr, nsid, nvme_completion_poll_cb, &status); 2126 if (res) { 2127 return res; 2128 } 2129 if (spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) { 2130 SPDK_ERRLOG("spdk_nvme_ctrlr_delete_ns failed!\n"); 2131 return -ENXIO; 2132 } 2133 2134 res = nvme_ctrlr_identify_active_ns(ctrlr); 2135 if (res) { 2136 return res; 2137 } 2138 2139 ns = &ctrlr->ns[nsid - 1]; 2140 nvme_ns_destruct(ns); 2141 2142 return 0; 2143 } 2144 2145 int 2146 spdk_nvme_ctrlr_format(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, 2147 struct spdk_nvme_format *format) 2148 { 2149 struct nvme_completion_poll_status status; 2150 int res; 2151 2152 res = nvme_ctrlr_cmd_format(ctrlr, nsid, format, nvme_completion_poll_cb, 2153 &status); 2154 if (res) { 2155 return res; 2156 } 2157 if (spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) { 2158 SPDK_ERRLOG("spdk_nvme_ctrlr_format failed!\n"); 2159 return -ENXIO; 2160 } 2161 2162 return spdk_nvme_ctrlr_reset(ctrlr); 2163 } 2164 2165 int 2166 spdk_nvme_ctrlr_update_firmware(struct spdk_nvme_ctrlr *ctrlr, void *payload, uint32_t size, 2167 int slot, enum spdk_nvme_fw_commit_action commit_action, struct spdk_nvme_status *completion_status) 2168 { 2169 struct spdk_nvme_fw_commit fw_commit; 2170 struct nvme_completion_poll_status status; 2171 int res; 2172 unsigned int size_remaining; 2173 unsigned int offset; 2174 unsigned int transfer; 2175 void *p; 2176 2177 if (!completion_status) { 2178 return -EINVAL; 2179 } 2180 memset(completion_status, 0, sizeof(struct spdk_nvme_status)); 2181 if (size % 4) { 2182 SPDK_ERRLOG("spdk_nvme_ctrlr_update_firmware invalid size!\n"); 2183 return -1; 2184 } 2185 2186 /* Current support only for SPDK_NVME_FW_COMMIT_REPLACE_IMG 2187 * and SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG 2188 */ 2189 if ((commit_action != SPDK_NVME_FW_COMMIT_REPLACE_IMG) && 2190 (commit_action != SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG)) { 2191 SPDK_ERRLOG("spdk_nvme_ctrlr_update_firmware invalid command!\n"); 2192 return -1; 2193 } 2194 2195 /* Firmware download */ 2196 size_remaining = size; 2197 offset = 0; 2198 p = payload; 2199 2200 while (size_remaining > 0) { 2201 transfer = spdk_min(size_remaining, ctrlr->min_page_size); 2202 2203 res = nvme_ctrlr_cmd_fw_image_download(ctrlr, transfer, offset, p, 2204 nvme_completion_poll_cb, 2205 &status); 2206 if (res) { 2207 return res; 2208 } 2209 2210 if (spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) { 2211 SPDK_ERRLOG("spdk_nvme_ctrlr_fw_image_download failed!\n"); 2212 return -ENXIO; 2213 } 2214 p += transfer; 2215 offset += transfer; 2216 size_remaining -= transfer; 2217 } 2218 2219 /* Firmware commit */ 2220 memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit)); 2221 fw_commit.fs = slot; 2222 fw_commit.ca = commit_action; 2223 2224 res = nvme_ctrlr_cmd_fw_commit(ctrlr, &fw_commit, nvme_completion_poll_cb, 2225 &status); 2226 if (res) { 2227 return res; 2228 } 2229 2230 res = spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock); 2231 2232 memcpy(completion_status, &status.cpl.status, sizeof(struct spdk_nvme_status)); 2233 2234 if (res) { 2235 if (status.cpl.status.sct != SPDK_NVME_SCT_COMMAND_SPECIFIC || 2236 status.cpl.status.sc != SPDK_NVME_SC_FIRMWARE_REQ_NVM_RESET) { 2237 if (status.cpl.status.sct == SPDK_NVME_SCT_COMMAND_SPECIFIC && 2238 status.cpl.status.sc == SPDK_NVME_SC_FIRMWARE_REQ_CONVENTIONAL_RESET) { 2239 SPDK_NOTICELOG("firmware activation requires conventional reset to be performed. !\n"); 2240 } else { 2241 SPDK_ERRLOG("nvme_ctrlr_cmd_fw_commit failed!\n"); 2242 } 2243 return -ENXIO; 2244 } 2245 } 2246 2247 return spdk_nvme_ctrlr_reset(ctrlr); 2248 } 2249 2250 void * 2251 spdk_nvme_ctrlr_alloc_cmb_io_buffer(struct spdk_nvme_ctrlr *ctrlr, size_t size) 2252 { 2253 void *buf; 2254 2255 if (size == 0) { 2256 return NULL; 2257 } 2258 2259 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 2260 buf = nvme_transport_ctrlr_alloc_cmb_io_buffer(ctrlr, size); 2261 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 2262 2263 return buf; 2264 } 2265 2266 void 2267 spdk_nvme_ctrlr_free_cmb_io_buffer(struct spdk_nvme_ctrlr *ctrlr, void *buf, size_t size) 2268 { 2269 if (buf && size) { 2270 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 2271 nvme_transport_ctrlr_free_cmb_io_buffer(ctrlr, buf, size); 2272 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 2273 } 2274 } 2275