1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2015 Intel Corporation. All rights reserved. 3 * Copyright (c) 2019-2021 Mellanox Technologies LTD. All rights reserved. 4 * Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 #include "spdk/stdinc.h" 8 9 #include "nvme_internal.h" 10 #include "nvme_io_msg.h" 11 12 #include "spdk/env.h" 13 #include "spdk/string.h" 14 #include "spdk/endian.h" 15 16 struct nvme_active_ns_ctx; 17 18 static int nvme_ctrlr_construct_and_submit_aer(struct spdk_nvme_ctrlr *ctrlr, 19 struct nvme_async_event_request *aer); 20 static void nvme_ctrlr_identify_active_ns_async(struct nvme_active_ns_ctx *ctx); 21 static int nvme_ctrlr_identify_ns_async(struct spdk_nvme_ns *ns); 22 static int nvme_ctrlr_identify_ns_iocs_specific_async(struct spdk_nvme_ns *ns); 23 static int nvme_ctrlr_identify_id_desc_async(struct spdk_nvme_ns *ns); 24 static void nvme_ctrlr_init_cap(struct spdk_nvme_ctrlr *ctrlr); 25 static void nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state, 26 uint64_t timeout_in_ms); 27 28 static int 29 nvme_ns_cmp(struct spdk_nvme_ns *ns1, struct spdk_nvme_ns *ns2) 30 { 31 if (ns1->id < ns2->id) { 32 return -1; 33 } else if (ns1->id > ns2->id) { 34 return 1; 35 } else { 36 return 0; 37 } 38 } 39 40 RB_GENERATE_STATIC(nvme_ns_tree, spdk_nvme_ns, node, nvme_ns_cmp); 41 42 #define CTRLR_STRING(ctrlr) \ 43 ((ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_TCP || ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_RDMA) ? \ 44 ctrlr->trid.subnqn : ctrlr->trid.traddr) 45 46 #define NVME_CTRLR_ERRLOG(ctrlr, format, ...) \ 47 SPDK_ERRLOG("[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__); 48 49 #define NVME_CTRLR_WARNLOG(ctrlr, format, ...) \ 50 SPDK_WARNLOG("[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__); 51 52 #define NVME_CTRLR_NOTICELOG(ctrlr, format, ...) \ 53 SPDK_NOTICELOG("[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__); 54 55 #define NVME_CTRLR_INFOLOG(ctrlr, format, ...) \ 56 SPDK_INFOLOG(nvme, "[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__); 57 58 #ifdef DEBUG 59 #define NVME_CTRLR_DEBUGLOG(ctrlr, format, ...) \ 60 SPDK_DEBUGLOG(nvme, "[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__); 61 #else 62 #define NVME_CTRLR_DEBUGLOG(ctrlr, ...) do { } while (0) 63 #endif 64 65 #define nvme_ctrlr_get_reg_async(ctrlr, reg, sz, cb_fn, cb_arg) \ 66 nvme_transport_ctrlr_get_reg_ ## sz ## _async(ctrlr, \ 67 offsetof(struct spdk_nvme_registers, reg), cb_fn, cb_arg) 68 69 #define nvme_ctrlr_set_reg_async(ctrlr, reg, sz, val, cb_fn, cb_arg) \ 70 nvme_transport_ctrlr_set_reg_ ## sz ## _async(ctrlr, \ 71 offsetof(struct spdk_nvme_registers, reg), val, cb_fn, cb_arg) 72 73 #define nvme_ctrlr_get_cc_async(ctrlr, cb_fn, cb_arg) \ 74 nvme_ctrlr_get_reg_async(ctrlr, cc, 4, cb_fn, cb_arg) 75 76 #define nvme_ctrlr_get_csts_async(ctrlr, cb_fn, cb_arg) \ 77 nvme_ctrlr_get_reg_async(ctrlr, csts, 4, cb_fn, cb_arg) 78 79 #define nvme_ctrlr_get_cap_async(ctrlr, cb_fn, cb_arg) \ 80 nvme_ctrlr_get_reg_async(ctrlr, cap, 8, cb_fn, cb_arg) 81 82 #define nvme_ctrlr_get_vs_async(ctrlr, cb_fn, cb_arg) \ 83 nvme_ctrlr_get_reg_async(ctrlr, vs, 4, cb_fn, cb_arg) 84 85 #define nvme_ctrlr_set_cc_async(ctrlr, value, cb_fn, cb_arg) \ 86 nvme_ctrlr_set_reg_async(ctrlr, cc, 4, value, cb_fn, cb_arg) 87 88 static int 89 nvme_ctrlr_get_cc(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cc_register *cc) 90 { 91 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cc.raw), 92 &cc->raw); 93 } 94 95 static int 96 nvme_ctrlr_get_csts(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_csts_register *csts) 97 { 98 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, csts.raw), 99 &csts->raw); 100 } 101 102 int 103 nvme_ctrlr_get_cap(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cap_register *cap) 104 { 105 return nvme_transport_ctrlr_get_reg_8(ctrlr, offsetof(struct spdk_nvme_registers, cap.raw), 106 &cap->raw); 107 } 108 109 int 110 nvme_ctrlr_get_vs(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_vs_register *vs) 111 { 112 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, vs.raw), 113 &vs->raw); 114 } 115 116 int 117 nvme_ctrlr_get_cmbsz(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cmbsz_register *cmbsz) 118 { 119 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cmbsz.raw), 120 &cmbsz->raw); 121 } 122 123 int 124 nvme_ctrlr_get_pmrcap(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_pmrcap_register *pmrcap) 125 { 126 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, pmrcap.raw), 127 &pmrcap->raw); 128 } 129 130 int 131 nvme_ctrlr_get_bpinfo(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_bpinfo_register *bpinfo) 132 { 133 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, bpinfo.raw), 134 &bpinfo->raw); 135 } 136 137 int 138 nvme_ctrlr_set_bprsel(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_bprsel_register *bprsel) 139 { 140 return nvme_transport_ctrlr_set_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, bprsel.raw), 141 bprsel->raw); 142 } 143 144 int 145 nvme_ctrlr_set_bpmbl(struct spdk_nvme_ctrlr *ctrlr, uint64_t bpmbl_value) 146 { 147 return nvme_transport_ctrlr_set_reg_8(ctrlr, offsetof(struct spdk_nvme_registers, bpmbl), 148 bpmbl_value); 149 } 150 151 static int 152 nvme_ctrlr_set_nssr(struct spdk_nvme_ctrlr *ctrlr, uint32_t nssr_value) 153 { 154 return nvme_transport_ctrlr_set_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, nssr), 155 nssr_value); 156 } 157 158 bool 159 nvme_ctrlr_multi_iocs_enabled(struct spdk_nvme_ctrlr *ctrlr) 160 { 161 return ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_IOCS && 162 ctrlr->opts.command_set == SPDK_NVME_CC_CSS_IOCS; 163 } 164 165 /* When the field in spdk_nvme_ctrlr_opts are changed and you change this function, please 166 * also update the nvme_ctrl_opts_init function in nvme_ctrlr.c 167 */ 168 void 169 spdk_nvme_ctrlr_get_default_ctrlr_opts(struct spdk_nvme_ctrlr_opts *opts, size_t opts_size) 170 { 171 char host_id_str[SPDK_UUID_STRING_LEN]; 172 173 assert(opts); 174 175 opts->opts_size = opts_size; 176 177 #define FIELD_OK(field) \ 178 offsetof(struct spdk_nvme_ctrlr_opts, field) + sizeof(opts->field) <= opts_size 179 180 #define SET_FIELD(field, value) \ 181 if (offsetof(struct spdk_nvme_ctrlr_opts, field) + sizeof(opts->field) <= opts_size) { \ 182 opts->field = value; \ 183 } \ 184 185 SET_FIELD(num_io_queues, DEFAULT_MAX_IO_QUEUES); 186 SET_FIELD(use_cmb_sqs, false); 187 SET_FIELD(no_shn_notification, false); 188 SET_FIELD(arb_mechanism, SPDK_NVME_CC_AMS_RR); 189 SET_FIELD(arbitration_burst, 0); 190 SET_FIELD(low_priority_weight, 0); 191 SET_FIELD(medium_priority_weight, 0); 192 SET_FIELD(high_priority_weight, 0); 193 SET_FIELD(keep_alive_timeout_ms, MIN_KEEP_ALIVE_TIMEOUT_IN_MS); 194 SET_FIELD(transport_retry_count, SPDK_NVME_DEFAULT_RETRY_COUNT); 195 SET_FIELD(io_queue_size, DEFAULT_IO_QUEUE_SIZE); 196 197 if (nvme_driver_init() == 0) { 198 if (FIELD_OK(hostnqn)) { 199 spdk_uuid_fmt_lower(host_id_str, sizeof(host_id_str), 200 &g_spdk_nvme_driver->default_extended_host_id); 201 snprintf(opts->hostnqn, sizeof(opts->hostnqn), 202 "nqn.2014-08.org.nvmexpress:uuid:%s", host_id_str); 203 } 204 205 if (FIELD_OK(extended_host_id)) { 206 memcpy(opts->extended_host_id, &g_spdk_nvme_driver->default_extended_host_id, 207 sizeof(opts->extended_host_id)); 208 } 209 210 } 211 212 SET_FIELD(io_queue_requests, DEFAULT_IO_QUEUE_REQUESTS); 213 214 if (FIELD_OK(src_addr)) { 215 memset(opts->src_addr, 0, sizeof(opts->src_addr)); 216 } 217 218 if (FIELD_OK(src_svcid)) { 219 memset(opts->src_svcid, 0, sizeof(opts->src_svcid)); 220 } 221 222 if (FIELD_OK(host_id)) { 223 memset(opts->host_id, 0, sizeof(opts->host_id)); 224 } 225 226 SET_FIELD(command_set, CHAR_BIT); 227 SET_FIELD(admin_timeout_ms, NVME_MAX_ADMIN_TIMEOUT_IN_SECS * 1000); 228 SET_FIELD(header_digest, false); 229 SET_FIELD(data_digest, false); 230 SET_FIELD(disable_error_logging, false); 231 SET_FIELD(transport_ack_timeout, SPDK_NVME_DEFAULT_TRANSPORT_ACK_TIMEOUT); 232 SET_FIELD(admin_queue_size, DEFAULT_ADMIN_QUEUE_SIZE); 233 SET_FIELD(fabrics_connect_timeout_us, NVME_FABRIC_CONNECT_COMMAND_TIMEOUT); 234 SET_FIELD(disable_read_ana_log_page, false); 235 SET_FIELD(disable_read_changed_ns_list_log_page, false); 236 237 if (FIELD_OK(psk)) { 238 memset(opts->psk, 0, sizeof(opts->psk)); 239 } 240 241 #undef FIELD_OK 242 #undef SET_FIELD 243 } 244 245 const struct spdk_nvme_ctrlr_opts * 246 spdk_nvme_ctrlr_get_opts(struct spdk_nvme_ctrlr *ctrlr) 247 { 248 return &ctrlr->opts; 249 } 250 251 /** 252 * This function will be called when the process allocates the IO qpair. 253 * Note: the ctrlr_lock must be held when calling this function. 254 */ 255 static void 256 nvme_ctrlr_proc_add_io_qpair(struct spdk_nvme_qpair *qpair) 257 { 258 struct spdk_nvme_ctrlr_process *active_proc; 259 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 260 261 active_proc = nvme_ctrlr_get_current_process(ctrlr); 262 if (active_proc) { 263 TAILQ_INSERT_TAIL(&active_proc->allocated_io_qpairs, qpair, per_process_tailq); 264 qpair->active_proc = active_proc; 265 } 266 } 267 268 /** 269 * This function will be called when the process frees the IO qpair. 270 * Note: the ctrlr_lock must be held when calling this function. 271 */ 272 static void 273 nvme_ctrlr_proc_remove_io_qpair(struct spdk_nvme_qpair *qpair) 274 { 275 struct spdk_nvme_ctrlr_process *active_proc; 276 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 277 struct spdk_nvme_qpair *active_qpair, *tmp_qpair; 278 279 active_proc = nvme_ctrlr_get_current_process(ctrlr); 280 if (!active_proc) { 281 return; 282 } 283 284 TAILQ_FOREACH_SAFE(active_qpair, &active_proc->allocated_io_qpairs, 285 per_process_tailq, tmp_qpair) { 286 if (active_qpair == qpair) { 287 TAILQ_REMOVE(&active_proc->allocated_io_qpairs, 288 active_qpair, per_process_tailq); 289 290 break; 291 } 292 } 293 } 294 295 void 296 spdk_nvme_ctrlr_get_default_io_qpair_opts(struct spdk_nvme_ctrlr *ctrlr, 297 struct spdk_nvme_io_qpair_opts *opts, 298 size_t opts_size) 299 { 300 assert(ctrlr); 301 302 assert(opts); 303 304 memset(opts, 0, opts_size); 305 306 #define FIELD_OK(field) \ 307 offsetof(struct spdk_nvme_io_qpair_opts, field) + sizeof(opts->field) <= opts_size 308 309 if (FIELD_OK(qprio)) { 310 opts->qprio = SPDK_NVME_QPRIO_URGENT; 311 } 312 313 if (FIELD_OK(io_queue_size)) { 314 opts->io_queue_size = ctrlr->opts.io_queue_size; 315 } 316 317 if (FIELD_OK(io_queue_requests)) { 318 opts->io_queue_requests = ctrlr->opts.io_queue_requests; 319 } 320 321 if (FIELD_OK(delay_cmd_submit)) { 322 opts->delay_cmd_submit = false; 323 } 324 325 if (FIELD_OK(sq.vaddr)) { 326 opts->sq.vaddr = NULL; 327 } 328 329 if (FIELD_OK(sq.paddr)) { 330 opts->sq.paddr = 0; 331 } 332 333 if (FIELD_OK(sq.buffer_size)) { 334 opts->sq.buffer_size = 0; 335 } 336 337 if (FIELD_OK(cq.vaddr)) { 338 opts->cq.vaddr = NULL; 339 } 340 341 if (FIELD_OK(cq.paddr)) { 342 opts->cq.paddr = 0; 343 } 344 345 if (FIELD_OK(cq.buffer_size)) { 346 opts->cq.buffer_size = 0; 347 } 348 349 if (FIELD_OK(create_only)) { 350 opts->create_only = false; 351 } 352 353 if (FIELD_OK(async_mode)) { 354 opts->async_mode = false; 355 } 356 357 #undef FIELD_OK 358 } 359 360 static struct spdk_nvme_qpair * 361 nvme_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, 362 const struct spdk_nvme_io_qpair_opts *opts) 363 { 364 int32_t qid; 365 struct spdk_nvme_qpair *qpair; 366 union spdk_nvme_cc_register cc; 367 368 if (!ctrlr) { 369 return NULL; 370 } 371 372 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 373 cc.raw = ctrlr->process_init_cc.raw; 374 375 if (opts->qprio & ~SPDK_NVME_CREATE_IO_SQ_QPRIO_MASK) { 376 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 377 return NULL; 378 } 379 380 /* 381 * Only value SPDK_NVME_QPRIO_URGENT(0) is valid for the 382 * default round robin arbitration method. 383 */ 384 if ((cc.bits.ams == SPDK_NVME_CC_AMS_RR) && (opts->qprio != SPDK_NVME_QPRIO_URGENT)) { 385 NVME_CTRLR_ERRLOG(ctrlr, "invalid queue priority for default round robin arbitration method\n"); 386 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 387 return NULL; 388 } 389 390 qid = spdk_nvme_ctrlr_alloc_qid(ctrlr); 391 if (qid < 0) { 392 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 393 return NULL; 394 } 395 396 qpair = nvme_transport_ctrlr_create_io_qpair(ctrlr, qid, opts); 397 if (qpair == NULL) { 398 NVME_CTRLR_ERRLOG(ctrlr, "nvme_transport_ctrlr_create_io_qpair() failed\n"); 399 spdk_nvme_ctrlr_free_qid(ctrlr, qid); 400 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 401 return NULL; 402 } 403 404 TAILQ_INSERT_TAIL(&ctrlr->active_io_qpairs, qpair, tailq); 405 406 nvme_ctrlr_proc_add_io_qpair(qpair); 407 408 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 409 410 return qpair; 411 } 412 413 int 414 spdk_nvme_ctrlr_connect_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 415 { 416 int rc; 417 418 if (nvme_qpair_get_state(qpair) != NVME_QPAIR_DISCONNECTED) { 419 return -EISCONN; 420 } 421 422 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 423 rc = nvme_transport_ctrlr_connect_qpair(ctrlr, qpair); 424 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 425 426 if (ctrlr->quirks & NVME_QUIRK_DELAY_AFTER_QUEUE_ALLOC) { 427 spdk_delay_us(100); 428 } 429 430 return rc; 431 } 432 433 void 434 spdk_nvme_ctrlr_disconnect_io_qpair(struct spdk_nvme_qpair *qpair) 435 { 436 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 437 438 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 439 nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair); 440 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 441 } 442 443 struct spdk_nvme_qpair * 444 spdk_nvme_ctrlr_alloc_io_qpair(struct spdk_nvme_ctrlr *ctrlr, 445 const struct spdk_nvme_io_qpair_opts *user_opts, 446 size_t opts_size) 447 { 448 449 struct spdk_nvme_qpair *qpair = NULL; 450 struct spdk_nvme_io_qpair_opts opts; 451 int rc; 452 453 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 454 455 if (spdk_unlikely(ctrlr->state != NVME_CTRLR_STATE_READY)) { 456 /* When controller is resetting or initializing, free_io_qids is deleted or not created yet. 457 * We can't create IO qpair in that case */ 458 goto unlock; 459 } 460 461 /* 462 * Get the default options, then overwrite them with the user-provided options 463 * up to opts_size. 464 * 465 * This allows for extensions of the opts structure without breaking 466 * ABI compatibility. 467 */ 468 spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &opts, sizeof(opts)); 469 if (user_opts) { 470 memcpy(&opts, user_opts, spdk_min(sizeof(opts), opts_size)); 471 472 /* If user passes buffers, make sure they're big enough for the requested queue size */ 473 if (opts.sq.vaddr) { 474 if (opts.sq.buffer_size < (opts.io_queue_size * sizeof(struct spdk_nvme_cmd))) { 475 NVME_CTRLR_ERRLOG(ctrlr, "sq buffer size %" PRIx64 " is too small for sq size %zx\n", 476 opts.sq.buffer_size, (opts.io_queue_size * sizeof(struct spdk_nvme_cmd))); 477 goto unlock; 478 } 479 } 480 if (opts.cq.vaddr) { 481 if (opts.cq.buffer_size < (opts.io_queue_size * sizeof(struct spdk_nvme_cpl))) { 482 NVME_CTRLR_ERRLOG(ctrlr, "cq buffer size %" PRIx64 " is too small for cq size %zx\n", 483 opts.cq.buffer_size, (opts.io_queue_size * sizeof(struct spdk_nvme_cpl))); 484 goto unlock; 485 } 486 } 487 } 488 489 qpair = nvme_ctrlr_create_io_qpair(ctrlr, &opts); 490 491 if (qpair == NULL || opts.create_only == true) { 492 goto unlock; 493 } 494 495 rc = spdk_nvme_ctrlr_connect_io_qpair(ctrlr, qpair); 496 if (rc != 0) { 497 NVME_CTRLR_ERRLOG(ctrlr, "nvme_transport_ctrlr_connect_io_qpair() failed\n"); 498 nvme_ctrlr_proc_remove_io_qpair(qpair); 499 TAILQ_REMOVE(&ctrlr->active_io_qpairs, qpair, tailq); 500 spdk_bit_array_set(ctrlr->free_io_qids, qpair->id); 501 nvme_transport_ctrlr_delete_io_qpair(ctrlr, qpair); 502 qpair = NULL; 503 goto unlock; 504 } 505 506 unlock: 507 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 508 509 return qpair; 510 } 511 512 int 513 spdk_nvme_ctrlr_reconnect_io_qpair(struct spdk_nvme_qpair *qpair) 514 { 515 struct spdk_nvme_ctrlr *ctrlr; 516 enum nvme_qpair_state qpair_state; 517 int rc; 518 519 assert(qpair != NULL); 520 assert(nvme_qpair_is_admin_queue(qpair) == false); 521 assert(qpair->ctrlr != NULL); 522 523 ctrlr = qpair->ctrlr; 524 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 525 qpair_state = nvme_qpair_get_state(qpair); 526 527 if (ctrlr->is_removed) { 528 rc = -ENODEV; 529 goto out; 530 } 531 532 if (ctrlr->is_resetting || qpair_state == NVME_QPAIR_DISCONNECTING) { 533 rc = -EAGAIN; 534 goto out; 535 } 536 537 if (ctrlr->is_failed || qpair_state == NVME_QPAIR_DESTROYING) { 538 rc = -ENXIO; 539 goto out; 540 } 541 542 if (qpair_state != NVME_QPAIR_DISCONNECTED) { 543 rc = 0; 544 goto out; 545 } 546 547 rc = nvme_transport_ctrlr_connect_qpair(ctrlr, qpair); 548 if (rc) { 549 rc = -EAGAIN; 550 goto out; 551 } 552 553 out: 554 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 555 return rc; 556 } 557 558 spdk_nvme_qp_failure_reason 559 spdk_nvme_ctrlr_get_admin_qp_failure_reason(struct spdk_nvme_ctrlr *ctrlr) 560 { 561 return ctrlr->adminq->transport_failure_reason; 562 } 563 564 /* 565 * This internal function will attempt to take the controller 566 * lock before calling disconnect on a controller qpair. 567 * Functions already holding the controller lock should 568 * call nvme_transport_ctrlr_disconnect_qpair directly. 569 */ 570 void 571 nvme_ctrlr_disconnect_qpair(struct spdk_nvme_qpair *qpair) 572 { 573 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 574 575 assert(ctrlr != NULL); 576 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 577 nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair); 578 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 579 } 580 581 int 582 spdk_nvme_ctrlr_free_io_qpair(struct spdk_nvme_qpair *qpair) 583 { 584 struct spdk_nvme_ctrlr *ctrlr; 585 586 if (qpair == NULL) { 587 return 0; 588 } 589 590 ctrlr = qpair->ctrlr; 591 592 if (qpair->in_completion_context) { 593 /* 594 * There are many cases where it is convenient to delete an io qpair in the context 595 * of that qpair's completion routine. To handle this properly, set a flag here 596 * so that the completion routine will perform an actual delete after the context 597 * unwinds. 598 */ 599 qpair->delete_after_completion_context = 1; 600 return 0; 601 } 602 603 qpair->destroy_in_progress = 1; 604 605 nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair); 606 607 if (qpair->poll_group && (qpair->active_proc == nvme_ctrlr_get_current_process(ctrlr))) { 608 spdk_nvme_poll_group_remove(qpair->poll_group->group, qpair); 609 } 610 611 /* Do not retry. */ 612 nvme_qpair_set_state(qpair, NVME_QPAIR_DESTROYING); 613 614 /* In the multi-process case, a process may call this function on a foreign 615 * I/O qpair (i.e. one that this process did not create) when that qpairs process 616 * exits unexpectedly. In that case, we must not try to abort any reqs associated 617 * with that qpair, since the callbacks will also be foreign to this process. 618 */ 619 if (qpair->active_proc == nvme_ctrlr_get_current_process(ctrlr)) { 620 nvme_qpair_abort_all_queued_reqs(qpair); 621 } 622 623 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 624 625 nvme_ctrlr_proc_remove_io_qpair(qpair); 626 627 TAILQ_REMOVE(&ctrlr->active_io_qpairs, qpair, tailq); 628 spdk_nvme_ctrlr_free_qid(ctrlr, qpair->id); 629 630 nvme_transport_ctrlr_delete_io_qpair(ctrlr, qpair); 631 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 632 return 0; 633 } 634 635 static void 636 nvme_ctrlr_construct_intel_support_log_page_list(struct spdk_nvme_ctrlr *ctrlr, 637 struct spdk_nvme_intel_log_page_directory *log_page_directory) 638 { 639 if (log_page_directory == NULL) { 640 return; 641 } 642 643 assert(ctrlr->cdata.vid == SPDK_PCI_VID_INTEL); 644 645 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY] = true; 646 647 if (log_page_directory->read_latency_log_len || 648 (ctrlr->quirks & NVME_INTEL_QUIRK_READ_LATENCY)) { 649 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_READ_CMD_LATENCY] = true; 650 } 651 if (log_page_directory->write_latency_log_len || 652 (ctrlr->quirks & NVME_INTEL_QUIRK_WRITE_LATENCY)) { 653 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_WRITE_CMD_LATENCY] = true; 654 } 655 if (log_page_directory->temperature_statistics_log_len) { 656 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_TEMPERATURE] = true; 657 } 658 if (log_page_directory->smart_log_len) { 659 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_SMART] = true; 660 } 661 if (log_page_directory->marketing_description_log_len) { 662 ctrlr->log_page_supported[SPDK_NVME_INTEL_MARKETING_DESCRIPTION] = true; 663 } 664 } 665 666 struct intel_log_pages_ctx { 667 struct spdk_nvme_intel_log_page_directory log_page_directory; 668 struct spdk_nvme_ctrlr *ctrlr; 669 }; 670 671 static void 672 nvme_ctrlr_set_intel_support_log_pages_done(void *arg, const struct spdk_nvme_cpl *cpl) 673 { 674 struct intel_log_pages_ctx *ctx = arg; 675 struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr; 676 677 if (!spdk_nvme_cpl_is_error(cpl)) { 678 nvme_ctrlr_construct_intel_support_log_page_list(ctrlr, &ctx->log_page_directory); 679 } 680 681 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES, 682 ctrlr->opts.admin_timeout_ms); 683 free(ctx); 684 } 685 686 static int 687 nvme_ctrlr_set_intel_support_log_pages(struct spdk_nvme_ctrlr *ctrlr) 688 { 689 int rc = 0; 690 struct intel_log_pages_ctx *ctx; 691 692 ctx = calloc(1, sizeof(*ctx)); 693 if (!ctx) { 694 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES, 695 ctrlr->opts.admin_timeout_ms); 696 return 0; 697 } 698 699 ctx->ctrlr = ctrlr; 700 701 rc = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY, 702 SPDK_NVME_GLOBAL_NS_TAG, &ctx->log_page_directory, 703 sizeof(struct spdk_nvme_intel_log_page_directory), 704 0, nvme_ctrlr_set_intel_support_log_pages_done, ctx); 705 if (rc != 0) { 706 free(ctx); 707 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES, 708 ctrlr->opts.admin_timeout_ms); 709 return 0; 710 } 711 712 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_SUPPORTED_INTEL_LOG_PAGES, 713 ctrlr->opts.admin_timeout_ms); 714 715 return 0; 716 } 717 718 static int 719 nvme_ctrlr_alloc_ana_log_page(struct spdk_nvme_ctrlr *ctrlr) 720 { 721 uint32_t ana_log_page_size; 722 723 ana_log_page_size = sizeof(struct spdk_nvme_ana_page) + ctrlr->cdata.nanagrpid * 724 sizeof(struct spdk_nvme_ana_group_descriptor) + ctrlr->active_ns_count * 725 sizeof(uint32_t); 726 727 /* Number of active namespaces may have changed. 728 * Check if ANA log page fits into existing buffer. 729 */ 730 if (ana_log_page_size > ctrlr->ana_log_page_size) { 731 void *new_buffer; 732 733 if (ctrlr->ana_log_page) { 734 new_buffer = realloc(ctrlr->ana_log_page, ana_log_page_size); 735 } else { 736 new_buffer = calloc(1, ana_log_page_size); 737 } 738 739 if (!new_buffer) { 740 NVME_CTRLR_ERRLOG(ctrlr, "could not allocate ANA log page buffer, size %u\n", 741 ana_log_page_size); 742 return -ENXIO; 743 } 744 745 ctrlr->ana_log_page = new_buffer; 746 if (ctrlr->copied_ana_desc) { 747 new_buffer = realloc(ctrlr->copied_ana_desc, ana_log_page_size); 748 } else { 749 new_buffer = calloc(1, ana_log_page_size); 750 } 751 752 if (!new_buffer) { 753 NVME_CTRLR_ERRLOG(ctrlr, "could not allocate a buffer to parse ANA descriptor, size %u\n", 754 ana_log_page_size); 755 return -ENOMEM; 756 } 757 758 ctrlr->copied_ana_desc = new_buffer; 759 ctrlr->ana_log_page_size = ana_log_page_size; 760 } 761 762 return 0; 763 } 764 765 static int 766 nvme_ctrlr_update_ana_log_page(struct spdk_nvme_ctrlr *ctrlr) 767 { 768 struct nvme_completion_poll_status *status; 769 int rc; 770 771 rc = nvme_ctrlr_alloc_ana_log_page(ctrlr); 772 if (rc != 0) { 773 return rc; 774 } 775 776 status = calloc(1, sizeof(*status)); 777 if (status == NULL) { 778 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 779 return -ENOMEM; 780 } 781 782 rc = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS, 783 SPDK_NVME_GLOBAL_NS_TAG, ctrlr->ana_log_page, 784 ctrlr->ana_log_page_size, 0, 785 nvme_completion_poll_cb, status); 786 if (rc != 0) { 787 free(status); 788 return rc; 789 } 790 791 if (nvme_wait_for_completion_robust_lock_timeout(ctrlr->adminq, status, &ctrlr->ctrlr_lock, 792 ctrlr->opts.admin_timeout_ms * 1000)) { 793 if (!status->timed_out) { 794 free(status); 795 } 796 return -EIO; 797 } 798 799 free(status); 800 return 0; 801 } 802 803 static int 804 nvme_ctrlr_update_ns_ana_states(const struct spdk_nvme_ana_group_descriptor *desc, 805 void *cb_arg) 806 { 807 struct spdk_nvme_ctrlr *ctrlr = cb_arg; 808 struct spdk_nvme_ns *ns; 809 uint32_t i, nsid; 810 811 for (i = 0; i < desc->num_of_nsid; i++) { 812 nsid = desc->nsid[i]; 813 if (nsid == 0 || nsid > ctrlr->cdata.nn) { 814 continue; 815 } 816 817 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 818 assert(ns != NULL); 819 820 ns->ana_group_id = desc->ana_group_id; 821 ns->ana_state = desc->ana_state; 822 } 823 824 return 0; 825 } 826 827 int 828 nvme_ctrlr_parse_ana_log_page(struct spdk_nvme_ctrlr *ctrlr, 829 spdk_nvme_parse_ana_log_page_cb cb_fn, void *cb_arg) 830 { 831 struct spdk_nvme_ana_group_descriptor *copied_desc; 832 uint8_t *orig_desc; 833 uint32_t i, desc_size, copy_len; 834 int rc = 0; 835 836 if (ctrlr->ana_log_page == NULL) { 837 return -EINVAL; 838 } 839 840 copied_desc = ctrlr->copied_ana_desc; 841 842 orig_desc = (uint8_t *)ctrlr->ana_log_page + sizeof(struct spdk_nvme_ana_page); 843 copy_len = ctrlr->ana_log_page_size - sizeof(struct spdk_nvme_ana_page); 844 845 for (i = 0; i < ctrlr->ana_log_page->num_ana_group_desc; i++) { 846 memcpy(copied_desc, orig_desc, copy_len); 847 848 rc = cb_fn(copied_desc, cb_arg); 849 if (rc != 0) { 850 break; 851 } 852 853 desc_size = sizeof(struct spdk_nvme_ana_group_descriptor) + 854 copied_desc->num_of_nsid * sizeof(uint32_t); 855 orig_desc += desc_size; 856 copy_len -= desc_size; 857 } 858 859 return rc; 860 } 861 862 static int 863 nvme_ctrlr_set_supported_log_pages(struct spdk_nvme_ctrlr *ctrlr) 864 { 865 int rc = 0; 866 867 memset(ctrlr->log_page_supported, 0, sizeof(ctrlr->log_page_supported)); 868 /* Mandatory pages */ 869 ctrlr->log_page_supported[SPDK_NVME_LOG_ERROR] = true; 870 ctrlr->log_page_supported[SPDK_NVME_LOG_HEALTH_INFORMATION] = true; 871 ctrlr->log_page_supported[SPDK_NVME_LOG_FIRMWARE_SLOT] = true; 872 if (ctrlr->cdata.lpa.celp) { 873 ctrlr->log_page_supported[SPDK_NVME_LOG_COMMAND_EFFECTS_LOG] = true; 874 } 875 876 if (ctrlr->cdata.cmic.ana_reporting) { 877 ctrlr->log_page_supported[SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS] = true; 878 if (!ctrlr->opts.disable_read_ana_log_page) { 879 rc = nvme_ctrlr_update_ana_log_page(ctrlr); 880 if (rc == 0) { 881 nvme_ctrlr_parse_ana_log_page(ctrlr, nvme_ctrlr_update_ns_ana_states, 882 ctrlr); 883 } 884 } 885 } 886 887 if (ctrlr->cdata.ctratt.fdps) { 888 ctrlr->log_page_supported[SPDK_NVME_LOG_FDP_CONFIGURATIONS] = true; 889 ctrlr->log_page_supported[SPDK_NVME_LOG_RECLAIM_UNIT_HANDLE_USAGE] = true; 890 ctrlr->log_page_supported[SPDK_NVME_LOG_FDP_STATISTICS] = true; 891 ctrlr->log_page_supported[SPDK_NVME_LOG_FDP_EVENTS] = true; 892 } 893 894 if (ctrlr->cdata.vid == SPDK_PCI_VID_INTEL && 895 ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE && 896 !(ctrlr->quirks & NVME_INTEL_QUIRK_NO_LOG_PAGES)) { 897 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_INTEL_LOG_PAGES, 898 ctrlr->opts.admin_timeout_ms); 899 900 } else { 901 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES, 902 ctrlr->opts.admin_timeout_ms); 903 904 } 905 906 return rc; 907 } 908 909 static void 910 nvme_ctrlr_set_intel_supported_features(struct spdk_nvme_ctrlr *ctrlr) 911 { 912 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_MAX_LBA] = true; 913 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_NATIVE_MAX_LBA] = true; 914 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_POWER_GOVERNOR_SETTING] = true; 915 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_SMBUS_ADDRESS] = true; 916 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_LED_PATTERN] = true; 917 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_RESET_TIMED_WORKLOAD_COUNTERS] = true; 918 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING] = true; 919 } 920 921 static void 922 nvme_ctrlr_set_arbitration_feature(struct spdk_nvme_ctrlr *ctrlr) 923 { 924 uint32_t cdw11; 925 struct nvme_completion_poll_status *status; 926 927 if (ctrlr->opts.arbitration_burst == 0) { 928 return; 929 } 930 931 if (ctrlr->opts.arbitration_burst > 7) { 932 NVME_CTRLR_WARNLOG(ctrlr, "Valid arbitration burst values is from 0-7\n"); 933 return; 934 } 935 936 status = calloc(1, sizeof(*status)); 937 if (!status) { 938 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 939 return; 940 } 941 942 cdw11 = ctrlr->opts.arbitration_burst; 943 944 if (spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_WRR_SUPPORTED) { 945 cdw11 |= (uint32_t)ctrlr->opts.low_priority_weight << 8; 946 cdw11 |= (uint32_t)ctrlr->opts.medium_priority_weight << 16; 947 cdw11 |= (uint32_t)ctrlr->opts.high_priority_weight << 24; 948 } 949 950 if (spdk_nvme_ctrlr_cmd_set_feature(ctrlr, SPDK_NVME_FEAT_ARBITRATION, 951 cdw11, 0, NULL, 0, 952 nvme_completion_poll_cb, status) < 0) { 953 NVME_CTRLR_ERRLOG(ctrlr, "Set arbitration feature failed\n"); 954 free(status); 955 return; 956 } 957 958 if (nvme_wait_for_completion_timeout(ctrlr->adminq, status, 959 ctrlr->opts.admin_timeout_ms * 1000)) { 960 NVME_CTRLR_ERRLOG(ctrlr, "Timeout to set arbitration feature\n"); 961 } 962 963 if (!status->timed_out) { 964 free(status); 965 } 966 } 967 968 static void 969 nvme_ctrlr_set_supported_features(struct spdk_nvme_ctrlr *ctrlr) 970 { 971 memset(ctrlr->feature_supported, 0, sizeof(ctrlr->feature_supported)); 972 /* Mandatory features */ 973 ctrlr->feature_supported[SPDK_NVME_FEAT_ARBITRATION] = true; 974 ctrlr->feature_supported[SPDK_NVME_FEAT_POWER_MANAGEMENT] = true; 975 ctrlr->feature_supported[SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD] = true; 976 ctrlr->feature_supported[SPDK_NVME_FEAT_ERROR_RECOVERY] = true; 977 ctrlr->feature_supported[SPDK_NVME_FEAT_NUMBER_OF_QUEUES] = true; 978 ctrlr->feature_supported[SPDK_NVME_FEAT_INTERRUPT_COALESCING] = true; 979 ctrlr->feature_supported[SPDK_NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION] = true; 980 ctrlr->feature_supported[SPDK_NVME_FEAT_WRITE_ATOMICITY] = true; 981 ctrlr->feature_supported[SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION] = true; 982 /* Optional features */ 983 if (ctrlr->cdata.vwc.present) { 984 ctrlr->feature_supported[SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE] = true; 985 } 986 if (ctrlr->cdata.apsta.supported) { 987 ctrlr->feature_supported[SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION] = true; 988 } 989 if (ctrlr->cdata.hmpre) { 990 ctrlr->feature_supported[SPDK_NVME_FEAT_HOST_MEM_BUFFER] = true; 991 } 992 if (ctrlr->cdata.vid == SPDK_PCI_VID_INTEL) { 993 nvme_ctrlr_set_intel_supported_features(ctrlr); 994 } 995 996 nvme_ctrlr_set_arbitration_feature(ctrlr); 997 } 998 999 bool 1000 spdk_nvme_ctrlr_is_failed(struct spdk_nvme_ctrlr *ctrlr) 1001 { 1002 return ctrlr->is_failed; 1003 } 1004 1005 void 1006 nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr, bool hot_remove) 1007 { 1008 /* 1009 * Set the flag here and leave the work failure of qpairs to 1010 * spdk_nvme_qpair_process_completions(). 1011 */ 1012 if (hot_remove) { 1013 ctrlr->is_removed = true; 1014 } 1015 1016 if (ctrlr->is_failed) { 1017 NVME_CTRLR_NOTICELOG(ctrlr, "already in failed state\n"); 1018 return; 1019 } 1020 1021 if (ctrlr->is_disconnecting) { 1022 NVME_CTRLR_DEBUGLOG(ctrlr, "already disconnecting\n"); 1023 return; 1024 } 1025 1026 ctrlr->is_failed = true; 1027 nvme_transport_ctrlr_disconnect_qpair(ctrlr, ctrlr->adminq); 1028 NVME_CTRLR_ERRLOG(ctrlr, "in failed state.\n"); 1029 } 1030 1031 /** 1032 * This public API function will try to take the controller lock. 1033 * Any private functions being called from a thread already holding 1034 * the ctrlr lock should call nvme_ctrlr_fail directly. 1035 */ 1036 void 1037 spdk_nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr) 1038 { 1039 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1040 nvme_ctrlr_fail(ctrlr, false); 1041 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1042 } 1043 1044 static void 1045 nvme_ctrlr_shutdown_set_cc_done(void *_ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 1046 { 1047 struct nvme_ctrlr_detach_ctx *ctx = _ctx; 1048 struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr; 1049 1050 if (spdk_nvme_cpl_is_error(cpl)) { 1051 NVME_CTRLR_ERRLOG(ctrlr, "Failed to write CC.SHN\n"); 1052 ctx->shutdown_complete = true; 1053 return; 1054 } 1055 1056 if (ctrlr->opts.no_shn_notification) { 1057 ctx->shutdown_complete = true; 1058 return; 1059 } 1060 1061 /* 1062 * The NVMe specification defines RTD3E to be the time between 1063 * setting SHN = 1 until the controller will set SHST = 10b. 1064 * If the device doesn't report RTD3 entry latency, or if it 1065 * reports RTD3 entry latency less than 10 seconds, pick 1066 * 10 seconds as a reasonable amount of time to 1067 * wait before proceeding. 1068 */ 1069 NVME_CTRLR_DEBUGLOG(ctrlr, "RTD3E = %" PRIu32 " us\n", ctrlr->cdata.rtd3e); 1070 ctx->shutdown_timeout_ms = SPDK_CEIL_DIV(ctrlr->cdata.rtd3e, 1000); 1071 ctx->shutdown_timeout_ms = spdk_max(ctx->shutdown_timeout_ms, 10000); 1072 NVME_CTRLR_DEBUGLOG(ctrlr, "shutdown timeout = %" PRIu32 " ms\n", ctx->shutdown_timeout_ms); 1073 1074 ctx->shutdown_start_tsc = spdk_get_ticks(); 1075 ctx->state = NVME_CTRLR_DETACH_CHECK_CSTS; 1076 } 1077 1078 static void 1079 nvme_ctrlr_shutdown_get_cc_done(void *_ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 1080 { 1081 struct nvme_ctrlr_detach_ctx *ctx = _ctx; 1082 struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr; 1083 union spdk_nvme_cc_register cc; 1084 int rc; 1085 1086 if (spdk_nvme_cpl_is_error(cpl)) { 1087 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CC register\n"); 1088 ctx->shutdown_complete = true; 1089 return; 1090 } 1091 1092 assert(value <= UINT32_MAX); 1093 cc.raw = (uint32_t)value; 1094 1095 if (ctrlr->opts.no_shn_notification) { 1096 NVME_CTRLR_INFOLOG(ctrlr, "Disable SSD without shutdown notification\n"); 1097 if (cc.bits.en == 0) { 1098 ctx->shutdown_complete = true; 1099 return; 1100 } 1101 1102 cc.bits.en = 0; 1103 } else { 1104 cc.bits.shn = SPDK_NVME_SHN_NORMAL; 1105 } 1106 1107 rc = nvme_ctrlr_set_cc_async(ctrlr, cc.raw, nvme_ctrlr_shutdown_set_cc_done, ctx); 1108 if (rc != 0) { 1109 NVME_CTRLR_ERRLOG(ctrlr, "Failed to write CC.SHN\n"); 1110 ctx->shutdown_complete = true; 1111 } 1112 } 1113 1114 static void 1115 nvme_ctrlr_shutdown_async(struct spdk_nvme_ctrlr *ctrlr, 1116 struct nvme_ctrlr_detach_ctx *ctx) 1117 { 1118 int rc; 1119 1120 if (ctrlr->is_removed) { 1121 ctx->shutdown_complete = true; 1122 return; 1123 } 1124 1125 if (ctrlr->adminq == NULL || 1126 ctrlr->adminq->transport_failure_reason != SPDK_NVME_QPAIR_FAILURE_NONE) { 1127 NVME_CTRLR_INFOLOG(ctrlr, "Adminq is not connected.\n"); 1128 ctx->shutdown_complete = true; 1129 return; 1130 } 1131 1132 ctx->state = NVME_CTRLR_DETACH_SET_CC; 1133 rc = nvme_ctrlr_get_cc_async(ctrlr, nvme_ctrlr_shutdown_get_cc_done, ctx); 1134 if (rc != 0) { 1135 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CC register\n"); 1136 ctx->shutdown_complete = true; 1137 } 1138 } 1139 1140 static void 1141 nvme_ctrlr_shutdown_get_csts_done(void *_ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 1142 { 1143 struct nvme_ctrlr_detach_ctx *ctx = _ctx; 1144 1145 if (spdk_nvme_cpl_is_error(cpl)) { 1146 NVME_CTRLR_ERRLOG(ctx->ctrlr, "Failed to read the CSTS register\n"); 1147 ctx->shutdown_complete = true; 1148 return; 1149 } 1150 1151 assert(value <= UINT32_MAX); 1152 ctx->csts.raw = (uint32_t)value; 1153 ctx->state = NVME_CTRLR_DETACH_GET_CSTS_DONE; 1154 } 1155 1156 static int 1157 nvme_ctrlr_shutdown_poll_async(struct spdk_nvme_ctrlr *ctrlr, 1158 struct nvme_ctrlr_detach_ctx *ctx) 1159 { 1160 union spdk_nvme_csts_register csts; 1161 uint32_t ms_waited; 1162 1163 switch (ctx->state) { 1164 case NVME_CTRLR_DETACH_SET_CC: 1165 case NVME_CTRLR_DETACH_GET_CSTS: 1166 /* We're still waiting for the register operation to complete */ 1167 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 1168 return -EAGAIN; 1169 1170 case NVME_CTRLR_DETACH_CHECK_CSTS: 1171 ctx->state = NVME_CTRLR_DETACH_GET_CSTS; 1172 if (nvme_ctrlr_get_csts_async(ctrlr, nvme_ctrlr_shutdown_get_csts_done, ctx)) { 1173 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CSTS register\n"); 1174 return -EIO; 1175 } 1176 return -EAGAIN; 1177 1178 case NVME_CTRLR_DETACH_GET_CSTS_DONE: 1179 ctx->state = NVME_CTRLR_DETACH_CHECK_CSTS; 1180 break; 1181 1182 default: 1183 assert(0 && "Should never happen"); 1184 return -EINVAL; 1185 } 1186 1187 ms_waited = (spdk_get_ticks() - ctx->shutdown_start_tsc) * 1000 / spdk_get_ticks_hz(); 1188 csts.raw = ctx->csts.raw; 1189 1190 if (csts.bits.shst == SPDK_NVME_SHST_COMPLETE) { 1191 NVME_CTRLR_DEBUGLOG(ctrlr, "shutdown complete in %u milliseconds\n", ms_waited); 1192 return 0; 1193 } 1194 1195 if (ms_waited < ctx->shutdown_timeout_ms) { 1196 return -EAGAIN; 1197 } 1198 1199 NVME_CTRLR_ERRLOG(ctrlr, "did not shutdown within %u milliseconds\n", 1200 ctx->shutdown_timeout_ms); 1201 if (ctrlr->quirks & NVME_QUIRK_SHST_COMPLETE) { 1202 NVME_CTRLR_ERRLOG(ctrlr, "likely due to shutdown handling in the VMWare emulated NVMe SSD\n"); 1203 } 1204 1205 return 0; 1206 } 1207 1208 static inline uint64_t 1209 nvme_ctrlr_get_ready_timeout(struct spdk_nvme_ctrlr *ctrlr) 1210 { 1211 return ctrlr->cap.bits.to * 500; 1212 } 1213 1214 static void 1215 nvme_ctrlr_set_cc_en_done(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 1216 { 1217 struct spdk_nvme_ctrlr *ctrlr = ctx; 1218 1219 if (spdk_nvme_cpl_is_error(cpl)) { 1220 NVME_CTRLR_ERRLOG(ctrlr, "Failed to set the CC register\n"); 1221 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 1222 return; 1223 } 1224 1225 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, 1226 nvme_ctrlr_get_ready_timeout(ctrlr)); 1227 } 1228 1229 static int 1230 nvme_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr) 1231 { 1232 union spdk_nvme_cc_register cc; 1233 int rc; 1234 1235 rc = nvme_transport_ctrlr_enable(ctrlr); 1236 if (rc != 0) { 1237 NVME_CTRLR_ERRLOG(ctrlr, "transport ctrlr_enable failed\n"); 1238 return rc; 1239 } 1240 1241 cc.raw = ctrlr->process_init_cc.raw; 1242 if (cc.bits.en != 0) { 1243 NVME_CTRLR_ERRLOG(ctrlr, "called with CC.EN = 1\n"); 1244 return -EINVAL; 1245 } 1246 1247 cc.bits.en = 1; 1248 cc.bits.css = 0; 1249 cc.bits.shn = 0; 1250 cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */ 1251 cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */ 1252 1253 /* Page size is 2 ^ (12 + mps). */ 1254 cc.bits.mps = spdk_u32log2(ctrlr->page_size) - 12; 1255 1256 /* 1257 * Since NVMe 1.0, a controller should have at least one bit set in CAP.CSS. 1258 * A controller that does not have any bit set in CAP.CSS is not spec compliant. 1259 * Try to support such a controller regardless. 1260 */ 1261 if (ctrlr->cap.bits.css == 0) { 1262 NVME_CTRLR_INFOLOG(ctrlr, "Drive reports no command sets supported. Assuming NVM is supported.\n"); 1263 ctrlr->cap.bits.css = SPDK_NVME_CAP_CSS_NVM; 1264 } 1265 1266 /* 1267 * If the user did not explicitly request a command set, or supplied a value larger than 1268 * what can be saved in CC.CSS, use the most reasonable default. 1269 */ 1270 if (ctrlr->opts.command_set >= CHAR_BIT) { 1271 if (ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_IOCS) { 1272 ctrlr->opts.command_set = SPDK_NVME_CC_CSS_IOCS; 1273 } else if (ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_NVM) { 1274 ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NVM; 1275 } else if (ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_NOIO) { 1276 ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NOIO; 1277 } else { 1278 /* Invalid supported bits detected, falling back to NVM. */ 1279 ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NVM; 1280 } 1281 } 1282 1283 /* Verify that the selected command set is supported by the controller. */ 1284 if (!(ctrlr->cap.bits.css & (1u << ctrlr->opts.command_set))) { 1285 NVME_CTRLR_DEBUGLOG(ctrlr, "Requested I/O command set %u but supported mask is 0x%x\n", 1286 ctrlr->opts.command_set, ctrlr->cap.bits.css); 1287 NVME_CTRLR_DEBUGLOG(ctrlr, "Falling back to NVM. Assuming NVM is supported.\n"); 1288 ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NVM; 1289 } 1290 1291 cc.bits.css = ctrlr->opts.command_set; 1292 1293 switch (ctrlr->opts.arb_mechanism) { 1294 case SPDK_NVME_CC_AMS_RR: 1295 break; 1296 case SPDK_NVME_CC_AMS_WRR: 1297 if (SPDK_NVME_CAP_AMS_WRR & ctrlr->cap.bits.ams) { 1298 break; 1299 } 1300 return -EINVAL; 1301 case SPDK_NVME_CC_AMS_VS: 1302 if (SPDK_NVME_CAP_AMS_VS & ctrlr->cap.bits.ams) { 1303 break; 1304 } 1305 return -EINVAL; 1306 default: 1307 return -EINVAL; 1308 } 1309 1310 cc.bits.ams = ctrlr->opts.arb_mechanism; 1311 ctrlr->process_init_cc.raw = cc.raw; 1312 1313 if (nvme_ctrlr_set_cc_async(ctrlr, cc.raw, nvme_ctrlr_set_cc_en_done, ctrlr)) { 1314 NVME_CTRLR_ERRLOG(ctrlr, "set_cc() failed\n"); 1315 return -EIO; 1316 } 1317 1318 return 0; 1319 } 1320 1321 static const char * 1322 nvme_ctrlr_state_string(enum nvme_ctrlr_state state) 1323 { 1324 switch (state) { 1325 case NVME_CTRLR_STATE_INIT_DELAY: 1326 return "delay init"; 1327 case NVME_CTRLR_STATE_CONNECT_ADMINQ: 1328 return "connect adminq"; 1329 case NVME_CTRLR_STATE_WAIT_FOR_CONNECT_ADMINQ: 1330 return "wait for connect adminq"; 1331 case NVME_CTRLR_STATE_READ_VS: 1332 return "read vs"; 1333 case NVME_CTRLR_STATE_READ_VS_WAIT_FOR_VS: 1334 return "read vs wait for vs"; 1335 case NVME_CTRLR_STATE_READ_CAP: 1336 return "read cap"; 1337 case NVME_CTRLR_STATE_READ_CAP_WAIT_FOR_CAP: 1338 return "read cap wait for cap"; 1339 case NVME_CTRLR_STATE_CHECK_EN: 1340 return "check en"; 1341 case NVME_CTRLR_STATE_CHECK_EN_WAIT_FOR_CC: 1342 return "check en wait for cc"; 1343 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1: 1344 return "disable and wait for CSTS.RDY = 1"; 1345 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS: 1346 return "disable and wait for CSTS.RDY = 1 reg"; 1347 case NVME_CTRLR_STATE_SET_EN_0: 1348 return "set CC.EN = 0"; 1349 case NVME_CTRLR_STATE_SET_EN_0_WAIT_FOR_CC: 1350 return "set CC.EN = 0 wait for cc"; 1351 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0: 1352 return "disable and wait for CSTS.RDY = 0"; 1353 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0_WAIT_FOR_CSTS: 1354 return "disable and wait for CSTS.RDY = 0 reg"; 1355 case NVME_CTRLR_STATE_DISABLED: 1356 return "controller is disabled"; 1357 case NVME_CTRLR_STATE_ENABLE: 1358 return "enable controller by writing CC.EN = 1"; 1359 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_CC: 1360 return "enable controller by writing CC.EN = 1 reg"; 1361 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1: 1362 return "wait for CSTS.RDY = 1"; 1363 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS: 1364 return "wait for CSTS.RDY = 1 reg"; 1365 case NVME_CTRLR_STATE_RESET_ADMIN_QUEUE: 1366 return "reset admin queue"; 1367 case NVME_CTRLR_STATE_IDENTIFY: 1368 return "identify controller"; 1369 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY: 1370 return "wait for identify controller"; 1371 case NVME_CTRLR_STATE_CONFIGURE_AER: 1372 return "configure AER"; 1373 case NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER: 1374 return "wait for configure aer"; 1375 case NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT: 1376 return "set keep alive timeout"; 1377 case NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT: 1378 return "wait for set keep alive timeout"; 1379 case NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC: 1380 return "identify controller iocs specific"; 1381 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_IOCS_SPECIFIC: 1382 return "wait for identify controller iocs specific"; 1383 case NVME_CTRLR_STATE_GET_ZNS_CMD_EFFECTS_LOG: 1384 return "get zns cmd and effects log page"; 1385 case NVME_CTRLR_STATE_WAIT_FOR_GET_ZNS_CMD_EFFECTS_LOG: 1386 return "wait for get zns cmd and effects log page"; 1387 case NVME_CTRLR_STATE_SET_NUM_QUEUES: 1388 return "set number of queues"; 1389 case NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES: 1390 return "wait for set number of queues"; 1391 case NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS: 1392 return "identify active ns"; 1393 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ACTIVE_NS: 1394 return "wait for identify active ns"; 1395 case NVME_CTRLR_STATE_IDENTIFY_NS: 1396 return "identify ns"; 1397 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS: 1398 return "wait for identify ns"; 1399 case NVME_CTRLR_STATE_IDENTIFY_ID_DESCS: 1400 return "identify namespace id descriptors"; 1401 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS: 1402 return "wait for identify namespace id descriptors"; 1403 case NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC: 1404 return "identify ns iocs specific"; 1405 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS_IOCS_SPECIFIC: 1406 return "wait for identify ns iocs specific"; 1407 case NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES: 1408 return "set supported log pages"; 1409 case NVME_CTRLR_STATE_SET_SUPPORTED_INTEL_LOG_PAGES: 1410 return "set supported INTEL log pages"; 1411 case NVME_CTRLR_STATE_WAIT_FOR_SUPPORTED_INTEL_LOG_PAGES: 1412 return "wait for supported INTEL log pages"; 1413 case NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES: 1414 return "set supported features"; 1415 case NVME_CTRLR_STATE_SET_DB_BUF_CFG: 1416 return "set doorbell buffer config"; 1417 case NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG: 1418 return "wait for doorbell buffer config"; 1419 case NVME_CTRLR_STATE_SET_HOST_ID: 1420 return "set host ID"; 1421 case NVME_CTRLR_STATE_WAIT_FOR_HOST_ID: 1422 return "wait for set host ID"; 1423 case NVME_CTRLR_STATE_TRANSPORT_READY: 1424 return "transport ready"; 1425 case NVME_CTRLR_STATE_READY: 1426 return "ready"; 1427 case NVME_CTRLR_STATE_ERROR: 1428 return "error"; 1429 case NVME_CTRLR_STATE_DISCONNECTED: 1430 return "disconnected"; 1431 } 1432 return "unknown"; 1433 }; 1434 1435 static void 1436 _nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state, 1437 uint64_t timeout_in_ms, bool quiet) 1438 { 1439 uint64_t ticks_per_ms, timeout_in_ticks, now_ticks; 1440 1441 ctrlr->state = state; 1442 if (timeout_in_ms == NVME_TIMEOUT_KEEP_EXISTING) { 1443 if (!quiet) { 1444 NVME_CTRLR_DEBUGLOG(ctrlr, "setting state to %s (keeping existing timeout)\n", 1445 nvme_ctrlr_state_string(ctrlr->state)); 1446 } 1447 return; 1448 } 1449 1450 if (timeout_in_ms == NVME_TIMEOUT_INFINITE) { 1451 goto inf; 1452 } 1453 1454 ticks_per_ms = spdk_get_ticks_hz() / 1000; 1455 if (timeout_in_ms > UINT64_MAX / ticks_per_ms) { 1456 NVME_CTRLR_ERRLOG(ctrlr, 1457 "Specified timeout would cause integer overflow. Defaulting to no timeout.\n"); 1458 goto inf; 1459 } 1460 1461 now_ticks = spdk_get_ticks(); 1462 timeout_in_ticks = timeout_in_ms * ticks_per_ms; 1463 if (timeout_in_ticks > UINT64_MAX - now_ticks) { 1464 NVME_CTRLR_ERRLOG(ctrlr, 1465 "Specified timeout would cause integer overflow. Defaulting to no timeout.\n"); 1466 goto inf; 1467 } 1468 1469 ctrlr->state_timeout_tsc = timeout_in_ticks + now_ticks; 1470 if (!quiet) { 1471 NVME_CTRLR_DEBUGLOG(ctrlr, "setting state to %s (timeout %" PRIu64 " ms)\n", 1472 nvme_ctrlr_state_string(ctrlr->state), timeout_in_ms); 1473 } 1474 return; 1475 inf: 1476 if (!quiet) { 1477 NVME_CTRLR_DEBUGLOG(ctrlr, "setting state to %s (no timeout)\n", 1478 nvme_ctrlr_state_string(ctrlr->state)); 1479 } 1480 ctrlr->state_timeout_tsc = NVME_TIMEOUT_INFINITE; 1481 } 1482 1483 static void 1484 nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state, 1485 uint64_t timeout_in_ms) 1486 { 1487 _nvme_ctrlr_set_state(ctrlr, state, timeout_in_ms, false); 1488 } 1489 1490 static void 1491 nvme_ctrlr_set_state_quiet(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state, 1492 uint64_t timeout_in_ms) 1493 { 1494 _nvme_ctrlr_set_state(ctrlr, state, timeout_in_ms, true); 1495 } 1496 1497 static void 1498 nvme_ctrlr_free_zns_specific_data(struct spdk_nvme_ctrlr *ctrlr) 1499 { 1500 spdk_free(ctrlr->cdata_zns); 1501 ctrlr->cdata_zns = NULL; 1502 } 1503 1504 static void 1505 nvme_ctrlr_free_iocs_specific_data(struct spdk_nvme_ctrlr *ctrlr) 1506 { 1507 nvme_ctrlr_free_zns_specific_data(ctrlr); 1508 } 1509 1510 static void 1511 nvme_ctrlr_free_doorbell_buffer(struct spdk_nvme_ctrlr *ctrlr) 1512 { 1513 if (ctrlr->shadow_doorbell) { 1514 spdk_free(ctrlr->shadow_doorbell); 1515 ctrlr->shadow_doorbell = NULL; 1516 } 1517 1518 if (ctrlr->eventidx) { 1519 spdk_free(ctrlr->eventidx); 1520 ctrlr->eventidx = NULL; 1521 } 1522 } 1523 1524 static void 1525 nvme_ctrlr_set_doorbell_buffer_config_done(void *arg, const struct spdk_nvme_cpl *cpl) 1526 { 1527 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 1528 1529 if (spdk_nvme_cpl_is_error(cpl)) { 1530 NVME_CTRLR_WARNLOG(ctrlr, "Doorbell buffer config failed\n"); 1531 } else { 1532 NVME_CTRLR_INFOLOG(ctrlr, "Doorbell buffer config enabled\n"); 1533 } 1534 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID, 1535 ctrlr->opts.admin_timeout_ms); 1536 } 1537 1538 static int 1539 nvme_ctrlr_set_doorbell_buffer_config(struct spdk_nvme_ctrlr *ctrlr) 1540 { 1541 int rc = 0; 1542 uint64_t prp1, prp2, len; 1543 1544 if (!ctrlr->cdata.oacs.doorbell_buffer_config) { 1545 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID, 1546 ctrlr->opts.admin_timeout_ms); 1547 return 0; 1548 } 1549 1550 if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) { 1551 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID, 1552 ctrlr->opts.admin_timeout_ms); 1553 return 0; 1554 } 1555 1556 /* only 1 page size for doorbell buffer */ 1557 ctrlr->shadow_doorbell = spdk_zmalloc(ctrlr->page_size, ctrlr->page_size, 1558 NULL, SPDK_ENV_LCORE_ID_ANY, 1559 SPDK_MALLOC_DMA | SPDK_MALLOC_SHARE); 1560 if (ctrlr->shadow_doorbell == NULL) { 1561 rc = -ENOMEM; 1562 goto error; 1563 } 1564 1565 len = ctrlr->page_size; 1566 prp1 = spdk_vtophys(ctrlr->shadow_doorbell, &len); 1567 if (prp1 == SPDK_VTOPHYS_ERROR || len != ctrlr->page_size) { 1568 rc = -EFAULT; 1569 goto error; 1570 } 1571 1572 ctrlr->eventidx = spdk_zmalloc(ctrlr->page_size, ctrlr->page_size, 1573 NULL, SPDK_ENV_LCORE_ID_ANY, 1574 SPDK_MALLOC_DMA | SPDK_MALLOC_SHARE); 1575 if (ctrlr->eventidx == NULL) { 1576 rc = -ENOMEM; 1577 goto error; 1578 } 1579 1580 len = ctrlr->page_size; 1581 prp2 = spdk_vtophys(ctrlr->eventidx, &len); 1582 if (prp2 == SPDK_VTOPHYS_ERROR || len != ctrlr->page_size) { 1583 rc = -EFAULT; 1584 goto error; 1585 } 1586 1587 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG, 1588 ctrlr->opts.admin_timeout_ms); 1589 1590 rc = nvme_ctrlr_cmd_doorbell_buffer_config(ctrlr, prp1, prp2, 1591 nvme_ctrlr_set_doorbell_buffer_config_done, ctrlr); 1592 if (rc != 0) { 1593 goto error; 1594 } 1595 1596 return 0; 1597 1598 error: 1599 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 1600 nvme_ctrlr_free_doorbell_buffer(ctrlr); 1601 return rc; 1602 } 1603 1604 void 1605 nvme_ctrlr_abort_queued_aborts(struct spdk_nvme_ctrlr *ctrlr) 1606 { 1607 struct nvme_request *req, *tmp; 1608 struct spdk_nvme_cpl cpl = {}; 1609 1610 cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION; 1611 cpl.status.sct = SPDK_NVME_SCT_GENERIC; 1612 1613 STAILQ_FOREACH_SAFE(req, &ctrlr->queued_aborts, stailq, tmp) { 1614 STAILQ_REMOVE_HEAD(&ctrlr->queued_aborts, stailq); 1615 ctrlr->outstanding_aborts++; 1616 1617 nvme_complete_request(req->cb_fn, req->cb_arg, req->qpair, req, &cpl); 1618 nvme_free_request(req); 1619 } 1620 } 1621 1622 static int 1623 nvme_ctrlr_disconnect(struct spdk_nvme_ctrlr *ctrlr) 1624 { 1625 if (ctrlr->is_resetting || ctrlr->is_removed) { 1626 /* 1627 * Controller is already resetting or has been removed. Return 1628 * immediately since there is no need to kick off another 1629 * reset in these cases. 1630 */ 1631 return ctrlr->is_resetting ? -EBUSY : -ENXIO; 1632 } 1633 1634 ctrlr->is_resetting = true; 1635 ctrlr->is_failed = false; 1636 ctrlr->is_disconnecting = true; 1637 ctrlr->prepare_for_reset = true; 1638 1639 NVME_CTRLR_NOTICELOG(ctrlr, "resetting controller\n"); 1640 1641 /* Disable keep-alive, it'll be re-enabled as part of the init process */ 1642 ctrlr->keep_alive_interval_ticks = 0; 1643 1644 /* Abort all of the queued abort requests */ 1645 nvme_ctrlr_abort_queued_aborts(ctrlr); 1646 1647 nvme_transport_admin_qpair_abort_aers(ctrlr->adminq); 1648 1649 ctrlr->adminq->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL; 1650 nvme_transport_ctrlr_disconnect_qpair(ctrlr, ctrlr->adminq); 1651 1652 return 0; 1653 } 1654 1655 static void 1656 nvme_ctrlr_disconnect_done(struct spdk_nvme_ctrlr *ctrlr) 1657 { 1658 assert(ctrlr->is_failed == false); 1659 ctrlr->is_disconnecting = false; 1660 1661 /* Doorbell buffer config is invalid during reset */ 1662 nvme_ctrlr_free_doorbell_buffer(ctrlr); 1663 1664 /* I/O Command Set Specific Identify Controller data is invalidated during reset */ 1665 nvme_ctrlr_free_iocs_specific_data(ctrlr); 1666 1667 spdk_bit_array_free(&ctrlr->free_io_qids); 1668 1669 /* Set the state back to DISCONNECTED to cause a full hardware reset. */ 1670 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISCONNECTED, NVME_TIMEOUT_INFINITE); 1671 } 1672 1673 int 1674 spdk_nvme_ctrlr_disconnect(struct spdk_nvme_ctrlr *ctrlr) 1675 { 1676 int rc; 1677 1678 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1679 rc = nvme_ctrlr_disconnect(ctrlr); 1680 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1681 1682 return rc; 1683 } 1684 1685 void 1686 spdk_nvme_ctrlr_reconnect_async(struct spdk_nvme_ctrlr *ctrlr) 1687 { 1688 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1689 1690 ctrlr->prepare_for_reset = false; 1691 1692 /* Set the state back to INIT to cause a full hardware reset. */ 1693 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE); 1694 1695 /* Return without releasing ctrlr_lock. ctrlr_lock will be released when 1696 * spdk_nvme_ctrlr_reset_poll_async() returns 0. 1697 */ 1698 } 1699 1700 /** 1701 * This function will be called when the controller is being reinitialized. 1702 * Note: the ctrlr_lock must be held when calling this function. 1703 */ 1704 int 1705 spdk_nvme_ctrlr_reconnect_poll_async(struct spdk_nvme_ctrlr *ctrlr) 1706 { 1707 struct spdk_nvme_ns *ns, *tmp_ns; 1708 struct spdk_nvme_qpair *qpair; 1709 int rc = 0, rc_tmp = 0; 1710 bool async; 1711 1712 if (nvme_ctrlr_process_init(ctrlr) != 0) { 1713 NVME_CTRLR_ERRLOG(ctrlr, "controller reinitialization failed\n"); 1714 rc = -1; 1715 } 1716 if (ctrlr->state != NVME_CTRLR_STATE_READY && rc != -1) { 1717 return -EAGAIN; 1718 } 1719 1720 /* 1721 * For non-fabrics controllers, the memory locations of the transport qpair 1722 * don't change when the controller is reset. They simply need to be 1723 * re-enabled with admin commands to the controller. For fabric 1724 * controllers we need to disconnect and reconnect the qpair on its 1725 * own thread outside of the context of the reset. 1726 */ 1727 if (rc == 0 && !spdk_nvme_ctrlr_is_fabrics(ctrlr)) { 1728 /* Reinitialize qpairs */ 1729 TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) { 1730 assert(spdk_bit_array_get(ctrlr->free_io_qids, qpair->id)); 1731 spdk_bit_array_clear(ctrlr->free_io_qids, qpair->id); 1732 1733 /* Force a synchronous connect. We can't currently handle an asynchronous 1734 * operation here. */ 1735 async = qpair->async; 1736 qpair->async = false; 1737 rc_tmp = nvme_transport_ctrlr_connect_qpair(ctrlr, qpair); 1738 qpair->async = async; 1739 1740 if (rc_tmp != 0) { 1741 rc = rc_tmp; 1742 qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL; 1743 continue; 1744 } 1745 } 1746 } 1747 1748 /* 1749 * Take this opportunity to remove inactive namespaces. During a reset namespace 1750 * handles can be invalidated. 1751 */ 1752 RB_FOREACH_SAFE(ns, nvme_ns_tree, &ctrlr->ns, tmp_ns) { 1753 if (!ns->active) { 1754 RB_REMOVE(nvme_ns_tree, &ctrlr->ns, ns); 1755 spdk_free(ns); 1756 } 1757 } 1758 1759 if (rc) { 1760 nvme_ctrlr_fail(ctrlr, false); 1761 } 1762 ctrlr->is_resetting = false; 1763 1764 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1765 1766 if (!ctrlr->cdata.oaes.ns_attribute_notices) { 1767 /* 1768 * If controller doesn't support ns_attribute_notices and 1769 * namespace attributes change (e.g. number of namespaces) 1770 * we need to update system handling device reset. 1771 */ 1772 nvme_io_msg_ctrlr_update(ctrlr); 1773 } 1774 1775 return rc; 1776 } 1777 1778 /* 1779 * For PCIe transport, spdk_nvme_ctrlr_disconnect() will do a Controller Level Reset 1780 * (Change CC.EN from 1 to 0) as a operation to disconnect the admin qpair. 1781 * The following two functions are added to do a Controller Level Reset. They have 1782 * to be called under the nvme controller's lock. 1783 */ 1784 void 1785 nvme_ctrlr_disable(struct spdk_nvme_ctrlr *ctrlr) 1786 { 1787 assert(ctrlr->is_disconnecting == true); 1788 1789 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CHECK_EN, NVME_TIMEOUT_INFINITE); 1790 } 1791 1792 int 1793 nvme_ctrlr_disable_poll(struct spdk_nvme_ctrlr *ctrlr) 1794 { 1795 int rc = 0; 1796 1797 if (nvme_ctrlr_process_init(ctrlr) != 0) { 1798 NVME_CTRLR_ERRLOG(ctrlr, "failed to disable controller\n"); 1799 rc = -1; 1800 } 1801 1802 if (ctrlr->state != NVME_CTRLR_STATE_DISABLED && rc != -1) { 1803 return -EAGAIN; 1804 } 1805 1806 return rc; 1807 } 1808 1809 static void 1810 nvme_ctrlr_fail_io_qpairs(struct spdk_nvme_ctrlr *ctrlr) 1811 { 1812 struct spdk_nvme_qpair *qpair; 1813 1814 TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) { 1815 qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL; 1816 } 1817 } 1818 1819 int 1820 spdk_nvme_ctrlr_reset(struct spdk_nvme_ctrlr *ctrlr) 1821 { 1822 int rc; 1823 1824 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1825 1826 rc = nvme_ctrlr_disconnect(ctrlr); 1827 if (rc == 0) { 1828 nvme_ctrlr_fail_io_qpairs(ctrlr); 1829 } 1830 1831 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1832 1833 if (rc != 0) { 1834 if (rc == -EBUSY) { 1835 rc = 0; 1836 } 1837 return rc; 1838 } 1839 1840 while (1) { 1841 rc = spdk_nvme_ctrlr_process_admin_completions(ctrlr); 1842 if (rc == -ENXIO) { 1843 break; 1844 } 1845 } 1846 1847 spdk_nvme_ctrlr_reconnect_async(ctrlr); 1848 1849 while (true) { 1850 rc = spdk_nvme_ctrlr_reconnect_poll_async(ctrlr); 1851 if (rc != -EAGAIN) { 1852 break; 1853 } 1854 } 1855 1856 return rc; 1857 } 1858 1859 int 1860 spdk_nvme_ctrlr_reset_subsystem(struct spdk_nvme_ctrlr *ctrlr) 1861 { 1862 union spdk_nvme_cap_register cap; 1863 int rc = 0; 1864 1865 cap = spdk_nvme_ctrlr_get_regs_cap(ctrlr); 1866 if (cap.bits.nssrs == 0) { 1867 NVME_CTRLR_WARNLOG(ctrlr, "subsystem reset is not supported\n"); 1868 return -ENOTSUP; 1869 } 1870 1871 NVME_CTRLR_NOTICELOG(ctrlr, "resetting subsystem\n"); 1872 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1873 ctrlr->is_resetting = true; 1874 rc = nvme_ctrlr_set_nssr(ctrlr, SPDK_NVME_NSSR_VALUE); 1875 ctrlr->is_resetting = false; 1876 1877 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1878 /* 1879 * No more cleanup at this point like in the ctrlr reset. A subsystem reset will cause 1880 * a hot remove for PCIe transport. The hot remove handling does all the necessary ctrlr cleanup. 1881 */ 1882 return rc; 1883 } 1884 1885 int 1886 spdk_nvme_ctrlr_set_trid(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_transport_id *trid) 1887 { 1888 int rc = 0; 1889 1890 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1891 1892 if (ctrlr->is_failed == false) { 1893 rc = -EPERM; 1894 goto out; 1895 } 1896 1897 if (trid->trtype != ctrlr->trid.trtype) { 1898 rc = -EINVAL; 1899 goto out; 1900 } 1901 1902 if (strncmp(trid->subnqn, ctrlr->trid.subnqn, SPDK_NVMF_NQN_MAX_LEN)) { 1903 rc = -EINVAL; 1904 goto out; 1905 } 1906 1907 ctrlr->trid = *trid; 1908 1909 out: 1910 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1911 return rc; 1912 } 1913 1914 void 1915 spdk_nvme_ctrlr_set_remove_cb(struct spdk_nvme_ctrlr *ctrlr, 1916 spdk_nvme_remove_cb remove_cb, void *remove_ctx) 1917 { 1918 if (!spdk_process_is_primary()) { 1919 return; 1920 } 1921 1922 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1923 ctrlr->remove_cb = remove_cb; 1924 ctrlr->cb_ctx = remove_ctx; 1925 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1926 } 1927 1928 static void 1929 nvme_ctrlr_identify_done(void *arg, const struct spdk_nvme_cpl *cpl) 1930 { 1931 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 1932 1933 if (spdk_nvme_cpl_is_error(cpl)) { 1934 NVME_CTRLR_ERRLOG(ctrlr, "nvme_identify_controller failed!\n"); 1935 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 1936 return; 1937 } 1938 1939 /* 1940 * Use MDTS to ensure our default max_xfer_size doesn't exceed what the 1941 * controller supports. 1942 */ 1943 ctrlr->max_xfer_size = nvme_transport_ctrlr_get_max_xfer_size(ctrlr); 1944 NVME_CTRLR_DEBUGLOG(ctrlr, "transport max_xfer_size %u\n", ctrlr->max_xfer_size); 1945 if (ctrlr->cdata.mdts > 0) { 1946 ctrlr->max_xfer_size = spdk_min(ctrlr->max_xfer_size, 1947 ctrlr->min_page_size * (1 << ctrlr->cdata.mdts)); 1948 NVME_CTRLR_DEBUGLOG(ctrlr, "MDTS max_xfer_size %u\n", ctrlr->max_xfer_size); 1949 } 1950 1951 NVME_CTRLR_DEBUGLOG(ctrlr, "CNTLID 0x%04" PRIx16 "\n", ctrlr->cdata.cntlid); 1952 if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { 1953 ctrlr->cntlid = ctrlr->cdata.cntlid; 1954 } else { 1955 /* 1956 * Fabrics controllers should already have CNTLID from the Connect command. 1957 * 1958 * If CNTLID from Connect doesn't match CNTLID in the Identify Controller data, 1959 * trust the one from Connect. 1960 */ 1961 if (ctrlr->cntlid != ctrlr->cdata.cntlid) { 1962 NVME_CTRLR_DEBUGLOG(ctrlr, "Identify CNTLID 0x%04" PRIx16 " != Connect CNTLID 0x%04" PRIx16 "\n", 1963 ctrlr->cdata.cntlid, ctrlr->cntlid); 1964 } 1965 } 1966 1967 if (ctrlr->cdata.sgls.supported && !(ctrlr->quirks & NVME_QUIRK_NOT_USE_SGL)) { 1968 assert(ctrlr->cdata.sgls.supported != 0x3); 1969 ctrlr->flags |= SPDK_NVME_CTRLR_SGL_SUPPORTED; 1970 if (ctrlr->cdata.sgls.supported == 0x2) { 1971 ctrlr->flags |= SPDK_NVME_CTRLR_SGL_REQUIRES_DWORD_ALIGNMENT; 1972 } 1973 1974 ctrlr->max_sges = nvme_transport_ctrlr_get_max_sges(ctrlr); 1975 NVME_CTRLR_DEBUGLOG(ctrlr, "transport max_sges %u\n", ctrlr->max_sges); 1976 } 1977 1978 if (ctrlr->cdata.sgls.metadata_address && !(ctrlr->quirks & NVME_QUIRK_NOT_USE_SGL)) { 1979 ctrlr->flags |= SPDK_NVME_CTRLR_MPTR_SGL_SUPPORTED; 1980 } 1981 1982 if (ctrlr->cdata.oacs.security && !(ctrlr->quirks & NVME_QUIRK_OACS_SECURITY)) { 1983 ctrlr->flags |= SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED; 1984 } 1985 1986 if (ctrlr->cdata.oacs.directives) { 1987 ctrlr->flags |= SPDK_NVME_CTRLR_DIRECTIVES_SUPPORTED; 1988 } 1989 1990 NVME_CTRLR_DEBUGLOG(ctrlr, "fuses compare and write: %d\n", 1991 ctrlr->cdata.fuses.compare_and_write); 1992 if (ctrlr->cdata.fuses.compare_and_write) { 1993 ctrlr->flags |= SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED; 1994 } 1995 1996 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONFIGURE_AER, 1997 ctrlr->opts.admin_timeout_ms); 1998 } 1999 2000 static int 2001 nvme_ctrlr_identify(struct spdk_nvme_ctrlr *ctrlr) 2002 { 2003 int rc; 2004 2005 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY, 2006 ctrlr->opts.admin_timeout_ms); 2007 2008 rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_CTRLR, 0, 0, 0, 2009 &ctrlr->cdata, sizeof(ctrlr->cdata), 2010 nvme_ctrlr_identify_done, ctrlr); 2011 if (rc != 0) { 2012 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2013 return rc; 2014 } 2015 2016 return 0; 2017 } 2018 2019 static void 2020 nvme_ctrlr_get_zns_cmd_and_effects_log_done(void *arg, const struct spdk_nvme_cpl *cpl) 2021 { 2022 struct spdk_nvme_cmds_and_effect_log_page *log_page; 2023 struct spdk_nvme_ctrlr *ctrlr = arg; 2024 2025 if (spdk_nvme_cpl_is_error(cpl)) { 2026 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_get_zns_cmd_and_effects_log failed!\n"); 2027 spdk_free(ctrlr->tmp_ptr); 2028 ctrlr->tmp_ptr = NULL; 2029 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2030 return; 2031 } 2032 2033 log_page = ctrlr->tmp_ptr; 2034 2035 if (log_page->io_cmds_supported[SPDK_NVME_OPC_ZONE_APPEND].csupp) { 2036 ctrlr->flags |= SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED; 2037 } 2038 spdk_free(ctrlr->tmp_ptr); 2039 ctrlr->tmp_ptr = NULL; 2040 2041 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_NUM_QUEUES, ctrlr->opts.admin_timeout_ms); 2042 } 2043 2044 static int 2045 nvme_ctrlr_get_zns_cmd_and_effects_log(struct spdk_nvme_ctrlr *ctrlr) 2046 { 2047 int rc; 2048 2049 assert(!ctrlr->tmp_ptr); 2050 ctrlr->tmp_ptr = spdk_zmalloc(sizeof(struct spdk_nvme_cmds_and_effect_log_page), 64, NULL, 2051 SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE | SPDK_MALLOC_DMA); 2052 if (!ctrlr->tmp_ptr) { 2053 rc = -ENOMEM; 2054 goto error; 2055 } 2056 2057 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_GET_ZNS_CMD_EFFECTS_LOG, 2058 ctrlr->opts.admin_timeout_ms); 2059 2060 rc = spdk_nvme_ctrlr_cmd_get_log_page_ext(ctrlr, SPDK_NVME_LOG_COMMAND_EFFECTS_LOG, 2061 0, ctrlr->tmp_ptr, sizeof(struct spdk_nvme_cmds_and_effect_log_page), 2062 0, 0, 0, SPDK_NVME_CSI_ZNS << 24, 2063 nvme_ctrlr_get_zns_cmd_and_effects_log_done, ctrlr); 2064 if (rc != 0) { 2065 goto error; 2066 } 2067 2068 return 0; 2069 2070 error: 2071 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2072 spdk_free(ctrlr->tmp_ptr); 2073 ctrlr->tmp_ptr = NULL; 2074 return rc; 2075 } 2076 2077 static void 2078 nvme_ctrlr_identify_zns_specific_done(void *arg, const struct spdk_nvme_cpl *cpl) 2079 { 2080 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 2081 2082 if (spdk_nvme_cpl_is_error(cpl)) { 2083 /* no need to print an error, the controller simply does not support ZNS */ 2084 nvme_ctrlr_free_zns_specific_data(ctrlr); 2085 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_NUM_QUEUES, 2086 ctrlr->opts.admin_timeout_ms); 2087 return; 2088 } 2089 2090 /* A zero zasl value means use mdts */ 2091 if (ctrlr->cdata_zns->zasl) { 2092 uint32_t max_append = ctrlr->min_page_size * (1 << ctrlr->cdata_zns->zasl); 2093 ctrlr->max_zone_append_size = spdk_min(ctrlr->max_xfer_size, max_append); 2094 } else { 2095 ctrlr->max_zone_append_size = ctrlr->max_xfer_size; 2096 } 2097 2098 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_GET_ZNS_CMD_EFFECTS_LOG, 2099 ctrlr->opts.admin_timeout_ms); 2100 } 2101 2102 /** 2103 * This function will try to fetch the I/O Command Specific Controller data structure for 2104 * each I/O Command Set supported by SPDK. 2105 * 2106 * If an I/O Command Set is not supported by the controller, "Invalid Field in Command" 2107 * will be returned. Since we are fetching in a exploratively way, getting an error back 2108 * from the controller should not be treated as fatal. 2109 * 2110 * I/O Command Sets not supported by SPDK will be skipped (e.g. Key Value Command Set). 2111 * 2112 * I/O Command Sets without a IOCS specific data structure (i.e. a zero-filled IOCS specific 2113 * data structure) will be skipped (e.g. NVM Command Set, Key Value Command Set). 2114 */ 2115 static int 2116 nvme_ctrlr_identify_iocs_specific(struct spdk_nvme_ctrlr *ctrlr) 2117 { 2118 int rc; 2119 2120 if (!nvme_ctrlr_multi_iocs_enabled(ctrlr)) { 2121 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_NUM_QUEUES, 2122 ctrlr->opts.admin_timeout_ms); 2123 return 0; 2124 } 2125 2126 /* 2127 * Since SPDK currently only needs to fetch a single Command Set, keep the code here, 2128 * instead of creating multiple NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC substates, 2129 * which would require additional functions and complexity for no good reason. 2130 */ 2131 assert(!ctrlr->cdata_zns); 2132 ctrlr->cdata_zns = spdk_zmalloc(sizeof(*ctrlr->cdata_zns), 64, NULL, SPDK_ENV_SOCKET_ID_ANY, 2133 SPDK_MALLOC_SHARE | SPDK_MALLOC_DMA); 2134 if (!ctrlr->cdata_zns) { 2135 rc = -ENOMEM; 2136 goto error; 2137 } 2138 2139 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_IOCS_SPECIFIC, 2140 ctrlr->opts.admin_timeout_ms); 2141 2142 rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_CTRLR_IOCS, 0, 0, SPDK_NVME_CSI_ZNS, 2143 ctrlr->cdata_zns, sizeof(*ctrlr->cdata_zns), 2144 nvme_ctrlr_identify_zns_specific_done, ctrlr); 2145 if (rc != 0) { 2146 goto error; 2147 } 2148 2149 return 0; 2150 2151 error: 2152 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2153 nvme_ctrlr_free_zns_specific_data(ctrlr); 2154 return rc; 2155 } 2156 2157 enum nvme_active_ns_state { 2158 NVME_ACTIVE_NS_STATE_IDLE, 2159 NVME_ACTIVE_NS_STATE_PROCESSING, 2160 NVME_ACTIVE_NS_STATE_DONE, 2161 NVME_ACTIVE_NS_STATE_ERROR 2162 }; 2163 2164 typedef void (*nvme_active_ns_ctx_deleter)(struct nvme_active_ns_ctx *); 2165 2166 struct nvme_active_ns_ctx { 2167 struct spdk_nvme_ctrlr *ctrlr; 2168 uint32_t page_count; 2169 uint32_t next_nsid; 2170 uint32_t *new_ns_list; 2171 nvme_active_ns_ctx_deleter deleter; 2172 2173 enum nvme_active_ns_state state; 2174 }; 2175 2176 static struct nvme_active_ns_ctx * 2177 nvme_active_ns_ctx_create(struct spdk_nvme_ctrlr *ctrlr, nvme_active_ns_ctx_deleter deleter) 2178 { 2179 struct nvme_active_ns_ctx *ctx; 2180 uint32_t *new_ns_list = NULL; 2181 2182 ctx = calloc(1, sizeof(*ctx)); 2183 if (!ctx) { 2184 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate nvme_active_ns_ctx!\n"); 2185 return NULL; 2186 } 2187 2188 new_ns_list = spdk_zmalloc(sizeof(struct spdk_nvme_ns_list), ctrlr->page_size, 2189 NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_SHARE); 2190 if (!new_ns_list) { 2191 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate active_ns_list!\n"); 2192 free(ctx); 2193 return NULL; 2194 } 2195 2196 ctx->page_count = 1; 2197 ctx->new_ns_list = new_ns_list; 2198 ctx->ctrlr = ctrlr; 2199 ctx->deleter = deleter; 2200 2201 return ctx; 2202 } 2203 2204 static void 2205 nvme_active_ns_ctx_destroy(struct nvme_active_ns_ctx *ctx) 2206 { 2207 spdk_free(ctx->new_ns_list); 2208 free(ctx); 2209 } 2210 2211 static int 2212 nvme_ctrlr_destruct_namespace(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 2213 { 2214 struct spdk_nvme_ns tmp, *ns; 2215 2216 assert(ctrlr != NULL); 2217 2218 tmp.id = nsid; 2219 ns = RB_FIND(nvme_ns_tree, &ctrlr->ns, &tmp); 2220 if (ns == NULL) { 2221 return -EINVAL; 2222 } 2223 2224 nvme_ns_destruct(ns); 2225 ns->active = false; 2226 2227 return 0; 2228 } 2229 2230 static int 2231 nvme_ctrlr_construct_namespace(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 2232 { 2233 struct spdk_nvme_ns *ns; 2234 2235 if (nsid < 1 || nsid > ctrlr->cdata.nn) { 2236 return -EINVAL; 2237 } 2238 2239 /* Namespaces are constructed on demand, so simply request it. */ 2240 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2241 if (ns == NULL) { 2242 return -ENOMEM; 2243 } 2244 2245 ns->active = true; 2246 2247 return 0; 2248 } 2249 2250 static void 2251 nvme_ctrlr_identify_active_ns_swap(struct spdk_nvme_ctrlr *ctrlr, uint32_t *new_ns_list, 2252 size_t max_entries) 2253 { 2254 uint32_t active_ns_count = 0; 2255 size_t i; 2256 uint32_t nsid; 2257 struct spdk_nvme_ns *ns, *tmp_ns; 2258 int rc; 2259 2260 /* First, remove namespaces that no longer exist */ 2261 RB_FOREACH_SAFE(ns, nvme_ns_tree, &ctrlr->ns, tmp_ns) { 2262 nsid = new_ns_list[0]; 2263 active_ns_count = 0; 2264 while (nsid != 0) { 2265 if (nsid == ns->id) { 2266 break; 2267 } 2268 2269 nsid = new_ns_list[active_ns_count++]; 2270 } 2271 2272 if (nsid != ns->id) { 2273 /* Did not find this namespace id in the new list. */ 2274 NVME_CTRLR_DEBUGLOG(ctrlr, "Namespace %u was removed\n", ns->id); 2275 nvme_ctrlr_destruct_namespace(ctrlr, ns->id); 2276 } 2277 } 2278 2279 /* Next, add new namespaces */ 2280 active_ns_count = 0; 2281 for (i = 0; i < max_entries; i++) { 2282 nsid = new_ns_list[active_ns_count]; 2283 2284 if (nsid == 0) { 2285 break; 2286 } 2287 2288 /* If the namespace already exists, this will not construct it a second time. */ 2289 rc = nvme_ctrlr_construct_namespace(ctrlr, nsid); 2290 if (rc != 0) { 2291 /* We can't easily handle a failure here. But just move on. */ 2292 assert(false); 2293 NVME_CTRLR_DEBUGLOG(ctrlr, "Failed to allocate a namespace object.\n"); 2294 continue; 2295 } 2296 2297 active_ns_count++; 2298 } 2299 2300 ctrlr->active_ns_count = active_ns_count; 2301 } 2302 2303 static void 2304 nvme_ctrlr_identify_active_ns_async_done(void *arg, const struct spdk_nvme_cpl *cpl) 2305 { 2306 struct nvme_active_ns_ctx *ctx = arg; 2307 uint32_t *new_ns_list = NULL; 2308 2309 if (spdk_nvme_cpl_is_error(cpl)) { 2310 ctx->state = NVME_ACTIVE_NS_STATE_ERROR; 2311 goto out; 2312 } 2313 2314 ctx->next_nsid = ctx->new_ns_list[1024 * ctx->page_count - 1]; 2315 if (ctx->next_nsid == 0) { 2316 ctx->state = NVME_ACTIVE_NS_STATE_DONE; 2317 goto out; 2318 } 2319 2320 ctx->page_count++; 2321 new_ns_list = spdk_realloc(ctx->new_ns_list, 2322 ctx->page_count * sizeof(struct spdk_nvme_ns_list), 2323 ctx->ctrlr->page_size); 2324 if (!new_ns_list) { 2325 SPDK_ERRLOG("Failed to reallocate active_ns_list!\n"); 2326 ctx->state = NVME_ACTIVE_NS_STATE_ERROR; 2327 goto out; 2328 } 2329 2330 ctx->new_ns_list = new_ns_list; 2331 nvme_ctrlr_identify_active_ns_async(ctx); 2332 return; 2333 2334 out: 2335 if (ctx->deleter) { 2336 ctx->deleter(ctx); 2337 } 2338 } 2339 2340 static void 2341 nvme_ctrlr_identify_active_ns_async(struct nvme_active_ns_ctx *ctx) 2342 { 2343 struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr; 2344 uint32_t i; 2345 int rc; 2346 2347 if (ctrlr->cdata.nn == 0) { 2348 ctx->state = NVME_ACTIVE_NS_STATE_DONE; 2349 goto out; 2350 } 2351 2352 assert(ctx->new_ns_list != NULL); 2353 2354 /* 2355 * If controller doesn't support active ns list CNS 0x02 dummy up 2356 * an active ns list, i.e. all namespaces report as active 2357 */ 2358 if (ctrlr->vs.raw < SPDK_NVME_VERSION(1, 1, 0) || ctrlr->quirks & NVME_QUIRK_IDENTIFY_CNS) { 2359 uint32_t *new_ns_list; 2360 2361 /* 2362 * Active NS list must always end with zero element. 2363 * So, we allocate for cdata.nn+1. 2364 */ 2365 ctx->page_count = spdk_divide_round_up(ctrlr->cdata.nn + 1, 2366 sizeof(struct spdk_nvme_ns_list) / sizeof(new_ns_list[0])); 2367 new_ns_list = spdk_realloc(ctx->new_ns_list, 2368 ctx->page_count * sizeof(struct spdk_nvme_ns_list), 2369 ctx->ctrlr->page_size); 2370 if (!new_ns_list) { 2371 SPDK_ERRLOG("Failed to reallocate active_ns_list!\n"); 2372 ctx->state = NVME_ACTIVE_NS_STATE_ERROR; 2373 goto out; 2374 } 2375 2376 ctx->new_ns_list = new_ns_list; 2377 ctx->new_ns_list[ctrlr->cdata.nn] = 0; 2378 for (i = 0; i < ctrlr->cdata.nn; i++) { 2379 ctx->new_ns_list[i] = i + 1; 2380 } 2381 2382 ctx->state = NVME_ACTIVE_NS_STATE_DONE; 2383 goto out; 2384 } 2385 2386 ctx->state = NVME_ACTIVE_NS_STATE_PROCESSING; 2387 rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_ACTIVE_NS_LIST, 0, ctx->next_nsid, 0, 2388 &ctx->new_ns_list[1024 * (ctx->page_count - 1)], sizeof(struct spdk_nvme_ns_list), 2389 nvme_ctrlr_identify_active_ns_async_done, ctx); 2390 if (rc != 0) { 2391 ctx->state = NVME_ACTIVE_NS_STATE_ERROR; 2392 goto out; 2393 } 2394 2395 return; 2396 2397 out: 2398 if (ctx->deleter) { 2399 ctx->deleter(ctx); 2400 } 2401 } 2402 2403 static void 2404 _nvme_active_ns_ctx_deleter(struct nvme_active_ns_ctx *ctx) 2405 { 2406 struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr; 2407 struct spdk_nvme_ns *ns; 2408 2409 if (ctx->state == NVME_ACTIVE_NS_STATE_ERROR) { 2410 nvme_active_ns_ctx_destroy(ctx); 2411 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2412 return; 2413 } 2414 2415 assert(ctx->state == NVME_ACTIVE_NS_STATE_DONE); 2416 2417 RB_FOREACH(ns, nvme_ns_tree, &ctrlr->ns) { 2418 nvme_ns_free_iocs_specific_data(ns); 2419 } 2420 2421 nvme_ctrlr_identify_active_ns_swap(ctrlr, ctx->new_ns_list, ctx->page_count * 1024); 2422 nvme_active_ns_ctx_destroy(ctx); 2423 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS, ctrlr->opts.admin_timeout_ms); 2424 } 2425 2426 static void 2427 _nvme_ctrlr_identify_active_ns(struct spdk_nvme_ctrlr *ctrlr) 2428 { 2429 struct nvme_active_ns_ctx *ctx; 2430 2431 ctx = nvme_active_ns_ctx_create(ctrlr, _nvme_active_ns_ctx_deleter); 2432 if (!ctx) { 2433 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2434 return; 2435 } 2436 2437 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ACTIVE_NS, 2438 ctrlr->opts.admin_timeout_ms); 2439 nvme_ctrlr_identify_active_ns_async(ctx); 2440 } 2441 2442 int 2443 nvme_ctrlr_identify_active_ns(struct spdk_nvme_ctrlr *ctrlr) 2444 { 2445 struct nvme_active_ns_ctx *ctx; 2446 int rc; 2447 2448 ctx = nvme_active_ns_ctx_create(ctrlr, NULL); 2449 if (!ctx) { 2450 return -ENOMEM; 2451 } 2452 2453 nvme_ctrlr_identify_active_ns_async(ctx); 2454 while (ctx->state == NVME_ACTIVE_NS_STATE_PROCESSING) { 2455 rc = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 2456 if (rc < 0) { 2457 ctx->state = NVME_ACTIVE_NS_STATE_ERROR; 2458 break; 2459 } 2460 } 2461 2462 if (ctx->state == NVME_ACTIVE_NS_STATE_ERROR) { 2463 nvme_active_ns_ctx_destroy(ctx); 2464 return -ENXIO; 2465 } 2466 2467 assert(ctx->state == NVME_ACTIVE_NS_STATE_DONE); 2468 nvme_ctrlr_identify_active_ns_swap(ctrlr, ctx->new_ns_list, ctx->page_count * 1024); 2469 nvme_active_ns_ctx_destroy(ctx); 2470 2471 return 0; 2472 } 2473 2474 static void 2475 nvme_ctrlr_identify_ns_async_done(void *arg, const struct spdk_nvme_cpl *cpl) 2476 { 2477 struct spdk_nvme_ns *ns = (struct spdk_nvme_ns *)arg; 2478 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2479 uint32_t nsid; 2480 int rc; 2481 2482 if (spdk_nvme_cpl_is_error(cpl)) { 2483 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2484 return; 2485 } 2486 2487 nvme_ns_set_identify_data(ns); 2488 2489 /* move on to the next active NS */ 2490 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, ns->id); 2491 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2492 if (ns == NULL) { 2493 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_ID_DESCS, 2494 ctrlr->opts.admin_timeout_ms); 2495 return; 2496 } 2497 ns->ctrlr = ctrlr; 2498 ns->id = nsid; 2499 2500 rc = nvme_ctrlr_identify_ns_async(ns); 2501 if (rc) { 2502 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2503 } 2504 } 2505 2506 static int 2507 nvme_ctrlr_identify_ns_async(struct spdk_nvme_ns *ns) 2508 { 2509 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2510 struct spdk_nvme_ns_data *nsdata; 2511 2512 nsdata = &ns->nsdata; 2513 2514 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS, 2515 ctrlr->opts.admin_timeout_ms); 2516 return nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS, 0, ns->id, 0, 2517 nsdata, sizeof(*nsdata), 2518 nvme_ctrlr_identify_ns_async_done, ns); 2519 } 2520 2521 static int 2522 nvme_ctrlr_identify_namespaces(struct spdk_nvme_ctrlr *ctrlr) 2523 { 2524 uint32_t nsid; 2525 struct spdk_nvme_ns *ns; 2526 int rc; 2527 2528 nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); 2529 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2530 if (ns == NULL) { 2531 /* No active NS, move on to the next state */ 2532 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_ID_DESCS, 2533 ctrlr->opts.admin_timeout_ms); 2534 return 0; 2535 } 2536 2537 ns->ctrlr = ctrlr; 2538 ns->id = nsid; 2539 2540 rc = nvme_ctrlr_identify_ns_async(ns); 2541 if (rc) { 2542 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2543 } 2544 2545 return rc; 2546 } 2547 2548 static int 2549 nvme_ctrlr_identify_namespaces_iocs_specific_next(struct spdk_nvme_ctrlr *ctrlr, uint32_t prev_nsid) 2550 { 2551 uint32_t nsid; 2552 struct spdk_nvme_ns *ns; 2553 int rc; 2554 2555 if (!prev_nsid) { 2556 nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); 2557 } else { 2558 /* move on to the next active NS */ 2559 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, prev_nsid); 2560 } 2561 2562 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2563 if (ns == NULL) { 2564 /* No first/next active NS, move on to the next state */ 2565 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES, 2566 ctrlr->opts.admin_timeout_ms); 2567 return 0; 2568 } 2569 2570 /* loop until we find a ns which has (supported) iocs specific data */ 2571 while (!nvme_ns_has_supported_iocs_specific_data(ns)) { 2572 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, ns->id); 2573 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2574 if (ns == NULL) { 2575 /* no namespace with (supported) iocs specific data found */ 2576 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES, 2577 ctrlr->opts.admin_timeout_ms); 2578 return 0; 2579 } 2580 } 2581 2582 rc = nvme_ctrlr_identify_ns_iocs_specific_async(ns); 2583 if (rc) { 2584 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2585 } 2586 2587 return rc; 2588 } 2589 2590 static void 2591 nvme_ctrlr_identify_ns_zns_specific_async_done(void *arg, const struct spdk_nvme_cpl *cpl) 2592 { 2593 struct spdk_nvme_ns *ns = (struct spdk_nvme_ns *)arg; 2594 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2595 2596 if (spdk_nvme_cpl_is_error(cpl)) { 2597 nvme_ns_free_zns_specific_data(ns); 2598 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2599 return; 2600 } 2601 2602 nvme_ctrlr_identify_namespaces_iocs_specific_next(ctrlr, ns->id); 2603 } 2604 2605 static int 2606 nvme_ctrlr_identify_ns_iocs_specific_async(struct spdk_nvme_ns *ns) 2607 { 2608 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2609 int rc; 2610 2611 switch (ns->csi) { 2612 case SPDK_NVME_CSI_ZNS: 2613 break; 2614 default: 2615 /* 2616 * This switch must handle all cases for which 2617 * nvme_ns_has_supported_iocs_specific_data() returns true, 2618 * other cases should never happen. 2619 */ 2620 assert(0); 2621 } 2622 2623 assert(!ns->nsdata_zns); 2624 ns->nsdata_zns = spdk_zmalloc(sizeof(*ns->nsdata_zns), 64, NULL, SPDK_ENV_SOCKET_ID_ANY, 2625 SPDK_MALLOC_SHARE); 2626 if (!ns->nsdata_zns) { 2627 return -ENOMEM; 2628 } 2629 2630 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS_IOCS_SPECIFIC, 2631 ctrlr->opts.admin_timeout_ms); 2632 rc = nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS_IOCS, 0, ns->id, ns->csi, 2633 ns->nsdata_zns, sizeof(*ns->nsdata_zns), 2634 nvme_ctrlr_identify_ns_zns_specific_async_done, ns); 2635 if (rc) { 2636 nvme_ns_free_zns_specific_data(ns); 2637 } 2638 2639 return rc; 2640 } 2641 2642 static int 2643 nvme_ctrlr_identify_namespaces_iocs_specific(struct spdk_nvme_ctrlr *ctrlr) 2644 { 2645 if (!nvme_ctrlr_multi_iocs_enabled(ctrlr)) { 2646 /* Multi IOCS not supported/enabled, move on to the next state */ 2647 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES, 2648 ctrlr->opts.admin_timeout_ms); 2649 return 0; 2650 } 2651 2652 return nvme_ctrlr_identify_namespaces_iocs_specific_next(ctrlr, 0); 2653 } 2654 2655 static void 2656 nvme_ctrlr_identify_id_desc_async_done(void *arg, const struct spdk_nvme_cpl *cpl) 2657 { 2658 struct spdk_nvme_ns *ns = (struct spdk_nvme_ns *)arg; 2659 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2660 uint32_t nsid; 2661 int rc; 2662 2663 if (spdk_nvme_cpl_is_error(cpl)) { 2664 /* 2665 * Many controllers claim to be compatible with NVMe 1.3, however, 2666 * they do not implement NS ID Desc List. Therefore, instead of setting 2667 * the state to NVME_CTRLR_STATE_ERROR, silently ignore the completion 2668 * error and move on to the next state. 2669 * 2670 * The proper way is to create a new quirk for controllers that violate 2671 * the NVMe 1.3 spec by not supporting NS ID Desc List. 2672 * (Re-using the NVME_QUIRK_IDENTIFY_CNS quirk is not possible, since 2673 * it is too generic and was added in order to handle controllers that 2674 * violate the NVMe 1.1 spec by not supporting ACTIVE LIST). 2675 */ 2676 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC, 2677 ctrlr->opts.admin_timeout_ms); 2678 return; 2679 } 2680 2681 nvme_ns_set_id_desc_list_data(ns); 2682 2683 /* move on to the next active NS */ 2684 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, ns->id); 2685 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2686 if (ns == NULL) { 2687 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC, 2688 ctrlr->opts.admin_timeout_ms); 2689 return; 2690 } 2691 2692 rc = nvme_ctrlr_identify_id_desc_async(ns); 2693 if (rc) { 2694 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2695 } 2696 } 2697 2698 static int 2699 nvme_ctrlr_identify_id_desc_async(struct spdk_nvme_ns *ns) 2700 { 2701 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2702 2703 memset(ns->id_desc_list, 0, sizeof(ns->id_desc_list)); 2704 2705 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS, 2706 ctrlr->opts.admin_timeout_ms); 2707 return nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS_ID_DESCRIPTOR_LIST, 2708 0, ns->id, 0, ns->id_desc_list, sizeof(ns->id_desc_list), 2709 nvme_ctrlr_identify_id_desc_async_done, ns); 2710 } 2711 2712 static int 2713 nvme_ctrlr_identify_id_desc_namespaces(struct spdk_nvme_ctrlr *ctrlr) 2714 { 2715 uint32_t nsid; 2716 struct spdk_nvme_ns *ns; 2717 int rc; 2718 2719 if ((ctrlr->vs.raw < SPDK_NVME_VERSION(1, 3, 0) && 2720 !(ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_IOCS)) || 2721 (ctrlr->quirks & NVME_QUIRK_IDENTIFY_CNS)) { 2722 NVME_CTRLR_DEBUGLOG(ctrlr, "Version < 1.3; not attempting to retrieve NS ID Descriptor List\n"); 2723 /* NS ID Desc List not supported, move on to the next state */ 2724 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC, 2725 ctrlr->opts.admin_timeout_ms); 2726 return 0; 2727 } 2728 2729 nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); 2730 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2731 if (ns == NULL) { 2732 /* No active NS, move on to the next state */ 2733 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC, 2734 ctrlr->opts.admin_timeout_ms); 2735 return 0; 2736 } 2737 2738 rc = nvme_ctrlr_identify_id_desc_async(ns); 2739 if (rc) { 2740 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2741 } 2742 2743 return rc; 2744 } 2745 2746 static void 2747 nvme_ctrlr_update_nvmf_ioccsz(struct spdk_nvme_ctrlr *ctrlr) 2748 { 2749 if (spdk_nvme_ctrlr_is_fabrics(ctrlr)) { 2750 if (ctrlr->cdata.nvmf_specific.ioccsz < 4) { 2751 NVME_CTRLR_ERRLOG(ctrlr, "Incorrect IOCCSZ %u, the minimum value should be 4\n", 2752 ctrlr->cdata.nvmf_specific.ioccsz); 2753 ctrlr->cdata.nvmf_specific.ioccsz = 4; 2754 assert(0); 2755 } 2756 ctrlr->ioccsz_bytes = ctrlr->cdata.nvmf_specific.ioccsz * 16 - sizeof(struct spdk_nvme_cmd); 2757 ctrlr->icdoff = ctrlr->cdata.nvmf_specific.icdoff; 2758 } 2759 } 2760 2761 static void 2762 nvme_ctrlr_set_num_queues_done(void *arg, const struct spdk_nvme_cpl *cpl) 2763 { 2764 uint32_t cq_allocated, sq_allocated, min_allocated, i; 2765 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 2766 2767 if (spdk_nvme_cpl_is_error(cpl)) { 2768 NVME_CTRLR_ERRLOG(ctrlr, "Set Features - Number of Queues failed!\n"); 2769 ctrlr->opts.num_io_queues = 0; 2770 } else { 2771 /* 2772 * Data in cdw0 is 0-based. 2773 * Lower 16-bits indicate number of submission queues allocated. 2774 * Upper 16-bits indicate number of completion queues allocated. 2775 */ 2776 sq_allocated = (cpl->cdw0 & 0xFFFF) + 1; 2777 cq_allocated = (cpl->cdw0 >> 16) + 1; 2778 2779 /* 2780 * For 1:1 queue mapping, set number of allocated queues to be minimum of 2781 * submission and completion queues. 2782 */ 2783 min_allocated = spdk_min(sq_allocated, cq_allocated); 2784 2785 /* Set number of queues to be minimum of requested and actually allocated. */ 2786 ctrlr->opts.num_io_queues = spdk_min(min_allocated, ctrlr->opts.num_io_queues); 2787 } 2788 2789 ctrlr->free_io_qids = spdk_bit_array_create(ctrlr->opts.num_io_queues + 1); 2790 if (ctrlr->free_io_qids == NULL) { 2791 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2792 return; 2793 } 2794 2795 /* Initialize list of free I/O queue IDs. QID 0 is the admin queue (implicitly allocated). */ 2796 for (i = 1; i <= ctrlr->opts.num_io_queues; i++) { 2797 spdk_nvme_ctrlr_free_qid(ctrlr, i); 2798 } 2799 2800 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS, 2801 ctrlr->opts.admin_timeout_ms); 2802 } 2803 2804 static int 2805 nvme_ctrlr_set_num_queues(struct spdk_nvme_ctrlr *ctrlr) 2806 { 2807 int rc; 2808 2809 if (ctrlr->opts.num_io_queues > SPDK_NVME_MAX_IO_QUEUES) { 2810 NVME_CTRLR_NOTICELOG(ctrlr, "Limiting requested num_io_queues %u to max %d\n", 2811 ctrlr->opts.num_io_queues, SPDK_NVME_MAX_IO_QUEUES); 2812 ctrlr->opts.num_io_queues = SPDK_NVME_MAX_IO_QUEUES; 2813 } else if (ctrlr->opts.num_io_queues < 1) { 2814 NVME_CTRLR_NOTICELOG(ctrlr, "Requested num_io_queues 0, increasing to 1\n"); 2815 ctrlr->opts.num_io_queues = 1; 2816 } 2817 2818 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES, 2819 ctrlr->opts.admin_timeout_ms); 2820 2821 rc = nvme_ctrlr_cmd_set_num_queues(ctrlr, ctrlr->opts.num_io_queues, 2822 nvme_ctrlr_set_num_queues_done, ctrlr); 2823 if (rc != 0) { 2824 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2825 return rc; 2826 } 2827 2828 return 0; 2829 } 2830 2831 static void 2832 nvme_ctrlr_set_keep_alive_timeout_done(void *arg, const struct spdk_nvme_cpl *cpl) 2833 { 2834 uint32_t keep_alive_interval_us; 2835 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 2836 2837 if (spdk_nvme_cpl_is_error(cpl)) { 2838 if ((cpl->status.sct == SPDK_NVME_SCT_GENERIC) && 2839 (cpl->status.sc == SPDK_NVME_SC_INVALID_FIELD)) { 2840 NVME_CTRLR_DEBUGLOG(ctrlr, "Keep alive timeout Get Feature is not supported\n"); 2841 } else { 2842 NVME_CTRLR_ERRLOG(ctrlr, "Keep alive timeout Get Feature failed: SC %x SCT %x\n", 2843 cpl->status.sc, cpl->status.sct); 2844 ctrlr->opts.keep_alive_timeout_ms = 0; 2845 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2846 return; 2847 } 2848 } else { 2849 if (ctrlr->opts.keep_alive_timeout_ms != cpl->cdw0) { 2850 NVME_CTRLR_DEBUGLOG(ctrlr, "Controller adjusted keep alive timeout to %u ms\n", 2851 cpl->cdw0); 2852 } 2853 2854 ctrlr->opts.keep_alive_timeout_ms = cpl->cdw0; 2855 } 2856 2857 if (ctrlr->opts.keep_alive_timeout_ms == 0) { 2858 ctrlr->keep_alive_interval_ticks = 0; 2859 } else { 2860 keep_alive_interval_us = ctrlr->opts.keep_alive_timeout_ms * 1000 / 2; 2861 2862 NVME_CTRLR_DEBUGLOG(ctrlr, "Sending keep alive every %u us\n", keep_alive_interval_us); 2863 2864 ctrlr->keep_alive_interval_ticks = (keep_alive_interval_us * spdk_get_ticks_hz()) / 2865 UINT64_C(1000000); 2866 2867 /* Schedule the first Keep Alive to be sent as soon as possible. */ 2868 ctrlr->next_keep_alive_tick = spdk_get_ticks(); 2869 } 2870 2871 if (spdk_nvme_ctrlr_is_discovery(ctrlr)) { 2872 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE); 2873 } else { 2874 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC, 2875 ctrlr->opts.admin_timeout_ms); 2876 } 2877 } 2878 2879 static int 2880 nvme_ctrlr_set_keep_alive_timeout(struct spdk_nvme_ctrlr *ctrlr) 2881 { 2882 int rc; 2883 2884 if (ctrlr->opts.keep_alive_timeout_ms == 0) { 2885 if (spdk_nvme_ctrlr_is_discovery(ctrlr)) { 2886 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE); 2887 } else { 2888 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC, 2889 ctrlr->opts.admin_timeout_ms); 2890 } 2891 return 0; 2892 } 2893 2894 /* Note: Discovery controller identify data does not populate KAS according to spec. */ 2895 if (!spdk_nvme_ctrlr_is_discovery(ctrlr) && ctrlr->cdata.kas == 0) { 2896 NVME_CTRLR_DEBUGLOG(ctrlr, "Controller KAS is 0 - not enabling Keep Alive\n"); 2897 ctrlr->opts.keep_alive_timeout_ms = 0; 2898 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC, 2899 ctrlr->opts.admin_timeout_ms); 2900 return 0; 2901 } 2902 2903 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT, 2904 ctrlr->opts.admin_timeout_ms); 2905 2906 /* Retrieve actual keep alive timeout, since the controller may have adjusted it. */ 2907 rc = spdk_nvme_ctrlr_cmd_get_feature(ctrlr, SPDK_NVME_FEAT_KEEP_ALIVE_TIMER, 0, NULL, 0, 2908 nvme_ctrlr_set_keep_alive_timeout_done, ctrlr); 2909 if (rc != 0) { 2910 NVME_CTRLR_ERRLOG(ctrlr, "Keep alive timeout Get Feature failed: %d\n", rc); 2911 ctrlr->opts.keep_alive_timeout_ms = 0; 2912 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2913 return rc; 2914 } 2915 2916 return 0; 2917 } 2918 2919 static void 2920 nvme_ctrlr_set_host_id_done(void *arg, const struct spdk_nvme_cpl *cpl) 2921 { 2922 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 2923 2924 if (spdk_nvme_cpl_is_error(cpl)) { 2925 /* 2926 * Treat Set Features - Host ID failure as non-fatal, since the Host ID feature 2927 * is optional. 2928 */ 2929 NVME_CTRLR_WARNLOG(ctrlr, "Set Features - Host ID failed: SC 0x%x SCT 0x%x\n", 2930 cpl->status.sc, cpl->status.sct); 2931 } else { 2932 NVME_CTRLR_DEBUGLOG(ctrlr, "Set Features - Host ID was successful\n"); 2933 } 2934 2935 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_TRANSPORT_READY, ctrlr->opts.admin_timeout_ms); 2936 } 2937 2938 static int 2939 nvme_ctrlr_set_host_id(struct spdk_nvme_ctrlr *ctrlr) 2940 { 2941 uint8_t *host_id; 2942 uint32_t host_id_size; 2943 int rc; 2944 2945 if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) { 2946 /* 2947 * NVMe-oF sends the host ID during Connect and doesn't allow 2948 * Set Features - Host Identifier after Connect, so we don't need to do anything here. 2949 */ 2950 NVME_CTRLR_DEBUGLOG(ctrlr, "NVMe-oF transport - not sending Set Features - Host ID\n"); 2951 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_TRANSPORT_READY, ctrlr->opts.admin_timeout_ms); 2952 return 0; 2953 } 2954 2955 if (ctrlr->cdata.ctratt.host_id_exhid_supported) { 2956 NVME_CTRLR_DEBUGLOG(ctrlr, "Using 128-bit extended host identifier\n"); 2957 host_id = ctrlr->opts.extended_host_id; 2958 host_id_size = sizeof(ctrlr->opts.extended_host_id); 2959 } else { 2960 NVME_CTRLR_DEBUGLOG(ctrlr, "Using 64-bit host identifier\n"); 2961 host_id = ctrlr->opts.host_id; 2962 host_id_size = sizeof(ctrlr->opts.host_id); 2963 } 2964 2965 /* If the user specified an all-zeroes host identifier, don't send the command. */ 2966 if (spdk_mem_all_zero(host_id, host_id_size)) { 2967 NVME_CTRLR_DEBUGLOG(ctrlr, "User did not specify host ID - not sending Set Features - Host ID\n"); 2968 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_TRANSPORT_READY, ctrlr->opts.admin_timeout_ms); 2969 return 0; 2970 } 2971 2972 SPDK_LOGDUMP(nvme, "host_id", host_id, host_id_size); 2973 2974 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_HOST_ID, 2975 ctrlr->opts.admin_timeout_ms); 2976 2977 rc = nvme_ctrlr_cmd_set_host_id(ctrlr, host_id, host_id_size, nvme_ctrlr_set_host_id_done, ctrlr); 2978 if (rc != 0) { 2979 NVME_CTRLR_ERRLOG(ctrlr, "Set Features - Host ID failed: %d\n", rc); 2980 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2981 return rc; 2982 } 2983 2984 return 0; 2985 } 2986 2987 void 2988 nvme_ctrlr_update_namespaces(struct spdk_nvme_ctrlr *ctrlr) 2989 { 2990 uint32_t nsid; 2991 struct spdk_nvme_ns *ns; 2992 2993 for (nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); 2994 nsid != 0; nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) { 2995 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2996 nvme_ns_destruct(ns); 2997 nvme_ns_construct(ns, nsid, ctrlr); 2998 } 2999 } 3000 3001 static int 3002 nvme_ctrlr_clear_changed_ns_log(struct spdk_nvme_ctrlr *ctrlr) 3003 { 3004 struct nvme_completion_poll_status *status; 3005 int rc = -ENOMEM; 3006 char *buffer = NULL; 3007 uint32_t nsid; 3008 size_t buf_size = (SPDK_NVME_MAX_CHANGED_NAMESPACES * sizeof(uint32_t)); 3009 3010 if (ctrlr->opts.disable_read_changed_ns_list_log_page) { 3011 return 0; 3012 } 3013 3014 buffer = spdk_dma_zmalloc(buf_size, 4096, NULL); 3015 if (!buffer) { 3016 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate buffer for getting " 3017 "changed ns log.\n"); 3018 return rc; 3019 } 3020 3021 status = calloc(1, sizeof(*status)); 3022 if (!status) { 3023 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 3024 goto free_buffer; 3025 } 3026 3027 rc = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, 3028 SPDK_NVME_LOG_CHANGED_NS_LIST, 3029 SPDK_NVME_GLOBAL_NS_TAG, 3030 buffer, buf_size, 0, 3031 nvme_completion_poll_cb, status); 3032 3033 if (rc) { 3034 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_cmd_get_log_page() failed: rc=%d\n", rc); 3035 free(status); 3036 goto free_buffer; 3037 } 3038 3039 rc = nvme_wait_for_completion_timeout(ctrlr->adminq, status, 3040 ctrlr->opts.admin_timeout_ms * 1000); 3041 if (!status->timed_out) { 3042 free(status); 3043 } 3044 3045 if (rc) { 3046 NVME_CTRLR_ERRLOG(ctrlr, "wait for spdk_nvme_ctrlr_cmd_get_log_page failed: rc=%d\n", rc); 3047 goto free_buffer; 3048 } 3049 3050 /* only check the case of overflow. */ 3051 nsid = from_le32(buffer); 3052 if (nsid == 0xffffffffu) { 3053 NVME_CTRLR_WARNLOG(ctrlr, "changed ns log overflowed.\n"); 3054 } 3055 3056 free_buffer: 3057 spdk_dma_free(buffer); 3058 return rc; 3059 } 3060 3061 void 3062 nvme_ctrlr_process_async_event(struct spdk_nvme_ctrlr *ctrlr, 3063 const struct spdk_nvme_cpl *cpl) 3064 { 3065 union spdk_nvme_async_event_completion event; 3066 struct spdk_nvme_ctrlr_process *active_proc; 3067 int rc; 3068 3069 event.raw = cpl->cdw0; 3070 3071 if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) && 3072 (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED)) { 3073 nvme_ctrlr_clear_changed_ns_log(ctrlr); 3074 3075 rc = nvme_ctrlr_identify_active_ns(ctrlr); 3076 if (rc) { 3077 return; 3078 } 3079 nvme_ctrlr_update_namespaces(ctrlr); 3080 nvme_io_msg_ctrlr_update(ctrlr); 3081 } 3082 3083 if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) && 3084 (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_ANA_CHANGE)) { 3085 if (!ctrlr->opts.disable_read_ana_log_page) { 3086 rc = nvme_ctrlr_update_ana_log_page(ctrlr); 3087 if (rc) { 3088 return; 3089 } 3090 nvme_ctrlr_parse_ana_log_page(ctrlr, nvme_ctrlr_update_ns_ana_states, 3091 ctrlr); 3092 } 3093 } 3094 3095 active_proc = nvme_ctrlr_get_current_process(ctrlr); 3096 if (active_proc && active_proc->aer_cb_fn) { 3097 active_proc->aer_cb_fn(active_proc->aer_cb_arg, cpl); 3098 } 3099 } 3100 3101 static void 3102 nvme_ctrlr_queue_async_event(struct spdk_nvme_ctrlr *ctrlr, 3103 const struct spdk_nvme_cpl *cpl) 3104 { 3105 struct spdk_nvme_ctrlr_aer_completion_list *nvme_event; 3106 struct spdk_nvme_ctrlr_process *proc; 3107 3108 /* Add async event to each process objects event list */ 3109 TAILQ_FOREACH(proc, &ctrlr->active_procs, tailq) { 3110 /* Must be shared memory so other processes can access */ 3111 nvme_event = spdk_zmalloc(sizeof(*nvme_event), 0, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE); 3112 if (!nvme_event) { 3113 NVME_CTRLR_ERRLOG(ctrlr, "Alloc nvme event failed, ignore the event\n"); 3114 return; 3115 } 3116 nvme_event->cpl = *cpl; 3117 3118 STAILQ_INSERT_TAIL(&proc->async_events, nvme_event, link); 3119 } 3120 } 3121 3122 void 3123 nvme_ctrlr_complete_queued_async_events(struct spdk_nvme_ctrlr *ctrlr) 3124 { 3125 struct spdk_nvme_ctrlr_aer_completion_list *nvme_event, *nvme_event_tmp; 3126 struct spdk_nvme_ctrlr_process *active_proc; 3127 3128 active_proc = nvme_ctrlr_get_current_process(ctrlr); 3129 3130 STAILQ_FOREACH_SAFE(nvme_event, &active_proc->async_events, link, nvme_event_tmp) { 3131 STAILQ_REMOVE(&active_proc->async_events, nvme_event, 3132 spdk_nvme_ctrlr_aer_completion_list, link); 3133 nvme_ctrlr_process_async_event(ctrlr, &nvme_event->cpl); 3134 spdk_free(nvme_event); 3135 3136 } 3137 } 3138 3139 static void 3140 nvme_ctrlr_async_event_cb(void *arg, const struct spdk_nvme_cpl *cpl) 3141 { 3142 struct nvme_async_event_request *aer = arg; 3143 struct spdk_nvme_ctrlr *ctrlr = aer->ctrlr; 3144 3145 if (cpl->status.sct == SPDK_NVME_SCT_GENERIC && 3146 cpl->status.sc == SPDK_NVME_SC_ABORTED_SQ_DELETION) { 3147 /* 3148 * This is simulated when controller is being shut down, to 3149 * effectively abort outstanding asynchronous event requests 3150 * and make sure all memory is freed. Do not repost the 3151 * request in this case. 3152 */ 3153 return; 3154 } 3155 3156 if (cpl->status.sct == SPDK_NVME_SCT_COMMAND_SPECIFIC && 3157 cpl->status.sc == SPDK_NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED) { 3158 /* 3159 * SPDK will only send as many AERs as the device says it supports, 3160 * so this status code indicates an out-of-spec device. Do not repost 3161 * the request in this case. 3162 */ 3163 NVME_CTRLR_ERRLOG(ctrlr, "Controller appears out-of-spec for asynchronous event request\n" 3164 "handling. Do not repost this AER.\n"); 3165 return; 3166 } 3167 3168 /* Add the events to the list */ 3169 nvme_ctrlr_queue_async_event(ctrlr, cpl); 3170 3171 /* If the ctrlr was removed or in the destruct state, we should not send aer again */ 3172 if (ctrlr->is_removed || ctrlr->is_destructed) { 3173 return; 3174 } 3175 3176 /* 3177 * Repost another asynchronous event request to replace the one 3178 * that just completed. 3179 */ 3180 if (nvme_ctrlr_construct_and_submit_aer(ctrlr, aer)) { 3181 /* 3182 * We can't do anything to recover from a failure here, 3183 * so just print a warning message and leave the AER unsubmitted. 3184 */ 3185 NVME_CTRLR_ERRLOG(ctrlr, "resubmitting AER failed!\n"); 3186 } 3187 } 3188 3189 static int 3190 nvme_ctrlr_construct_and_submit_aer(struct spdk_nvme_ctrlr *ctrlr, 3191 struct nvme_async_event_request *aer) 3192 { 3193 struct nvme_request *req; 3194 3195 aer->ctrlr = ctrlr; 3196 req = nvme_allocate_request_null(ctrlr->adminq, nvme_ctrlr_async_event_cb, aer); 3197 aer->req = req; 3198 if (req == NULL) { 3199 return -1; 3200 } 3201 3202 req->cmd.opc = SPDK_NVME_OPC_ASYNC_EVENT_REQUEST; 3203 return nvme_ctrlr_submit_admin_request(ctrlr, req); 3204 } 3205 3206 static void 3207 nvme_ctrlr_configure_aer_done(void *arg, const struct spdk_nvme_cpl *cpl) 3208 { 3209 struct nvme_async_event_request *aer; 3210 int rc; 3211 uint32_t i; 3212 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 3213 3214 if (spdk_nvme_cpl_is_error(cpl)) { 3215 NVME_CTRLR_NOTICELOG(ctrlr, "nvme_ctrlr_configure_aer failed!\n"); 3216 ctrlr->num_aers = 0; 3217 } else { 3218 /* aerl is a zero-based value, so we need to add 1 here. */ 3219 ctrlr->num_aers = spdk_min(NVME_MAX_ASYNC_EVENTS, (ctrlr->cdata.aerl + 1)); 3220 } 3221 3222 for (i = 0; i < ctrlr->num_aers; i++) { 3223 aer = &ctrlr->aer[i]; 3224 rc = nvme_ctrlr_construct_and_submit_aer(ctrlr, aer); 3225 if (rc) { 3226 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_construct_and_submit_aer failed!\n"); 3227 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3228 return; 3229 } 3230 } 3231 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT, ctrlr->opts.admin_timeout_ms); 3232 } 3233 3234 static int 3235 nvme_ctrlr_configure_aer(struct spdk_nvme_ctrlr *ctrlr) 3236 { 3237 union spdk_nvme_feat_async_event_configuration config; 3238 int rc; 3239 3240 config.raw = 0; 3241 3242 if (spdk_nvme_ctrlr_is_discovery(ctrlr)) { 3243 config.bits.discovery_log_change_notice = 1; 3244 } else { 3245 config.bits.crit_warn.bits.available_spare = 1; 3246 config.bits.crit_warn.bits.temperature = 1; 3247 config.bits.crit_warn.bits.device_reliability = 1; 3248 config.bits.crit_warn.bits.read_only = 1; 3249 config.bits.crit_warn.bits.volatile_memory_backup = 1; 3250 3251 if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 2, 0)) { 3252 if (ctrlr->cdata.oaes.ns_attribute_notices) { 3253 config.bits.ns_attr_notice = 1; 3254 } 3255 if (ctrlr->cdata.oaes.fw_activation_notices) { 3256 config.bits.fw_activation_notice = 1; 3257 } 3258 if (ctrlr->cdata.oaes.ana_change_notices) { 3259 config.bits.ana_change_notice = 1; 3260 } 3261 } 3262 if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 3, 0) && ctrlr->cdata.lpa.telemetry) { 3263 config.bits.telemetry_log_notice = 1; 3264 } 3265 } 3266 3267 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER, 3268 ctrlr->opts.admin_timeout_ms); 3269 3270 rc = nvme_ctrlr_cmd_set_async_event_config(ctrlr, config, 3271 nvme_ctrlr_configure_aer_done, 3272 ctrlr); 3273 if (rc != 0) { 3274 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3275 return rc; 3276 } 3277 3278 return 0; 3279 } 3280 3281 struct spdk_nvme_ctrlr_process * 3282 nvme_ctrlr_get_process(struct spdk_nvme_ctrlr *ctrlr, pid_t pid) 3283 { 3284 struct spdk_nvme_ctrlr_process *active_proc; 3285 3286 TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) { 3287 if (active_proc->pid == pid) { 3288 return active_proc; 3289 } 3290 } 3291 3292 return NULL; 3293 } 3294 3295 struct spdk_nvme_ctrlr_process * 3296 nvme_ctrlr_get_current_process(struct spdk_nvme_ctrlr *ctrlr) 3297 { 3298 return nvme_ctrlr_get_process(ctrlr, getpid()); 3299 } 3300 3301 /** 3302 * This function will be called when a process is using the controller. 3303 * 1. For the primary process, it is called when constructing the controller. 3304 * 2. For the secondary process, it is called at probing the controller. 3305 * Note: will check whether the process is already added for the same process. 3306 */ 3307 int 3308 nvme_ctrlr_add_process(struct spdk_nvme_ctrlr *ctrlr, void *devhandle) 3309 { 3310 struct spdk_nvme_ctrlr_process *ctrlr_proc; 3311 pid_t pid = getpid(); 3312 3313 /* Check whether the process is already added or not */ 3314 if (nvme_ctrlr_get_process(ctrlr, pid)) { 3315 return 0; 3316 } 3317 3318 /* Initialize the per process properties for this ctrlr */ 3319 ctrlr_proc = spdk_zmalloc(sizeof(struct spdk_nvme_ctrlr_process), 3320 64, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE); 3321 if (ctrlr_proc == NULL) { 3322 NVME_CTRLR_ERRLOG(ctrlr, "failed to allocate memory to track the process props\n"); 3323 3324 return -1; 3325 } 3326 3327 ctrlr_proc->is_primary = spdk_process_is_primary(); 3328 ctrlr_proc->pid = pid; 3329 STAILQ_INIT(&ctrlr_proc->active_reqs); 3330 ctrlr_proc->devhandle = devhandle; 3331 ctrlr_proc->ref = 0; 3332 TAILQ_INIT(&ctrlr_proc->allocated_io_qpairs); 3333 STAILQ_INIT(&ctrlr_proc->async_events); 3334 3335 TAILQ_INSERT_TAIL(&ctrlr->active_procs, ctrlr_proc, tailq); 3336 3337 return 0; 3338 } 3339 3340 /** 3341 * This function will be called when the process detaches the controller. 3342 * Note: the ctrlr_lock must be held when calling this function. 3343 */ 3344 static void 3345 nvme_ctrlr_remove_process(struct spdk_nvme_ctrlr *ctrlr, 3346 struct spdk_nvme_ctrlr_process *proc) 3347 { 3348 struct spdk_nvme_qpair *qpair, *tmp_qpair; 3349 3350 assert(STAILQ_EMPTY(&proc->active_reqs)); 3351 3352 TAILQ_FOREACH_SAFE(qpair, &proc->allocated_io_qpairs, per_process_tailq, tmp_qpair) { 3353 spdk_nvme_ctrlr_free_io_qpair(qpair); 3354 } 3355 3356 TAILQ_REMOVE(&ctrlr->active_procs, proc, tailq); 3357 3358 if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { 3359 spdk_pci_device_detach(proc->devhandle); 3360 } 3361 3362 spdk_free(proc); 3363 } 3364 3365 /** 3366 * This function will be called when the process exited unexpectedly 3367 * in order to free any incomplete nvme request, allocated IO qpairs 3368 * and allocated memory. 3369 * Note: the ctrlr_lock must be held when calling this function. 3370 */ 3371 static void 3372 nvme_ctrlr_cleanup_process(struct spdk_nvme_ctrlr_process *proc) 3373 { 3374 struct nvme_request *req, *tmp_req; 3375 struct spdk_nvme_qpair *qpair, *tmp_qpair; 3376 struct spdk_nvme_ctrlr_aer_completion_list *event; 3377 3378 STAILQ_FOREACH_SAFE(req, &proc->active_reqs, stailq, tmp_req) { 3379 STAILQ_REMOVE(&proc->active_reqs, req, nvme_request, stailq); 3380 3381 assert(req->pid == proc->pid); 3382 if (req->user_buffer && req->payload_size) { 3383 spdk_free(req->payload.contig_or_cb_arg); 3384 } 3385 nvme_free_request(req); 3386 } 3387 3388 /* Remove async event from each process objects event list */ 3389 while (!STAILQ_EMPTY(&proc->async_events)) { 3390 event = STAILQ_FIRST(&proc->async_events); 3391 STAILQ_REMOVE_HEAD(&proc->async_events, link); 3392 spdk_free(event); 3393 } 3394 3395 TAILQ_FOREACH_SAFE(qpair, &proc->allocated_io_qpairs, per_process_tailq, tmp_qpair) { 3396 TAILQ_REMOVE(&proc->allocated_io_qpairs, qpair, per_process_tailq); 3397 3398 /* 3399 * The process may have been killed while some qpairs were in their 3400 * completion context. Clear that flag here to allow these IO 3401 * qpairs to be deleted. 3402 */ 3403 qpair->in_completion_context = 0; 3404 3405 qpair->no_deletion_notification_needed = 1; 3406 3407 spdk_nvme_ctrlr_free_io_qpair(qpair); 3408 } 3409 3410 spdk_free(proc); 3411 } 3412 3413 /** 3414 * This function will be called when destructing the controller. 3415 * 1. There is no more admin request on this controller. 3416 * 2. Clean up any left resource allocation when its associated process is gone. 3417 */ 3418 void 3419 nvme_ctrlr_free_processes(struct spdk_nvme_ctrlr *ctrlr) 3420 { 3421 struct spdk_nvme_ctrlr_process *active_proc, *tmp; 3422 3423 /* Free all the processes' properties and make sure no pending admin IOs */ 3424 TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) { 3425 TAILQ_REMOVE(&ctrlr->active_procs, active_proc, tailq); 3426 3427 assert(STAILQ_EMPTY(&active_proc->active_reqs)); 3428 3429 spdk_free(active_proc); 3430 } 3431 } 3432 3433 /** 3434 * This function will be called when any other process attaches or 3435 * detaches the controller in order to cleanup those unexpectedly 3436 * terminated processes. 3437 * Note: the ctrlr_lock must be held when calling this function. 3438 */ 3439 static int 3440 nvme_ctrlr_remove_inactive_proc(struct spdk_nvme_ctrlr *ctrlr) 3441 { 3442 struct spdk_nvme_ctrlr_process *active_proc, *tmp; 3443 int active_proc_count = 0; 3444 3445 TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) { 3446 if ((kill(active_proc->pid, 0) == -1) && (errno == ESRCH)) { 3447 NVME_CTRLR_ERRLOG(ctrlr, "process %d terminated unexpected\n", active_proc->pid); 3448 3449 TAILQ_REMOVE(&ctrlr->active_procs, active_proc, tailq); 3450 3451 nvme_ctrlr_cleanup_process(active_proc); 3452 } else { 3453 active_proc_count++; 3454 } 3455 } 3456 3457 return active_proc_count; 3458 } 3459 3460 void 3461 nvme_ctrlr_proc_get_ref(struct spdk_nvme_ctrlr *ctrlr) 3462 { 3463 struct spdk_nvme_ctrlr_process *active_proc; 3464 3465 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 3466 3467 nvme_ctrlr_remove_inactive_proc(ctrlr); 3468 3469 active_proc = nvme_ctrlr_get_current_process(ctrlr); 3470 if (active_proc) { 3471 active_proc->ref++; 3472 } 3473 3474 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 3475 } 3476 3477 void 3478 nvme_ctrlr_proc_put_ref(struct spdk_nvme_ctrlr *ctrlr) 3479 { 3480 struct spdk_nvme_ctrlr_process *active_proc; 3481 int proc_count; 3482 3483 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 3484 3485 proc_count = nvme_ctrlr_remove_inactive_proc(ctrlr); 3486 3487 active_proc = nvme_ctrlr_get_current_process(ctrlr); 3488 if (active_proc) { 3489 active_proc->ref--; 3490 assert(active_proc->ref >= 0); 3491 3492 /* 3493 * The last active process will be removed at the end of 3494 * the destruction of the controller. 3495 */ 3496 if (active_proc->ref == 0 && proc_count != 1) { 3497 nvme_ctrlr_remove_process(ctrlr, active_proc); 3498 } 3499 } 3500 3501 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 3502 } 3503 3504 int 3505 nvme_ctrlr_get_ref_count(struct spdk_nvme_ctrlr *ctrlr) 3506 { 3507 struct spdk_nvme_ctrlr_process *active_proc; 3508 int ref = 0; 3509 3510 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 3511 3512 nvme_ctrlr_remove_inactive_proc(ctrlr); 3513 3514 TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) { 3515 ref += active_proc->ref; 3516 } 3517 3518 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 3519 3520 return ref; 3521 } 3522 3523 /** 3524 * Get the PCI device handle which is only visible to its associated process. 3525 */ 3526 struct spdk_pci_device * 3527 nvme_ctrlr_proc_get_devhandle(struct spdk_nvme_ctrlr *ctrlr) 3528 { 3529 struct spdk_nvme_ctrlr_process *active_proc; 3530 struct spdk_pci_device *devhandle = NULL; 3531 3532 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 3533 3534 active_proc = nvme_ctrlr_get_current_process(ctrlr); 3535 if (active_proc) { 3536 devhandle = active_proc->devhandle; 3537 } 3538 3539 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 3540 3541 return devhandle; 3542 } 3543 3544 static void 3545 nvme_ctrlr_process_init_vs_done(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3546 { 3547 struct spdk_nvme_ctrlr *ctrlr = ctx; 3548 3549 if (spdk_nvme_cpl_is_error(cpl)) { 3550 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the VS register\n"); 3551 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3552 return; 3553 } 3554 3555 assert(value <= UINT32_MAX); 3556 ctrlr->vs.raw = (uint32_t)value; 3557 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READ_CAP, NVME_TIMEOUT_INFINITE); 3558 } 3559 3560 static void 3561 nvme_ctrlr_process_init_cap_done(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3562 { 3563 struct spdk_nvme_ctrlr *ctrlr = ctx; 3564 3565 if (spdk_nvme_cpl_is_error(cpl)) { 3566 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CAP register\n"); 3567 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3568 return; 3569 } 3570 3571 ctrlr->cap.raw = value; 3572 nvme_ctrlr_init_cap(ctrlr); 3573 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CHECK_EN, NVME_TIMEOUT_INFINITE); 3574 } 3575 3576 static void 3577 nvme_ctrlr_process_init_check_en(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3578 { 3579 struct spdk_nvme_ctrlr *ctrlr = ctx; 3580 enum nvme_ctrlr_state state; 3581 3582 if (spdk_nvme_cpl_is_error(cpl)) { 3583 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CC register\n"); 3584 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3585 return; 3586 } 3587 3588 assert(value <= UINT32_MAX); 3589 ctrlr->process_init_cc.raw = (uint32_t)value; 3590 3591 if (ctrlr->process_init_cc.bits.en) { 3592 NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 1\n"); 3593 state = NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1; 3594 } else { 3595 state = NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0; 3596 } 3597 3598 nvme_ctrlr_set_state(ctrlr, state, nvme_ctrlr_get_ready_timeout(ctrlr)); 3599 } 3600 3601 static void 3602 nvme_ctrlr_process_init_set_en_0(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3603 { 3604 struct spdk_nvme_ctrlr *ctrlr = ctx; 3605 3606 if (spdk_nvme_cpl_is_error(cpl)) { 3607 NVME_CTRLR_ERRLOG(ctrlr, "Failed to write the CC register\n"); 3608 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3609 return; 3610 } 3611 3612 /* 3613 * Wait 2.5 seconds before accessing PCI registers. 3614 * Not using sleep() to avoid blocking other controller's initialization. 3615 */ 3616 if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) { 3617 NVME_CTRLR_DEBUGLOG(ctrlr, "Applying quirk: delay 2.5 seconds before reading registers\n"); 3618 ctrlr->sleep_timeout_tsc = spdk_get_ticks() + (2500 * spdk_get_ticks_hz() / 1000); 3619 } 3620 3621 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, 3622 nvme_ctrlr_get_ready_timeout(ctrlr)); 3623 } 3624 3625 static void 3626 nvme_ctrlr_process_init_set_en_0_read_cc(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3627 { 3628 struct spdk_nvme_ctrlr *ctrlr = ctx; 3629 union spdk_nvme_cc_register cc; 3630 int rc; 3631 3632 if (spdk_nvme_cpl_is_error(cpl)) { 3633 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CC register\n"); 3634 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3635 return; 3636 } 3637 3638 assert(value <= UINT32_MAX); 3639 cc.raw = (uint32_t)value; 3640 cc.bits.en = 0; 3641 ctrlr->process_init_cc.raw = cc.raw; 3642 3643 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_EN_0_WAIT_FOR_CC, 3644 nvme_ctrlr_get_ready_timeout(ctrlr)); 3645 3646 rc = nvme_ctrlr_set_cc_async(ctrlr, cc.raw, nvme_ctrlr_process_init_set_en_0, ctrlr); 3647 if (rc != 0) { 3648 NVME_CTRLR_ERRLOG(ctrlr, "set_cc() failed\n"); 3649 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3650 } 3651 } 3652 3653 static void 3654 nvme_ctrlr_process_init_wait_for_ready_1(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3655 { 3656 struct spdk_nvme_ctrlr *ctrlr = ctx; 3657 union spdk_nvme_csts_register csts; 3658 3659 if (spdk_nvme_cpl_is_error(cpl)) { 3660 /* While a device is resetting, it may be unable to service MMIO reads 3661 * temporarily. Allow for this case. 3662 */ 3663 if (!ctrlr->is_failed && ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE) { 3664 NVME_CTRLR_DEBUGLOG(ctrlr, "Failed to read the CSTS register\n"); 3665 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1, 3666 NVME_TIMEOUT_KEEP_EXISTING); 3667 } else { 3668 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CSTS register\n"); 3669 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3670 } 3671 3672 return; 3673 } 3674 3675 assert(value <= UINT32_MAX); 3676 csts.raw = (uint32_t)value; 3677 if (csts.bits.rdy == 1 || csts.bits.cfs == 1) { 3678 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_EN_0, 3679 nvme_ctrlr_get_ready_timeout(ctrlr)); 3680 } else { 3681 NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 1 && CSTS.RDY = 0 - waiting for reset to complete\n"); 3682 nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1, 3683 NVME_TIMEOUT_KEEP_EXISTING); 3684 } 3685 } 3686 3687 static void 3688 nvme_ctrlr_process_init_wait_for_ready_0(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3689 { 3690 struct spdk_nvme_ctrlr *ctrlr = ctx; 3691 union spdk_nvme_csts_register csts; 3692 3693 if (spdk_nvme_cpl_is_error(cpl)) { 3694 /* While a device is resetting, it may be unable to service MMIO reads 3695 * temporarily. Allow for this case. 3696 */ 3697 if (!ctrlr->is_failed && ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE) { 3698 NVME_CTRLR_DEBUGLOG(ctrlr, "Failed to read the CSTS register\n"); 3699 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, 3700 NVME_TIMEOUT_KEEP_EXISTING); 3701 } else { 3702 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CSTS register\n"); 3703 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3704 } 3705 3706 return; 3707 } 3708 3709 assert(value <= UINT32_MAX); 3710 csts.raw = (uint32_t)value; 3711 if (csts.bits.rdy == 0) { 3712 NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 0 && CSTS.RDY = 0\n"); 3713 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLED, 3714 nvme_ctrlr_get_ready_timeout(ctrlr)); 3715 } else { 3716 nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, 3717 NVME_TIMEOUT_KEEP_EXISTING); 3718 } 3719 } 3720 3721 static void 3722 nvme_ctrlr_process_init_enable_wait_for_ready_1(void *ctx, uint64_t value, 3723 const struct spdk_nvme_cpl *cpl) 3724 { 3725 struct spdk_nvme_ctrlr *ctrlr = ctx; 3726 union spdk_nvme_csts_register csts; 3727 3728 if (spdk_nvme_cpl_is_error(cpl)) { 3729 /* While a device is resetting, it may be unable to service MMIO reads 3730 * temporarily. Allow for this case. 3731 */ 3732 if (!ctrlr->is_failed && ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE) { 3733 NVME_CTRLR_DEBUGLOG(ctrlr, "Failed to read the CSTS register\n"); 3734 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, 3735 NVME_TIMEOUT_KEEP_EXISTING); 3736 } else { 3737 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CSTS register\n"); 3738 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3739 } 3740 3741 return; 3742 } 3743 3744 assert(value <= UINT32_MAX); 3745 csts.raw = value; 3746 if (csts.bits.rdy == 1) { 3747 NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 1 && CSTS.RDY = 1 - controller is ready\n"); 3748 /* 3749 * The controller has been enabled. 3750 * Perform the rest of initialization serially. 3751 */ 3752 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_RESET_ADMIN_QUEUE, 3753 ctrlr->opts.admin_timeout_ms); 3754 } else { 3755 nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, 3756 NVME_TIMEOUT_KEEP_EXISTING); 3757 } 3758 } 3759 3760 /** 3761 * This function will be called repeatedly during initialization until the controller is ready. 3762 */ 3763 int 3764 nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr) 3765 { 3766 uint32_t ready_timeout_in_ms; 3767 uint64_t ticks; 3768 int rc = 0; 3769 3770 ticks = spdk_get_ticks(); 3771 3772 /* 3773 * May need to avoid accessing any register on the target controller 3774 * for a while. Return early without touching the FSM. 3775 * Check sleep_timeout_tsc > 0 for unit test. 3776 */ 3777 if ((ctrlr->sleep_timeout_tsc > 0) && 3778 (ticks <= ctrlr->sleep_timeout_tsc)) { 3779 return 0; 3780 } 3781 ctrlr->sleep_timeout_tsc = 0; 3782 3783 ready_timeout_in_ms = nvme_ctrlr_get_ready_timeout(ctrlr); 3784 3785 /* 3786 * Check if the current initialization step is done or has timed out. 3787 */ 3788 switch (ctrlr->state) { 3789 case NVME_CTRLR_STATE_INIT_DELAY: 3790 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, ready_timeout_in_ms); 3791 if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_INIT) { 3792 /* 3793 * Controller may need some delay before it's enabled. 3794 * 3795 * This is a workaround for an issue where the PCIe-attached NVMe controller 3796 * is not ready after VFIO reset. We delay the initialization rather than the 3797 * enabling itself, because this is required only for the very first enabling 3798 * - directly after a VFIO reset. 3799 */ 3800 NVME_CTRLR_DEBUGLOG(ctrlr, "Adding 2 second delay before initializing the controller\n"); 3801 ctrlr->sleep_timeout_tsc = ticks + (2000 * spdk_get_ticks_hz() / 1000); 3802 } 3803 break; 3804 3805 case NVME_CTRLR_STATE_DISCONNECTED: 3806 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE); 3807 break; 3808 3809 case NVME_CTRLR_STATE_CONNECT_ADMINQ: /* synonymous with NVME_CTRLR_STATE_INIT and NVME_CTRLR_STATE_DISCONNECTED */ 3810 rc = nvme_transport_ctrlr_connect_qpair(ctrlr, ctrlr->adminq); 3811 if (rc == 0) { 3812 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_CONNECT_ADMINQ, 3813 NVME_TIMEOUT_INFINITE); 3814 } else { 3815 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3816 } 3817 break; 3818 3819 case NVME_CTRLR_STATE_WAIT_FOR_CONNECT_ADMINQ: 3820 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 3821 3822 switch (nvme_qpair_get_state(ctrlr->adminq)) { 3823 case NVME_QPAIR_CONNECTING: 3824 break; 3825 case NVME_QPAIR_CONNECTED: 3826 nvme_qpair_set_state(ctrlr->adminq, NVME_QPAIR_ENABLED); 3827 /* Fall through */ 3828 case NVME_QPAIR_ENABLED: 3829 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READ_VS, 3830 NVME_TIMEOUT_INFINITE); 3831 /* Abort any queued requests that were sent while the adminq was connecting 3832 * to avoid stalling the init process during a reset, as requests don't get 3833 * resubmitted while the controller is resetting and subsequent commands 3834 * would get queued too. 3835 */ 3836 nvme_qpair_abort_queued_reqs(ctrlr->adminq); 3837 break; 3838 case NVME_QPAIR_DISCONNECTING: 3839 assert(ctrlr->adminq->async == true); 3840 break; 3841 case NVME_QPAIR_DISCONNECTED: 3842 /* fallthrough */ 3843 default: 3844 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3845 break; 3846 } 3847 3848 break; 3849 3850 case NVME_CTRLR_STATE_READ_VS: 3851 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READ_VS_WAIT_FOR_VS, NVME_TIMEOUT_INFINITE); 3852 rc = nvme_ctrlr_get_vs_async(ctrlr, nvme_ctrlr_process_init_vs_done, ctrlr); 3853 break; 3854 3855 case NVME_CTRLR_STATE_READ_CAP: 3856 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READ_CAP_WAIT_FOR_CAP, NVME_TIMEOUT_INFINITE); 3857 rc = nvme_ctrlr_get_cap_async(ctrlr, nvme_ctrlr_process_init_cap_done, ctrlr); 3858 break; 3859 3860 case NVME_CTRLR_STATE_CHECK_EN: 3861 /* Begin the hardware initialization by making sure the controller is disabled. */ 3862 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CHECK_EN_WAIT_FOR_CC, ready_timeout_in_ms); 3863 rc = nvme_ctrlr_get_cc_async(ctrlr, nvme_ctrlr_process_init_check_en, ctrlr); 3864 break; 3865 3866 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1: 3867 /* 3868 * Controller is currently enabled. We need to disable it to cause a reset. 3869 * 3870 * If CC.EN = 1 && CSTS.RDY = 0, the controller is in the process of becoming ready. 3871 * Wait for the ready bit to be 1 before disabling the controller. 3872 */ 3873 nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS, 3874 NVME_TIMEOUT_KEEP_EXISTING); 3875 rc = nvme_ctrlr_get_csts_async(ctrlr, nvme_ctrlr_process_init_wait_for_ready_1, ctrlr); 3876 break; 3877 3878 case NVME_CTRLR_STATE_SET_EN_0: 3879 NVME_CTRLR_DEBUGLOG(ctrlr, "Setting CC.EN = 0\n"); 3880 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_EN_0_WAIT_FOR_CC, ready_timeout_in_ms); 3881 rc = nvme_ctrlr_get_cc_async(ctrlr, nvme_ctrlr_process_init_set_en_0_read_cc, ctrlr); 3882 break; 3883 3884 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0: 3885 nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0_WAIT_FOR_CSTS, 3886 NVME_TIMEOUT_KEEP_EXISTING); 3887 rc = nvme_ctrlr_get_csts_async(ctrlr, nvme_ctrlr_process_init_wait_for_ready_0, ctrlr); 3888 break; 3889 3890 case NVME_CTRLR_STATE_DISABLED: 3891 if (ctrlr->is_disconnecting) { 3892 NVME_CTRLR_DEBUGLOG(ctrlr, "Ctrlr was disabled.\n"); 3893 } else { 3894 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE, ready_timeout_in_ms); 3895 3896 /* 3897 * Delay 100us before setting CC.EN = 1. Some NVMe SSDs miss CC.EN getting 3898 * set to 1 if it is too soon after CSTS.RDY is reported as 0. 3899 */ 3900 spdk_delay_us(100); 3901 } 3902 break; 3903 3904 case NVME_CTRLR_STATE_ENABLE: 3905 NVME_CTRLR_DEBUGLOG(ctrlr, "Setting CC.EN = 1\n"); 3906 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_CC, ready_timeout_in_ms); 3907 rc = nvme_ctrlr_enable(ctrlr); 3908 if (rc) { 3909 NVME_CTRLR_ERRLOG(ctrlr, "Ctrlr enable failed with error: %d", rc); 3910 } 3911 return rc; 3912 3913 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1: 3914 nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS, 3915 NVME_TIMEOUT_KEEP_EXISTING); 3916 rc = nvme_ctrlr_get_csts_async(ctrlr, nvme_ctrlr_process_init_enable_wait_for_ready_1, 3917 ctrlr); 3918 break; 3919 3920 case NVME_CTRLR_STATE_RESET_ADMIN_QUEUE: 3921 nvme_transport_qpair_reset(ctrlr->adminq); 3922 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY, NVME_TIMEOUT_INFINITE); 3923 break; 3924 3925 case NVME_CTRLR_STATE_IDENTIFY: 3926 rc = nvme_ctrlr_identify(ctrlr); 3927 break; 3928 3929 case NVME_CTRLR_STATE_CONFIGURE_AER: 3930 rc = nvme_ctrlr_configure_aer(ctrlr); 3931 break; 3932 3933 case NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT: 3934 rc = nvme_ctrlr_set_keep_alive_timeout(ctrlr); 3935 break; 3936 3937 case NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC: 3938 rc = nvme_ctrlr_identify_iocs_specific(ctrlr); 3939 break; 3940 3941 case NVME_CTRLR_STATE_GET_ZNS_CMD_EFFECTS_LOG: 3942 rc = nvme_ctrlr_get_zns_cmd_and_effects_log(ctrlr); 3943 break; 3944 3945 case NVME_CTRLR_STATE_SET_NUM_QUEUES: 3946 nvme_ctrlr_update_nvmf_ioccsz(ctrlr); 3947 rc = nvme_ctrlr_set_num_queues(ctrlr); 3948 break; 3949 3950 case NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS: 3951 _nvme_ctrlr_identify_active_ns(ctrlr); 3952 break; 3953 3954 case NVME_CTRLR_STATE_IDENTIFY_NS: 3955 rc = nvme_ctrlr_identify_namespaces(ctrlr); 3956 break; 3957 3958 case NVME_CTRLR_STATE_IDENTIFY_ID_DESCS: 3959 rc = nvme_ctrlr_identify_id_desc_namespaces(ctrlr); 3960 break; 3961 3962 case NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC: 3963 rc = nvme_ctrlr_identify_namespaces_iocs_specific(ctrlr); 3964 break; 3965 3966 case NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES: 3967 rc = nvme_ctrlr_set_supported_log_pages(ctrlr); 3968 break; 3969 3970 case NVME_CTRLR_STATE_SET_SUPPORTED_INTEL_LOG_PAGES: 3971 rc = nvme_ctrlr_set_intel_support_log_pages(ctrlr); 3972 break; 3973 3974 case NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES: 3975 nvme_ctrlr_set_supported_features(ctrlr); 3976 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_DB_BUF_CFG, 3977 ctrlr->opts.admin_timeout_ms); 3978 break; 3979 3980 case NVME_CTRLR_STATE_SET_DB_BUF_CFG: 3981 rc = nvme_ctrlr_set_doorbell_buffer_config(ctrlr); 3982 break; 3983 3984 case NVME_CTRLR_STATE_SET_HOST_ID: 3985 rc = nvme_ctrlr_set_host_id(ctrlr); 3986 break; 3987 3988 case NVME_CTRLR_STATE_TRANSPORT_READY: 3989 rc = nvme_transport_ctrlr_ready(ctrlr); 3990 if (rc) { 3991 NVME_CTRLR_ERRLOG(ctrlr, "Transport controller ready step failed: rc %d\n", rc); 3992 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3993 } else { 3994 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE); 3995 } 3996 break; 3997 3998 case NVME_CTRLR_STATE_READY: 3999 NVME_CTRLR_DEBUGLOG(ctrlr, "Ctrlr already in ready state\n"); 4000 return 0; 4001 4002 case NVME_CTRLR_STATE_ERROR: 4003 NVME_CTRLR_ERRLOG(ctrlr, "Ctrlr is in error state\n"); 4004 return -1; 4005 4006 case NVME_CTRLR_STATE_READ_VS_WAIT_FOR_VS: 4007 case NVME_CTRLR_STATE_READ_CAP_WAIT_FOR_CAP: 4008 case NVME_CTRLR_STATE_CHECK_EN_WAIT_FOR_CC: 4009 case NVME_CTRLR_STATE_SET_EN_0_WAIT_FOR_CC: 4010 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS: 4011 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0_WAIT_FOR_CSTS: 4012 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_CC: 4013 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS: 4014 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY: 4015 case NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER: 4016 case NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT: 4017 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_IOCS_SPECIFIC: 4018 case NVME_CTRLR_STATE_WAIT_FOR_GET_ZNS_CMD_EFFECTS_LOG: 4019 case NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES: 4020 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ACTIVE_NS: 4021 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS: 4022 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS: 4023 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS_IOCS_SPECIFIC: 4024 case NVME_CTRLR_STATE_WAIT_FOR_SUPPORTED_INTEL_LOG_PAGES: 4025 case NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG: 4026 case NVME_CTRLR_STATE_WAIT_FOR_HOST_ID: 4027 /* 4028 * nvme_ctrlr_process_init() may be called from the completion context 4029 * for the admin qpair. Avoid recursive calls for this case. 4030 */ 4031 if (!ctrlr->adminq->in_completion_context) { 4032 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 4033 } 4034 break; 4035 4036 default: 4037 assert(0); 4038 return -1; 4039 } 4040 4041 if (rc) { 4042 NVME_CTRLR_ERRLOG(ctrlr, "Ctrlr operation failed with error: %d, ctrlr state: %d (%s)\n", 4043 rc, ctrlr->state, nvme_ctrlr_state_string(ctrlr->state)); 4044 } 4045 4046 /* Note: we use the ticks captured when we entered this function. 4047 * This covers environments where the SPDK process gets swapped out after 4048 * we tried to advance the state but before we check the timeout here. 4049 * It is not normal for this to happen, but harmless to handle it in this 4050 * way. 4051 */ 4052 if (ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE && 4053 ticks > ctrlr->state_timeout_tsc) { 4054 NVME_CTRLR_ERRLOG(ctrlr, "Initialization timed out in state %d (%s)\n", 4055 ctrlr->state, nvme_ctrlr_state_string(ctrlr->state)); 4056 return -1; 4057 } 4058 4059 return rc; 4060 } 4061 4062 int 4063 nvme_robust_mutex_init_recursive_shared(pthread_mutex_t *mtx) 4064 { 4065 pthread_mutexattr_t attr; 4066 int rc = 0; 4067 4068 if (pthread_mutexattr_init(&attr)) { 4069 return -1; 4070 } 4071 if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE) || 4072 #ifndef __FreeBSD__ 4073 pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST) || 4074 pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED) || 4075 #endif 4076 pthread_mutex_init(mtx, &attr)) { 4077 rc = -1; 4078 } 4079 pthread_mutexattr_destroy(&attr); 4080 return rc; 4081 } 4082 4083 int 4084 nvme_ctrlr_construct(struct spdk_nvme_ctrlr *ctrlr) 4085 { 4086 int rc; 4087 4088 if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { 4089 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT_DELAY, NVME_TIMEOUT_INFINITE); 4090 } else { 4091 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE); 4092 } 4093 4094 if (ctrlr->opts.admin_queue_size > SPDK_NVME_ADMIN_QUEUE_MAX_ENTRIES) { 4095 NVME_CTRLR_ERRLOG(ctrlr, "admin_queue_size %u exceeds max defined by NVMe spec, use max value\n", 4096 ctrlr->opts.admin_queue_size); 4097 ctrlr->opts.admin_queue_size = SPDK_NVME_ADMIN_QUEUE_MAX_ENTRIES; 4098 } 4099 4100 if (ctrlr->quirks & NVME_QUIRK_MINIMUM_ADMIN_QUEUE_SIZE && 4101 (ctrlr->opts.admin_queue_size % SPDK_NVME_ADMIN_QUEUE_QUIRK_ENTRIES_MULTIPLE) != 0) { 4102 NVME_CTRLR_ERRLOG(ctrlr, 4103 "admin_queue_size %u is invalid for this NVMe device, adjust to next multiple\n", 4104 ctrlr->opts.admin_queue_size); 4105 ctrlr->opts.admin_queue_size = SPDK_ALIGN_CEIL(ctrlr->opts.admin_queue_size, 4106 SPDK_NVME_ADMIN_QUEUE_QUIRK_ENTRIES_MULTIPLE); 4107 } 4108 4109 if (ctrlr->opts.admin_queue_size < SPDK_NVME_ADMIN_QUEUE_MIN_ENTRIES) { 4110 NVME_CTRLR_ERRLOG(ctrlr, 4111 "admin_queue_size %u is less than minimum defined by NVMe spec, use min value\n", 4112 ctrlr->opts.admin_queue_size); 4113 ctrlr->opts.admin_queue_size = SPDK_NVME_ADMIN_QUEUE_MIN_ENTRIES; 4114 } 4115 4116 ctrlr->flags = 0; 4117 ctrlr->free_io_qids = NULL; 4118 ctrlr->is_resetting = false; 4119 ctrlr->is_failed = false; 4120 ctrlr->is_destructed = false; 4121 4122 TAILQ_INIT(&ctrlr->active_io_qpairs); 4123 STAILQ_INIT(&ctrlr->queued_aborts); 4124 ctrlr->outstanding_aborts = 0; 4125 4126 ctrlr->ana_log_page = NULL; 4127 ctrlr->ana_log_page_size = 0; 4128 4129 rc = nvme_robust_mutex_init_recursive_shared(&ctrlr->ctrlr_lock); 4130 if (rc != 0) { 4131 return rc; 4132 } 4133 4134 TAILQ_INIT(&ctrlr->active_procs); 4135 STAILQ_INIT(&ctrlr->register_operations); 4136 4137 RB_INIT(&ctrlr->ns); 4138 4139 return rc; 4140 } 4141 4142 static void 4143 nvme_ctrlr_init_cap(struct spdk_nvme_ctrlr *ctrlr) 4144 { 4145 if (ctrlr->cap.bits.ams & SPDK_NVME_CAP_AMS_WRR) { 4146 ctrlr->flags |= SPDK_NVME_CTRLR_WRR_SUPPORTED; 4147 } 4148 4149 ctrlr->min_page_size = 1u << (12 + ctrlr->cap.bits.mpsmin); 4150 4151 /* For now, always select page_size == min_page_size. */ 4152 ctrlr->page_size = ctrlr->min_page_size; 4153 4154 ctrlr->opts.io_queue_size = spdk_max(ctrlr->opts.io_queue_size, SPDK_NVME_IO_QUEUE_MIN_ENTRIES); 4155 ctrlr->opts.io_queue_size = spdk_min(ctrlr->opts.io_queue_size, MAX_IO_QUEUE_ENTRIES); 4156 if (ctrlr->quirks & NVME_QUIRK_MINIMUM_IO_QUEUE_SIZE && 4157 ctrlr->opts.io_queue_size == DEFAULT_IO_QUEUE_SIZE) { 4158 /* If the user specifically set an IO queue size different than the 4159 * default, use that value. Otherwise overwrite with the quirked value. 4160 * This allows this quirk to be overridden when necessary. 4161 * However, cap.mqes still needs to be respected. 4162 */ 4163 ctrlr->opts.io_queue_size = DEFAULT_IO_QUEUE_SIZE_FOR_QUIRK; 4164 } 4165 ctrlr->opts.io_queue_size = spdk_min(ctrlr->opts.io_queue_size, ctrlr->cap.bits.mqes + 1u); 4166 4167 ctrlr->opts.io_queue_requests = spdk_max(ctrlr->opts.io_queue_requests, ctrlr->opts.io_queue_size); 4168 } 4169 4170 void 4171 nvme_ctrlr_destruct_finish(struct spdk_nvme_ctrlr *ctrlr) 4172 { 4173 pthread_mutex_destroy(&ctrlr->ctrlr_lock); 4174 } 4175 4176 void 4177 nvme_ctrlr_destruct_async(struct spdk_nvme_ctrlr *ctrlr, 4178 struct nvme_ctrlr_detach_ctx *ctx) 4179 { 4180 struct spdk_nvme_qpair *qpair, *tmp; 4181 4182 NVME_CTRLR_DEBUGLOG(ctrlr, "Prepare to destruct SSD\n"); 4183 4184 ctrlr->prepare_for_reset = false; 4185 ctrlr->is_destructed = true; 4186 4187 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 4188 4189 nvme_ctrlr_abort_queued_aborts(ctrlr); 4190 nvme_transport_admin_qpair_abort_aers(ctrlr->adminq); 4191 4192 TAILQ_FOREACH_SAFE(qpair, &ctrlr->active_io_qpairs, tailq, tmp) { 4193 spdk_nvme_ctrlr_free_io_qpair(qpair); 4194 } 4195 4196 nvme_ctrlr_free_doorbell_buffer(ctrlr); 4197 nvme_ctrlr_free_iocs_specific_data(ctrlr); 4198 4199 nvme_ctrlr_shutdown_async(ctrlr, ctx); 4200 } 4201 4202 int 4203 nvme_ctrlr_destruct_poll_async(struct spdk_nvme_ctrlr *ctrlr, 4204 struct nvme_ctrlr_detach_ctx *ctx) 4205 { 4206 struct spdk_nvme_ns *ns, *tmp_ns; 4207 int rc = 0; 4208 4209 if (!ctx->shutdown_complete) { 4210 rc = nvme_ctrlr_shutdown_poll_async(ctrlr, ctx); 4211 if (rc == -EAGAIN) { 4212 return -EAGAIN; 4213 } 4214 /* Destruct ctrlr forcefully for any other error. */ 4215 } 4216 4217 if (ctx->cb_fn) { 4218 ctx->cb_fn(ctrlr); 4219 } 4220 4221 nvme_transport_ctrlr_disconnect_qpair(ctrlr, ctrlr->adminq); 4222 4223 RB_FOREACH_SAFE(ns, nvme_ns_tree, &ctrlr->ns, tmp_ns) { 4224 nvme_ctrlr_destruct_namespace(ctrlr, ns->id); 4225 RB_REMOVE(nvme_ns_tree, &ctrlr->ns, ns); 4226 spdk_free(ns); 4227 } 4228 4229 ctrlr->active_ns_count = 0; 4230 4231 spdk_bit_array_free(&ctrlr->free_io_qids); 4232 4233 free(ctrlr->ana_log_page); 4234 free(ctrlr->copied_ana_desc); 4235 ctrlr->ana_log_page = NULL; 4236 ctrlr->copied_ana_desc = NULL; 4237 ctrlr->ana_log_page_size = 0; 4238 4239 nvme_transport_ctrlr_destruct(ctrlr); 4240 4241 return rc; 4242 } 4243 4244 void 4245 nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) 4246 { 4247 struct nvme_ctrlr_detach_ctx ctx = { .ctrlr = ctrlr }; 4248 int rc; 4249 4250 nvme_ctrlr_destruct_async(ctrlr, &ctx); 4251 4252 while (1) { 4253 rc = nvme_ctrlr_destruct_poll_async(ctrlr, &ctx); 4254 if (rc != -EAGAIN) { 4255 break; 4256 } 4257 nvme_delay(1000); 4258 } 4259 } 4260 4261 int 4262 nvme_ctrlr_submit_admin_request(struct spdk_nvme_ctrlr *ctrlr, 4263 struct nvme_request *req) 4264 { 4265 return nvme_qpair_submit_request(ctrlr->adminq, req); 4266 } 4267 4268 static void 4269 nvme_keep_alive_completion(void *cb_ctx, const struct spdk_nvme_cpl *cpl) 4270 { 4271 /* Do nothing */ 4272 } 4273 4274 /* 4275 * Check if we need to send a Keep Alive command. 4276 * Caller must hold ctrlr->ctrlr_lock. 4277 */ 4278 static int 4279 nvme_ctrlr_keep_alive(struct spdk_nvme_ctrlr *ctrlr) 4280 { 4281 uint64_t now; 4282 struct nvme_request *req; 4283 struct spdk_nvme_cmd *cmd; 4284 int rc = 0; 4285 4286 now = spdk_get_ticks(); 4287 if (now < ctrlr->next_keep_alive_tick) { 4288 return rc; 4289 } 4290 4291 req = nvme_allocate_request_null(ctrlr->adminq, nvme_keep_alive_completion, NULL); 4292 if (req == NULL) { 4293 return rc; 4294 } 4295 4296 cmd = &req->cmd; 4297 cmd->opc = SPDK_NVME_OPC_KEEP_ALIVE; 4298 4299 rc = nvme_ctrlr_submit_admin_request(ctrlr, req); 4300 if (rc != 0) { 4301 NVME_CTRLR_ERRLOG(ctrlr, "Submitting Keep Alive failed\n"); 4302 rc = -ENXIO; 4303 } 4304 4305 ctrlr->next_keep_alive_tick = now + ctrlr->keep_alive_interval_ticks; 4306 return rc; 4307 } 4308 4309 int32_t 4310 spdk_nvme_ctrlr_process_admin_completions(struct spdk_nvme_ctrlr *ctrlr) 4311 { 4312 int32_t num_completions; 4313 int32_t rc; 4314 struct spdk_nvme_ctrlr_process *active_proc; 4315 4316 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4317 4318 if (ctrlr->keep_alive_interval_ticks) { 4319 rc = nvme_ctrlr_keep_alive(ctrlr); 4320 if (rc) { 4321 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4322 return rc; 4323 } 4324 } 4325 4326 rc = nvme_io_msg_process(ctrlr); 4327 if (rc < 0) { 4328 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4329 return rc; 4330 } 4331 num_completions = rc; 4332 4333 rc = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 4334 4335 /* Each process has an async list, complete the ones for this process object */ 4336 active_proc = nvme_ctrlr_get_current_process(ctrlr); 4337 if (active_proc) { 4338 nvme_ctrlr_complete_queued_async_events(ctrlr); 4339 } 4340 4341 if (rc == -ENXIO && ctrlr->is_disconnecting) { 4342 nvme_ctrlr_disconnect_done(ctrlr); 4343 } 4344 4345 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4346 4347 if (rc < 0) { 4348 num_completions = rc; 4349 } else { 4350 num_completions += rc; 4351 } 4352 4353 return num_completions; 4354 } 4355 4356 const struct spdk_nvme_ctrlr_data * 4357 spdk_nvme_ctrlr_get_data(struct spdk_nvme_ctrlr *ctrlr) 4358 { 4359 return &ctrlr->cdata; 4360 } 4361 4362 union spdk_nvme_csts_register spdk_nvme_ctrlr_get_regs_csts(struct spdk_nvme_ctrlr *ctrlr) 4363 { 4364 union spdk_nvme_csts_register csts; 4365 4366 if (nvme_ctrlr_get_csts(ctrlr, &csts)) { 4367 csts.raw = SPDK_NVME_INVALID_REGISTER_VALUE; 4368 } 4369 return csts; 4370 } 4371 4372 union spdk_nvme_cc_register spdk_nvme_ctrlr_get_regs_cc(struct spdk_nvme_ctrlr *ctrlr) 4373 { 4374 union spdk_nvme_cc_register cc; 4375 4376 if (nvme_ctrlr_get_cc(ctrlr, &cc)) { 4377 cc.raw = SPDK_NVME_INVALID_REGISTER_VALUE; 4378 } 4379 return cc; 4380 } 4381 4382 union spdk_nvme_cap_register spdk_nvme_ctrlr_get_regs_cap(struct spdk_nvme_ctrlr *ctrlr) 4383 { 4384 return ctrlr->cap; 4385 } 4386 4387 union spdk_nvme_vs_register spdk_nvme_ctrlr_get_regs_vs(struct spdk_nvme_ctrlr *ctrlr) 4388 { 4389 return ctrlr->vs; 4390 } 4391 4392 union spdk_nvme_cmbsz_register spdk_nvme_ctrlr_get_regs_cmbsz(struct spdk_nvme_ctrlr *ctrlr) 4393 { 4394 union spdk_nvme_cmbsz_register cmbsz; 4395 4396 if (nvme_ctrlr_get_cmbsz(ctrlr, &cmbsz)) { 4397 cmbsz.raw = 0; 4398 } 4399 4400 return cmbsz; 4401 } 4402 4403 union spdk_nvme_pmrcap_register spdk_nvme_ctrlr_get_regs_pmrcap(struct spdk_nvme_ctrlr *ctrlr) 4404 { 4405 union spdk_nvme_pmrcap_register pmrcap; 4406 4407 if (nvme_ctrlr_get_pmrcap(ctrlr, &pmrcap)) { 4408 pmrcap.raw = 0; 4409 } 4410 4411 return pmrcap; 4412 } 4413 4414 union spdk_nvme_bpinfo_register spdk_nvme_ctrlr_get_regs_bpinfo(struct spdk_nvme_ctrlr *ctrlr) 4415 { 4416 union spdk_nvme_bpinfo_register bpinfo; 4417 4418 if (nvme_ctrlr_get_bpinfo(ctrlr, &bpinfo)) { 4419 bpinfo.raw = 0; 4420 } 4421 4422 return bpinfo; 4423 } 4424 4425 uint64_t 4426 spdk_nvme_ctrlr_get_pmrsz(struct spdk_nvme_ctrlr *ctrlr) 4427 { 4428 return ctrlr->pmr_size; 4429 } 4430 4431 uint32_t 4432 spdk_nvme_ctrlr_get_num_ns(struct spdk_nvme_ctrlr *ctrlr) 4433 { 4434 return ctrlr->cdata.nn; 4435 } 4436 4437 bool 4438 spdk_nvme_ctrlr_is_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 4439 { 4440 struct spdk_nvme_ns tmp, *ns; 4441 4442 tmp.id = nsid; 4443 ns = RB_FIND(nvme_ns_tree, &ctrlr->ns, &tmp); 4444 4445 if (ns != NULL) { 4446 return ns->active; 4447 } 4448 4449 return false; 4450 } 4451 4452 uint32_t 4453 spdk_nvme_ctrlr_get_first_active_ns(struct spdk_nvme_ctrlr *ctrlr) 4454 { 4455 struct spdk_nvme_ns *ns; 4456 4457 ns = RB_MIN(nvme_ns_tree, &ctrlr->ns); 4458 if (ns == NULL) { 4459 return 0; 4460 } 4461 4462 while (ns != NULL) { 4463 if (ns->active) { 4464 return ns->id; 4465 } 4466 4467 ns = RB_NEXT(nvme_ns_tree, &ctrlr->ns, ns); 4468 } 4469 4470 return 0; 4471 } 4472 4473 uint32_t 4474 spdk_nvme_ctrlr_get_next_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t prev_nsid) 4475 { 4476 struct spdk_nvme_ns tmp, *ns; 4477 4478 tmp.id = prev_nsid; 4479 ns = RB_FIND(nvme_ns_tree, &ctrlr->ns, &tmp); 4480 if (ns == NULL) { 4481 return 0; 4482 } 4483 4484 ns = RB_NEXT(nvme_ns_tree, &ctrlr->ns, ns); 4485 while (ns != NULL) { 4486 if (ns->active) { 4487 return ns->id; 4488 } 4489 4490 ns = RB_NEXT(nvme_ns_tree, &ctrlr->ns, ns); 4491 } 4492 4493 return 0; 4494 } 4495 4496 struct spdk_nvme_ns * 4497 spdk_nvme_ctrlr_get_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 4498 { 4499 struct spdk_nvme_ns tmp; 4500 struct spdk_nvme_ns *ns; 4501 4502 if (nsid < 1 || nsid > ctrlr->cdata.nn) { 4503 return NULL; 4504 } 4505 4506 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4507 4508 tmp.id = nsid; 4509 ns = RB_FIND(nvme_ns_tree, &ctrlr->ns, &tmp); 4510 4511 if (ns == NULL) { 4512 ns = spdk_zmalloc(sizeof(struct spdk_nvme_ns), 64, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE); 4513 if (ns == NULL) { 4514 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4515 return NULL; 4516 } 4517 4518 NVME_CTRLR_DEBUGLOG(ctrlr, "Namespace %u was added\n", nsid); 4519 ns->id = nsid; 4520 RB_INSERT(nvme_ns_tree, &ctrlr->ns, ns); 4521 } 4522 4523 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4524 4525 return ns; 4526 } 4527 4528 struct spdk_pci_device * 4529 spdk_nvme_ctrlr_get_pci_device(struct spdk_nvme_ctrlr *ctrlr) 4530 { 4531 if (ctrlr == NULL) { 4532 return NULL; 4533 } 4534 4535 if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) { 4536 return NULL; 4537 } 4538 4539 return nvme_ctrlr_proc_get_devhandle(ctrlr); 4540 } 4541 4542 uint32_t 4543 spdk_nvme_ctrlr_get_max_xfer_size(const struct spdk_nvme_ctrlr *ctrlr) 4544 { 4545 return ctrlr->max_xfer_size; 4546 } 4547 4548 void 4549 spdk_nvme_ctrlr_register_aer_callback(struct spdk_nvme_ctrlr *ctrlr, 4550 spdk_nvme_aer_cb aer_cb_fn, 4551 void *aer_cb_arg) 4552 { 4553 struct spdk_nvme_ctrlr_process *active_proc; 4554 4555 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4556 4557 active_proc = nvme_ctrlr_get_current_process(ctrlr); 4558 if (active_proc) { 4559 active_proc->aer_cb_fn = aer_cb_fn; 4560 active_proc->aer_cb_arg = aer_cb_arg; 4561 } 4562 4563 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4564 } 4565 4566 void 4567 spdk_nvme_ctrlr_disable_read_changed_ns_list_log_page(struct spdk_nvme_ctrlr *ctrlr) 4568 { 4569 ctrlr->opts.disable_read_changed_ns_list_log_page = true; 4570 } 4571 4572 void 4573 spdk_nvme_ctrlr_register_timeout_callback(struct spdk_nvme_ctrlr *ctrlr, 4574 uint64_t timeout_io_us, uint64_t timeout_admin_us, 4575 spdk_nvme_timeout_cb cb_fn, void *cb_arg) 4576 { 4577 struct spdk_nvme_ctrlr_process *active_proc; 4578 4579 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4580 4581 active_proc = nvme_ctrlr_get_current_process(ctrlr); 4582 if (active_proc) { 4583 active_proc->timeout_io_ticks = timeout_io_us * spdk_get_ticks_hz() / 1000000ULL; 4584 active_proc->timeout_admin_ticks = timeout_admin_us * spdk_get_ticks_hz() / 1000000ULL; 4585 active_proc->timeout_cb_fn = cb_fn; 4586 active_proc->timeout_cb_arg = cb_arg; 4587 } 4588 4589 ctrlr->timeout_enabled = true; 4590 4591 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4592 } 4593 4594 bool 4595 spdk_nvme_ctrlr_is_log_page_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t log_page) 4596 { 4597 /* No bounds check necessary, since log_page is uint8_t and log_page_supported has 256 entries */ 4598 SPDK_STATIC_ASSERT(sizeof(ctrlr->log_page_supported) == 256, "log_page_supported size mismatch"); 4599 return ctrlr->log_page_supported[log_page]; 4600 } 4601 4602 bool 4603 spdk_nvme_ctrlr_is_feature_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t feature_code) 4604 { 4605 /* No bounds check necessary, since feature_code is uint8_t and feature_supported has 256 entries */ 4606 SPDK_STATIC_ASSERT(sizeof(ctrlr->feature_supported) == 256, "feature_supported size mismatch"); 4607 return ctrlr->feature_supported[feature_code]; 4608 } 4609 4610 int 4611 spdk_nvme_ctrlr_attach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, 4612 struct spdk_nvme_ctrlr_list *payload) 4613 { 4614 struct nvme_completion_poll_status *status; 4615 struct spdk_nvme_ns *ns; 4616 int res; 4617 4618 if (nsid == 0) { 4619 return -EINVAL; 4620 } 4621 4622 status = calloc(1, sizeof(*status)); 4623 if (!status) { 4624 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4625 return -ENOMEM; 4626 } 4627 4628 res = nvme_ctrlr_cmd_attach_ns(ctrlr, nsid, payload, 4629 nvme_completion_poll_cb, status); 4630 if (res) { 4631 free(status); 4632 return res; 4633 } 4634 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4635 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_attach_ns failed!\n"); 4636 if (!status->timed_out) { 4637 free(status); 4638 } 4639 return -ENXIO; 4640 } 4641 free(status); 4642 4643 res = nvme_ctrlr_identify_active_ns(ctrlr); 4644 if (res) { 4645 return res; 4646 } 4647 4648 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 4649 if (ns == NULL) { 4650 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_get_ns failed!\n"); 4651 return -ENXIO; 4652 } 4653 4654 return nvme_ns_construct(ns, nsid, ctrlr); 4655 } 4656 4657 int 4658 spdk_nvme_ctrlr_detach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, 4659 struct spdk_nvme_ctrlr_list *payload) 4660 { 4661 struct nvme_completion_poll_status *status; 4662 int res; 4663 4664 if (nsid == 0) { 4665 return -EINVAL; 4666 } 4667 4668 status = calloc(1, sizeof(*status)); 4669 if (!status) { 4670 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4671 return -ENOMEM; 4672 } 4673 4674 res = nvme_ctrlr_cmd_detach_ns(ctrlr, nsid, payload, 4675 nvme_completion_poll_cb, status); 4676 if (res) { 4677 free(status); 4678 return res; 4679 } 4680 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4681 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_detach_ns failed!\n"); 4682 if (!status->timed_out) { 4683 free(status); 4684 } 4685 return -ENXIO; 4686 } 4687 free(status); 4688 4689 return nvme_ctrlr_identify_active_ns(ctrlr); 4690 } 4691 4692 uint32_t 4693 spdk_nvme_ctrlr_create_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns_data *payload) 4694 { 4695 struct nvme_completion_poll_status *status; 4696 int res; 4697 uint32_t nsid; 4698 4699 status = calloc(1, sizeof(*status)); 4700 if (!status) { 4701 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4702 return 0; 4703 } 4704 4705 res = nvme_ctrlr_cmd_create_ns(ctrlr, payload, nvme_completion_poll_cb, status); 4706 if (res) { 4707 free(status); 4708 return 0; 4709 } 4710 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4711 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_create_ns failed!\n"); 4712 if (!status->timed_out) { 4713 free(status); 4714 } 4715 return 0; 4716 } 4717 4718 nsid = status->cpl.cdw0; 4719 free(status); 4720 4721 assert(nsid > 0); 4722 4723 /* Return the namespace ID that was created */ 4724 return nsid; 4725 } 4726 4727 int 4728 spdk_nvme_ctrlr_delete_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 4729 { 4730 struct nvme_completion_poll_status *status; 4731 int res; 4732 4733 if (nsid == 0) { 4734 return -EINVAL; 4735 } 4736 4737 status = calloc(1, sizeof(*status)); 4738 if (!status) { 4739 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4740 return -ENOMEM; 4741 } 4742 4743 res = nvme_ctrlr_cmd_delete_ns(ctrlr, nsid, nvme_completion_poll_cb, status); 4744 if (res) { 4745 free(status); 4746 return res; 4747 } 4748 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4749 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_delete_ns failed!\n"); 4750 if (!status->timed_out) { 4751 free(status); 4752 } 4753 return -ENXIO; 4754 } 4755 free(status); 4756 4757 return nvme_ctrlr_identify_active_ns(ctrlr); 4758 } 4759 4760 int 4761 spdk_nvme_ctrlr_format(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, 4762 struct spdk_nvme_format *format) 4763 { 4764 struct nvme_completion_poll_status *status; 4765 int res; 4766 4767 status = calloc(1, sizeof(*status)); 4768 if (!status) { 4769 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4770 return -ENOMEM; 4771 } 4772 4773 res = nvme_ctrlr_cmd_format(ctrlr, nsid, format, nvme_completion_poll_cb, 4774 status); 4775 if (res) { 4776 free(status); 4777 return res; 4778 } 4779 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4780 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_format failed!\n"); 4781 if (!status->timed_out) { 4782 free(status); 4783 } 4784 return -ENXIO; 4785 } 4786 free(status); 4787 4788 return spdk_nvme_ctrlr_reset(ctrlr); 4789 } 4790 4791 int 4792 spdk_nvme_ctrlr_update_firmware(struct spdk_nvme_ctrlr *ctrlr, void *payload, uint32_t size, 4793 int slot, enum spdk_nvme_fw_commit_action commit_action, struct spdk_nvme_status *completion_status) 4794 { 4795 struct spdk_nvme_fw_commit fw_commit; 4796 struct nvme_completion_poll_status *status; 4797 int res; 4798 unsigned int size_remaining; 4799 unsigned int offset; 4800 unsigned int transfer; 4801 uint8_t *p; 4802 4803 if (!completion_status) { 4804 return -EINVAL; 4805 } 4806 memset(completion_status, 0, sizeof(struct spdk_nvme_status)); 4807 if (size % 4) { 4808 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_update_firmware invalid size!\n"); 4809 return -1; 4810 } 4811 4812 /* Current support only for SPDK_NVME_FW_COMMIT_REPLACE_IMG 4813 * and SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG 4814 */ 4815 if ((commit_action != SPDK_NVME_FW_COMMIT_REPLACE_IMG) && 4816 (commit_action != SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG)) { 4817 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_update_firmware invalid command!\n"); 4818 return -1; 4819 } 4820 4821 status = calloc(1, sizeof(*status)); 4822 if (!status) { 4823 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4824 return -ENOMEM; 4825 } 4826 4827 /* Firmware download */ 4828 size_remaining = size; 4829 offset = 0; 4830 p = payload; 4831 4832 while (size_remaining > 0) { 4833 transfer = spdk_min(size_remaining, ctrlr->min_page_size); 4834 4835 memset(status, 0, sizeof(*status)); 4836 res = nvme_ctrlr_cmd_fw_image_download(ctrlr, transfer, offset, p, 4837 nvme_completion_poll_cb, 4838 status); 4839 if (res) { 4840 free(status); 4841 return res; 4842 } 4843 4844 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4845 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_fw_image_download failed!\n"); 4846 if (!status->timed_out) { 4847 free(status); 4848 } 4849 return -ENXIO; 4850 } 4851 p += transfer; 4852 offset += transfer; 4853 size_remaining -= transfer; 4854 } 4855 4856 /* Firmware commit */ 4857 memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit)); 4858 fw_commit.fs = slot; 4859 fw_commit.ca = commit_action; 4860 4861 memset(status, 0, sizeof(*status)); 4862 res = nvme_ctrlr_cmd_fw_commit(ctrlr, &fw_commit, nvme_completion_poll_cb, 4863 status); 4864 if (res) { 4865 free(status); 4866 return res; 4867 } 4868 4869 res = nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock); 4870 4871 memcpy(completion_status, &status->cpl.status, sizeof(struct spdk_nvme_status)); 4872 4873 if (!status->timed_out) { 4874 free(status); 4875 } 4876 4877 if (res) { 4878 if (completion_status->sct != SPDK_NVME_SCT_COMMAND_SPECIFIC || 4879 completion_status->sc != SPDK_NVME_SC_FIRMWARE_REQ_NVM_RESET) { 4880 if (completion_status->sct == SPDK_NVME_SCT_COMMAND_SPECIFIC && 4881 completion_status->sc == SPDK_NVME_SC_FIRMWARE_REQ_CONVENTIONAL_RESET) { 4882 NVME_CTRLR_NOTICELOG(ctrlr, 4883 "firmware activation requires conventional reset to be performed. !\n"); 4884 } else { 4885 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_cmd_fw_commit failed!\n"); 4886 } 4887 return -ENXIO; 4888 } 4889 } 4890 4891 return spdk_nvme_ctrlr_reset(ctrlr); 4892 } 4893 4894 int 4895 spdk_nvme_ctrlr_reserve_cmb(struct spdk_nvme_ctrlr *ctrlr) 4896 { 4897 int rc, size; 4898 union spdk_nvme_cmbsz_register cmbsz; 4899 4900 cmbsz = spdk_nvme_ctrlr_get_regs_cmbsz(ctrlr); 4901 4902 if (cmbsz.bits.rds == 0 || cmbsz.bits.wds == 0) { 4903 return -ENOTSUP; 4904 } 4905 4906 size = cmbsz.bits.sz * (0x1000 << (cmbsz.bits.szu * 4)); 4907 4908 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4909 rc = nvme_transport_ctrlr_reserve_cmb(ctrlr); 4910 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4911 4912 if (rc < 0) { 4913 return rc; 4914 } 4915 4916 return size; 4917 } 4918 4919 void * 4920 spdk_nvme_ctrlr_map_cmb(struct spdk_nvme_ctrlr *ctrlr, size_t *size) 4921 { 4922 void *buf; 4923 4924 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4925 buf = nvme_transport_ctrlr_map_cmb(ctrlr, size); 4926 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4927 4928 return buf; 4929 } 4930 4931 void 4932 spdk_nvme_ctrlr_unmap_cmb(struct spdk_nvme_ctrlr *ctrlr) 4933 { 4934 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4935 nvme_transport_ctrlr_unmap_cmb(ctrlr); 4936 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4937 } 4938 4939 int 4940 spdk_nvme_ctrlr_enable_pmr(struct spdk_nvme_ctrlr *ctrlr) 4941 { 4942 int rc; 4943 4944 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4945 rc = nvme_transport_ctrlr_enable_pmr(ctrlr); 4946 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4947 4948 return rc; 4949 } 4950 4951 int 4952 spdk_nvme_ctrlr_disable_pmr(struct spdk_nvme_ctrlr *ctrlr) 4953 { 4954 int rc; 4955 4956 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4957 rc = nvme_transport_ctrlr_disable_pmr(ctrlr); 4958 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4959 4960 return rc; 4961 } 4962 4963 void * 4964 spdk_nvme_ctrlr_map_pmr(struct spdk_nvme_ctrlr *ctrlr, size_t *size) 4965 { 4966 void *buf; 4967 4968 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4969 buf = nvme_transport_ctrlr_map_pmr(ctrlr, size); 4970 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4971 4972 return buf; 4973 } 4974 4975 int 4976 spdk_nvme_ctrlr_unmap_pmr(struct spdk_nvme_ctrlr *ctrlr) 4977 { 4978 int rc; 4979 4980 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4981 rc = nvme_transport_ctrlr_unmap_pmr(ctrlr); 4982 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4983 4984 return rc; 4985 } 4986 4987 int 4988 spdk_nvme_ctrlr_read_boot_partition_start(struct spdk_nvme_ctrlr *ctrlr, void *payload, 4989 uint32_t bprsz, uint32_t bprof, uint32_t bpid) 4990 { 4991 union spdk_nvme_bprsel_register bprsel; 4992 union spdk_nvme_bpinfo_register bpinfo; 4993 uint64_t bpmbl, bpmb_size; 4994 4995 if (ctrlr->cap.bits.bps == 0) { 4996 return -ENOTSUP; 4997 } 4998 4999 if (nvme_ctrlr_get_bpinfo(ctrlr, &bpinfo)) { 5000 NVME_CTRLR_ERRLOG(ctrlr, "get bpinfo failed\n"); 5001 return -EIO; 5002 } 5003 5004 if (bpinfo.bits.brs == SPDK_NVME_BRS_READ_IN_PROGRESS) { 5005 NVME_CTRLR_ERRLOG(ctrlr, "Boot Partition read already initiated\n"); 5006 return -EALREADY; 5007 } 5008 5009 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 5010 5011 bpmb_size = bprsz * 4096; 5012 bpmbl = spdk_vtophys(payload, &bpmb_size); 5013 if (bpmbl == SPDK_VTOPHYS_ERROR) { 5014 NVME_CTRLR_ERRLOG(ctrlr, "spdk_vtophys of bpmbl failed\n"); 5015 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 5016 return -EFAULT; 5017 } 5018 5019 if (bpmb_size != bprsz * 4096) { 5020 NVME_CTRLR_ERRLOG(ctrlr, "Boot Partition buffer is not physically contiguous\n"); 5021 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 5022 return -EFAULT; 5023 } 5024 5025 if (nvme_ctrlr_set_bpmbl(ctrlr, bpmbl)) { 5026 NVME_CTRLR_ERRLOG(ctrlr, "set_bpmbl() failed\n"); 5027 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 5028 return -EIO; 5029 } 5030 5031 bprsel.bits.bpid = bpid; 5032 bprsel.bits.bprof = bprof; 5033 bprsel.bits.bprsz = bprsz; 5034 5035 if (nvme_ctrlr_set_bprsel(ctrlr, &bprsel)) { 5036 NVME_CTRLR_ERRLOG(ctrlr, "set_bprsel() failed\n"); 5037 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 5038 return -EIO; 5039 } 5040 5041 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 5042 return 0; 5043 } 5044 5045 int 5046 spdk_nvme_ctrlr_read_boot_partition_poll(struct spdk_nvme_ctrlr *ctrlr) 5047 { 5048 int rc = 0; 5049 union spdk_nvme_bpinfo_register bpinfo; 5050 5051 if (nvme_ctrlr_get_bpinfo(ctrlr, &bpinfo)) { 5052 NVME_CTRLR_ERRLOG(ctrlr, "get bpinfo failed\n"); 5053 return -EIO; 5054 } 5055 5056 switch (bpinfo.bits.brs) { 5057 case SPDK_NVME_BRS_NO_READ: 5058 NVME_CTRLR_ERRLOG(ctrlr, "Boot Partition read not initiated\n"); 5059 rc = -EINVAL; 5060 break; 5061 case SPDK_NVME_BRS_READ_IN_PROGRESS: 5062 NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition read in progress\n"); 5063 rc = -EAGAIN; 5064 break; 5065 case SPDK_NVME_BRS_READ_ERROR: 5066 NVME_CTRLR_ERRLOG(ctrlr, "Error completing Boot Partition read\n"); 5067 rc = -EIO; 5068 break; 5069 case SPDK_NVME_BRS_READ_SUCCESS: 5070 NVME_CTRLR_INFOLOG(ctrlr, "Boot Partition read completed successfully\n"); 5071 break; 5072 default: 5073 NVME_CTRLR_ERRLOG(ctrlr, "Invalid Boot Partition read status\n"); 5074 rc = -EINVAL; 5075 } 5076 5077 return rc; 5078 } 5079 5080 static void 5081 nvme_write_boot_partition_cb(void *arg, const struct spdk_nvme_cpl *cpl) 5082 { 5083 int res; 5084 struct spdk_nvme_ctrlr *ctrlr = arg; 5085 struct spdk_nvme_fw_commit fw_commit; 5086 struct spdk_nvme_cpl err_cpl = 5087 {.status = {.sct = SPDK_NVME_SCT_GENERIC, .sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR }}; 5088 5089 if (spdk_nvme_cpl_is_error(cpl)) { 5090 NVME_CTRLR_ERRLOG(ctrlr, "Write Boot Partition failed\n"); 5091 ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, cpl); 5092 return; 5093 } 5094 5095 if (ctrlr->bp_ws == SPDK_NVME_BP_WS_DOWNLOADING) { 5096 NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition Downloading at Offset %d Success\n", ctrlr->fw_offset); 5097 ctrlr->fw_payload = (uint8_t *)ctrlr->fw_payload + ctrlr->fw_transfer_size; 5098 ctrlr->fw_offset += ctrlr->fw_transfer_size; 5099 ctrlr->fw_size_remaining -= ctrlr->fw_transfer_size; 5100 ctrlr->fw_transfer_size = spdk_min(ctrlr->fw_size_remaining, ctrlr->min_page_size); 5101 res = nvme_ctrlr_cmd_fw_image_download(ctrlr, ctrlr->fw_transfer_size, ctrlr->fw_offset, 5102 ctrlr->fw_payload, nvme_write_boot_partition_cb, ctrlr); 5103 if (res) { 5104 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_cmd_fw_image_download failed!\n"); 5105 ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, &err_cpl); 5106 return; 5107 } 5108 5109 if (ctrlr->fw_transfer_size < ctrlr->min_page_size) { 5110 ctrlr->bp_ws = SPDK_NVME_BP_WS_DOWNLOADED; 5111 } 5112 } else if (ctrlr->bp_ws == SPDK_NVME_BP_WS_DOWNLOADED) { 5113 NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition Download Success\n"); 5114 memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit)); 5115 fw_commit.bpid = ctrlr->bpid; 5116 fw_commit.ca = SPDK_NVME_FW_COMMIT_REPLACE_BOOT_PARTITION; 5117 res = nvme_ctrlr_cmd_fw_commit(ctrlr, &fw_commit, 5118 nvme_write_boot_partition_cb, ctrlr); 5119 if (res) { 5120 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_cmd_fw_commit failed!\n"); 5121 NVME_CTRLR_ERRLOG(ctrlr, "commit action: %d\n", fw_commit.ca); 5122 ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, &err_cpl); 5123 return; 5124 } 5125 5126 ctrlr->bp_ws = SPDK_NVME_BP_WS_REPLACE; 5127 } else if (ctrlr->bp_ws == SPDK_NVME_BP_WS_REPLACE) { 5128 NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition Replacement Success\n"); 5129 memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit)); 5130 fw_commit.bpid = ctrlr->bpid; 5131 fw_commit.ca = SPDK_NVME_FW_COMMIT_ACTIVATE_BOOT_PARTITION; 5132 res = nvme_ctrlr_cmd_fw_commit(ctrlr, &fw_commit, 5133 nvme_write_boot_partition_cb, ctrlr); 5134 if (res) { 5135 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_cmd_fw_commit failed!\n"); 5136 NVME_CTRLR_ERRLOG(ctrlr, "commit action: %d\n", fw_commit.ca); 5137 ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, &err_cpl); 5138 return; 5139 } 5140 5141 ctrlr->bp_ws = SPDK_NVME_BP_WS_ACTIVATE; 5142 } else if (ctrlr->bp_ws == SPDK_NVME_BP_WS_ACTIVATE) { 5143 NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition Activation Success\n"); 5144 ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, cpl); 5145 } else { 5146 NVME_CTRLR_ERRLOG(ctrlr, "Invalid Boot Partition write state\n"); 5147 ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, &err_cpl); 5148 return; 5149 } 5150 } 5151 5152 int 5153 spdk_nvme_ctrlr_write_boot_partition(struct spdk_nvme_ctrlr *ctrlr, 5154 void *payload, uint32_t size, uint32_t bpid, 5155 spdk_nvme_cmd_cb cb_fn, void *cb_arg) 5156 { 5157 int res; 5158 5159 if (ctrlr->cap.bits.bps == 0) { 5160 return -ENOTSUP; 5161 } 5162 5163 ctrlr->bp_ws = SPDK_NVME_BP_WS_DOWNLOADING; 5164 ctrlr->bpid = bpid; 5165 ctrlr->bp_write_cb_fn = cb_fn; 5166 ctrlr->bp_write_cb_arg = cb_arg; 5167 ctrlr->fw_offset = 0; 5168 ctrlr->fw_size_remaining = size; 5169 ctrlr->fw_payload = payload; 5170 ctrlr->fw_transfer_size = spdk_min(ctrlr->fw_size_remaining, ctrlr->min_page_size); 5171 5172 res = nvme_ctrlr_cmd_fw_image_download(ctrlr, ctrlr->fw_transfer_size, ctrlr->fw_offset, 5173 ctrlr->fw_payload, nvme_write_boot_partition_cb, ctrlr); 5174 5175 return res; 5176 } 5177 5178 bool 5179 spdk_nvme_ctrlr_is_discovery(struct spdk_nvme_ctrlr *ctrlr) 5180 { 5181 assert(ctrlr); 5182 5183 return !strncmp(ctrlr->trid.subnqn, SPDK_NVMF_DISCOVERY_NQN, 5184 strlen(SPDK_NVMF_DISCOVERY_NQN)); 5185 } 5186 5187 bool 5188 spdk_nvme_ctrlr_is_fabrics(struct spdk_nvme_ctrlr *ctrlr) 5189 { 5190 assert(ctrlr); 5191 5192 return spdk_nvme_trtype_is_fabrics(ctrlr->trid.trtype); 5193 } 5194 5195 int 5196 spdk_nvme_ctrlr_security_receive(struct spdk_nvme_ctrlr *ctrlr, uint8_t secp, 5197 uint16_t spsp, uint8_t nssf, void *payload, size_t size) 5198 { 5199 struct nvme_completion_poll_status *status; 5200 int res; 5201 5202 status = calloc(1, sizeof(*status)); 5203 if (!status) { 5204 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 5205 return -ENOMEM; 5206 } 5207 5208 res = spdk_nvme_ctrlr_cmd_security_receive(ctrlr, secp, spsp, nssf, payload, size, 5209 nvme_completion_poll_cb, status); 5210 if (res) { 5211 free(status); 5212 return res; 5213 } 5214 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 5215 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_cmd_security_receive failed!\n"); 5216 if (!status->timed_out) { 5217 free(status); 5218 } 5219 return -ENXIO; 5220 } 5221 free(status); 5222 5223 return 0; 5224 } 5225 5226 int 5227 spdk_nvme_ctrlr_security_send(struct spdk_nvme_ctrlr *ctrlr, uint8_t secp, 5228 uint16_t spsp, uint8_t nssf, void *payload, size_t size) 5229 { 5230 struct nvme_completion_poll_status *status; 5231 int res; 5232 5233 status = calloc(1, sizeof(*status)); 5234 if (!status) { 5235 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 5236 return -ENOMEM; 5237 } 5238 5239 res = spdk_nvme_ctrlr_cmd_security_send(ctrlr, secp, spsp, nssf, payload, size, 5240 nvme_completion_poll_cb, 5241 status); 5242 if (res) { 5243 free(status); 5244 return res; 5245 } 5246 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 5247 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_cmd_security_send failed!\n"); 5248 if (!status->timed_out) { 5249 free(status); 5250 } 5251 return -ENXIO; 5252 } 5253 5254 free(status); 5255 5256 return 0; 5257 } 5258 5259 uint64_t 5260 spdk_nvme_ctrlr_get_flags(struct spdk_nvme_ctrlr *ctrlr) 5261 { 5262 return ctrlr->flags; 5263 } 5264 5265 const struct spdk_nvme_transport_id * 5266 spdk_nvme_ctrlr_get_transport_id(struct spdk_nvme_ctrlr *ctrlr) 5267 { 5268 return &ctrlr->trid; 5269 } 5270 5271 int32_t 5272 spdk_nvme_ctrlr_alloc_qid(struct spdk_nvme_ctrlr *ctrlr) 5273 { 5274 uint32_t qid; 5275 5276 assert(ctrlr->free_io_qids); 5277 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 5278 qid = spdk_bit_array_find_first_set(ctrlr->free_io_qids, 1); 5279 if (qid > ctrlr->opts.num_io_queues) { 5280 NVME_CTRLR_ERRLOG(ctrlr, "No free I/O queue IDs\n"); 5281 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 5282 return -1; 5283 } 5284 5285 spdk_bit_array_clear(ctrlr->free_io_qids, qid); 5286 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 5287 return qid; 5288 } 5289 5290 void 5291 spdk_nvme_ctrlr_free_qid(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid) 5292 { 5293 assert(qid <= ctrlr->opts.num_io_queues); 5294 5295 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 5296 5297 if (spdk_likely(ctrlr->free_io_qids)) { 5298 spdk_bit_array_set(ctrlr->free_io_qids, qid); 5299 } 5300 5301 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 5302 } 5303 5304 int 5305 spdk_nvme_ctrlr_get_memory_domains(const struct spdk_nvme_ctrlr *ctrlr, 5306 struct spdk_memory_domain **domains, int array_size) 5307 { 5308 return nvme_transport_ctrlr_get_memory_domains(ctrlr, domains, array_size); 5309 } 5310