1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2015 Intel Corporation. All rights reserved. 3 * Copyright (c) 2019-2021 Mellanox Technologies LTD. All rights reserved. 4 * Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 #include "spdk/stdinc.h" 8 9 #include "nvme_internal.h" 10 #include "nvme_io_msg.h" 11 12 #include "spdk/env.h" 13 #include "spdk/string.h" 14 #include "spdk/endian.h" 15 16 struct nvme_active_ns_ctx; 17 18 static int nvme_ctrlr_construct_and_submit_aer(struct spdk_nvme_ctrlr *ctrlr, 19 struct nvme_async_event_request *aer); 20 static void nvme_ctrlr_identify_active_ns_async(struct nvme_active_ns_ctx *ctx); 21 static int nvme_ctrlr_identify_ns_async(struct spdk_nvme_ns *ns); 22 static int nvme_ctrlr_identify_ns_iocs_specific_async(struct spdk_nvme_ns *ns); 23 static int nvme_ctrlr_identify_id_desc_async(struct spdk_nvme_ns *ns); 24 static void nvme_ctrlr_init_cap(struct spdk_nvme_ctrlr *ctrlr); 25 static void nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state, 26 uint64_t timeout_in_ms); 27 28 static int 29 nvme_ns_cmp(struct spdk_nvme_ns *ns1, struct spdk_nvme_ns *ns2) 30 { 31 if (ns1->id < ns2->id) { 32 return -1; 33 } else if (ns1->id > ns2->id) { 34 return 1; 35 } else { 36 return 0; 37 } 38 } 39 40 RB_GENERATE_STATIC(nvme_ns_tree, spdk_nvme_ns, node, nvme_ns_cmp); 41 42 #define CTRLR_STRING(ctrlr) \ 43 ((ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_TCP || ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_RDMA) ? \ 44 ctrlr->trid.subnqn : ctrlr->trid.traddr) 45 46 #define NVME_CTRLR_ERRLOG(ctrlr, format, ...) \ 47 SPDK_ERRLOG("[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__); 48 49 #define NVME_CTRLR_WARNLOG(ctrlr, format, ...) \ 50 SPDK_WARNLOG("[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__); 51 52 #define NVME_CTRLR_NOTICELOG(ctrlr, format, ...) \ 53 SPDK_NOTICELOG("[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__); 54 55 #define NVME_CTRLR_INFOLOG(ctrlr, format, ...) \ 56 SPDK_INFOLOG(nvme, "[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__); 57 58 #ifdef DEBUG 59 #define NVME_CTRLR_DEBUGLOG(ctrlr, format, ...) \ 60 SPDK_DEBUGLOG(nvme, "[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__); 61 #else 62 #define NVME_CTRLR_DEBUGLOG(ctrlr, ...) do { } while (0) 63 #endif 64 65 #define nvme_ctrlr_get_reg_async(ctrlr, reg, sz, cb_fn, cb_arg) \ 66 nvme_transport_ctrlr_get_reg_ ## sz ## _async(ctrlr, \ 67 offsetof(struct spdk_nvme_registers, reg), cb_fn, cb_arg) 68 69 #define nvme_ctrlr_set_reg_async(ctrlr, reg, sz, val, cb_fn, cb_arg) \ 70 nvme_transport_ctrlr_set_reg_ ## sz ## _async(ctrlr, \ 71 offsetof(struct spdk_nvme_registers, reg), val, cb_fn, cb_arg) 72 73 #define nvme_ctrlr_get_cc_async(ctrlr, cb_fn, cb_arg) \ 74 nvme_ctrlr_get_reg_async(ctrlr, cc, 4, cb_fn, cb_arg) 75 76 #define nvme_ctrlr_get_csts_async(ctrlr, cb_fn, cb_arg) \ 77 nvme_ctrlr_get_reg_async(ctrlr, csts, 4, cb_fn, cb_arg) 78 79 #define nvme_ctrlr_get_cap_async(ctrlr, cb_fn, cb_arg) \ 80 nvme_ctrlr_get_reg_async(ctrlr, cap, 8, cb_fn, cb_arg) 81 82 #define nvme_ctrlr_get_vs_async(ctrlr, cb_fn, cb_arg) \ 83 nvme_ctrlr_get_reg_async(ctrlr, vs, 4, cb_fn, cb_arg) 84 85 #define nvme_ctrlr_set_cc_async(ctrlr, value, cb_fn, cb_arg) \ 86 nvme_ctrlr_set_reg_async(ctrlr, cc, 4, value, cb_fn, cb_arg) 87 88 static int 89 nvme_ctrlr_get_cc(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cc_register *cc) 90 { 91 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cc.raw), 92 &cc->raw); 93 } 94 95 static int 96 nvme_ctrlr_get_csts(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_csts_register *csts) 97 { 98 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, csts.raw), 99 &csts->raw); 100 } 101 102 int 103 nvme_ctrlr_get_cap(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cap_register *cap) 104 { 105 return nvme_transport_ctrlr_get_reg_8(ctrlr, offsetof(struct spdk_nvme_registers, cap.raw), 106 &cap->raw); 107 } 108 109 int 110 nvme_ctrlr_get_vs(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_vs_register *vs) 111 { 112 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, vs.raw), 113 &vs->raw); 114 } 115 116 int 117 nvme_ctrlr_get_cmbsz(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cmbsz_register *cmbsz) 118 { 119 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cmbsz.raw), 120 &cmbsz->raw); 121 } 122 123 int 124 nvme_ctrlr_get_pmrcap(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_pmrcap_register *pmrcap) 125 { 126 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, pmrcap.raw), 127 &pmrcap->raw); 128 } 129 130 int 131 nvme_ctrlr_get_bpinfo(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_bpinfo_register *bpinfo) 132 { 133 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, bpinfo.raw), 134 &bpinfo->raw); 135 } 136 137 int 138 nvme_ctrlr_set_bprsel(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_bprsel_register *bprsel) 139 { 140 return nvme_transport_ctrlr_set_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, bprsel.raw), 141 bprsel->raw); 142 } 143 144 int 145 nvme_ctrlr_set_bpmbl(struct spdk_nvme_ctrlr *ctrlr, uint64_t bpmbl_value) 146 { 147 return nvme_transport_ctrlr_set_reg_8(ctrlr, offsetof(struct spdk_nvme_registers, bpmbl), 148 bpmbl_value); 149 } 150 151 static int 152 nvme_ctrlr_set_nssr(struct spdk_nvme_ctrlr *ctrlr, uint32_t nssr_value) 153 { 154 return nvme_transport_ctrlr_set_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, nssr), 155 nssr_value); 156 } 157 158 bool 159 nvme_ctrlr_multi_iocs_enabled(struct spdk_nvme_ctrlr *ctrlr) 160 { 161 return ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_IOCS && 162 ctrlr->opts.command_set == SPDK_NVME_CC_CSS_IOCS; 163 } 164 165 /* When the field in spdk_nvme_ctrlr_opts are changed and you change this function, please 166 * also update the nvme_ctrl_opts_init function in nvme_ctrlr.c 167 */ 168 void 169 spdk_nvme_ctrlr_get_default_ctrlr_opts(struct spdk_nvme_ctrlr_opts *opts, size_t opts_size) 170 { 171 char host_id_str[SPDK_UUID_STRING_LEN]; 172 173 assert(opts); 174 175 opts->opts_size = opts_size; 176 177 #define FIELD_OK(field) \ 178 offsetof(struct spdk_nvme_ctrlr_opts, field) + sizeof(opts->field) <= opts_size 179 180 #define SET_FIELD(field, value) \ 181 if (offsetof(struct spdk_nvme_ctrlr_opts, field) + sizeof(opts->field) <= opts_size) { \ 182 opts->field = value; \ 183 } \ 184 185 SET_FIELD(num_io_queues, DEFAULT_MAX_IO_QUEUES); 186 SET_FIELD(use_cmb_sqs, false); 187 SET_FIELD(no_shn_notification, false); 188 SET_FIELD(arb_mechanism, SPDK_NVME_CC_AMS_RR); 189 SET_FIELD(arbitration_burst, 0); 190 SET_FIELD(low_priority_weight, 0); 191 SET_FIELD(medium_priority_weight, 0); 192 SET_FIELD(high_priority_weight, 0); 193 SET_FIELD(keep_alive_timeout_ms, MIN_KEEP_ALIVE_TIMEOUT_IN_MS); 194 SET_FIELD(transport_retry_count, SPDK_NVME_DEFAULT_RETRY_COUNT); 195 SET_FIELD(io_queue_size, DEFAULT_IO_QUEUE_SIZE); 196 197 if (nvme_driver_init() == 0) { 198 if (FIELD_OK(hostnqn)) { 199 spdk_uuid_fmt_lower(host_id_str, sizeof(host_id_str), 200 &g_spdk_nvme_driver->default_extended_host_id); 201 snprintf(opts->hostnqn, sizeof(opts->hostnqn), 202 "nqn.2014-08.org.nvmexpress:uuid:%s", host_id_str); 203 } 204 205 if (FIELD_OK(extended_host_id)) { 206 memcpy(opts->extended_host_id, &g_spdk_nvme_driver->default_extended_host_id, 207 sizeof(opts->extended_host_id)); 208 } 209 210 } 211 212 SET_FIELD(io_queue_requests, DEFAULT_IO_QUEUE_REQUESTS); 213 214 if (FIELD_OK(src_addr)) { 215 memset(opts->src_addr, 0, sizeof(opts->src_addr)); 216 } 217 218 if (FIELD_OK(src_svcid)) { 219 memset(opts->src_svcid, 0, sizeof(opts->src_svcid)); 220 } 221 222 if (FIELD_OK(host_id)) { 223 memset(opts->host_id, 0, sizeof(opts->host_id)); 224 } 225 226 SET_FIELD(command_set, CHAR_BIT); 227 SET_FIELD(admin_timeout_ms, NVME_MAX_ADMIN_TIMEOUT_IN_SECS * 1000); 228 SET_FIELD(header_digest, false); 229 SET_FIELD(data_digest, false); 230 SET_FIELD(disable_error_logging, false); 231 SET_FIELD(transport_ack_timeout, SPDK_NVME_DEFAULT_TRANSPORT_ACK_TIMEOUT); 232 SET_FIELD(admin_queue_size, DEFAULT_ADMIN_QUEUE_SIZE); 233 SET_FIELD(fabrics_connect_timeout_us, NVME_FABRIC_CONNECT_COMMAND_TIMEOUT); 234 SET_FIELD(disable_read_ana_log_page, false); 235 SET_FIELD(disable_read_changed_ns_list_log_page, false); 236 237 if (FIELD_OK(psk)) { 238 memset(opts->psk, 0, sizeof(opts->psk)); 239 } 240 241 #undef FIELD_OK 242 #undef SET_FIELD 243 } 244 245 const struct spdk_nvme_ctrlr_opts * 246 spdk_nvme_ctrlr_get_opts(struct spdk_nvme_ctrlr *ctrlr) 247 { 248 return &ctrlr->opts; 249 } 250 251 /** 252 * This function will be called when the process allocates the IO qpair. 253 * Note: the ctrlr_lock must be held when calling this function. 254 */ 255 static void 256 nvme_ctrlr_proc_add_io_qpair(struct spdk_nvme_qpair *qpair) 257 { 258 struct spdk_nvme_ctrlr_process *active_proc; 259 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 260 261 active_proc = nvme_ctrlr_get_current_process(ctrlr); 262 if (active_proc) { 263 TAILQ_INSERT_TAIL(&active_proc->allocated_io_qpairs, qpair, per_process_tailq); 264 qpair->active_proc = active_proc; 265 } 266 } 267 268 /** 269 * This function will be called when the process frees the IO qpair. 270 * Note: the ctrlr_lock must be held when calling this function. 271 */ 272 static void 273 nvme_ctrlr_proc_remove_io_qpair(struct spdk_nvme_qpair *qpair) 274 { 275 struct spdk_nvme_ctrlr_process *active_proc; 276 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 277 struct spdk_nvme_qpair *active_qpair, *tmp_qpair; 278 279 active_proc = nvme_ctrlr_get_current_process(ctrlr); 280 if (!active_proc) { 281 return; 282 } 283 284 TAILQ_FOREACH_SAFE(active_qpair, &active_proc->allocated_io_qpairs, 285 per_process_tailq, tmp_qpair) { 286 if (active_qpair == qpair) { 287 TAILQ_REMOVE(&active_proc->allocated_io_qpairs, 288 active_qpair, per_process_tailq); 289 290 break; 291 } 292 } 293 } 294 295 void 296 spdk_nvme_ctrlr_get_default_io_qpair_opts(struct spdk_nvme_ctrlr *ctrlr, 297 struct spdk_nvme_io_qpair_opts *opts, 298 size_t opts_size) 299 { 300 assert(ctrlr); 301 302 assert(opts); 303 304 memset(opts, 0, opts_size); 305 306 #define FIELD_OK(field) \ 307 offsetof(struct spdk_nvme_io_qpair_opts, field) + sizeof(opts->field) <= opts_size 308 309 if (FIELD_OK(qprio)) { 310 opts->qprio = SPDK_NVME_QPRIO_URGENT; 311 } 312 313 if (FIELD_OK(io_queue_size)) { 314 opts->io_queue_size = ctrlr->opts.io_queue_size; 315 } 316 317 if (FIELD_OK(io_queue_requests)) { 318 opts->io_queue_requests = ctrlr->opts.io_queue_requests; 319 } 320 321 if (FIELD_OK(delay_cmd_submit)) { 322 opts->delay_cmd_submit = false; 323 } 324 325 if (FIELD_OK(sq.vaddr)) { 326 opts->sq.vaddr = NULL; 327 } 328 329 if (FIELD_OK(sq.paddr)) { 330 opts->sq.paddr = 0; 331 } 332 333 if (FIELD_OK(sq.buffer_size)) { 334 opts->sq.buffer_size = 0; 335 } 336 337 if (FIELD_OK(cq.vaddr)) { 338 opts->cq.vaddr = NULL; 339 } 340 341 if (FIELD_OK(cq.paddr)) { 342 opts->cq.paddr = 0; 343 } 344 345 if (FIELD_OK(cq.buffer_size)) { 346 opts->cq.buffer_size = 0; 347 } 348 349 if (FIELD_OK(create_only)) { 350 opts->create_only = false; 351 } 352 353 if (FIELD_OK(async_mode)) { 354 opts->async_mode = false; 355 } 356 357 #undef FIELD_OK 358 } 359 360 static struct spdk_nvme_qpair * 361 nvme_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, 362 const struct spdk_nvme_io_qpair_opts *opts) 363 { 364 int32_t qid; 365 struct spdk_nvme_qpair *qpair; 366 union spdk_nvme_cc_register cc; 367 368 if (!ctrlr) { 369 return NULL; 370 } 371 372 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 373 cc.raw = ctrlr->process_init_cc.raw; 374 375 if (opts->qprio & ~SPDK_NVME_CREATE_IO_SQ_QPRIO_MASK) { 376 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 377 return NULL; 378 } 379 380 /* 381 * Only value SPDK_NVME_QPRIO_URGENT(0) is valid for the 382 * default round robin arbitration method. 383 */ 384 if ((cc.bits.ams == SPDK_NVME_CC_AMS_RR) && (opts->qprio != SPDK_NVME_QPRIO_URGENT)) { 385 NVME_CTRLR_ERRLOG(ctrlr, "invalid queue priority for default round robin arbitration method\n"); 386 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 387 return NULL; 388 } 389 390 qid = spdk_nvme_ctrlr_alloc_qid(ctrlr); 391 if (qid < 0) { 392 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 393 return NULL; 394 } 395 396 qpair = nvme_transport_ctrlr_create_io_qpair(ctrlr, qid, opts); 397 if (qpair == NULL) { 398 NVME_CTRLR_ERRLOG(ctrlr, "nvme_transport_ctrlr_create_io_qpair() failed\n"); 399 spdk_nvme_ctrlr_free_qid(ctrlr, qid); 400 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 401 return NULL; 402 } 403 404 TAILQ_INSERT_TAIL(&ctrlr->active_io_qpairs, qpair, tailq); 405 406 nvme_ctrlr_proc_add_io_qpair(qpair); 407 408 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 409 410 return qpair; 411 } 412 413 int 414 spdk_nvme_ctrlr_connect_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 415 { 416 int rc; 417 418 if (nvme_qpair_get_state(qpair) != NVME_QPAIR_DISCONNECTED) { 419 return -EISCONN; 420 } 421 422 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 423 rc = nvme_transport_ctrlr_connect_qpair(ctrlr, qpair); 424 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 425 426 if (ctrlr->quirks & NVME_QUIRK_DELAY_AFTER_QUEUE_ALLOC) { 427 spdk_delay_us(100); 428 } 429 430 return rc; 431 } 432 433 void 434 spdk_nvme_ctrlr_disconnect_io_qpair(struct spdk_nvme_qpair *qpair) 435 { 436 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 437 438 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 439 nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair); 440 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 441 } 442 443 struct spdk_nvme_qpair * 444 spdk_nvme_ctrlr_alloc_io_qpair(struct spdk_nvme_ctrlr *ctrlr, 445 const struct spdk_nvme_io_qpair_opts *user_opts, 446 size_t opts_size) 447 { 448 449 struct spdk_nvme_qpair *qpair; 450 struct spdk_nvme_io_qpair_opts opts; 451 int rc; 452 453 if (spdk_unlikely(ctrlr->state != NVME_CTRLR_STATE_READY)) { 454 /* When controller is resetting or initializing, free_io_qids is deleted or not created yet. 455 * We can't create IO qpair in that case */ 456 return NULL; 457 } 458 459 /* 460 * Get the default options, then overwrite them with the user-provided options 461 * up to opts_size. 462 * 463 * This allows for extensions of the opts structure without breaking 464 * ABI compatibility. 465 */ 466 spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &opts, sizeof(opts)); 467 if (user_opts) { 468 memcpy(&opts, user_opts, spdk_min(sizeof(opts), opts_size)); 469 470 /* If user passes buffers, make sure they're big enough for the requested queue size */ 471 if (opts.sq.vaddr) { 472 if (opts.sq.buffer_size < (opts.io_queue_size * sizeof(struct spdk_nvme_cmd))) { 473 NVME_CTRLR_ERRLOG(ctrlr, "sq buffer size %" PRIx64 " is too small for sq size %zx\n", 474 opts.sq.buffer_size, (opts.io_queue_size * sizeof(struct spdk_nvme_cmd))); 475 return NULL; 476 } 477 } 478 if (opts.cq.vaddr) { 479 if (opts.cq.buffer_size < (opts.io_queue_size * sizeof(struct spdk_nvme_cpl))) { 480 NVME_CTRLR_ERRLOG(ctrlr, "cq buffer size %" PRIx64 " is too small for cq size %zx\n", 481 opts.cq.buffer_size, (opts.io_queue_size * sizeof(struct spdk_nvme_cpl))); 482 return NULL; 483 } 484 } 485 } 486 487 qpair = nvme_ctrlr_create_io_qpair(ctrlr, &opts); 488 489 if (qpair == NULL || opts.create_only == true) { 490 return qpair; 491 } 492 493 rc = spdk_nvme_ctrlr_connect_io_qpair(ctrlr, qpair); 494 if (rc != 0) { 495 NVME_CTRLR_ERRLOG(ctrlr, "nvme_transport_ctrlr_connect_io_qpair() failed\n"); 496 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 497 nvme_ctrlr_proc_remove_io_qpair(qpair); 498 TAILQ_REMOVE(&ctrlr->active_io_qpairs, qpair, tailq); 499 spdk_bit_array_set(ctrlr->free_io_qids, qpair->id); 500 nvme_transport_ctrlr_delete_io_qpair(ctrlr, qpair); 501 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 502 return NULL; 503 } 504 505 return qpair; 506 } 507 508 int 509 spdk_nvme_ctrlr_reconnect_io_qpair(struct spdk_nvme_qpair *qpair) 510 { 511 struct spdk_nvme_ctrlr *ctrlr; 512 enum nvme_qpair_state qpair_state; 513 int rc; 514 515 assert(qpair != NULL); 516 assert(nvme_qpair_is_admin_queue(qpair) == false); 517 assert(qpair->ctrlr != NULL); 518 519 ctrlr = qpair->ctrlr; 520 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 521 qpair_state = nvme_qpair_get_state(qpair); 522 523 if (ctrlr->is_removed) { 524 rc = -ENODEV; 525 goto out; 526 } 527 528 if (ctrlr->is_resetting || qpair_state == NVME_QPAIR_DISCONNECTING) { 529 rc = -EAGAIN; 530 goto out; 531 } 532 533 if (ctrlr->is_failed || qpair_state == NVME_QPAIR_DESTROYING) { 534 rc = -ENXIO; 535 goto out; 536 } 537 538 if (qpair_state != NVME_QPAIR_DISCONNECTED) { 539 rc = 0; 540 goto out; 541 } 542 543 rc = nvme_transport_ctrlr_connect_qpair(ctrlr, qpair); 544 if (rc) { 545 rc = -EAGAIN; 546 goto out; 547 } 548 549 out: 550 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 551 return rc; 552 } 553 554 spdk_nvme_qp_failure_reason 555 spdk_nvme_ctrlr_get_admin_qp_failure_reason(struct spdk_nvme_ctrlr *ctrlr) 556 { 557 return ctrlr->adminq->transport_failure_reason; 558 } 559 560 /* 561 * This internal function will attempt to take the controller 562 * lock before calling disconnect on a controller qpair. 563 * Functions already holding the controller lock should 564 * call nvme_transport_ctrlr_disconnect_qpair directly. 565 */ 566 void 567 nvme_ctrlr_disconnect_qpair(struct spdk_nvme_qpair *qpair) 568 { 569 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 570 571 assert(ctrlr != NULL); 572 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 573 nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair); 574 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 575 } 576 577 int 578 spdk_nvme_ctrlr_free_io_qpair(struct spdk_nvme_qpair *qpair) 579 { 580 struct spdk_nvme_ctrlr *ctrlr; 581 582 if (qpair == NULL) { 583 return 0; 584 } 585 586 ctrlr = qpair->ctrlr; 587 588 if (qpair->in_completion_context) { 589 /* 590 * There are many cases where it is convenient to delete an io qpair in the context 591 * of that qpair's completion routine. To handle this properly, set a flag here 592 * so that the completion routine will perform an actual delete after the context 593 * unwinds. 594 */ 595 qpair->delete_after_completion_context = 1; 596 return 0; 597 } 598 599 qpair->destroy_in_progress = 1; 600 601 nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair); 602 603 if (qpair->poll_group && (qpair->active_proc == nvme_ctrlr_get_current_process(ctrlr))) { 604 spdk_nvme_poll_group_remove(qpair->poll_group->group, qpair); 605 } 606 607 /* Do not retry. */ 608 nvme_qpair_set_state(qpair, NVME_QPAIR_DESTROYING); 609 610 /* In the multi-process case, a process may call this function on a foreign 611 * I/O qpair (i.e. one that this process did not create) when that qpairs process 612 * exits unexpectedly. In that case, we must not try to abort any reqs associated 613 * with that qpair, since the callbacks will also be foreign to this process. 614 */ 615 if (qpair->active_proc == nvme_ctrlr_get_current_process(ctrlr)) { 616 nvme_qpair_abort_all_queued_reqs(qpair); 617 } 618 619 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 620 621 nvme_ctrlr_proc_remove_io_qpair(qpair); 622 623 TAILQ_REMOVE(&ctrlr->active_io_qpairs, qpair, tailq); 624 spdk_nvme_ctrlr_free_qid(ctrlr, qpair->id); 625 626 nvme_transport_ctrlr_delete_io_qpair(ctrlr, qpair); 627 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 628 return 0; 629 } 630 631 static void 632 nvme_ctrlr_construct_intel_support_log_page_list(struct spdk_nvme_ctrlr *ctrlr, 633 struct spdk_nvme_intel_log_page_directory *log_page_directory) 634 { 635 if (log_page_directory == NULL) { 636 return; 637 } 638 639 assert(ctrlr->cdata.vid == SPDK_PCI_VID_INTEL); 640 641 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY] = true; 642 643 if (log_page_directory->read_latency_log_len || 644 (ctrlr->quirks & NVME_INTEL_QUIRK_READ_LATENCY)) { 645 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_READ_CMD_LATENCY] = true; 646 } 647 if (log_page_directory->write_latency_log_len || 648 (ctrlr->quirks & NVME_INTEL_QUIRK_WRITE_LATENCY)) { 649 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_WRITE_CMD_LATENCY] = true; 650 } 651 if (log_page_directory->temperature_statistics_log_len) { 652 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_TEMPERATURE] = true; 653 } 654 if (log_page_directory->smart_log_len) { 655 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_SMART] = true; 656 } 657 if (log_page_directory->marketing_description_log_len) { 658 ctrlr->log_page_supported[SPDK_NVME_INTEL_MARKETING_DESCRIPTION] = true; 659 } 660 } 661 662 struct intel_log_pages_ctx { 663 struct spdk_nvme_intel_log_page_directory log_page_directory; 664 struct spdk_nvme_ctrlr *ctrlr; 665 }; 666 667 static void 668 nvme_ctrlr_set_intel_support_log_pages_done(void *arg, const struct spdk_nvme_cpl *cpl) 669 { 670 struct intel_log_pages_ctx *ctx = arg; 671 struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr; 672 673 if (!spdk_nvme_cpl_is_error(cpl)) { 674 nvme_ctrlr_construct_intel_support_log_page_list(ctrlr, &ctx->log_page_directory); 675 } 676 677 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES, 678 ctrlr->opts.admin_timeout_ms); 679 free(ctx); 680 } 681 682 static int 683 nvme_ctrlr_set_intel_support_log_pages(struct spdk_nvme_ctrlr *ctrlr) 684 { 685 int rc = 0; 686 struct intel_log_pages_ctx *ctx; 687 688 ctx = calloc(1, sizeof(*ctx)); 689 if (!ctx) { 690 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES, 691 ctrlr->opts.admin_timeout_ms); 692 return 0; 693 } 694 695 ctx->ctrlr = ctrlr; 696 697 rc = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY, 698 SPDK_NVME_GLOBAL_NS_TAG, &ctx->log_page_directory, 699 sizeof(struct spdk_nvme_intel_log_page_directory), 700 0, nvme_ctrlr_set_intel_support_log_pages_done, ctx); 701 if (rc != 0) { 702 free(ctx); 703 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES, 704 ctrlr->opts.admin_timeout_ms); 705 return 0; 706 } 707 708 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_SUPPORTED_INTEL_LOG_PAGES, 709 ctrlr->opts.admin_timeout_ms); 710 711 return 0; 712 } 713 714 static int 715 nvme_ctrlr_alloc_ana_log_page(struct spdk_nvme_ctrlr *ctrlr) 716 { 717 uint32_t ana_log_page_size; 718 719 ana_log_page_size = sizeof(struct spdk_nvme_ana_page) + ctrlr->cdata.nanagrpid * 720 sizeof(struct spdk_nvme_ana_group_descriptor) + ctrlr->active_ns_count * 721 sizeof(uint32_t); 722 723 /* Number of active namespaces may have changed. 724 * Check if ANA log page fits into existing buffer. 725 */ 726 if (ana_log_page_size > ctrlr->ana_log_page_size) { 727 void *new_buffer; 728 729 if (ctrlr->ana_log_page) { 730 new_buffer = realloc(ctrlr->ana_log_page, ana_log_page_size); 731 } else { 732 new_buffer = calloc(1, ana_log_page_size); 733 } 734 735 if (!new_buffer) { 736 NVME_CTRLR_ERRLOG(ctrlr, "could not allocate ANA log page buffer, size %u\n", 737 ana_log_page_size); 738 return -ENXIO; 739 } 740 741 ctrlr->ana_log_page = new_buffer; 742 if (ctrlr->copied_ana_desc) { 743 new_buffer = realloc(ctrlr->copied_ana_desc, ana_log_page_size); 744 } else { 745 new_buffer = calloc(1, ana_log_page_size); 746 } 747 748 if (!new_buffer) { 749 NVME_CTRLR_ERRLOG(ctrlr, "could not allocate a buffer to parse ANA descriptor, size %u\n", 750 ana_log_page_size); 751 return -ENOMEM; 752 } 753 754 ctrlr->copied_ana_desc = new_buffer; 755 ctrlr->ana_log_page_size = ana_log_page_size; 756 } 757 758 return 0; 759 } 760 761 static int 762 nvme_ctrlr_update_ana_log_page(struct spdk_nvme_ctrlr *ctrlr) 763 { 764 struct nvme_completion_poll_status *status; 765 int rc; 766 767 rc = nvme_ctrlr_alloc_ana_log_page(ctrlr); 768 if (rc != 0) { 769 return rc; 770 } 771 772 status = calloc(1, sizeof(*status)); 773 if (status == NULL) { 774 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 775 return -ENOMEM; 776 } 777 778 rc = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS, 779 SPDK_NVME_GLOBAL_NS_TAG, ctrlr->ana_log_page, 780 ctrlr->ana_log_page_size, 0, 781 nvme_completion_poll_cb, status); 782 if (rc != 0) { 783 free(status); 784 return rc; 785 } 786 787 if (nvme_wait_for_completion_robust_lock_timeout(ctrlr->adminq, status, &ctrlr->ctrlr_lock, 788 ctrlr->opts.admin_timeout_ms * 1000)) { 789 if (!status->timed_out) { 790 free(status); 791 } 792 return -EIO; 793 } 794 795 free(status); 796 return 0; 797 } 798 799 static int 800 nvme_ctrlr_update_ns_ana_states(const struct spdk_nvme_ana_group_descriptor *desc, 801 void *cb_arg) 802 { 803 struct spdk_nvme_ctrlr *ctrlr = cb_arg; 804 struct spdk_nvme_ns *ns; 805 uint32_t i, nsid; 806 807 for (i = 0; i < desc->num_of_nsid; i++) { 808 nsid = desc->nsid[i]; 809 if (nsid == 0 || nsid > ctrlr->cdata.nn) { 810 continue; 811 } 812 813 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 814 assert(ns != NULL); 815 816 ns->ana_group_id = desc->ana_group_id; 817 ns->ana_state = desc->ana_state; 818 } 819 820 return 0; 821 } 822 823 int 824 nvme_ctrlr_parse_ana_log_page(struct spdk_nvme_ctrlr *ctrlr, 825 spdk_nvme_parse_ana_log_page_cb cb_fn, void *cb_arg) 826 { 827 struct spdk_nvme_ana_group_descriptor *copied_desc; 828 uint8_t *orig_desc; 829 uint32_t i, desc_size, copy_len; 830 int rc = 0; 831 832 if (ctrlr->ana_log_page == NULL) { 833 return -EINVAL; 834 } 835 836 copied_desc = ctrlr->copied_ana_desc; 837 838 orig_desc = (uint8_t *)ctrlr->ana_log_page + sizeof(struct spdk_nvme_ana_page); 839 copy_len = ctrlr->ana_log_page_size - sizeof(struct spdk_nvme_ana_page); 840 841 for (i = 0; i < ctrlr->ana_log_page->num_ana_group_desc; i++) { 842 memcpy(copied_desc, orig_desc, copy_len); 843 844 rc = cb_fn(copied_desc, cb_arg); 845 if (rc != 0) { 846 break; 847 } 848 849 desc_size = sizeof(struct spdk_nvme_ana_group_descriptor) + 850 copied_desc->num_of_nsid * sizeof(uint32_t); 851 orig_desc += desc_size; 852 copy_len -= desc_size; 853 } 854 855 return rc; 856 } 857 858 static int 859 nvme_ctrlr_set_supported_log_pages(struct spdk_nvme_ctrlr *ctrlr) 860 { 861 int rc = 0; 862 863 memset(ctrlr->log_page_supported, 0, sizeof(ctrlr->log_page_supported)); 864 /* Mandatory pages */ 865 ctrlr->log_page_supported[SPDK_NVME_LOG_ERROR] = true; 866 ctrlr->log_page_supported[SPDK_NVME_LOG_HEALTH_INFORMATION] = true; 867 ctrlr->log_page_supported[SPDK_NVME_LOG_FIRMWARE_SLOT] = true; 868 if (ctrlr->cdata.lpa.celp) { 869 ctrlr->log_page_supported[SPDK_NVME_LOG_COMMAND_EFFECTS_LOG] = true; 870 } 871 872 if (ctrlr->cdata.cmic.ana_reporting) { 873 ctrlr->log_page_supported[SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS] = true; 874 if (!ctrlr->opts.disable_read_ana_log_page) { 875 rc = nvme_ctrlr_update_ana_log_page(ctrlr); 876 if (rc == 0) { 877 nvme_ctrlr_parse_ana_log_page(ctrlr, nvme_ctrlr_update_ns_ana_states, 878 ctrlr); 879 } 880 } 881 } 882 883 if (ctrlr->cdata.ctratt.fdps) { 884 ctrlr->log_page_supported[SPDK_NVME_LOG_FDP_CONFIGURATIONS] = true; 885 ctrlr->log_page_supported[SPDK_NVME_LOG_RECLAIM_UNIT_HANDLE_USAGE] = true; 886 ctrlr->log_page_supported[SPDK_NVME_LOG_FDP_STATISTICS] = true; 887 ctrlr->log_page_supported[SPDK_NVME_LOG_FDP_EVENTS] = true; 888 } 889 890 if (ctrlr->cdata.vid == SPDK_PCI_VID_INTEL && 891 ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE && 892 !(ctrlr->quirks & NVME_INTEL_QUIRK_NO_LOG_PAGES)) { 893 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_INTEL_LOG_PAGES, 894 ctrlr->opts.admin_timeout_ms); 895 896 } else { 897 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES, 898 ctrlr->opts.admin_timeout_ms); 899 900 } 901 902 return rc; 903 } 904 905 static void 906 nvme_ctrlr_set_intel_supported_features(struct spdk_nvme_ctrlr *ctrlr) 907 { 908 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_MAX_LBA] = true; 909 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_NATIVE_MAX_LBA] = true; 910 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_POWER_GOVERNOR_SETTING] = true; 911 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_SMBUS_ADDRESS] = true; 912 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_LED_PATTERN] = true; 913 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_RESET_TIMED_WORKLOAD_COUNTERS] = true; 914 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING] = true; 915 } 916 917 static void 918 nvme_ctrlr_set_arbitration_feature(struct spdk_nvme_ctrlr *ctrlr) 919 { 920 uint32_t cdw11; 921 struct nvme_completion_poll_status *status; 922 923 if (ctrlr->opts.arbitration_burst == 0) { 924 return; 925 } 926 927 if (ctrlr->opts.arbitration_burst > 7) { 928 NVME_CTRLR_WARNLOG(ctrlr, "Valid arbitration burst values is from 0-7\n"); 929 return; 930 } 931 932 status = calloc(1, sizeof(*status)); 933 if (!status) { 934 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 935 return; 936 } 937 938 cdw11 = ctrlr->opts.arbitration_burst; 939 940 if (spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_WRR_SUPPORTED) { 941 cdw11 |= (uint32_t)ctrlr->opts.low_priority_weight << 8; 942 cdw11 |= (uint32_t)ctrlr->opts.medium_priority_weight << 16; 943 cdw11 |= (uint32_t)ctrlr->opts.high_priority_weight << 24; 944 } 945 946 if (spdk_nvme_ctrlr_cmd_set_feature(ctrlr, SPDK_NVME_FEAT_ARBITRATION, 947 cdw11, 0, NULL, 0, 948 nvme_completion_poll_cb, status) < 0) { 949 NVME_CTRLR_ERRLOG(ctrlr, "Set arbitration feature failed\n"); 950 free(status); 951 return; 952 } 953 954 if (nvme_wait_for_completion_timeout(ctrlr->adminq, status, 955 ctrlr->opts.admin_timeout_ms * 1000)) { 956 NVME_CTRLR_ERRLOG(ctrlr, "Timeout to set arbitration feature\n"); 957 } 958 959 if (!status->timed_out) { 960 free(status); 961 } 962 } 963 964 static void 965 nvme_ctrlr_set_supported_features(struct spdk_nvme_ctrlr *ctrlr) 966 { 967 memset(ctrlr->feature_supported, 0, sizeof(ctrlr->feature_supported)); 968 /* Mandatory features */ 969 ctrlr->feature_supported[SPDK_NVME_FEAT_ARBITRATION] = true; 970 ctrlr->feature_supported[SPDK_NVME_FEAT_POWER_MANAGEMENT] = true; 971 ctrlr->feature_supported[SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD] = true; 972 ctrlr->feature_supported[SPDK_NVME_FEAT_ERROR_RECOVERY] = true; 973 ctrlr->feature_supported[SPDK_NVME_FEAT_NUMBER_OF_QUEUES] = true; 974 ctrlr->feature_supported[SPDK_NVME_FEAT_INTERRUPT_COALESCING] = true; 975 ctrlr->feature_supported[SPDK_NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION] = true; 976 ctrlr->feature_supported[SPDK_NVME_FEAT_WRITE_ATOMICITY] = true; 977 ctrlr->feature_supported[SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION] = true; 978 /* Optional features */ 979 if (ctrlr->cdata.vwc.present) { 980 ctrlr->feature_supported[SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE] = true; 981 } 982 if (ctrlr->cdata.apsta.supported) { 983 ctrlr->feature_supported[SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION] = true; 984 } 985 if (ctrlr->cdata.hmpre) { 986 ctrlr->feature_supported[SPDK_NVME_FEAT_HOST_MEM_BUFFER] = true; 987 } 988 if (ctrlr->cdata.vid == SPDK_PCI_VID_INTEL) { 989 nvme_ctrlr_set_intel_supported_features(ctrlr); 990 } 991 992 nvme_ctrlr_set_arbitration_feature(ctrlr); 993 } 994 995 bool 996 spdk_nvme_ctrlr_is_failed(struct spdk_nvme_ctrlr *ctrlr) 997 { 998 return ctrlr->is_failed; 999 } 1000 1001 void 1002 nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr, bool hot_remove) 1003 { 1004 /* 1005 * Set the flag here and leave the work failure of qpairs to 1006 * spdk_nvme_qpair_process_completions(). 1007 */ 1008 if (hot_remove) { 1009 ctrlr->is_removed = true; 1010 } 1011 1012 if (ctrlr->is_failed) { 1013 NVME_CTRLR_NOTICELOG(ctrlr, "already in failed state\n"); 1014 return; 1015 } 1016 1017 if (ctrlr->is_disconnecting) { 1018 NVME_CTRLR_DEBUGLOG(ctrlr, "already disconnecting\n"); 1019 return; 1020 } 1021 1022 ctrlr->is_failed = true; 1023 nvme_transport_ctrlr_disconnect_qpair(ctrlr, ctrlr->adminq); 1024 NVME_CTRLR_ERRLOG(ctrlr, "in failed state.\n"); 1025 } 1026 1027 /** 1028 * This public API function will try to take the controller lock. 1029 * Any private functions being called from a thread already holding 1030 * the ctrlr lock should call nvme_ctrlr_fail directly. 1031 */ 1032 void 1033 spdk_nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr) 1034 { 1035 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1036 nvme_ctrlr_fail(ctrlr, false); 1037 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1038 } 1039 1040 static void 1041 nvme_ctrlr_shutdown_set_cc_done(void *_ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 1042 { 1043 struct nvme_ctrlr_detach_ctx *ctx = _ctx; 1044 struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr; 1045 1046 if (spdk_nvme_cpl_is_error(cpl)) { 1047 NVME_CTRLR_ERRLOG(ctrlr, "Failed to write CC.SHN\n"); 1048 ctx->shutdown_complete = true; 1049 return; 1050 } 1051 1052 if (ctrlr->opts.no_shn_notification) { 1053 ctx->shutdown_complete = true; 1054 return; 1055 } 1056 1057 /* 1058 * The NVMe specification defines RTD3E to be the time between 1059 * setting SHN = 1 until the controller will set SHST = 10b. 1060 * If the device doesn't report RTD3 entry latency, or if it 1061 * reports RTD3 entry latency less than 10 seconds, pick 1062 * 10 seconds as a reasonable amount of time to 1063 * wait before proceeding. 1064 */ 1065 NVME_CTRLR_DEBUGLOG(ctrlr, "RTD3E = %" PRIu32 " us\n", ctrlr->cdata.rtd3e); 1066 ctx->shutdown_timeout_ms = SPDK_CEIL_DIV(ctrlr->cdata.rtd3e, 1000); 1067 ctx->shutdown_timeout_ms = spdk_max(ctx->shutdown_timeout_ms, 10000); 1068 NVME_CTRLR_DEBUGLOG(ctrlr, "shutdown timeout = %" PRIu32 " ms\n", ctx->shutdown_timeout_ms); 1069 1070 ctx->shutdown_start_tsc = spdk_get_ticks(); 1071 ctx->state = NVME_CTRLR_DETACH_CHECK_CSTS; 1072 } 1073 1074 static void 1075 nvme_ctrlr_shutdown_get_cc_done(void *_ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 1076 { 1077 struct nvme_ctrlr_detach_ctx *ctx = _ctx; 1078 struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr; 1079 union spdk_nvme_cc_register cc; 1080 int rc; 1081 1082 if (spdk_nvme_cpl_is_error(cpl)) { 1083 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CC register\n"); 1084 ctx->shutdown_complete = true; 1085 return; 1086 } 1087 1088 assert(value <= UINT32_MAX); 1089 cc.raw = (uint32_t)value; 1090 1091 if (ctrlr->opts.no_shn_notification) { 1092 NVME_CTRLR_INFOLOG(ctrlr, "Disable SSD without shutdown notification\n"); 1093 if (cc.bits.en == 0) { 1094 ctx->shutdown_complete = true; 1095 return; 1096 } 1097 1098 cc.bits.en = 0; 1099 } else { 1100 cc.bits.shn = SPDK_NVME_SHN_NORMAL; 1101 } 1102 1103 rc = nvme_ctrlr_set_cc_async(ctrlr, cc.raw, nvme_ctrlr_shutdown_set_cc_done, ctx); 1104 if (rc != 0) { 1105 NVME_CTRLR_ERRLOG(ctrlr, "Failed to write CC.SHN\n"); 1106 ctx->shutdown_complete = true; 1107 } 1108 } 1109 1110 static void 1111 nvme_ctrlr_shutdown_async(struct spdk_nvme_ctrlr *ctrlr, 1112 struct nvme_ctrlr_detach_ctx *ctx) 1113 { 1114 int rc; 1115 1116 if (ctrlr->is_removed) { 1117 ctx->shutdown_complete = true; 1118 return; 1119 } 1120 1121 if (ctrlr->adminq == NULL || 1122 ctrlr->adminq->transport_failure_reason != SPDK_NVME_QPAIR_FAILURE_NONE) { 1123 NVME_CTRLR_INFOLOG(ctrlr, "Adminq is not connected.\n"); 1124 ctx->shutdown_complete = true; 1125 return; 1126 } 1127 1128 ctx->state = NVME_CTRLR_DETACH_SET_CC; 1129 rc = nvme_ctrlr_get_cc_async(ctrlr, nvme_ctrlr_shutdown_get_cc_done, ctx); 1130 if (rc != 0) { 1131 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CC register\n"); 1132 ctx->shutdown_complete = true; 1133 } 1134 } 1135 1136 static void 1137 nvme_ctrlr_shutdown_get_csts_done(void *_ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 1138 { 1139 struct nvme_ctrlr_detach_ctx *ctx = _ctx; 1140 1141 if (spdk_nvme_cpl_is_error(cpl)) { 1142 NVME_CTRLR_ERRLOG(ctx->ctrlr, "Failed to read the CSTS register\n"); 1143 ctx->shutdown_complete = true; 1144 return; 1145 } 1146 1147 assert(value <= UINT32_MAX); 1148 ctx->csts.raw = (uint32_t)value; 1149 ctx->state = NVME_CTRLR_DETACH_GET_CSTS_DONE; 1150 } 1151 1152 static int 1153 nvme_ctrlr_shutdown_poll_async(struct spdk_nvme_ctrlr *ctrlr, 1154 struct nvme_ctrlr_detach_ctx *ctx) 1155 { 1156 union spdk_nvme_csts_register csts; 1157 uint32_t ms_waited; 1158 1159 switch (ctx->state) { 1160 case NVME_CTRLR_DETACH_SET_CC: 1161 case NVME_CTRLR_DETACH_GET_CSTS: 1162 /* We're still waiting for the register operation to complete */ 1163 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 1164 return -EAGAIN; 1165 1166 case NVME_CTRLR_DETACH_CHECK_CSTS: 1167 ctx->state = NVME_CTRLR_DETACH_GET_CSTS; 1168 if (nvme_ctrlr_get_csts_async(ctrlr, nvme_ctrlr_shutdown_get_csts_done, ctx)) { 1169 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CSTS register\n"); 1170 return -EIO; 1171 } 1172 return -EAGAIN; 1173 1174 case NVME_CTRLR_DETACH_GET_CSTS_DONE: 1175 ctx->state = NVME_CTRLR_DETACH_CHECK_CSTS; 1176 break; 1177 1178 default: 1179 assert(0 && "Should never happen"); 1180 return -EINVAL; 1181 } 1182 1183 ms_waited = (spdk_get_ticks() - ctx->shutdown_start_tsc) * 1000 / spdk_get_ticks_hz(); 1184 csts.raw = ctx->csts.raw; 1185 1186 if (csts.bits.shst == SPDK_NVME_SHST_COMPLETE) { 1187 NVME_CTRLR_DEBUGLOG(ctrlr, "shutdown complete in %u milliseconds\n", ms_waited); 1188 return 0; 1189 } 1190 1191 if (ms_waited < ctx->shutdown_timeout_ms) { 1192 return -EAGAIN; 1193 } 1194 1195 NVME_CTRLR_ERRLOG(ctrlr, "did not shutdown within %u milliseconds\n", 1196 ctx->shutdown_timeout_ms); 1197 if (ctrlr->quirks & NVME_QUIRK_SHST_COMPLETE) { 1198 NVME_CTRLR_ERRLOG(ctrlr, "likely due to shutdown handling in the VMWare emulated NVMe SSD\n"); 1199 } 1200 1201 return 0; 1202 } 1203 1204 static inline uint64_t 1205 nvme_ctrlr_get_ready_timeout(struct spdk_nvme_ctrlr *ctrlr) 1206 { 1207 return ctrlr->cap.bits.to * 500; 1208 } 1209 1210 static void 1211 nvme_ctrlr_set_cc_en_done(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 1212 { 1213 struct spdk_nvme_ctrlr *ctrlr = ctx; 1214 1215 if (spdk_nvme_cpl_is_error(cpl)) { 1216 NVME_CTRLR_ERRLOG(ctrlr, "Failed to set the CC register\n"); 1217 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 1218 return; 1219 } 1220 1221 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, 1222 nvme_ctrlr_get_ready_timeout(ctrlr)); 1223 } 1224 1225 static int 1226 nvme_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr) 1227 { 1228 union spdk_nvme_cc_register cc; 1229 int rc; 1230 1231 rc = nvme_transport_ctrlr_enable(ctrlr); 1232 if (rc != 0) { 1233 NVME_CTRLR_ERRLOG(ctrlr, "transport ctrlr_enable failed\n"); 1234 return rc; 1235 } 1236 1237 cc.raw = ctrlr->process_init_cc.raw; 1238 if (cc.bits.en != 0) { 1239 NVME_CTRLR_ERRLOG(ctrlr, "called with CC.EN = 1\n"); 1240 return -EINVAL; 1241 } 1242 1243 cc.bits.en = 1; 1244 cc.bits.css = 0; 1245 cc.bits.shn = 0; 1246 cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */ 1247 cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */ 1248 1249 /* Page size is 2 ^ (12 + mps). */ 1250 cc.bits.mps = spdk_u32log2(ctrlr->page_size) - 12; 1251 1252 /* 1253 * Since NVMe 1.0, a controller should have at least one bit set in CAP.CSS. 1254 * A controller that does not have any bit set in CAP.CSS is not spec compliant. 1255 * Try to support such a controller regardless. 1256 */ 1257 if (ctrlr->cap.bits.css == 0) { 1258 NVME_CTRLR_INFOLOG(ctrlr, "Drive reports no command sets supported. Assuming NVM is supported.\n"); 1259 ctrlr->cap.bits.css = SPDK_NVME_CAP_CSS_NVM; 1260 } 1261 1262 /* 1263 * If the user did not explicitly request a command set, or supplied a value larger than 1264 * what can be saved in CC.CSS, use the most reasonable default. 1265 */ 1266 if (ctrlr->opts.command_set >= CHAR_BIT) { 1267 if (ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_IOCS) { 1268 ctrlr->opts.command_set = SPDK_NVME_CC_CSS_IOCS; 1269 } else if (ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_NVM) { 1270 ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NVM; 1271 } else if (ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_NOIO) { 1272 ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NOIO; 1273 } else { 1274 /* Invalid supported bits detected, falling back to NVM. */ 1275 ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NVM; 1276 } 1277 } 1278 1279 /* Verify that the selected command set is supported by the controller. */ 1280 if (!(ctrlr->cap.bits.css & (1u << ctrlr->opts.command_set))) { 1281 NVME_CTRLR_DEBUGLOG(ctrlr, "Requested I/O command set %u but supported mask is 0x%x\n", 1282 ctrlr->opts.command_set, ctrlr->cap.bits.css); 1283 NVME_CTRLR_DEBUGLOG(ctrlr, "Falling back to NVM. Assuming NVM is supported.\n"); 1284 ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NVM; 1285 } 1286 1287 cc.bits.css = ctrlr->opts.command_set; 1288 1289 switch (ctrlr->opts.arb_mechanism) { 1290 case SPDK_NVME_CC_AMS_RR: 1291 break; 1292 case SPDK_NVME_CC_AMS_WRR: 1293 if (SPDK_NVME_CAP_AMS_WRR & ctrlr->cap.bits.ams) { 1294 break; 1295 } 1296 return -EINVAL; 1297 case SPDK_NVME_CC_AMS_VS: 1298 if (SPDK_NVME_CAP_AMS_VS & ctrlr->cap.bits.ams) { 1299 break; 1300 } 1301 return -EINVAL; 1302 default: 1303 return -EINVAL; 1304 } 1305 1306 cc.bits.ams = ctrlr->opts.arb_mechanism; 1307 ctrlr->process_init_cc.raw = cc.raw; 1308 1309 if (nvme_ctrlr_set_cc_async(ctrlr, cc.raw, nvme_ctrlr_set_cc_en_done, ctrlr)) { 1310 NVME_CTRLR_ERRLOG(ctrlr, "set_cc() failed\n"); 1311 return -EIO; 1312 } 1313 1314 return 0; 1315 } 1316 1317 static const char * 1318 nvme_ctrlr_state_string(enum nvme_ctrlr_state state) 1319 { 1320 switch (state) { 1321 case NVME_CTRLR_STATE_INIT_DELAY: 1322 return "delay init"; 1323 case NVME_CTRLR_STATE_CONNECT_ADMINQ: 1324 return "connect adminq"; 1325 case NVME_CTRLR_STATE_WAIT_FOR_CONNECT_ADMINQ: 1326 return "wait for connect adminq"; 1327 case NVME_CTRLR_STATE_READ_VS: 1328 return "read vs"; 1329 case NVME_CTRLR_STATE_READ_VS_WAIT_FOR_VS: 1330 return "read vs wait for vs"; 1331 case NVME_CTRLR_STATE_READ_CAP: 1332 return "read cap"; 1333 case NVME_CTRLR_STATE_READ_CAP_WAIT_FOR_CAP: 1334 return "read cap wait for cap"; 1335 case NVME_CTRLR_STATE_CHECK_EN: 1336 return "check en"; 1337 case NVME_CTRLR_STATE_CHECK_EN_WAIT_FOR_CC: 1338 return "check en wait for cc"; 1339 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1: 1340 return "disable and wait for CSTS.RDY = 1"; 1341 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS: 1342 return "disable and wait for CSTS.RDY = 1 reg"; 1343 case NVME_CTRLR_STATE_SET_EN_0: 1344 return "set CC.EN = 0"; 1345 case NVME_CTRLR_STATE_SET_EN_0_WAIT_FOR_CC: 1346 return "set CC.EN = 0 wait for cc"; 1347 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0: 1348 return "disable and wait for CSTS.RDY = 0"; 1349 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0_WAIT_FOR_CSTS: 1350 return "disable and wait for CSTS.RDY = 0 reg"; 1351 case NVME_CTRLR_STATE_DISABLED: 1352 return "controller is disabled"; 1353 case NVME_CTRLR_STATE_ENABLE: 1354 return "enable controller by writing CC.EN = 1"; 1355 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_CC: 1356 return "enable controller by writing CC.EN = 1 reg"; 1357 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1: 1358 return "wait for CSTS.RDY = 1"; 1359 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS: 1360 return "wait for CSTS.RDY = 1 reg"; 1361 case NVME_CTRLR_STATE_RESET_ADMIN_QUEUE: 1362 return "reset admin queue"; 1363 case NVME_CTRLR_STATE_IDENTIFY: 1364 return "identify controller"; 1365 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY: 1366 return "wait for identify controller"; 1367 case NVME_CTRLR_STATE_CONFIGURE_AER: 1368 return "configure AER"; 1369 case NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER: 1370 return "wait for configure aer"; 1371 case NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT: 1372 return "set keep alive timeout"; 1373 case NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT: 1374 return "wait for set keep alive timeout"; 1375 case NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC: 1376 return "identify controller iocs specific"; 1377 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_IOCS_SPECIFIC: 1378 return "wait for identify controller iocs specific"; 1379 case NVME_CTRLR_STATE_GET_ZNS_CMD_EFFECTS_LOG: 1380 return "get zns cmd and effects log page"; 1381 case NVME_CTRLR_STATE_WAIT_FOR_GET_ZNS_CMD_EFFECTS_LOG: 1382 return "wait for get zns cmd and effects log page"; 1383 case NVME_CTRLR_STATE_SET_NUM_QUEUES: 1384 return "set number of queues"; 1385 case NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES: 1386 return "wait for set number of queues"; 1387 case NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS: 1388 return "identify active ns"; 1389 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ACTIVE_NS: 1390 return "wait for identify active ns"; 1391 case NVME_CTRLR_STATE_IDENTIFY_NS: 1392 return "identify ns"; 1393 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS: 1394 return "wait for identify ns"; 1395 case NVME_CTRLR_STATE_IDENTIFY_ID_DESCS: 1396 return "identify namespace id descriptors"; 1397 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS: 1398 return "wait for identify namespace id descriptors"; 1399 case NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC: 1400 return "identify ns iocs specific"; 1401 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS_IOCS_SPECIFIC: 1402 return "wait for identify ns iocs specific"; 1403 case NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES: 1404 return "set supported log pages"; 1405 case NVME_CTRLR_STATE_SET_SUPPORTED_INTEL_LOG_PAGES: 1406 return "set supported INTEL log pages"; 1407 case NVME_CTRLR_STATE_WAIT_FOR_SUPPORTED_INTEL_LOG_PAGES: 1408 return "wait for supported INTEL log pages"; 1409 case NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES: 1410 return "set supported features"; 1411 case NVME_CTRLR_STATE_SET_DB_BUF_CFG: 1412 return "set doorbell buffer config"; 1413 case NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG: 1414 return "wait for doorbell buffer config"; 1415 case NVME_CTRLR_STATE_SET_HOST_ID: 1416 return "set host ID"; 1417 case NVME_CTRLR_STATE_WAIT_FOR_HOST_ID: 1418 return "wait for set host ID"; 1419 case NVME_CTRLR_STATE_TRANSPORT_READY: 1420 return "transport ready"; 1421 case NVME_CTRLR_STATE_READY: 1422 return "ready"; 1423 case NVME_CTRLR_STATE_ERROR: 1424 return "error"; 1425 } 1426 return "unknown"; 1427 }; 1428 1429 static void 1430 _nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state, 1431 uint64_t timeout_in_ms, bool quiet) 1432 { 1433 uint64_t ticks_per_ms, timeout_in_ticks, now_ticks; 1434 1435 ctrlr->state = state; 1436 if (timeout_in_ms == NVME_TIMEOUT_KEEP_EXISTING) { 1437 if (!quiet) { 1438 NVME_CTRLR_DEBUGLOG(ctrlr, "setting state to %s (keeping existing timeout)\n", 1439 nvme_ctrlr_state_string(ctrlr->state)); 1440 } 1441 return; 1442 } 1443 1444 if (timeout_in_ms == NVME_TIMEOUT_INFINITE) { 1445 goto inf; 1446 } 1447 1448 ticks_per_ms = spdk_get_ticks_hz() / 1000; 1449 if (timeout_in_ms > UINT64_MAX / ticks_per_ms) { 1450 NVME_CTRLR_ERRLOG(ctrlr, 1451 "Specified timeout would cause integer overflow. Defaulting to no timeout.\n"); 1452 goto inf; 1453 } 1454 1455 now_ticks = spdk_get_ticks(); 1456 timeout_in_ticks = timeout_in_ms * ticks_per_ms; 1457 if (timeout_in_ticks > UINT64_MAX - now_ticks) { 1458 NVME_CTRLR_ERRLOG(ctrlr, 1459 "Specified timeout would cause integer overflow. Defaulting to no timeout.\n"); 1460 goto inf; 1461 } 1462 1463 ctrlr->state_timeout_tsc = timeout_in_ticks + now_ticks; 1464 if (!quiet) { 1465 NVME_CTRLR_DEBUGLOG(ctrlr, "setting state to %s (timeout %" PRIu64 " ms)\n", 1466 nvme_ctrlr_state_string(ctrlr->state), timeout_in_ms); 1467 } 1468 return; 1469 inf: 1470 if (!quiet) { 1471 NVME_CTRLR_DEBUGLOG(ctrlr, "setting state to %s (no timeout)\n", 1472 nvme_ctrlr_state_string(ctrlr->state)); 1473 } 1474 ctrlr->state_timeout_tsc = NVME_TIMEOUT_INFINITE; 1475 } 1476 1477 static void 1478 nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state, 1479 uint64_t timeout_in_ms) 1480 { 1481 _nvme_ctrlr_set_state(ctrlr, state, timeout_in_ms, false); 1482 } 1483 1484 static void 1485 nvme_ctrlr_set_state_quiet(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state, 1486 uint64_t timeout_in_ms) 1487 { 1488 _nvme_ctrlr_set_state(ctrlr, state, timeout_in_ms, true); 1489 } 1490 1491 static void 1492 nvme_ctrlr_free_zns_specific_data(struct spdk_nvme_ctrlr *ctrlr) 1493 { 1494 spdk_free(ctrlr->cdata_zns); 1495 ctrlr->cdata_zns = NULL; 1496 } 1497 1498 static void 1499 nvme_ctrlr_free_iocs_specific_data(struct spdk_nvme_ctrlr *ctrlr) 1500 { 1501 nvme_ctrlr_free_zns_specific_data(ctrlr); 1502 } 1503 1504 static void 1505 nvme_ctrlr_free_doorbell_buffer(struct spdk_nvme_ctrlr *ctrlr) 1506 { 1507 if (ctrlr->shadow_doorbell) { 1508 spdk_free(ctrlr->shadow_doorbell); 1509 ctrlr->shadow_doorbell = NULL; 1510 } 1511 1512 if (ctrlr->eventidx) { 1513 spdk_free(ctrlr->eventidx); 1514 ctrlr->eventidx = NULL; 1515 } 1516 } 1517 1518 static void 1519 nvme_ctrlr_set_doorbell_buffer_config_done(void *arg, const struct spdk_nvme_cpl *cpl) 1520 { 1521 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 1522 1523 if (spdk_nvme_cpl_is_error(cpl)) { 1524 NVME_CTRLR_WARNLOG(ctrlr, "Doorbell buffer config failed\n"); 1525 } else { 1526 NVME_CTRLR_INFOLOG(ctrlr, "Doorbell buffer config enabled\n"); 1527 } 1528 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID, 1529 ctrlr->opts.admin_timeout_ms); 1530 } 1531 1532 static int 1533 nvme_ctrlr_set_doorbell_buffer_config(struct spdk_nvme_ctrlr *ctrlr) 1534 { 1535 int rc = 0; 1536 uint64_t prp1, prp2, len; 1537 1538 if (!ctrlr->cdata.oacs.doorbell_buffer_config) { 1539 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID, 1540 ctrlr->opts.admin_timeout_ms); 1541 return 0; 1542 } 1543 1544 if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) { 1545 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID, 1546 ctrlr->opts.admin_timeout_ms); 1547 return 0; 1548 } 1549 1550 /* only 1 page size for doorbell buffer */ 1551 ctrlr->shadow_doorbell = spdk_zmalloc(ctrlr->page_size, ctrlr->page_size, 1552 NULL, SPDK_ENV_LCORE_ID_ANY, 1553 SPDK_MALLOC_DMA | SPDK_MALLOC_SHARE); 1554 if (ctrlr->shadow_doorbell == NULL) { 1555 rc = -ENOMEM; 1556 goto error; 1557 } 1558 1559 len = ctrlr->page_size; 1560 prp1 = spdk_vtophys(ctrlr->shadow_doorbell, &len); 1561 if (prp1 == SPDK_VTOPHYS_ERROR || len != ctrlr->page_size) { 1562 rc = -EFAULT; 1563 goto error; 1564 } 1565 1566 ctrlr->eventidx = spdk_zmalloc(ctrlr->page_size, ctrlr->page_size, 1567 NULL, SPDK_ENV_LCORE_ID_ANY, 1568 SPDK_MALLOC_DMA | SPDK_MALLOC_SHARE); 1569 if (ctrlr->eventidx == NULL) { 1570 rc = -ENOMEM; 1571 goto error; 1572 } 1573 1574 len = ctrlr->page_size; 1575 prp2 = spdk_vtophys(ctrlr->eventidx, &len); 1576 if (prp2 == SPDK_VTOPHYS_ERROR || len != ctrlr->page_size) { 1577 rc = -EFAULT; 1578 goto error; 1579 } 1580 1581 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG, 1582 ctrlr->opts.admin_timeout_ms); 1583 1584 rc = nvme_ctrlr_cmd_doorbell_buffer_config(ctrlr, prp1, prp2, 1585 nvme_ctrlr_set_doorbell_buffer_config_done, ctrlr); 1586 if (rc != 0) { 1587 goto error; 1588 } 1589 1590 return 0; 1591 1592 error: 1593 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 1594 nvme_ctrlr_free_doorbell_buffer(ctrlr); 1595 return rc; 1596 } 1597 1598 void 1599 nvme_ctrlr_abort_queued_aborts(struct spdk_nvme_ctrlr *ctrlr) 1600 { 1601 struct nvme_request *req, *tmp; 1602 struct spdk_nvme_cpl cpl = {}; 1603 1604 cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION; 1605 cpl.status.sct = SPDK_NVME_SCT_GENERIC; 1606 1607 STAILQ_FOREACH_SAFE(req, &ctrlr->queued_aborts, stailq, tmp) { 1608 STAILQ_REMOVE_HEAD(&ctrlr->queued_aborts, stailq); 1609 ctrlr->outstanding_aborts++; 1610 1611 nvme_complete_request(req->cb_fn, req->cb_arg, req->qpair, req, &cpl); 1612 nvme_free_request(req); 1613 } 1614 } 1615 1616 static int 1617 nvme_ctrlr_disconnect(struct spdk_nvme_ctrlr *ctrlr) 1618 { 1619 if (ctrlr->is_resetting || ctrlr->is_removed) { 1620 /* 1621 * Controller is already resetting or has been removed. Return 1622 * immediately since there is no need to kick off another 1623 * reset in these cases. 1624 */ 1625 return ctrlr->is_resetting ? -EBUSY : -ENXIO; 1626 } 1627 1628 ctrlr->is_resetting = true; 1629 ctrlr->is_failed = false; 1630 ctrlr->is_disconnecting = true; 1631 ctrlr->prepare_for_reset = true; 1632 1633 NVME_CTRLR_NOTICELOG(ctrlr, "resetting controller\n"); 1634 1635 /* Disable keep-alive, it'll be re-enabled as part of the init process */ 1636 ctrlr->keep_alive_interval_ticks = 0; 1637 1638 /* Abort all of the queued abort requests */ 1639 nvme_ctrlr_abort_queued_aborts(ctrlr); 1640 1641 nvme_transport_admin_qpair_abort_aers(ctrlr->adminq); 1642 1643 ctrlr->adminq->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL; 1644 nvme_transport_ctrlr_disconnect_qpair(ctrlr, ctrlr->adminq); 1645 1646 return 0; 1647 } 1648 1649 static void 1650 nvme_ctrlr_disconnect_done(struct spdk_nvme_ctrlr *ctrlr) 1651 { 1652 assert(ctrlr->is_failed == false); 1653 ctrlr->is_disconnecting = false; 1654 1655 /* Doorbell buffer config is invalid during reset */ 1656 nvme_ctrlr_free_doorbell_buffer(ctrlr); 1657 1658 /* I/O Command Set Specific Identify Controller data is invalidated during reset */ 1659 nvme_ctrlr_free_iocs_specific_data(ctrlr); 1660 1661 spdk_bit_array_free(&ctrlr->free_io_qids); 1662 } 1663 1664 int 1665 spdk_nvme_ctrlr_disconnect(struct spdk_nvme_ctrlr *ctrlr) 1666 { 1667 int rc; 1668 1669 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1670 rc = nvme_ctrlr_disconnect(ctrlr); 1671 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1672 1673 return rc; 1674 } 1675 1676 void 1677 spdk_nvme_ctrlr_reconnect_async(struct spdk_nvme_ctrlr *ctrlr) 1678 { 1679 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1680 1681 ctrlr->prepare_for_reset = false; 1682 1683 /* Set the state back to INIT to cause a full hardware reset. */ 1684 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE); 1685 1686 /* Return without releasing ctrlr_lock. ctrlr_lock will be released when 1687 * spdk_nvme_ctrlr_reset_poll_async() returns 0. 1688 */ 1689 } 1690 1691 /** 1692 * This function will be called when the controller is being reinitialized. 1693 * Note: the ctrlr_lock must be held when calling this function. 1694 */ 1695 int 1696 spdk_nvme_ctrlr_reconnect_poll_async(struct spdk_nvme_ctrlr *ctrlr) 1697 { 1698 struct spdk_nvme_ns *ns, *tmp_ns; 1699 struct spdk_nvme_qpair *qpair; 1700 int rc = 0, rc_tmp = 0; 1701 bool async; 1702 1703 if (nvme_ctrlr_process_init(ctrlr) != 0) { 1704 NVME_CTRLR_ERRLOG(ctrlr, "controller reinitialization failed\n"); 1705 rc = -1; 1706 } 1707 if (ctrlr->state != NVME_CTRLR_STATE_READY && rc != -1) { 1708 return -EAGAIN; 1709 } 1710 1711 /* 1712 * For non-fabrics controllers, the memory locations of the transport qpair 1713 * don't change when the controller is reset. They simply need to be 1714 * re-enabled with admin commands to the controller. For fabric 1715 * controllers we need to disconnect and reconnect the qpair on its 1716 * own thread outside of the context of the reset. 1717 */ 1718 if (rc == 0 && !spdk_nvme_ctrlr_is_fabrics(ctrlr)) { 1719 /* Reinitialize qpairs */ 1720 TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) { 1721 assert(spdk_bit_array_get(ctrlr->free_io_qids, qpair->id)); 1722 spdk_bit_array_clear(ctrlr->free_io_qids, qpair->id); 1723 1724 /* Force a synchronous connect. We can't currently handle an asynchronous 1725 * operation here. */ 1726 async = qpair->async; 1727 qpair->async = false; 1728 rc_tmp = nvme_transport_ctrlr_connect_qpair(ctrlr, qpair); 1729 qpair->async = async; 1730 1731 if (rc_tmp != 0) { 1732 rc = rc_tmp; 1733 qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL; 1734 continue; 1735 } 1736 } 1737 } 1738 1739 /* 1740 * Take this opportunity to remove inactive namespaces. During a reset namespace 1741 * handles can be invalidated. 1742 */ 1743 RB_FOREACH_SAFE(ns, nvme_ns_tree, &ctrlr->ns, tmp_ns) { 1744 if (!ns->active) { 1745 RB_REMOVE(nvme_ns_tree, &ctrlr->ns, ns); 1746 spdk_free(ns); 1747 } 1748 } 1749 1750 if (rc) { 1751 nvme_ctrlr_fail(ctrlr, false); 1752 } 1753 ctrlr->is_resetting = false; 1754 1755 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1756 1757 if (!ctrlr->cdata.oaes.ns_attribute_notices) { 1758 /* 1759 * If controller doesn't support ns_attribute_notices and 1760 * namespace attributes change (e.g. number of namespaces) 1761 * we need to update system handling device reset. 1762 */ 1763 nvme_io_msg_ctrlr_update(ctrlr); 1764 } 1765 1766 return rc; 1767 } 1768 1769 /* 1770 * For PCIe transport, spdk_nvme_ctrlr_disconnect() will do a Controller Level Reset 1771 * (Change CC.EN from 1 to 0) as a operation to disconnect the admin qpair. 1772 * The following two functions are added to do a Controller Level Reset. They have 1773 * to be called under the nvme controller's lock. 1774 */ 1775 void 1776 nvme_ctrlr_disable(struct spdk_nvme_ctrlr *ctrlr) 1777 { 1778 assert(ctrlr->is_disconnecting == true); 1779 1780 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CHECK_EN, NVME_TIMEOUT_INFINITE); 1781 } 1782 1783 int 1784 nvme_ctrlr_disable_poll(struct spdk_nvme_ctrlr *ctrlr) 1785 { 1786 int rc = 0; 1787 1788 if (nvme_ctrlr_process_init(ctrlr) != 0) { 1789 NVME_CTRLR_ERRLOG(ctrlr, "failed to disable controller\n"); 1790 rc = -1; 1791 } 1792 1793 if (ctrlr->state != NVME_CTRLR_STATE_DISABLED && rc != -1) { 1794 return -EAGAIN; 1795 } 1796 1797 return rc; 1798 } 1799 1800 static void 1801 nvme_ctrlr_fail_io_qpairs(struct spdk_nvme_ctrlr *ctrlr) 1802 { 1803 struct spdk_nvme_qpair *qpair; 1804 1805 TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) { 1806 qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL; 1807 } 1808 } 1809 1810 int 1811 spdk_nvme_ctrlr_reset(struct spdk_nvme_ctrlr *ctrlr) 1812 { 1813 int rc; 1814 1815 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1816 1817 rc = nvme_ctrlr_disconnect(ctrlr); 1818 if (rc == 0) { 1819 nvme_ctrlr_fail_io_qpairs(ctrlr); 1820 } 1821 1822 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1823 1824 if (rc != 0) { 1825 if (rc == -EBUSY) { 1826 rc = 0; 1827 } 1828 return rc; 1829 } 1830 1831 while (1) { 1832 rc = spdk_nvme_ctrlr_process_admin_completions(ctrlr); 1833 if (rc == -ENXIO) { 1834 break; 1835 } 1836 } 1837 1838 spdk_nvme_ctrlr_reconnect_async(ctrlr); 1839 1840 while (true) { 1841 rc = spdk_nvme_ctrlr_reconnect_poll_async(ctrlr); 1842 if (rc != -EAGAIN) { 1843 break; 1844 } 1845 } 1846 1847 return rc; 1848 } 1849 1850 int 1851 spdk_nvme_ctrlr_reset_subsystem(struct spdk_nvme_ctrlr *ctrlr) 1852 { 1853 union spdk_nvme_cap_register cap; 1854 int rc = 0; 1855 1856 cap = spdk_nvme_ctrlr_get_regs_cap(ctrlr); 1857 if (cap.bits.nssrs == 0) { 1858 NVME_CTRLR_WARNLOG(ctrlr, "subsystem reset is not supported\n"); 1859 return -ENOTSUP; 1860 } 1861 1862 NVME_CTRLR_NOTICELOG(ctrlr, "resetting subsystem\n"); 1863 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1864 ctrlr->is_resetting = true; 1865 rc = nvme_ctrlr_set_nssr(ctrlr, SPDK_NVME_NSSR_VALUE); 1866 ctrlr->is_resetting = false; 1867 1868 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1869 /* 1870 * No more cleanup at this point like in the ctrlr reset. A subsystem reset will cause 1871 * a hot remove for PCIe transport. The hot remove handling does all the necessary ctrlr cleanup. 1872 */ 1873 return rc; 1874 } 1875 1876 int 1877 spdk_nvme_ctrlr_set_trid(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_transport_id *trid) 1878 { 1879 int rc = 0; 1880 1881 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1882 1883 if (ctrlr->is_failed == false) { 1884 rc = -EPERM; 1885 goto out; 1886 } 1887 1888 if (trid->trtype != ctrlr->trid.trtype) { 1889 rc = -EINVAL; 1890 goto out; 1891 } 1892 1893 if (strncmp(trid->subnqn, ctrlr->trid.subnqn, SPDK_NVMF_NQN_MAX_LEN)) { 1894 rc = -EINVAL; 1895 goto out; 1896 } 1897 1898 ctrlr->trid = *trid; 1899 1900 out: 1901 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1902 return rc; 1903 } 1904 1905 void 1906 spdk_nvme_ctrlr_set_remove_cb(struct spdk_nvme_ctrlr *ctrlr, 1907 spdk_nvme_remove_cb remove_cb, void *remove_ctx) 1908 { 1909 if (!spdk_process_is_primary()) { 1910 return; 1911 } 1912 1913 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1914 ctrlr->remove_cb = remove_cb; 1915 ctrlr->cb_ctx = remove_ctx; 1916 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1917 } 1918 1919 static void 1920 nvme_ctrlr_identify_done(void *arg, const struct spdk_nvme_cpl *cpl) 1921 { 1922 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 1923 1924 if (spdk_nvme_cpl_is_error(cpl)) { 1925 NVME_CTRLR_ERRLOG(ctrlr, "nvme_identify_controller failed!\n"); 1926 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 1927 return; 1928 } 1929 1930 /* 1931 * Use MDTS to ensure our default max_xfer_size doesn't exceed what the 1932 * controller supports. 1933 */ 1934 ctrlr->max_xfer_size = nvme_transport_ctrlr_get_max_xfer_size(ctrlr); 1935 NVME_CTRLR_DEBUGLOG(ctrlr, "transport max_xfer_size %u\n", ctrlr->max_xfer_size); 1936 if (ctrlr->cdata.mdts > 0) { 1937 ctrlr->max_xfer_size = spdk_min(ctrlr->max_xfer_size, 1938 ctrlr->min_page_size * (1 << ctrlr->cdata.mdts)); 1939 NVME_CTRLR_DEBUGLOG(ctrlr, "MDTS max_xfer_size %u\n", ctrlr->max_xfer_size); 1940 } 1941 1942 NVME_CTRLR_DEBUGLOG(ctrlr, "CNTLID 0x%04" PRIx16 "\n", ctrlr->cdata.cntlid); 1943 if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { 1944 ctrlr->cntlid = ctrlr->cdata.cntlid; 1945 } else { 1946 /* 1947 * Fabrics controllers should already have CNTLID from the Connect command. 1948 * 1949 * If CNTLID from Connect doesn't match CNTLID in the Identify Controller data, 1950 * trust the one from Connect. 1951 */ 1952 if (ctrlr->cntlid != ctrlr->cdata.cntlid) { 1953 NVME_CTRLR_DEBUGLOG(ctrlr, "Identify CNTLID 0x%04" PRIx16 " != Connect CNTLID 0x%04" PRIx16 "\n", 1954 ctrlr->cdata.cntlid, ctrlr->cntlid); 1955 } 1956 } 1957 1958 if (ctrlr->cdata.sgls.supported && !(ctrlr->quirks & NVME_QUIRK_NOT_USE_SGL)) { 1959 assert(ctrlr->cdata.sgls.supported != 0x3); 1960 ctrlr->flags |= SPDK_NVME_CTRLR_SGL_SUPPORTED; 1961 if (ctrlr->cdata.sgls.supported == 0x2) { 1962 ctrlr->flags |= SPDK_NVME_CTRLR_SGL_REQUIRES_DWORD_ALIGNMENT; 1963 } 1964 1965 ctrlr->max_sges = nvme_transport_ctrlr_get_max_sges(ctrlr); 1966 NVME_CTRLR_DEBUGLOG(ctrlr, "transport max_sges %u\n", ctrlr->max_sges); 1967 } 1968 1969 if (ctrlr->cdata.sgls.metadata_address && !(ctrlr->quirks & NVME_QUIRK_NOT_USE_SGL)) { 1970 ctrlr->flags |= SPDK_NVME_CTRLR_MPTR_SGL_SUPPORTED; 1971 } 1972 1973 if (ctrlr->cdata.oacs.security && !(ctrlr->quirks & NVME_QUIRK_OACS_SECURITY)) { 1974 ctrlr->flags |= SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED; 1975 } 1976 1977 if (ctrlr->cdata.oacs.directives) { 1978 ctrlr->flags |= SPDK_NVME_CTRLR_DIRECTIVES_SUPPORTED; 1979 } 1980 1981 NVME_CTRLR_DEBUGLOG(ctrlr, "fuses compare and write: %d\n", 1982 ctrlr->cdata.fuses.compare_and_write); 1983 if (ctrlr->cdata.fuses.compare_and_write) { 1984 ctrlr->flags |= SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED; 1985 } 1986 1987 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONFIGURE_AER, 1988 ctrlr->opts.admin_timeout_ms); 1989 } 1990 1991 static int 1992 nvme_ctrlr_identify(struct spdk_nvme_ctrlr *ctrlr) 1993 { 1994 int rc; 1995 1996 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY, 1997 ctrlr->opts.admin_timeout_ms); 1998 1999 rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_CTRLR, 0, 0, 0, 2000 &ctrlr->cdata, sizeof(ctrlr->cdata), 2001 nvme_ctrlr_identify_done, ctrlr); 2002 if (rc != 0) { 2003 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2004 return rc; 2005 } 2006 2007 return 0; 2008 } 2009 2010 static void 2011 nvme_ctrlr_get_zns_cmd_and_effects_log_done(void *arg, const struct spdk_nvme_cpl *cpl) 2012 { 2013 struct spdk_nvme_cmds_and_effect_log_page *log_page; 2014 struct spdk_nvme_ctrlr *ctrlr = arg; 2015 2016 if (spdk_nvme_cpl_is_error(cpl)) { 2017 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_get_zns_cmd_and_effects_log failed!\n"); 2018 spdk_free(ctrlr->tmp_ptr); 2019 ctrlr->tmp_ptr = NULL; 2020 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2021 return; 2022 } 2023 2024 log_page = ctrlr->tmp_ptr; 2025 2026 if (log_page->io_cmds_supported[SPDK_NVME_OPC_ZONE_APPEND].csupp) { 2027 ctrlr->flags |= SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED; 2028 } 2029 spdk_free(ctrlr->tmp_ptr); 2030 ctrlr->tmp_ptr = NULL; 2031 2032 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_NUM_QUEUES, ctrlr->opts.admin_timeout_ms); 2033 } 2034 2035 static int 2036 nvme_ctrlr_get_zns_cmd_and_effects_log(struct spdk_nvme_ctrlr *ctrlr) 2037 { 2038 int rc; 2039 2040 assert(!ctrlr->tmp_ptr); 2041 ctrlr->tmp_ptr = spdk_zmalloc(sizeof(struct spdk_nvme_cmds_and_effect_log_page), 64, NULL, 2042 SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE | SPDK_MALLOC_DMA); 2043 if (!ctrlr->tmp_ptr) { 2044 rc = -ENOMEM; 2045 goto error; 2046 } 2047 2048 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_GET_ZNS_CMD_EFFECTS_LOG, 2049 ctrlr->opts.admin_timeout_ms); 2050 2051 rc = spdk_nvme_ctrlr_cmd_get_log_page_ext(ctrlr, SPDK_NVME_LOG_COMMAND_EFFECTS_LOG, 2052 0, ctrlr->tmp_ptr, sizeof(struct spdk_nvme_cmds_and_effect_log_page), 2053 0, 0, 0, SPDK_NVME_CSI_ZNS << 24, 2054 nvme_ctrlr_get_zns_cmd_and_effects_log_done, ctrlr); 2055 if (rc != 0) { 2056 goto error; 2057 } 2058 2059 return 0; 2060 2061 error: 2062 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2063 spdk_free(ctrlr->tmp_ptr); 2064 ctrlr->tmp_ptr = NULL; 2065 return rc; 2066 } 2067 2068 static void 2069 nvme_ctrlr_identify_zns_specific_done(void *arg, const struct spdk_nvme_cpl *cpl) 2070 { 2071 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 2072 2073 if (spdk_nvme_cpl_is_error(cpl)) { 2074 /* no need to print an error, the controller simply does not support ZNS */ 2075 nvme_ctrlr_free_zns_specific_data(ctrlr); 2076 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_NUM_QUEUES, 2077 ctrlr->opts.admin_timeout_ms); 2078 return; 2079 } 2080 2081 /* A zero zasl value means use mdts */ 2082 if (ctrlr->cdata_zns->zasl) { 2083 uint32_t max_append = ctrlr->min_page_size * (1 << ctrlr->cdata_zns->zasl); 2084 ctrlr->max_zone_append_size = spdk_min(ctrlr->max_xfer_size, max_append); 2085 } else { 2086 ctrlr->max_zone_append_size = ctrlr->max_xfer_size; 2087 } 2088 2089 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_GET_ZNS_CMD_EFFECTS_LOG, 2090 ctrlr->opts.admin_timeout_ms); 2091 } 2092 2093 /** 2094 * This function will try to fetch the I/O Command Specific Controller data structure for 2095 * each I/O Command Set supported by SPDK. 2096 * 2097 * If an I/O Command Set is not supported by the controller, "Invalid Field in Command" 2098 * will be returned. Since we are fetching in a exploratively way, getting an error back 2099 * from the controller should not be treated as fatal. 2100 * 2101 * I/O Command Sets not supported by SPDK will be skipped (e.g. Key Value Command Set). 2102 * 2103 * I/O Command Sets without a IOCS specific data structure (i.e. a zero-filled IOCS specific 2104 * data structure) will be skipped (e.g. NVM Command Set, Key Value Command Set). 2105 */ 2106 static int 2107 nvme_ctrlr_identify_iocs_specific(struct spdk_nvme_ctrlr *ctrlr) 2108 { 2109 int rc; 2110 2111 if (!nvme_ctrlr_multi_iocs_enabled(ctrlr)) { 2112 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_NUM_QUEUES, 2113 ctrlr->opts.admin_timeout_ms); 2114 return 0; 2115 } 2116 2117 /* 2118 * Since SPDK currently only needs to fetch a single Command Set, keep the code here, 2119 * instead of creating multiple NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC substates, 2120 * which would require additional functions and complexity for no good reason. 2121 */ 2122 assert(!ctrlr->cdata_zns); 2123 ctrlr->cdata_zns = spdk_zmalloc(sizeof(*ctrlr->cdata_zns), 64, NULL, SPDK_ENV_SOCKET_ID_ANY, 2124 SPDK_MALLOC_SHARE | SPDK_MALLOC_DMA); 2125 if (!ctrlr->cdata_zns) { 2126 rc = -ENOMEM; 2127 goto error; 2128 } 2129 2130 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_IOCS_SPECIFIC, 2131 ctrlr->opts.admin_timeout_ms); 2132 2133 rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_CTRLR_IOCS, 0, 0, SPDK_NVME_CSI_ZNS, 2134 ctrlr->cdata_zns, sizeof(*ctrlr->cdata_zns), 2135 nvme_ctrlr_identify_zns_specific_done, ctrlr); 2136 if (rc != 0) { 2137 goto error; 2138 } 2139 2140 return 0; 2141 2142 error: 2143 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2144 nvme_ctrlr_free_zns_specific_data(ctrlr); 2145 return rc; 2146 } 2147 2148 enum nvme_active_ns_state { 2149 NVME_ACTIVE_NS_STATE_IDLE, 2150 NVME_ACTIVE_NS_STATE_PROCESSING, 2151 NVME_ACTIVE_NS_STATE_DONE, 2152 NVME_ACTIVE_NS_STATE_ERROR 2153 }; 2154 2155 typedef void (*nvme_active_ns_ctx_deleter)(struct nvme_active_ns_ctx *); 2156 2157 struct nvme_active_ns_ctx { 2158 struct spdk_nvme_ctrlr *ctrlr; 2159 uint32_t page_count; 2160 uint32_t next_nsid; 2161 uint32_t *new_ns_list; 2162 nvme_active_ns_ctx_deleter deleter; 2163 2164 enum nvme_active_ns_state state; 2165 }; 2166 2167 static struct nvme_active_ns_ctx * 2168 nvme_active_ns_ctx_create(struct spdk_nvme_ctrlr *ctrlr, nvme_active_ns_ctx_deleter deleter) 2169 { 2170 struct nvme_active_ns_ctx *ctx; 2171 uint32_t *new_ns_list = NULL; 2172 2173 ctx = calloc(1, sizeof(*ctx)); 2174 if (!ctx) { 2175 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate nvme_active_ns_ctx!\n"); 2176 return NULL; 2177 } 2178 2179 new_ns_list = spdk_zmalloc(sizeof(struct spdk_nvme_ns_list), ctrlr->page_size, 2180 NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_SHARE); 2181 if (!new_ns_list) { 2182 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate active_ns_list!\n"); 2183 free(ctx); 2184 return NULL; 2185 } 2186 2187 ctx->page_count = 1; 2188 ctx->new_ns_list = new_ns_list; 2189 ctx->ctrlr = ctrlr; 2190 ctx->deleter = deleter; 2191 2192 return ctx; 2193 } 2194 2195 static void 2196 nvme_active_ns_ctx_destroy(struct nvme_active_ns_ctx *ctx) 2197 { 2198 spdk_free(ctx->new_ns_list); 2199 free(ctx); 2200 } 2201 2202 static int 2203 nvme_ctrlr_destruct_namespace(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 2204 { 2205 struct spdk_nvme_ns tmp, *ns; 2206 2207 assert(ctrlr != NULL); 2208 2209 tmp.id = nsid; 2210 ns = RB_FIND(nvme_ns_tree, &ctrlr->ns, &tmp); 2211 if (ns == NULL) { 2212 return -EINVAL; 2213 } 2214 2215 nvme_ns_destruct(ns); 2216 ns->active = false; 2217 2218 return 0; 2219 } 2220 2221 static int 2222 nvme_ctrlr_construct_namespace(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 2223 { 2224 struct spdk_nvme_ns *ns; 2225 2226 if (nsid < 1 || nsid > ctrlr->cdata.nn) { 2227 return -EINVAL; 2228 } 2229 2230 /* Namespaces are constructed on demand, so simply request it. */ 2231 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2232 if (ns == NULL) { 2233 return -ENOMEM; 2234 } 2235 2236 ns->active = true; 2237 2238 return 0; 2239 } 2240 2241 static void 2242 nvme_ctrlr_identify_active_ns_swap(struct spdk_nvme_ctrlr *ctrlr, uint32_t *new_ns_list, 2243 size_t max_entries) 2244 { 2245 uint32_t active_ns_count = 0; 2246 size_t i; 2247 uint32_t nsid; 2248 struct spdk_nvme_ns *ns, *tmp_ns; 2249 int rc; 2250 2251 /* First, remove namespaces that no longer exist */ 2252 RB_FOREACH_SAFE(ns, nvme_ns_tree, &ctrlr->ns, tmp_ns) { 2253 nsid = new_ns_list[0]; 2254 active_ns_count = 0; 2255 while (nsid != 0) { 2256 if (nsid == ns->id) { 2257 break; 2258 } 2259 2260 nsid = new_ns_list[active_ns_count++]; 2261 } 2262 2263 if (nsid != ns->id) { 2264 /* Did not find this namespace id in the new list. */ 2265 NVME_CTRLR_DEBUGLOG(ctrlr, "Namespace %u was removed\n", ns->id); 2266 nvme_ctrlr_destruct_namespace(ctrlr, ns->id); 2267 } 2268 } 2269 2270 /* Next, add new namespaces */ 2271 active_ns_count = 0; 2272 for (i = 0; i < max_entries; i++) { 2273 nsid = new_ns_list[active_ns_count]; 2274 2275 if (nsid == 0) { 2276 break; 2277 } 2278 2279 /* If the namespace already exists, this will not construct it a second time. */ 2280 rc = nvme_ctrlr_construct_namespace(ctrlr, nsid); 2281 if (rc != 0) { 2282 /* We can't easily handle a failure here. But just move on. */ 2283 assert(false); 2284 NVME_CTRLR_DEBUGLOG(ctrlr, "Failed to allocate a namespace object.\n"); 2285 continue; 2286 } 2287 2288 active_ns_count++; 2289 } 2290 2291 ctrlr->active_ns_count = active_ns_count; 2292 } 2293 2294 static void 2295 nvme_ctrlr_identify_active_ns_async_done(void *arg, const struct spdk_nvme_cpl *cpl) 2296 { 2297 struct nvme_active_ns_ctx *ctx = arg; 2298 uint32_t *new_ns_list = NULL; 2299 2300 if (spdk_nvme_cpl_is_error(cpl)) { 2301 ctx->state = NVME_ACTIVE_NS_STATE_ERROR; 2302 goto out; 2303 } 2304 2305 ctx->next_nsid = ctx->new_ns_list[1024 * ctx->page_count - 1]; 2306 if (ctx->next_nsid == 0) { 2307 ctx->state = NVME_ACTIVE_NS_STATE_DONE; 2308 goto out; 2309 } 2310 2311 ctx->page_count++; 2312 new_ns_list = spdk_realloc(ctx->new_ns_list, 2313 ctx->page_count * sizeof(struct spdk_nvme_ns_list), 2314 ctx->ctrlr->page_size); 2315 if (!new_ns_list) { 2316 SPDK_ERRLOG("Failed to reallocate active_ns_list!\n"); 2317 ctx->state = NVME_ACTIVE_NS_STATE_ERROR; 2318 goto out; 2319 } 2320 2321 ctx->new_ns_list = new_ns_list; 2322 nvme_ctrlr_identify_active_ns_async(ctx); 2323 return; 2324 2325 out: 2326 if (ctx->deleter) { 2327 ctx->deleter(ctx); 2328 } 2329 } 2330 2331 static void 2332 nvme_ctrlr_identify_active_ns_async(struct nvme_active_ns_ctx *ctx) 2333 { 2334 struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr; 2335 uint32_t i; 2336 int rc; 2337 2338 if (ctrlr->cdata.nn == 0) { 2339 ctx->state = NVME_ACTIVE_NS_STATE_DONE; 2340 goto out; 2341 } 2342 2343 assert(ctx->new_ns_list != NULL); 2344 2345 /* 2346 * If controller doesn't support active ns list CNS 0x02 dummy up 2347 * an active ns list, i.e. all namespaces report as active 2348 */ 2349 if (ctrlr->vs.raw < SPDK_NVME_VERSION(1, 1, 0) || ctrlr->quirks & NVME_QUIRK_IDENTIFY_CNS) { 2350 uint32_t *new_ns_list; 2351 2352 /* 2353 * Active NS list must always end with zero element. 2354 * So, we allocate for cdata.nn+1. 2355 */ 2356 ctx->page_count = spdk_divide_round_up(ctrlr->cdata.nn + 1, 2357 sizeof(struct spdk_nvme_ns_list) / sizeof(new_ns_list[0])); 2358 new_ns_list = spdk_realloc(ctx->new_ns_list, 2359 ctx->page_count * sizeof(struct spdk_nvme_ns_list), 2360 ctx->ctrlr->page_size); 2361 if (!new_ns_list) { 2362 SPDK_ERRLOG("Failed to reallocate active_ns_list!\n"); 2363 ctx->state = NVME_ACTIVE_NS_STATE_ERROR; 2364 goto out; 2365 } 2366 2367 ctx->new_ns_list = new_ns_list; 2368 ctx->new_ns_list[ctrlr->cdata.nn] = 0; 2369 for (i = 0; i < ctrlr->cdata.nn; i++) { 2370 ctx->new_ns_list[i] = i + 1; 2371 } 2372 2373 ctx->state = NVME_ACTIVE_NS_STATE_DONE; 2374 goto out; 2375 } 2376 2377 ctx->state = NVME_ACTIVE_NS_STATE_PROCESSING; 2378 rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_ACTIVE_NS_LIST, 0, ctx->next_nsid, 0, 2379 &ctx->new_ns_list[1024 * (ctx->page_count - 1)], sizeof(struct spdk_nvme_ns_list), 2380 nvme_ctrlr_identify_active_ns_async_done, ctx); 2381 if (rc != 0) { 2382 ctx->state = NVME_ACTIVE_NS_STATE_ERROR; 2383 goto out; 2384 } 2385 2386 return; 2387 2388 out: 2389 if (ctx->deleter) { 2390 ctx->deleter(ctx); 2391 } 2392 } 2393 2394 static void 2395 _nvme_active_ns_ctx_deleter(struct nvme_active_ns_ctx *ctx) 2396 { 2397 struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr; 2398 struct spdk_nvme_ns *ns; 2399 2400 if (ctx->state == NVME_ACTIVE_NS_STATE_ERROR) { 2401 nvme_active_ns_ctx_destroy(ctx); 2402 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2403 return; 2404 } 2405 2406 assert(ctx->state == NVME_ACTIVE_NS_STATE_DONE); 2407 2408 RB_FOREACH(ns, nvme_ns_tree, &ctrlr->ns) { 2409 nvme_ns_free_iocs_specific_data(ns); 2410 } 2411 2412 nvme_ctrlr_identify_active_ns_swap(ctrlr, ctx->new_ns_list, ctx->page_count * 1024); 2413 nvme_active_ns_ctx_destroy(ctx); 2414 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS, ctrlr->opts.admin_timeout_ms); 2415 } 2416 2417 static void 2418 _nvme_ctrlr_identify_active_ns(struct spdk_nvme_ctrlr *ctrlr) 2419 { 2420 struct nvme_active_ns_ctx *ctx; 2421 2422 ctx = nvme_active_ns_ctx_create(ctrlr, _nvme_active_ns_ctx_deleter); 2423 if (!ctx) { 2424 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2425 return; 2426 } 2427 2428 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ACTIVE_NS, 2429 ctrlr->opts.admin_timeout_ms); 2430 nvme_ctrlr_identify_active_ns_async(ctx); 2431 } 2432 2433 int 2434 nvme_ctrlr_identify_active_ns(struct spdk_nvme_ctrlr *ctrlr) 2435 { 2436 struct nvme_active_ns_ctx *ctx; 2437 int rc; 2438 2439 ctx = nvme_active_ns_ctx_create(ctrlr, NULL); 2440 if (!ctx) { 2441 return -ENOMEM; 2442 } 2443 2444 nvme_ctrlr_identify_active_ns_async(ctx); 2445 while (ctx->state == NVME_ACTIVE_NS_STATE_PROCESSING) { 2446 rc = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 2447 if (rc < 0) { 2448 ctx->state = NVME_ACTIVE_NS_STATE_ERROR; 2449 break; 2450 } 2451 } 2452 2453 if (ctx->state == NVME_ACTIVE_NS_STATE_ERROR) { 2454 nvme_active_ns_ctx_destroy(ctx); 2455 return -ENXIO; 2456 } 2457 2458 assert(ctx->state == NVME_ACTIVE_NS_STATE_DONE); 2459 nvme_ctrlr_identify_active_ns_swap(ctrlr, ctx->new_ns_list, ctx->page_count * 1024); 2460 nvme_active_ns_ctx_destroy(ctx); 2461 2462 return 0; 2463 } 2464 2465 static void 2466 nvme_ctrlr_identify_ns_async_done(void *arg, const struct spdk_nvme_cpl *cpl) 2467 { 2468 struct spdk_nvme_ns *ns = (struct spdk_nvme_ns *)arg; 2469 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2470 uint32_t nsid; 2471 int rc; 2472 2473 if (spdk_nvme_cpl_is_error(cpl)) { 2474 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2475 return; 2476 } 2477 2478 nvme_ns_set_identify_data(ns); 2479 2480 /* move on to the next active NS */ 2481 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, ns->id); 2482 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2483 if (ns == NULL) { 2484 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_ID_DESCS, 2485 ctrlr->opts.admin_timeout_ms); 2486 return; 2487 } 2488 ns->ctrlr = ctrlr; 2489 ns->id = nsid; 2490 2491 rc = nvme_ctrlr_identify_ns_async(ns); 2492 if (rc) { 2493 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2494 } 2495 } 2496 2497 static int 2498 nvme_ctrlr_identify_ns_async(struct spdk_nvme_ns *ns) 2499 { 2500 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2501 struct spdk_nvme_ns_data *nsdata; 2502 2503 nsdata = &ns->nsdata; 2504 2505 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS, 2506 ctrlr->opts.admin_timeout_ms); 2507 return nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS, 0, ns->id, 0, 2508 nsdata, sizeof(*nsdata), 2509 nvme_ctrlr_identify_ns_async_done, ns); 2510 } 2511 2512 static int 2513 nvme_ctrlr_identify_namespaces(struct spdk_nvme_ctrlr *ctrlr) 2514 { 2515 uint32_t nsid; 2516 struct spdk_nvme_ns *ns; 2517 int rc; 2518 2519 nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); 2520 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2521 if (ns == NULL) { 2522 /* No active NS, move on to the next state */ 2523 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_ID_DESCS, 2524 ctrlr->opts.admin_timeout_ms); 2525 return 0; 2526 } 2527 2528 ns->ctrlr = ctrlr; 2529 ns->id = nsid; 2530 2531 rc = nvme_ctrlr_identify_ns_async(ns); 2532 if (rc) { 2533 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2534 } 2535 2536 return rc; 2537 } 2538 2539 static int 2540 nvme_ctrlr_identify_namespaces_iocs_specific_next(struct spdk_nvme_ctrlr *ctrlr, uint32_t prev_nsid) 2541 { 2542 uint32_t nsid; 2543 struct spdk_nvme_ns *ns; 2544 int rc; 2545 2546 if (!prev_nsid) { 2547 nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); 2548 } else { 2549 /* move on to the next active NS */ 2550 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, prev_nsid); 2551 } 2552 2553 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2554 if (ns == NULL) { 2555 /* No first/next active NS, move on to the next state */ 2556 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES, 2557 ctrlr->opts.admin_timeout_ms); 2558 return 0; 2559 } 2560 2561 /* loop until we find a ns which has (supported) iocs specific data */ 2562 while (!nvme_ns_has_supported_iocs_specific_data(ns)) { 2563 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, ns->id); 2564 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2565 if (ns == NULL) { 2566 /* no namespace with (supported) iocs specific data found */ 2567 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES, 2568 ctrlr->opts.admin_timeout_ms); 2569 return 0; 2570 } 2571 } 2572 2573 rc = nvme_ctrlr_identify_ns_iocs_specific_async(ns); 2574 if (rc) { 2575 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2576 } 2577 2578 return rc; 2579 } 2580 2581 static void 2582 nvme_ctrlr_identify_ns_zns_specific_async_done(void *arg, const struct spdk_nvme_cpl *cpl) 2583 { 2584 struct spdk_nvme_ns *ns = (struct spdk_nvme_ns *)arg; 2585 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2586 2587 if (spdk_nvme_cpl_is_error(cpl)) { 2588 nvme_ns_free_zns_specific_data(ns); 2589 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2590 return; 2591 } 2592 2593 nvme_ctrlr_identify_namespaces_iocs_specific_next(ctrlr, ns->id); 2594 } 2595 2596 static int 2597 nvme_ctrlr_identify_ns_iocs_specific_async(struct spdk_nvme_ns *ns) 2598 { 2599 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2600 int rc; 2601 2602 switch (ns->csi) { 2603 case SPDK_NVME_CSI_ZNS: 2604 break; 2605 default: 2606 /* 2607 * This switch must handle all cases for which 2608 * nvme_ns_has_supported_iocs_specific_data() returns true, 2609 * other cases should never happen. 2610 */ 2611 assert(0); 2612 } 2613 2614 assert(!ns->nsdata_zns); 2615 ns->nsdata_zns = spdk_zmalloc(sizeof(*ns->nsdata_zns), 64, NULL, SPDK_ENV_SOCKET_ID_ANY, 2616 SPDK_MALLOC_SHARE); 2617 if (!ns->nsdata_zns) { 2618 return -ENOMEM; 2619 } 2620 2621 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS_IOCS_SPECIFIC, 2622 ctrlr->opts.admin_timeout_ms); 2623 rc = nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS_IOCS, 0, ns->id, ns->csi, 2624 ns->nsdata_zns, sizeof(*ns->nsdata_zns), 2625 nvme_ctrlr_identify_ns_zns_specific_async_done, ns); 2626 if (rc) { 2627 nvme_ns_free_zns_specific_data(ns); 2628 } 2629 2630 return rc; 2631 } 2632 2633 static int 2634 nvme_ctrlr_identify_namespaces_iocs_specific(struct spdk_nvme_ctrlr *ctrlr) 2635 { 2636 if (!nvme_ctrlr_multi_iocs_enabled(ctrlr)) { 2637 /* Multi IOCS not supported/enabled, move on to the next state */ 2638 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES, 2639 ctrlr->opts.admin_timeout_ms); 2640 return 0; 2641 } 2642 2643 return nvme_ctrlr_identify_namespaces_iocs_specific_next(ctrlr, 0); 2644 } 2645 2646 static void 2647 nvme_ctrlr_identify_id_desc_async_done(void *arg, const struct spdk_nvme_cpl *cpl) 2648 { 2649 struct spdk_nvme_ns *ns = (struct spdk_nvme_ns *)arg; 2650 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2651 uint32_t nsid; 2652 int rc; 2653 2654 if (spdk_nvme_cpl_is_error(cpl)) { 2655 /* 2656 * Many controllers claim to be compatible with NVMe 1.3, however, 2657 * they do not implement NS ID Desc List. Therefore, instead of setting 2658 * the state to NVME_CTRLR_STATE_ERROR, silently ignore the completion 2659 * error and move on to the next state. 2660 * 2661 * The proper way is to create a new quirk for controllers that violate 2662 * the NVMe 1.3 spec by not supporting NS ID Desc List. 2663 * (Re-using the NVME_QUIRK_IDENTIFY_CNS quirk is not possible, since 2664 * it is too generic and was added in order to handle controllers that 2665 * violate the NVMe 1.1 spec by not supporting ACTIVE LIST). 2666 */ 2667 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC, 2668 ctrlr->opts.admin_timeout_ms); 2669 return; 2670 } 2671 2672 nvme_ns_set_id_desc_list_data(ns); 2673 2674 /* move on to the next active NS */ 2675 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, ns->id); 2676 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2677 if (ns == NULL) { 2678 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC, 2679 ctrlr->opts.admin_timeout_ms); 2680 return; 2681 } 2682 2683 rc = nvme_ctrlr_identify_id_desc_async(ns); 2684 if (rc) { 2685 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2686 } 2687 } 2688 2689 static int 2690 nvme_ctrlr_identify_id_desc_async(struct spdk_nvme_ns *ns) 2691 { 2692 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2693 2694 memset(ns->id_desc_list, 0, sizeof(ns->id_desc_list)); 2695 2696 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS, 2697 ctrlr->opts.admin_timeout_ms); 2698 return nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS_ID_DESCRIPTOR_LIST, 2699 0, ns->id, 0, ns->id_desc_list, sizeof(ns->id_desc_list), 2700 nvme_ctrlr_identify_id_desc_async_done, ns); 2701 } 2702 2703 static int 2704 nvme_ctrlr_identify_id_desc_namespaces(struct spdk_nvme_ctrlr *ctrlr) 2705 { 2706 uint32_t nsid; 2707 struct spdk_nvme_ns *ns; 2708 int rc; 2709 2710 if ((ctrlr->vs.raw < SPDK_NVME_VERSION(1, 3, 0) && 2711 !(ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_IOCS)) || 2712 (ctrlr->quirks & NVME_QUIRK_IDENTIFY_CNS)) { 2713 NVME_CTRLR_DEBUGLOG(ctrlr, "Version < 1.3; not attempting to retrieve NS ID Descriptor List\n"); 2714 /* NS ID Desc List not supported, move on to the next state */ 2715 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC, 2716 ctrlr->opts.admin_timeout_ms); 2717 return 0; 2718 } 2719 2720 nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); 2721 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2722 if (ns == NULL) { 2723 /* No active NS, move on to the next state */ 2724 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC, 2725 ctrlr->opts.admin_timeout_ms); 2726 return 0; 2727 } 2728 2729 rc = nvme_ctrlr_identify_id_desc_async(ns); 2730 if (rc) { 2731 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2732 } 2733 2734 return rc; 2735 } 2736 2737 static void 2738 nvme_ctrlr_update_nvmf_ioccsz(struct spdk_nvme_ctrlr *ctrlr) 2739 { 2740 if (spdk_nvme_ctrlr_is_fabrics(ctrlr)) { 2741 if (ctrlr->cdata.nvmf_specific.ioccsz < 4) { 2742 NVME_CTRLR_ERRLOG(ctrlr, "Incorrect IOCCSZ %u, the minimum value should be 4\n", 2743 ctrlr->cdata.nvmf_specific.ioccsz); 2744 ctrlr->cdata.nvmf_specific.ioccsz = 4; 2745 assert(0); 2746 } 2747 ctrlr->ioccsz_bytes = ctrlr->cdata.nvmf_specific.ioccsz * 16 - sizeof(struct spdk_nvme_cmd); 2748 ctrlr->icdoff = ctrlr->cdata.nvmf_specific.icdoff; 2749 } 2750 } 2751 2752 static void 2753 nvme_ctrlr_set_num_queues_done(void *arg, const struct spdk_nvme_cpl *cpl) 2754 { 2755 uint32_t cq_allocated, sq_allocated, min_allocated, i; 2756 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 2757 2758 if (spdk_nvme_cpl_is_error(cpl)) { 2759 NVME_CTRLR_ERRLOG(ctrlr, "Set Features - Number of Queues failed!\n"); 2760 ctrlr->opts.num_io_queues = 0; 2761 } else { 2762 /* 2763 * Data in cdw0 is 0-based. 2764 * Lower 16-bits indicate number of submission queues allocated. 2765 * Upper 16-bits indicate number of completion queues allocated. 2766 */ 2767 sq_allocated = (cpl->cdw0 & 0xFFFF) + 1; 2768 cq_allocated = (cpl->cdw0 >> 16) + 1; 2769 2770 /* 2771 * For 1:1 queue mapping, set number of allocated queues to be minimum of 2772 * submission and completion queues. 2773 */ 2774 min_allocated = spdk_min(sq_allocated, cq_allocated); 2775 2776 /* Set number of queues to be minimum of requested and actually allocated. */ 2777 ctrlr->opts.num_io_queues = spdk_min(min_allocated, ctrlr->opts.num_io_queues); 2778 } 2779 2780 ctrlr->free_io_qids = spdk_bit_array_create(ctrlr->opts.num_io_queues + 1); 2781 if (ctrlr->free_io_qids == NULL) { 2782 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2783 return; 2784 } 2785 2786 /* Initialize list of free I/O queue IDs. QID 0 is the admin queue (implicitly allocated). */ 2787 for (i = 1; i <= ctrlr->opts.num_io_queues; i++) { 2788 spdk_nvme_ctrlr_free_qid(ctrlr, i); 2789 } 2790 2791 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS, 2792 ctrlr->opts.admin_timeout_ms); 2793 } 2794 2795 static int 2796 nvme_ctrlr_set_num_queues(struct spdk_nvme_ctrlr *ctrlr) 2797 { 2798 int rc; 2799 2800 if (ctrlr->opts.num_io_queues > SPDK_NVME_MAX_IO_QUEUES) { 2801 NVME_CTRLR_NOTICELOG(ctrlr, "Limiting requested num_io_queues %u to max %d\n", 2802 ctrlr->opts.num_io_queues, SPDK_NVME_MAX_IO_QUEUES); 2803 ctrlr->opts.num_io_queues = SPDK_NVME_MAX_IO_QUEUES; 2804 } else if (ctrlr->opts.num_io_queues < 1) { 2805 NVME_CTRLR_NOTICELOG(ctrlr, "Requested num_io_queues 0, increasing to 1\n"); 2806 ctrlr->opts.num_io_queues = 1; 2807 } 2808 2809 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES, 2810 ctrlr->opts.admin_timeout_ms); 2811 2812 rc = nvme_ctrlr_cmd_set_num_queues(ctrlr, ctrlr->opts.num_io_queues, 2813 nvme_ctrlr_set_num_queues_done, ctrlr); 2814 if (rc != 0) { 2815 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2816 return rc; 2817 } 2818 2819 return 0; 2820 } 2821 2822 static void 2823 nvme_ctrlr_set_keep_alive_timeout_done(void *arg, const struct spdk_nvme_cpl *cpl) 2824 { 2825 uint32_t keep_alive_interval_us; 2826 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 2827 2828 if (spdk_nvme_cpl_is_error(cpl)) { 2829 if ((cpl->status.sct == SPDK_NVME_SCT_GENERIC) && 2830 (cpl->status.sc == SPDK_NVME_SC_INVALID_FIELD)) { 2831 NVME_CTRLR_DEBUGLOG(ctrlr, "Keep alive timeout Get Feature is not supported\n"); 2832 } else { 2833 NVME_CTRLR_ERRLOG(ctrlr, "Keep alive timeout Get Feature failed: SC %x SCT %x\n", 2834 cpl->status.sc, cpl->status.sct); 2835 ctrlr->opts.keep_alive_timeout_ms = 0; 2836 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2837 return; 2838 } 2839 } else { 2840 if (ctrlr->opts.keep_alive_timeout_ms != cpl->cdw0) { 2841 NVME_CTRLR_DEBUGLOG(ctrlr, "Controller adjusted keep alive timeout to %u ms\n", 2842 cpl->cdw0); 2843 } 2844 2845 ctrlr->opts.keep_alive_timeout_ms = cpl->cdw0; 2846 } 2847 2848 if (ctrlr->opts.keep_alive_timeout_ms == 0) { 2849 ctrlr->keep_alive_interval_ticks = 0; 2850 } else { 2851 keep_alive_interval_us = ctrlr->opts.keep_alive_timeout_ms * 1000 / 2; 2852 2853 NVME_CTRLR_DEBUGLOG(ctrlr, "Sending keep alive every %u us\n", keep_alive_interval_us); 2854 2855 ctrlr->keep_alive_interval_ticks = (keep_alive_interval_us * spdk_get_ticks_hz()) / 2856 UINT64_C(1000000); 2857 2858 /* Schedule the first Keep Alive to be sent as soon as possible. */ 2859 ctrlr->next_keep_alive_tick = spdk_get_ticks(); 2860 } 2861 2862 if (spdk_nvme_ctrlr_is_discovery(ctrlr)) { 2863 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE); 2864 } else { 2865 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC, 2866 ctrlr->opts.admin_timeout_ms); 2867 } 2868 } 2869 2870 static int 2871 nvme_ctrlr_set_keep_alive_timeout(struct spdk_nvme_ctrlr *ctrlr) 2872 { 2873 int rc; 2874 2875 if (ctrlr->opts.keep_alive_timeout_ms == 0) { 2876 if (spdk_nvme_ctrlr_is_discovery(ctrlr)) { 2877 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE); 2878 } else { 2879 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC, 2880 ctrlr->opts.admin_timeout_ms); 2881 } 2882 return 0; 2883 } 2884 2885 /* Note: Discovery controller identify data does not populate KAS according to spec. */ 2886 if (!spdk_nvme_ctrlr_is_discovery(ctrlr) && ctrlr->cdata.kas == 0) { 2887 NVME_CTRLR_DEBUGLOG(ctrlr, "Controller KAS is 0 - not enabling Keep Alive\n"); 2888 ctrlr->opts.keep_alive_timeout_ms = 0; 2889 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC, 2890 ctrlr->opts.admin_timeout_ms); 2891 return 0; 2892 } 2893 2894 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT, 2895 ctrlr->opts.admin_timeout_ms); 2896 2897 /* Retrieve actual keep alive timeout, since the controller may have adjusted it. */ 2898 rc = spdk_nvme_ctrlr_cmd_get_feature(ctrlr, SPDK_NVME_FEAT_KEEP_ALIVE_TIMER, 0, NULL, 0, 2899 nvme_ctrlr_set_keep_alive_timeout_done, ctrlr); 2900 if (rc != 0) { 2901 NVME_CTRLR_ERRLOG(ctrlr, "Keep alive timeout Get Feature failed: %d\n", rc); 2902 ctrlr->opts.keep_alive_timeout_ms = 0; 2903 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2904 return rc; 2905 } 2906 2907 return 0; 2908 } 2909 2910 static void 2911 nvme_ctrlr_set_host_id_done(void *arg, const struct spdk_nvme_cpl *cpl) 2912 { 2913 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 2914 2915 if (spdk_nvme_cpl_is_error(cpl)) { 2916 /* 2917 * Treat Set Features - Host ID failure as non-fatal, since the Host ID feature 2918 * is optional. 2919 */ 2920 NVME_CTRLR_WARNLOG(ctrlr, "Set Features - Host ID failed: SC 0x%x SCT 0x%x\n", 2921 cpl->status.sc, cpl->status.sct); 2922 } else { 2923 NVME_CTRLR_DEBUGLOG(ctrlr, "Set Features - Host ID was successful\n"); 2924 } 2925 2926 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_TRANSPORT_READY, ctrlr->opts.admin_timeout_ms); 2927 } 2928 2929 static int 2930 nvme_ctrlr_set_host_id(struct spdk_nvme_ctrlr *ctrlr) 2931 { 2932 uint8_t *host_id; 2933 uint32_t host_id_size; 2934 int rc; 2935 2936 if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) { 2937 /* 2938 * NVMe-oF sends the host ID during Connect and doesn't allow 2939 * Set Features - Host Identifier after Connect, so we don't need to do anything here. 2940 */ 2941 NVME_CTRLR_DEBUGLOG(ctrlr, "NVMe-oF transport - not sending Set Features - Host ID\n"); 2942 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_TRANSPORT_READY, ctrlr->opts.admin_timeout_ms); 2943 return 0; 2944 } 2945 2946 if (ctrlr->cdata.ctratt.host_id_exhid_supported) { 2947 NVME_CTRLR_DEBUGLOG(ctrlr, "Using 128-bit extended host identifier\n"); 2948 host_id = ctrlr->opts.extended_host_id; 2949 host_id_size = sizeof(ctrlr->opts.extended_host_id); 2950 } else { 2951 NVME_CTRLR_DEBUGLOG(ctrlr, "Using 64-bit host identifier\n"); 2952 host_id = ctrlr->opts.host_id; 2953 host_id_size = sizeof(ctrlr->opts.host_id); 2954 } 2955 2956 /* If the user specified an all-zeroes host identifier, don't send the command. */ 2957 if (spdk_mem_all_zero(host_id, host_id_size)) { 2958 NVME_CTRLR_DEBUGLOG(ctrlr, "User did not specify host ID - not sending Set Features - Host ID\n"); 2959 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_TRANSPORT_READY, ctrlr->opts.admin_timeout_ms); 2960 return 0; 2961 } 2962 2963 SPDK_LOGDUMP(nvme, "host_id", host_id, host_id_size); 2964 2965 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_HOST_ID, 2966 ctrlr->opts.admin_timeout_ms); 2967 2968 rc = nvme_ctrlr_cmd_set_host_id(ctrlr, host_id, host_id_size, nvme_ctrlr_set_host_id_done, ctrlr); 2969 if (rc != 0) { 2970 NVME_CTRLR_ERRLOG(ctrlr, "Set Features - Host ID failed: %d\n", rc); 2971 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2972 return rc; 2973 } 2974 2975 return 0; 2976 } 2977 2978 void 2979 nvme_ctrlr_update_namespaces(struct spdk_nvme_ctrlr *ctrlr) 2980 { 2981 uint32_t nsid; 2982 struct spdk_nvme_ns *ns; 2983 2984 for (nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); 2985 nsid != 0; nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) { 2986 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2987 nvme_ns_destruct(ns); 2988 nvme_ns_construct(ns, nsid, ctrlr); 2989 } 2990 } 2991 2992 static int 2993 nvme_ctrlr_clear_changed_ns_log(struct spdk_nvme_ctrlr *ctrlr) 2994 { 2995 struct nvme_completion_poll_status *status; 2996 int rc = -ENOMEM; 2997 char *buffer = NULL; 2998 uint32_t nsid; 2999 size_t buf_size = (SPDK_NVME_MAX_CHANGED_NAMESPACES * sizeof(uint32_t)); 3000 3001 if (ctrlr->opts.disable_read_changed_ns_list_log_page) { 3002 return 0; 3003 } 3004 3005 buffer = spdk_dma_zmalloc(buf_size, 4096, NULL); 3006 if (!buffer) { 3007 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate buffer for getting " 3008 "changed ns log.\n"); 3009 return rc; 3010 } 3011 3012 status = calloc(1, sizeof(*status)); 3013 if (!status) { 3014 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 3015 goto free_buffer; 3016 } 3017 3018 rc = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, 3019 SPDK_NVME_LOG_CHANGED_NS_LIST, 3020 SPDK_NVME_GLOBAL_NS_TAG, 3021 buffer, buf_size, 0, 3022 nvme_completion_poll_cb, status); 3023 3024 if (rc) { 3025 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_cmd_get_log_page() failed: rc=%d\n", rc); 3026 free(status); 3027 goto free_buffer; 3028 } 3029 3030 rc = nvme_wait_for_completion_timeout(ctrlr->adminq, status, 3031 ctrlr->opts.admin_timeout_ms * 1000); 3032 if (!status->timed_out) { 3033 free(status); 3034 } 3035 3036 if (rc) { 3037 NVME_CTRLR_ERRLOG(ctrlr, "wait for spdk_nvme_ctrlr_cmd_get_log_page failed: rc=%d\n", rc); 3038 goto free_buffer; 3039 } 3040 3041 /* only check the case of overflow. */ 3042 nsid = from_le32(buffer); 3043 if (nsid == 0xffffffffu) { 3044 NVME_CTRLR_WARNLOG(ctrlr, "changed ns log overflowed.\n"); 3045 } 3046 3047 free_buffer: 3048 spdk_dma_free(buffer); 3049 return rc; 3050 } 3051 3052 void 3053 nvme_ctrlr_process_async_event(struct spdk_nvme_ctrlr *ctrlr, 3054 const struct spdk_nvme_cpl *cpl) 3055 { 3056 union spdk_nvme_async_event_completion event; 3057 struct spdk_nvme_ctrlr_process *active_proc; 3058 int rc; 3059 3060 event.raw = cpl->cdw0; 3061 3062 if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) && 3063 (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED)) { 3064 nvme_ctrlr_clear_changed_ns_log(ctrlr); 3065 3066 rc = nvme_ctrlr_identify_active_ns(ctrlr); 3067 if (rc) { 3068 return; 3069 } 3070 nvme_ctrlr_update_namespaces(ctrlr); 3071 nvme_io_msg_ctrlr_update(ctrlr); 3072 } 3073 3074 if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) && 3075 (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_ANA_CHANGE)) { 3076 if (!ctrlr->opts.disable_read_ana_log_page) { 3077 rc = nvme_ctrlr_update_ana_log_page(ctrlr); 3078 if (rc) { 3079 return; 3080 } 3081 nvme_ctrlr_parse_ana_log_page(ctrlr, nvme_ctrlr_update_ns_ana_states, 3082 ctrlr); 3083 } 3084 } 3085 3086 active_proc = nvme_ctrlr_get_current_process(ctrlr); 3087 if (active_proc && active_proc->aer_cb_fn) { 3088 active_proc->aer_cb_fn(active_proc->aer_cb_arg, cpl); 3089 } 3090 } 3091 3092 static void 3093 nvme_ctrlr_queue_async_event(struct spdk_nvme_ctrlr *ctrlr, 3094 const struct spdk_nvme_cpl *cpl) 3095 { 3096 struct spdk_nvme_ctrlr_aer_completion_list *nvme_event; 3097 struct spdk_nvme_ctrlr_process *proc; 3098 3099 /* Add async event to each process objects event list */ 3100 TAILQ_FOREACH(proc, &ctrlr->active_procs, tailq) { 3101 /* Must be shared memory so other processes can access */ 3102 nvme_event = spdk_zmalloc(sizeof(*nvme_event), 0, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE); 3103 if (!nvme_event) { 3104 NVME_CTRLR_ERRLOG(ctrlr, "Alloc nvme event failed, ignore the event\n"); 3105 return; 3106 } 3107 nvme_event->cpl = *cpl; 3108 3109 STAILQ_INSERT_TAIL(&proc->async_events, nvme_event, link); 3110 } 3111 } 3112 3113 void 3114 nvme_ctrlr_complete_queued_async_events(struct spdk_nvme_ctrlr *ctrlr) 3115 { 3116 struct spdk_nvme_ctrlr_aer_completion_list *nvme_event, *nvme_event_tmp; 3117 struct spdk_nvme_ctrlr_process *active_proc; 3118 3119 active_proc = nvme_ctrlr_get_current_process(ctrlr); 3120 3121 STAILQ_FOREACH_SAFE(nvme_event, &active_proc->async_events, link, nvme_event_tmp) { 3122 STAILQ_REMOVE(&active_proc->async_events, nvme_event, 3123 spdk_nvme_ctrlr_aer_completion_list, link); 3124 nvme_ctrlr_process_async_event(ctrlr, &nvme_event->cpl); 3125 spdk_free(nvme_event); 3126 3127 } 3128 } 3129 3130 static void 3131 nvme_ctrlr_async_event_cb(void *arg, const struct spdk_nvme_cpl *cpl) 3132 { 3133 struct nvme_async_event_request *aer = arg; 3134 struct spdk_nvme_ctrlr *ctrlr = aer->ctrlr; 3135 3136 if (cpl->status.sct == SPDK_NVME_SCT_GENERIC && 3137 cpl->status.sc == SPDK_NVME_SC_ABORTED_SQ_DELETION) { 3138 /* 3139 * This is simulated when controller is being shut down, to 3140 * effectively abort outstanding asynchronous event requests 3141 * and make sure all memory is freed. Do not repost the 3142 * request in this case. 3143 */ 3144 return; 3145 } 3146 3147 if (cpl->status.sct == SPDK_NVME_SCT_COMMAND_SPECIFIC && 3148 cpl->status.sc == SPDK_NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED) { 3149 /* 3150 * SPDK will only send as many AERs as the device says it supports, 3151 * so this status code indicates an out-of-spec device. Do not repost 3152 * the request in this case. 3153 */ 3154 NVME_CTRLR_ERRLOG(ctrlr, "Controller appears out-of-spec for asynchronous event request\n" 3155 "handling. Do not repost this AER.\n"); 3156 return; 3157 } 3158 3159 /* Add the events to the list */ 3160 nvme_ctrlr_queue_async_event(ctrlr, cpl); 3161 3162 /* If the ctrlr was removed or in the destruct state, we should not send aer again */ 3163 if (ctrlr->is_removed || ctrlr->is_destructed) { 3164 return; 3165 } 3166 3167 /* 3168 * Repost another asynchronous event request to replace the one 3169 * that just completed. 3170 */ 3171 if (nvme_ctrlr_construct_and_submit_aer(ctrlr, aer)) { 3172 /* 3173 * We can't do anything to recover from a failure here, 3174 * so just print a warning message and leave the AER unsubmitted. 3175 */ 3176 NVME_CTRLR_ERRLOG(ctrlr, "resubmitting AER failed!\n"); 3177 } 3178 } 3179 3180 static int 3181 nvme_ctrlr_construct_and_submit_aer(struct spdk_nvme_ctrlr *ctrlr, 3182 struct nvme_async_event_request *aer) 3183 { 3184 struct nvme_request *req; 3185 3186 aer->ctrlr = ctrlr; 3187 req = nvme_allocate_request_null(ctrlr->adminq, nvme_ctrlr_async_event_cb, aer); 3188 aer->req = req; 3189 if (req == NULL) { 3190 return -1; 3191 } 3192 3193 req->cmd.opc = SPDK_NVME_OPC_ASYNC_EVENT_REQUEST; 3194 return nvme_ctrlr_submit_admin_request(ctrlr, req); 3195 } 3196 3197 static void 3198 nvme_ctrlr_configure_aer_done(void *arg, const struct spdk_nvme_cpl *cpl) 3199 { 3200 struct nvme_async_event_request *aer; 3201 int rc; 3202 uint32_t i; 3203 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 3204 3205 if (spdk_nvme_cpl_is_error(cpl)) { 3206 NVME_CTRLR_NOTICELOG(ctrlr, "nvme_ctrlr_configure_aer failed!\n"); 3207 ctrlr->num_aers = 0; 3208 } else { 3209 /* aerl is a zero-based value, so we need to add 1 here. */ 3210 ctrlr->num_aers = spdk_min(NVME_MAX_ASYNC_EVENTS, (ctrlr->cdata.aerl + 1)); 3211 } 3212 3213 for (i = 0; i < ctrlr->num_aers; i++) { 3214 aer = &ctrlr->aer[i]; 3215 rc = nvme_ctrlr_construct_and_submit_aer(ctrlr, aer); 3216 if (rc) { 3217 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_construct_and_submit_aer failed!\n"); 3218 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3219 return; 3220 } 3221 } 3222 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT, ctrlr->opts.admin_timeout_ms); 3223 } 3224 3225 static int 3226 nvme_ctrlr_configure_aer(struct spdk_nvme_ctrlr *ctrlr) 3227 { 3228 union spdk_nvme_feat_async_event_configuration config; 3229 int rc; 3230 3231 config.raw = 0; 3232 3233 if (spdk_nvme_ctrlr_is_discovery(ctrlr)) { 3234 config.bits.discovery_log_change_notice = 1; 3235 } else { 3236 config.bits.crit_warn.bits.available_spare = 1; 3237 config.bits.crit_warn.bits.temperature = 1; 3238 config.bits.crit_warn.bits.device_reliability = 1; 3239 config.bits.crit_warn.bits.read_only = 1; 3240 config.bits.crit_warn.bits.volatile_memory_backup = 1; 3241 3242 if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 2, 0)) { 3243 if (ctrlr->cdata.oaes.ns_attribute_notices) { 3244 config.bits.ns_attr_notice = 1; 3245 } 3246 if (ctrlr->cdata.oaes.fw_activation_notices) { 3247 config.bits.fw_activation_notice = 1; 3248 } 3249 if (ctrlr->cdata.oaes.ana_change_notices) { 3250 config.bits.ana_change_notice = 1; 3251 } 3252 } 3253 if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 3, 0) && ctrlr->cdata.lpa.telemetry) { 3254 config.bits.telemetry_log_notice = 1; 3255 } 3256 } 3257 3258 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER, 3259 ctrlr->opts.admin_timeout_ms); 3260 3261 rc = nvme_ctrlr_cmd_set_async_event_config(ctrlr, config, 3262 nvme_ctrlr_configure_aer_done, 3263 ctrlr); 3264 if (rc != 0) { 3265 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3266 return rc; 3267 } 3268 3269 return 0; 3270 } 3271 3272 struct spdk_nvme_ctrlr_process * 3273 nvme_ctrlr_get_process(struct spdk_nvme_ctrlr *ctrlr, pid_t pid) 3274 { 3275 struct spdk_nvme_ctrlr_process *active_proc; 3276 3277 TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) { 3278 if (active_proc->pid == pid) { 3279 return active_proc; 3280 } 3281 } 3282 3283 return NULL; 3284 } 3285 3286 struct spdk_nvme_ctrlr_process * 3287 nvme_ctrlr_get_current_process(struct spdk_nvme_ctrlr *ctrlr) 3288 { 3289 return nvme_ctrlr_get_process(ctrlr, getpid()); 3290 } 3291 3292 /** 3293 * This function will be called when a process is using the controller. 3294 * 1. For the primary process, it is called when constructing the controller. 3295 * 2. For the secondary process, it is called at probing the controller. 3296 * Note: will check whether the process is already added for the same process. 3297 */ 3298 int 3299 nvme_ctrlr_add_process(struct spdk_nvme_ctrlr *ctrlr, void *devhandle) 3300 { 3301 struct spdk_nvme_ctrlr_process *ctrlr_proc; 3302 pid_t pid = getpid(); 3303 3304 /* Check whether the process is already added or not */ 3305 if (nvme_ctrlr_get_process(ctrlr, pid)) { 3306 return 0; 3307 } 3308 3309 /* Initialize the per process properties for this ctrlr */ 3310 ctrlr_proc = spdk_zmalloc(sizeof(struct spdk_nvme_ctrlr_process), 3311 64, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE); 3312 if (ctrlr_proc == NULL) { 3313 NVME_CTRLR_ERRLOG(ctrlr, "failed to allocate memory to track the process props\n"); 3314 3315 return -1; 3316 } 3317 3318 ctrlr_proc->is_primary = spdk_process_is_primary(); 3319 ctrlr_proc->pid = pid; 3320 STAILQ_INIT(&ctrlr_proc->active_reqs); 3321 ctrlr_proc->devhandle = devhandle; 3322 ctrlr_proc->ref = 0; 3323 TAILQ_INIT(&ctrlr_proc->allocated_io_qpairs); 3324 STAILQ_INIT(&ctrlr_proc->async_events); 3325 3326 TAILQ_INSERT_TAIL(&ctrlr->active_procs, ctrlr_proc, tailq); 3327 3328 return 0; 3329 } 3330 3331 /** 3332 * This function will be called when the process detaches the controller. 3333 * Note: the ctrlr_lock must be held when calling this function. 3334 */ 3335 static void 3336 nvme_ctrlr_remove_process(struct spdk_nvme_ctrlr *ctrlr, 3337 struct spdk_nvme_ctrlr_process *proc) 3338 { 3339 struct spdk_nvme_qpair *qpair, *tmp_qpair; 3340 3341 assert(STAILQ_EMPTY(&proc->active_reqs)); 3342 3343 TAILQ_FOREACH_SAFE(qpair, &proc->allocated_io_qpairs, per_process_tailq, tmp_qpair) { 3344 spdk_nvme_ctrlr_free_io_qpair(qpair); 3345 } 3346 3347 TAILQ_REMOVE(&ctrlr->active_procs, proc, tailq); 3348 3349 if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { 3350 spdk_pci_device_detach(proc->devhandle); 3351 } 3352 3353 spdk_free(proc); 3354 } 3355 3356 /** 3357 * This function will be called when the process exited unexpectedly 3358 * in order to free any incomplete nvme request, allocated IO qpairs 3359 * and allocated memory. 3360 * Note: the ctrlr_lock must be held when calling this function. 3361 */ 3362 static void 3363 nvme_ctrlr_cleanup_process(struct spdk_nvme_ctrlr_process *proc) 3364 { 3365 struct nvme_request *req, *tmp_req; 3366 struct spdk_nvme_qpair *qpair, *tmp_qpair; 3367 struct spdk_nvme_ctrlr_aer_completion_list *event; 3368 3369 STAILQ_FOREACH_SAFE(req, &proc->active_reqs, stailq, tmp_req) { 3370 STAILQ_REMOVE(&proc->active_reqs, req, nvme_request, stailq); 3371 3372 assert(req->pid == proc->pid); 3373 if (req->user_buffer && req->payload_size) { 3374 spdk_free(req->payload.contig_or_cb_arg); 3375 } 3376 nvme_free_request(req); 3377 } 3378 3379 /* Remove async event from each process objects event list */ 3380 while (!STAILQ_EMPTY(&proc->async_events)) { 3381 event = STAILQ_FIRST(&proc->async_events); 3382 STAILQ_REMOVE_HEAD(&proc->async_events, link); 3383 spdk_free(event); 3384 } 3385 3386 TAILQ_FOREACH_SAFE(qpair, &proc->allocated_io_qpairs, per_process_tailq, tmp_qpair) { 3387 TAILQ_REMOVE(&proc->allocated_io_qpairs, qpair, per_process_tailq); 3388 3389 /* 3390 * The process may have been killed while some qpairs were in their 3391 * completion context. Clear that flag here to allow these IO 3392 * qpairs to be deleted. 3393 */ 3394 qpair->in_completion_context = 0; 3395 3396 qpair->no_deletion_notification_needed = 1; 3397 3398 spdk_nvme_ctrlr_free_io_qpair(qpair); 3399 } 3400 3401 spdk_free(proc); 3402 } 3403 3404 /** 3405 * This function will be called when destructing the controller. 3406 * 1. There is no more admin request on this controller. 3407 * 2. Clean up any left resource allocation when its associated process is gone. 3408 */ 3409 void 3410 nvme_ctrlr_free_processes(struct spdk_nvme_ctrlr *ctrlr) 3411 { 3412 struct spdk_nvme_ctrlr_process *active_proc, *tmp; 3413 3414 /* Free all the processes' properties and make sure no pending admin IOs */ 3415 TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) { 3416 TAILQ_REMOVE(&ctrlr->active_procs, active_proc, tailq); 3417 3418 assert(STAILQ_EMPTY(&active_proc->active_reqs)); 3419 3420 spdk_free(active_proc); 3421 } 3422 } 3423 3424 /** 3425 * This function will be called when any other process attaches or 3426 * detaches the controller in order to cleanup those unexpectedly 3427 * terminated processes. 3428 * Note: the ctrlr_lock must be held when calling this function. 3429 */ 3430 static int 3431 nvme_ctrlr_remove_inactive_proc(struct spdk_nvme_ctrlr *ctrlr) 3432 { 3433 struct spdk_nvme_ctrlr_process *active_proc, *tmp; 3434 int active_proc_count = 0; 3435 3436 TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) { 3437 if ((kill(active_proc->pid, 0) == -1) && (errno == ESRCH)) { 3438 NVME_CTRLR_ERRLOG(ctrlr, "process %d terminated unexpected\n", active_proc->pid); 3439 3440 TAILQ_REMOVE(&ctrlr->active_procs, active_proc, tailq); 3441 3442 nvme_ctrlr_cleanup_process(active_proc); 3443 } else { 3444 active_proc_count++; 3445 } 3446 } 3447 3448 return active_proc_count; 3449 } 3450 3451 void 3452 nvme_ctrlr_proc_get_ref(struct spdk_nvme_ctrlr *ctrlr) 3453 { 3454 struct spdk_nvme_ctrlr_process *active_proc; 3455 3456 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 3457 3458 nvme_ctrlr_remove_inactive_proc(ctrlr); 3459 3460 active_proc = nvme_ctrlr_get_current_process(ctrlr); 3461 if (active_proc) { 3462 active_proc->ref++; 3463 } 3464 3465 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 3466 } 3467 3468 void 3469 nvme_ctrlr_proc_put_ref(struct spdk_nvme_ctrlr *ctrlr) 3470 { 3471 struct spdk_nvme_ctrlr_process *active_proc; 3472 int proc_count; 3473 3474 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 3475 3476 proc_count = nvme_ctrlr_remove_inactive_proc(ctrlr); 3477 3478 active_proc = nvme_ctrlr_get_current_process(ctrlr); 3479 if (active_proc) { 3480 active_proc->ref--; 3481 assert(active_proc->ref >= 0); 3482 3483 /* 3484 * The last active process will be removed at the end of 3485 * the destruction of the controller. 3486 */ 3487 if (active_proc->ref == 0 && proc_count != 1) { 3488 nvme_ctrlr_remove_process(ctrlr, active_proc); 3489 } 3490 } 3491 3492 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 3493 } 3494 3495 int 3496 nvme_ctrlr_get_ref_count(struct spdk_nvme_ctrlr *ctrlr) 3497 { 3498 struct spdk_nvme_ctrlr_process *active_proc; 3499 int ref = 0; 3500 3501 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 3502 3503 nvme_ctrlr_remove_inactive_proc(ctrlr); 3504 3505 TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) { 3506 ref += active_proc->ref; 3507 } 3508 3509 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 3510 3511 return ref; 3512 } 3513 3514 /** 3515 * Get the PCI device handle which is only visible to its associated process. 3516 */ 3517 struct spdk_pci_device * 3518 nvme_ctrlr_proc_get_devhandle(struct spdk_nvme_ctrlr *ctrlr) 3519 { 3520 struct spdk_nvme_ctrlr_process *active_proc; 3521 struct spdk_pci_device *devhandle = NULL; 3522 3523 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 3524 3525 active_proc = nvme_ctrlr_get_current_process(ctrlr); 3526 if (active_proc) { 3527 devhandle = active_proc->devhandle; 3528 } 3529 3530 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 3531 3532 return devhandle; 3533 } 3534 3535 static void 3536 nvme_ctrlr_process_init_vs_done(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3537 { 3538 struct spdk_nvme_ctrlr *ctrlr = ctx; 3539 3540 if (spdk_nvme_cpl_is_error(cpl)) { 3541 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the VS register\n"); 3542 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3543 return; 3544 } 3545 3546 assert(value <= UINT32_MAX); 3547 ctrlr->vs.raw = (uint32_t)value; 3548 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READ_CAP, NVME_TIMEOUT_INFINITE); 3549 } 3550 3551 static void 3552 nvme_ctrlr_process_init_cap_done(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3553 { 3554 struct spdk_nvme_ctrlr *ctrlr = ctx; 3555 3556 if (spdk_nvme_cpl_is_error(cpl)) { 3557 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CAP register\n"); 3558 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3559 return; 3560 } 3561 3562 ctrlr->cap.raw = value; 3563 nvme_ctrlr_init_cap(ctrlr); 3564 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CHECK_EN, NVME_TIMEOUT_INFINITE); 3565 } 3566 3567 static void 3568 nvme_ctrlr_process_init_check_en(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3569 { 3570 struct spdk_nvme_ctrlr *ctrlr = ctx; 3571 enum nvme_ctrlr_state state; 3572 3573 if (spdk_nvme_cpl_is_error(cpl)) { 3574 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CC register\n"); 3575 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3576 return; 3577 } 3578 3579 assert(value <= UINT32_MAX); 3580 ctrlr->process_init_cc.raw = (uint32_t)value; 3581 3582 if (ctrlr->process_init_cc.bits.en) { 3583 NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 1\n"); 3584 state = NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1; 3585 } else { 3586 state = NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0; 3587 } 3588 3589 nvme_ctrlr_set_state(ctrlr, state, nvme_ctrlr_get_ready_timeout(ctrlr)); 3590 } 3591 3592 static void 3593 nvme_ctrlr_process_init_set_en_0(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3594 { 3595 struct spdk_nvme_ctrlr *ctrlr = ctx; 3596 3597 if (spdk_nvme_cpl_is_error(cpl)) { 3598 NVME_CTRLR_ERRLOG(ctrlr, "Failed to write the CC register\n"); 3599 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3600 return; 3601 } 3602 3603 /* 3604 * Wait 2.5 seconds before accessing PCI registers. 3605 * Not using sleep() to avoid blocking other controller's initialization. 3606 */ 3607 if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) { 3608 NVME_CTRLR_DEBUGLOG(ctrlr, "Applying quirk: delay 2.5 seconds before reading registers\n"); 3609 ctrlr->sleep_timeout_tsc = spdk_get_ticks() + (2500 * spdk_get_ticks_hz() / 1000); 3610 } 3611 3612 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, 3613 nvme_ctrlr_get_ready_timeout(ctrlr)); 3614 } 3615 3616 static void 3617 nvme_ctrlr_process_init_set_en_0_read_cc(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3618 { 3619 struct spdk_nvme_ctrlr *ctrlr = ctx; 3620 union spdk_nvme_cc_register cc; 3621 int rc; 3622 3623 if (spdk_nvme_cpl_is_error(cpl)) { 3624 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CC register\n"); 3625 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3626 return; 3627 } 3628 3629 assert(value <= UINT32_MAX); 3630 cc.raw = (uint32_t)value; 3631 cc.bits.en = 0; 3632 ctrlr->process_init_cc.raw = cc.raw; 3633 3634 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_EN_0_WAIT_FOR_CC, 3635 nvme_ctrlr_get_ready_timeout(ctrlr)); 3636 3637 rc = nvme_ctrlr_set_cc_async(ctrlr, cc.raw, nvme_ctrlr_process_init_set_en_0, ctrlr); 3638 if (rc != 0) { 3639 NVME_CTRLR_ERRLOG(ctrlr, "set_cc() failed\n"); 3640 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3641 } 3642 } 3643 3644 static void 3645 nvme_ctrlr_process_init_wait_for_ready_1(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3646 { 3647 struct spdk_nvme_ctrlr *ctrlr = ctx; 3648 union spdk_nvme_csts_register csts; 3649 3650 if (spdk_nvme_cpl_is_error(cpl)) { 3651 /* While a device is resetting, it may be unable to service MMIO reads 3652 * temporarily. Allow for this case. 3653 */ 3654 if (!ctrlr->is_failed && ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE) { 3655 NVME_CTRLR_DEBUGLOG(ctrlr, "Failed to read the CSTS register\n"); 3656 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1, 3657 NVME_TIMEOUT_KEEP_EXISTING); 3658 } else { 3659 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CSTS register\n"); 3660 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3661 } 3662 3663 return; 3664 } 3665 3666 assert(value <= UINT32_MAX); 3667 csts.raw = (uint32_t)value; 3668 if (csts.bits.rdy == 1 || csts.bits.cfs == 1) { 3669 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_EN_0, 3670 nvme_ctrlr_get_ready_timeout(ctrlr)); 3671 } else { 3672 NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 1 && CSTS.RDY = 0 - waiting for reset to complete\n"); 3673 nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1, 3674 NVME_TIMEOUT_KEEP_EXISTING); 3675 } 3676 } 3677 3678 static void 3679 nvme_ctrlr_process_init_wait_for_ready_0(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3680 { 3681 struct spdk_nvme_ctrlr *ctrlr = ctx; 3682 union spdk_nvme_csts_register csts; 3683 3684 if (spdk_nvme_cpl_is_error(cpl)) { 3685 /* While a device is resetting, it may be unable to service MMIO reads 3686 * temporarily. Allow for this case. 3687 */ 3688 if (!ctrlr->is_failed && ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE) { 3689 NVME_CTRLR_DEBUGLOG(ctrlr, "Failed to read the CSTS register\n"); 3690 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, 3691 NVME_TIMEOUT_KEEP_EXISTING); 3692 } else { 3693 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CSTS register\n"); 3694 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3695 } 3696 3697 return; 3698 } 3699 3700 assert(value <= UINT32_MAX); 3701 csts.raw = (uint32_t)value; 3702 if (csts.bits.rdy == 0) { 3703 NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 0 && CSTS.RDY = 0\n"); 3704 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLED, 3705 nvme_ctrlr_get_ready_timeout(ctrlr)); 3706 } else { 3707 nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, 3708 NVME_TIMEOUT_KEEP_EXISTING); 3709 } 3710 } 3711 3712 static void 3713 nvme_ctrlr_process_init_enable_wait_for_ready_1(void *ctx, uint64_t value, 3714 const struct spdk_nvme_cpl *cpl) 3715 { 3716 struct spdk_nvme_ctrlr *ctrlr = ctx; 3717 union spdk_nvme_csts_register csts; 3718 3719 if (spdk_nvme_cpl_is_error(cpl)) { 3720 /* While a device is resetting, it may be unable to service MMIO reads 3721 * temporarily. Allow for this case. 3722 */ 3723 if (!ctrlr->is_failed && ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE) { 3724 NVME_CTRLR_DEBUGLOG(ctrlr, "Failed to read the CSTS register\n"); 3725 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, 3726 NVME_TIMEOUT_KEEP_EXISTING); 3727 } else { 3728 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CSTS register\n"); 3729 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3730 } 3731 3732 return; 3733 } 3734 3735 assert(value <= UINT32_MAX); 3736 csts.raw = value; 3737 if (csts.bits.rdy == 1) { 3738 NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 1 && CSTS.RDY = 1 - controller is ready\n"); 3739 /* 3740 * The controller has been enabled. 3741 * Perform the rest of initialization serially. 3742 */ 3743 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_RESET_ADMIN_QUEUE, 3744 ctrlr->opts.admin_timeout_ms); 3745 } else { 3746 nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, 3747 NVME_TIMEOUT_KEEP_EXISTING); 3748 } 3749 } 3750 3751 /** 3752 * This function will be called repeatedly during initialization until the controller is ready. 3753 */ 3754 int 3755 nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr) 3756 { 3757 uint32_t ready_timeout_in_ms; 3758 uint64_t ticks; 3759 int rc = 0; 3760 3761 ticks = spdk_get_ticks(); 3762 3763 /* 3764 * May need to avoid accessing any register on the target controller 3765 * for a while. Return early without touching the FSM. 3766 * Check sleep_timeout_tsc > 0 for unit test. 3767 */ 3768 if ((ctrlr->sleep_timeout_tsc > 0) && 3769 (ticks <= ctrlr->sleep_timeout_tsc)) { 3770 return 0; 3771 } 3772 ctrlr->sleep_timeout_tsc = 0; 3773 3774 ready_timeout_in_ms = nvme_ctrlr_get_ready_timeout(ctrlr); 3775 3776 /* 3777 * Check if the current initialization step is done or has timed out. 3778 */ 3779 switch (ctrlr->state) { 3780 case NVME_CTRLR_STATE_INIT_DELAY: 3781 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, ready_timeout_in_ms); 3782 if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_INIT) { 3783 /* 3784 * Controller may need some delay before it's enabled. 3785 * 3786 * This is a workaround for an issue where the PCIe-attached NVMe controller 3787 * is not ready after VFIO reset. We delay the initialization rather than the 3788 * enabling itself, because this is required only for the very first enabling 3789 * - directly after a VFIO reset. 3790 */ 3791 NVME_CTRLR_DEBUGLOG(ctrlr, "Adding 2 second delay before initializing the controller\n"); 3792 ctrlr->sleep_timeout_tsc = ticks + (2000 * spdk_get_ticks_hz() / 1000); 3793 } 3794 break; 3795 3796 case NVME_CTRLR_STATE_CONNECT_ADMINQ: /* synonymous with NVME_CTRLR_STATE_INIT */ 3797 rc = nvme_transport_ctrlr_connect_qpair(ctrlr, ctrlr->adminq); 3798 if (rc == 0) { 3799 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_CONNECT_ADMINQ, 3800 NVME_TIMEOUT_INFINITE); 3801 } else { 3802 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3803 } 3804 break; 3805 3806 case NVME_CTRLR_STATE_WAIT_FOR_CONNECT_ADMINQ: 3807 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 3808 3809 switch (nvme_qpair_get_state(ctrlr->adminq)) { 3810 case NVME_QPAIR_CONNECTING: 3811 break; 3812 case NVME_QPAIR_CONNECTED: 3813 nvme_qpair_set_state(ctrlr->adminq, NVME_QPAIR_ENABLED); 3814 /* Fall through */ 3815 case NVME_QPAIR_ENABLED: 3816 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READ_VS, 3817 NVME_TIMEOUT_INFINITE); 3818 /* Abort any queued requests that were sent while the adminq was connecting 3819 * to avoid stalling the init process during a reset, as requests don't get 3820 * resubmitted while the controller is resetting and subsequent commands 3821 * would get queued too. 3822 */ 3823 nvme_qpair_abort_queued_reqs(ctrlr->adminq); 3824 break; 3825 case NVME_QPAIR_DISCONNECTING: 3826 assert(ctrlr->adminq->async == true); 3827 break; 3828 case NVME_QPAIR_DISCONNECTED: 3829 /* fallthrough */ 3830 default: 3831 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3832 break; 3833 } 3834 3835 break; 3836 3837 case NVME_CTRLR_STATE_READ_VS: 3838 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READ_VS_WAIT_FOR_VS, NVME_TIMEOUT_INFINITE); 3839 rc = nvme_ctrlr_get_vs_async(ctrlr, nvme_ctrlr_process_init_vs_done, ctrlr); 3840 break; 3841 3842 case NVME_CTRLR_STATE_READ_CAP: 3843 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READ_CAP_WAIT_FOR_CAP, NVME_TIMEOUT_INFINITE); 3844 rc = nvme_ctrlr_get_cap_async(ctrlr, nvme_ctrlr_process_init_cap_done, ctrlr); 3845 break; 3846 3847 case NVME_CTRLR_STATE_CHECK_EN: 3848 /* Begin the hardware initialization by making sure the controller is disabled. */ 3849 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CHECK_EN_WAIT_FOR_CC, ready_timeout_in_ms); 3850 rc = nvme_ctrlr_get_cc_async(ctrlr, nvme_ctrlr_process_init_check_en, ctrlr); 3851 break; 3852 3853 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1: 3854 /* 3855 * Controller is currently enabled. We need to disable it to cause a reset. 3856 * 3857 * If CC.EN = 1 && CSTS.RDY = 0, the controller is in the process of becoming ready. 3858 * Wait for the ready bit to be 1 before disabling the controller. 3859 */ 3860 nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS, 3861 NVME_TIMEOUT_KEEP_EXISTING); 3862 rc = nvme_ctrlr_get_csts_async(ctrlr, nvme_ctrlr_process_init_wait_for_ready_1, ctrlr); 3863 break; 3864 3865 case NVME_CTRLR_STATE_SET_EN_0: 3866 NVME_CTRLR_DEBUGLOG(ctrlr, "Setting CC.EN = 0\n"); 3867 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_EN_0_WAIT_FOR_CC, ready_timeout_in_ms); 3868 rc = nvme_ctrlr_get_cc_async(ctrlr, nvme_ctrlr_process_init_set_en_0_read_cc, ctrlr); 3869 break; 3870 3871 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0: 3872 nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0_WAIT_FOR_CSTS, 3873 NVME_TIMEOUT_KEEP_EXISTING); 3874 rc = nvme_ctrlr_get_csts_async(ctrlr, nvme_ctrlr_process_init_wait_for_ready_0, ctrlr); 3875 break; 3876 3877 case NVME_CTRLR_STATE_DISABLED: 3878 if (ctrlr->is_disconnecting) { 3879 NVME_CTRLR_DEBUGLOG(ctrlr, "Ctrlr was disabled.\n"); 3880 } else { 3881 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE, ready_timeout_in_ms); 3882 3883 /* 3884 * Delay 100us before setting CC.EN = 1. Some NVMe SSDs miss CC.EN getting 3885 * set to 1 if it is too soon after CSTS.RDY is reported as 0. 3886 */ 3887 spdk_delay_us(100); 3888 } 3889 break; 3890 3891 case NVME_CTRLR_STATE_ENABLE: 3892 NVME_CTRLR_DEBUGLOG(ctrlr, "Setting CC.EN = 1\n"); 3893 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_CC, ready_timeout_in_ms); 3894 rc = nvme_ctrlr_enable(ctrlr); 3895 if (rc) { 3896 NVME_CTRLR_ERRLOG(ctrlr, "Ctrlr enable failed with error: %d", rc); 3897 } 3898 return rc; 3899 3900 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1: 3901 nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS, 3902 NVME_TIMEOUT_KEEP_EXISTING); 3903 rc = nvme_ctrlr_get_csts_async(ctrlr, nvme_ctrlr_process_init_enable_wait_for_ready_1, 3904 ctrlr); 3905 break; 3906 3907 case NVME_CTRLR_STATE_RESET_ADMIN_QUEUE: 3908 nvme_transport_qpair_reset(ctrlr->adminq); 3909 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY, NVME_TIMEOUT_INFINITE); 3910 break; 3911 3912 case NVME_CTRLR_STATE_IDENTIFY: 3913 rc = nvme_ctrlr_identify(ctrlr); 3914 break; 3915 3916 case NVME_CTRLR_STATE_CONFIGURE_AER: 3917 rc = nvme_ctrlr_configure_aer(ctrlr); 3918 break; 3919 3920 case NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT: 3921 rc = nvme_ctrlr_set_keep_alive_timeout(ctrlr); 3922 break; 3923 3924 case NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC: 3925 rc = nvme_ctrlr_identify_iocs_specific(ctrlr); 3926 break; 3927 3928 case NVME_CTRLR_STATE_GET_ZNS_CMD_EFFECTS_LOG: 3929 rc = nvme_ctrlr_get_zns_cmd_and_effects_log(ctrlr); 3930 break; 3931 3932 case NVME_CTRLR_STATE_SET_NUM_QUEUES: 3933 nvme_ctrlr_update_nvmf_ioccsz(ctrlr); 3934 rc = nvme_ctrlr_set_num_queues(ctrlr); 3935 break; 3936 3937 case NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS: 3938 _nvme_ctrlr_identify_active_ns(ctrlr); 3939 break; 3940 3941 case NVME_CTRLR_STATE_IDENTIFY_NS: 3942 rc = nvme_ctrlr_identify_namespaces(ctrlr); 3943 break; 3944 3945 case NVME_CTRLR_STATE_IDENTIFY_ID_DESCS: 3946 rc = nvme_ctrlr_identify_id_desc_namespaces(ctrlr); 3947 break; 3948 3949 case NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC: 3950 rc = nvme_ctrlr_identify_namespaces_iocs_specific(ctrlr); 3951 break; 3952 3953 case NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES: 3954 rc = nvme_ctrlr_set_supported_log_pages(ctrlr); 3955 break; 3956 3957 case NVME_CTRLR_STATE_SET_SUPPORTED_INTEL_LOG_PAGES: 3958 rc = nvme_ctrlr_set_intel_support_log_pages(ctrlr); 3959 break; 3960 3961 case NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES: 3962 nvme_ctrlr_set_supported_features(ctrlr); 3963 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_DB_BUF_CFG, 3964 ctrlr->opts.admin_timeout_ms); 3965 break; 3966 3967 case NVME_CTRLR_STATE_SET_DB_BUF_CFG: 3968 rc = nvme_ctrlr_set_doorbell_buffer_config(ctrlr); 3969 break; 3970 3971 case NVME_CTRLR_STATE_SET_HOST_ID: 3972 rc = nvme_ctrlr_set_host_id(ctrlr); 3973 break; 3974 3975 case NVME_CTRLR_STATE_TRANSPORT_READY: 3976 rc = nvme_transport_ctrlr_ready(ctrlr); 3977 if (rc) { 3978 NVME_CTRLR_ERRLOG(ctrlr, "Transport controller ready step failed: rc %d\n", rc); 3979 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3980 } else { 3981 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE); 3982 } 3983 break; 3984 3985 case NVME_CTRLR_STATE_READY: 3986 NVME_CTRLR_DEBUGLOG(ctrlr, "Ctrlr already in ready state\n"); 3987 return 0; 3988 3989 case NVME_CTRLR_STATE_ERROR: 3990 NVME_CTRLR_ERRLOG(ctrlr, "Ctrlr is in error state\n"); 3991 return -1; 3992 3993 case NVME_CTRLR_STATE_READ_VS_WAIT_FOR_VS: 3994 case NVME_CTRLR_STATE_READ_CAP_WAIT_FOR_CAP: 3995 case NVME_CTRLR_STATE_CHECK_EN_WAIT_FOR_CC: 3996 case NVME_CTRLR_STATE_SET_EN_0_WAIT_FOR_CC: 3997 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS: 3998 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0_WAIT_FOR_CSTS: 3999 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_CC: 4000 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS: 4001 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY: 4002 case NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER: 4003 case NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT: 4004 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_IOCS_SPECIFIC: 4005 case NVME_CTRLR_STATE_WAIT_FOR_GET_ZNS_CMD_EFFECTS_LOG: 4006 case NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES: 4007 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ACTIVE_NS: 4008 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS: 4009 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS: 4010 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS_IOCS_SPECIFIC: 4011 case NVME_CTRLR_STATE_WAIT_FOR_SUPPORTED_INTEL_LOG_PAGES: 4012 case NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG: 4013 case NVME_CTRLR_STATE_WAIT_FOR_HOST_ID: 4014 /* 4015 * nvme_ctrlr_process_init() may be called from the completion context 4016 * for the admin qpair. Avoid recursive calls for this case. 4017 */ 4018 if (!ctrlr->adminq->in_completion_context) { 4019 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 4020 } 4021 break; 4022 4023 default: 4024 assert(0); 4025 return -1; 4026 } 4027 4028 if (rc) { 4029 NVME_CTRLR_ERRLOG(ctrlr, "Ctrlr operation failed with error: %d, ctrlr state: %d (%s)\n", 4030 rc, ctrlr->state, nvme_ctrlr_state_string(ctrlr->state)); 4031 } 4032 4033 /* Note: we use the ticks captured when we entered this function. 4034 * This covers environments where the SPDK process gets swapped out after 4035 * we tried to advance the state but before we check the timeout here. 4036 * It is not normal for this to happen, but harmless to handle it in this 4037 * way. 4038 */ 4039 if (ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE && 4040 ticks > ctrlr->state_timeout_tsc) { 4041 NVME_CTRLR_ERRLOG(ctrlr, "Initialization timed out in state %d (%s)\n", 4042 ctrlr->state, nvme_ctrlr_state_string(ctrlr->state)); 4043 return -1; 4044 } 4045 4046 return rc; 4047 } 4048 4049 int 4050 nvme_robust_mutex_init_recursive_shared(pthread_mutex_t *mtx) 4051 { 4052 pthread_mutexattr_t attr; 4053 int rc = 0; 4054 4055 if (pthread_mutexattr_init(&attr)) { 4056 return -1; 4057 } 4058 if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE) || 4059 #ifndef __FreeBSD__ 4060 pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST) || 4061 pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED) || 4062 #endif 4063 pthread_mutex_init(mtx, &attr)) { 4064 rc = -1; 4065 } 4066 pthread_mutexattr_destroy(&attr); 4067 return rc; 4068 } 4069 4070 int 4071 nvme_ctrlr_construct(struct spdk_nvme_ctrlr *ctrlr) 4072 { 4073 int rc; 4074 4075 if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { 4076 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT_DELAY, NVME_TIMEOUT_INFINITE); 4077 } else { 4078 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE); 4079 } 4080 4081 if (ctrlr->opts.admin_queue_size > SPDK_NVME_ADMIN_QUEUE_MAX_ENTRIES) { 4082 NVME_CTRLR_ERRLOG(ctrlr, "admin_queue_size %u exceeds max defined by NVMe spec, use max value\n", 4083 ctrlr->opts.admin_queue_size); 4084 ctrlr->opts.admin_queue_size = SPDK_NVME_ADMIN_QUEUE_MAX_ENTRIES; 4085 } 4086 4087 if (ctrlr->quirks & NVME_QUIRK_MINIMUM_ADMIN_QUEUE_SIZE && 4088 (ctrlr->opts.admin_queue_size % SPDK_NVME_ADMIN_QUEUE_QUIRK_ENTRIES_MULTIPLE) != 0) { 4089 NVME_CTRLR_ERRLOG(ctrlr, 4090 "admin_queue_size %u is invalid for this NVMe device, adjust to next multiple\n", 4091 ctrlr->opts.admin_queue_size); 4092 ctrlr->opts.admin_queue_size = SPDK_ALIGN_CEIL(ctrlr->opts.admin_queue_size, 4093 SPDK_NVME_ADMIN_QUEUE_QUIRK_ENTRIES_MULTIPLE); 4094 } 4095 4096 if (ctrlr->opts.admin_queue_size < SPDK_NVME_ADMIN_QUEUE_MIN_ENTRIES) { 4097 NVME_CTRLR_ERRLOG(ctrlr, 4098 "admin_queue_size %u is less than minimum defined by NVMe spec, use min value\n", 4099 ctrlr->opts.admin_queue_size); 4100 ctrlr->opts.admin_queue_size = SPDK_NVME_ADMIN_QUEUE_MIN_ENTRIES; 4101 } 4102 4103 ctrlr->flags = 0; 4104 ctrlr->free_io_qids = NULL; 4105 ctrlr->is_resetting = false; 4106 ctrlr->is_failed = false; 4107 ctrlr->is_destructed = false; 4108 4109 TAILQ_INIT(&ctrlr->active_io_qpairs); 4110 STAILQ_INIT(&ctrlr->queued_aborts); 4111 ctrlr->outstanding_aborts = 0; 4112 4113 ctrlr->ana_log_page = NULL; 4114 ctrlr->ana_log_page_size = 0; 4115 4116 rc = nvme_robust_mutex_init_recursive_shared(&ctrlr->ctrlr_lock); 4117 if (rc != 0) { 4118 return rc; 4119 } 4120 4121 TAILQ_INIT(&ctrlr->active_procs); 4122 STAILQ_INIT(&ctrlr->register_operations); 4123 4124 RB_INIT(&ctrlr->ns); 4125 4126 return rc; 4127 } 4128 4129 static void 4130 nvme_ctrlr_init_cap(struct spdk_nvme_ctrlr *ctrlr) 4131 { 4132 if (ctrlr->cap.bits.ams & SPDK_NVME_CAP_AMS_WRR) { 4133 ctrlr->flags |= SPDK_NVME_CTRLR_WRR_SUPPORTED; 4134 } 4135 4136 ctrlr->min_page_size = 1u << (12 + ctrlr->cap.bits.mpsmin); 4137 4138 /* For now, always select page_size == min_page_size. */ 4139 ctrlr->page_size = ctrlr->min_page_size; 4140 4141 ctrlr->opts.io_queue_size = spdk_max(ctrlr->opts.io_queue_size, SPDK_NVME_IO_QUEUE_MIN_ENTRIES); 4142 ctrlr->opts.io_queue_size = spdk_min(ctrlr->opts.io_queue_size, MAX_IO_QUEUE_ENTRIES); 4143 if (ctrlr->quirks & NVME_QUIRK_MINIMUM_IO_QUEUE_SIZE && 4144 ctrlr->opts.io_queue_size == DEFAULT_IO_QUEUE_SIZE) { 4145 /* If the user specifically set an IO queue size different than the 4146 * default, use that value. Otherwise overwrite with the quirked value. 4147 * This allows this quirk to be overridden when necessary. 4148 * However, cap.mqes still needs to be respected. 4149 */ 4150 ctrlr->opts.io_queue_size = DEFAULT_IO_QUEUE_SIZE_FOR_QUIRK; 4151 } 4152 ctrlr->opts.io_queue_size = spdk_min(ctrlr->opts.io_queue_size, ctrlr->cap.bits.mqes + 1u); 4153 4154 ctrlr->opts.io_queue_requests = spdk_max(ctrlr->opts.io_queue_requests, ctrlr->opts.io_queue_size); 4155 } 4156 4157 void 4158 nvme_ctrlr_destruct_finish(struct spdk_nvme_ctrlr *ctrlr) 4159 { 4160 pthread_mutex_destroy(&ctrlr->ctrlr_lock); 4161 } 4162 4163 void 4164 nvme_ctrlr_destruct_async(struct spdk_nvme_ctrlr *ctrlr, 4165 struct nvme_ctrlr_detach_ctx *ctx) 4166 { 4167 struct spdk_nvme_qpair *qpair, *tmp; 4168 4169 NVME_CTRLR_DEBUGLOG(ctrlr, "Prepare to destruct SSD\n"); 4170 4171 ctrlr->prepare_for_reset = false; 4172 ctrlr->is_destructed = true; 4173 4174 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 4175 4176 nvme_ctrlr_abort_queued_aborts(ctrlr); 4177 nvme_transport_admin_qpair_abort_aers(ctrlr->adminq); 4178 4179 TAILQ_FOREACH_SAFE(qpair, &ctrlr->active_io_qpairs, tailq, tmp) { 4180 spdk_nvme_ctrlr_free_io_qpair(qpair); 4181 } 4182 4183 nvme_ctrlr_free_doorbell_buffer(ctrlr); 4184 nvme_ctrlr_free_iocs_specific_data(ctrlr); 4185 4186 nvme_ctrlr_shutdown_async(ctrlr, ctx); 4187 } 4188 4189 int 4190 nvme_ctrlr_destruct_poll_async(struct spdk_nvme_ctrlr *ctrlr, 4191 struct nvme_ctrlr_detach_ctx *ctx) 4192 { 4193 struct spdk_nvme_ns *ns, *tmp_ns; 4194 int rc = 0; 4195 4196 if (!ctx->shutdown_complete) { 4197 rc = nvme_ctrlr_shutdown_poll_async(ctrlr, ctx); 4198 if (rc == -EAGAIN) { 4199 return -EAGAIN; 4200 } 4201 /* Destruct ctrlr forcefully for any other error. */ 4202 } 4203 4204 if (ctx->cb_fn) { 4205 ctx->cb_fn(ctrlr); 4206 } 4207 4208 nvme_transport_ctrlr_disconnect_qpair(ctrlr, ctrlr->adminq); 4209 4210 RB_FOREACH_SAFE(ns, nvme_ns_tree, &ctrlr->ns, tmp_ns) { 4211 nvme_ctrlr_destruct_namespace(ctrlr, ns->id); 4212 RB_REMOVE(nvme_ns_tree, &ctrlr->ns, ns); 4213 spdk_free(ns); 4214 } 4215 4216 ctrlr->active_ns_count = 0; 4217 4218 spdk_bit_array_free(&ctrlr->free_io_qids); 4219 4220 free(ctrlr->ana_log_page); 4221 free(ctrlr->copied_ana_desc); 4222 ctrlr->ana_log_page = NULL; 4223 ctrlr->copied_ana_desc = NULL; 4224 ctrlr->ana_log_page_size = 0; 4225 4226 nvme_transport_ctrlr_destruct(ctrlr); 4227 4228 return rc; 4229 } 4230 4231 void 4232 nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) 4233 { 4234 struct nvme_ctrlr_detach_ctx ctx = { .ctrlr = ctrlr }; 4235 int rc; 4236 4237 nvme_ctrlr_destruct_async(ctrlr, &ctx); 4238 4239 while (1) { 4240 rc = nvme_ctrlr_destruct_poll_async(ctrlr, &ctx); 4241 if (rc != -EAGAIN) { 4242 break; 4243 } 4244 nvme_delay(1000); 4245 } 4246 } 4247 4248 int 4249 nvme_ctrlr_submit_admin_request(struct spdk_nvme_ctrlr *ctrlr, 4250 struct nvme_request *req) 4251 { 4252 return nvme_qpair_submit_request(ctrlr->adminq, req); 4253 } 4254 4255 static void 4256 nvme_keep_alive_completion(void *cb_ctx, const struct spdk_nvme_cpl *cpl) 4257 { 4258 /* Do nothing */ 4259 } 4260 4261 /* 4262 * Check if we need to send a Keep Alive command. 4263 * Caller must hold ctrlr->ctrlr_lock. 4264 */ 4265 static int 4266 nvme_ctrlr_keep_alive(struct spdk_nvme_ctrlr *ctrlr) 4267 { 4268 uint64_t now; 4269 struct nvme_request *req; 4270 struct spdk_nvme_cmd *cmd; 4271 int rc = 0; 4272 4273 now = spdk_get_ticks(); 4274 if (now < ctrlr->next_keep_alive_tick) { 4275 return rc; 4276 } 4277 4278 req = nvme_allocate_request_null(ctrlr->adminq, nvme_keep_alive_completion, NULL); 4279 if (req == NULL) { 4280 return rc; 4281 } 4282 4283 cmd = &req->cmd; 4284 cmd->opc = SPDK_NVME_OPC_KEEP_ALIVE; 4285 4286 rc = nvme_ctrlr_submit_admin_request(ctrlr, req); 4287 if (rc != 0) { 4288 NVME_CTRLR_ERRLOG(ctrlr, "Submitting Keep Alive failed\n"); 4289 rc = -ENXIO; 4290 } 4291 4292 ctrlr->next_keep_alive_tick = now + ctrlr->keep_alive_interval_ticks; 4293 return rc; 4294 } 4295 4296 int32_t 4297 spdk_nvme_ctrlr_process_admin_completions(struct spdk_nvme_ctrlr *ctrlr) 4298 { 4299 int32_t num_completions; 4300 int32_t rc; 4301 struct spdk_nvme_ctrlr_process *active_proc; 4302 4303 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4304 4305 if (ctrlr->keep_alive_interval_ticks) { 4306 rc = nvme_ctrlr_keep_alive(ctrlr); 4307 if (rc) { 4308 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4309 return rc; 4310 } 4311 } 4312 4313 rc = nvme_io_msg_process(ctrlr); 4314 if (rc < 0) { 4315 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4316 return rc; 4317 } 4318 num_completions = rc; 4319 4320 rc = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 4321 4322 /* Each process has an async list, complete the ones for this process object */ 4323 active_proc = nvme_ctrlr_get_current_process(ctrlr); 4324 if (active_proc) { 4325 nvme_ctrlr_complete_queued_async_events(ctrlr); 4326 } 4327 4328 if (rc == -ENXIO && ctrlr->is_disconnecting) { 4329 nvme_ctrlr_disconnect_done(ctrlr); 4330 } 4331 4332 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4333 4334 if (rc < 0) { 4335 num_completions = rc; 4336 } else { 4337 num_completions += rc; 4338 } 4339 4340 return num_completions; 4341 } 4342 4343 const struct spdk_nvme_ctrlr_data * 4344 spdk_nvme_ctrlr_get_data(struct spdk_nvme_ctrlr *ctrlr) 4345 { 4346 return &ctrlr->cdata; 4347 } 4348 4349 union spdk_nvme_csts_register spdk_nvme_ctrlr_get_regs_csts(struct spdk_nvme_ctrlr *ctrlr) 4350 { 4351 union spdk_nvme_csts_register csts; 4352 4353 if (nvme_ctrlr_get_csts(ctrlr, &csts)) { 4354 csts.raw = SPDK_NVME_INVALID_REGISTER_VALUE; 4355 } 4356 return csts; 4357 } 4358 4359 union spdk_nvme_cc_register spdk_nvme_ctrlr_get_regs_cc(struct spdk_nvme_ctrlr *ctrlr) 4360 { 4361 union spdk_nvme_cc_register cc; 4362 4363 if (nvme_ctrlr_get_cc(ctrlr, &cc)) { 4364 cc.raw = SPDK_NVME_INVALID_REGISTER_VALUE; 4365 } 4366 return cc; 4367 } 4368 4369 union spdk_nvme_cap_register spdk_nvme_ctrlr_get_regs_cap(struct spdk_nvme_ctrlr *ctrlr) 4370 { 4371 return ctrlr->cap; 4372 } 4373 4374 union spdk_nvme_vs_register spdk_nvme_ctrlr_get_regs_vs(struct spdk_nvme_ctrlr *ctrlr) 4375 { 4376 return ctrlr->vs; 4377 } 4378 4379 union spdk_nvme_cmbsz_register spdk_nvme_ctrlr_get_regs_cmbsz(struct spdk_nvme_ctrlr *ctrlr) 4380 { 4381 union spdk_nvme_cmbsz_register cmbsz; 4382 4383 if (nvme_ctrlr_get_cmbsz(ctrlr, &cmbsz)) { 4384 cmbsz.raw = 0; 4385 } 4386 4387 return cmbsz; 4388 } 4389 4390 union spdk_nvme_pmrcap_register spdk_nvme_ctrlr_get_regs_pmrcap(struct spdk_nvme_ctrlr *ctrlr) 4391 { 4392 union spdk_nvme_pmrcap_register pmrcap; 4393 4394 if (nvme_ctrlr_get_pmrcap(ctrlr, &pmrcap)) { 4395 pmrcap.raw = 0; 4396 } 4397 4398 return pmrcap; 4399 } 4400 4401 union spdk_nvme_bpinfo_register spdk_nvme_ctrlr_get_regs_bpinfo(struct spdk_nvme_ctrlr *ctrlr) 4402 { 4403 union spdk_nvme_bpinfo_register bpinfo; 4404 4405 if (nvme_ctrlr_get_bpinfo(ctrlr, &bpinfo)) { 4406 bpinfo.raw = 0; 4407 } 4408 4409 return bpinfo; 4410 } 4411 4412 uint64_t 4413 spdk_nvme_ctrlr_get_pmrsz(struct spdk_nvme_ctrlr *ctrlr) 4414 { 4415 return ctrlr->pmr_size; 4416 } 4417 4418 uint32_t 4419 spdk_nvme_ctrlr_get_num_ns(struct spdk_nvme_ctrlr *ctrlr) 4420 { 4421 return ctrlr->cdata.nn; 4422 } 4423 4424 bool 4425 spdk_nvme_ctrlr_is_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 4426 { 4427 struct spdk_nvme_ns tmp, *ns; 4428 4429 tmp.id = nsid; 4430 ns = RB_FIND(nvme_ns_tree, &ctrlr->ns, &tmp); 4431 4432 if (ns != NULL) { 4433 return ns->active; 4434 } 4435 4436 return false; 4437 } 4438 4439 uint32_t 4440 spdk_nvme_ctrlr_get_first_active_ns(struct spdk_nvme_ctrlr *ctrlr) 4441 { 4442 struct spdk_nvme_ns *ns; 4443 4444 ns = RB_MIN(nvme_ns_tree, &ctrlr->ns); 4445 if (ns == NULL) { 4446 return 0; 4447 } 4448 4449 while (ns != NULL) { 4450 if (ns->active) { 4451 return ns->id; 4452 } 4453 4454 ns = RB_NEXT(nvme_ns_tree, &ctrlr->ns, ns); 4455 } 4456 4457 return 0; 4458 } 4459 4460 uint32_t 4461 spdk_nvme_ctrlr_get_next_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t prev_nsid) 4462 { 4463 struct spdk_nvme_ns tmp, *ns; 4464 4465 tmp.id = prev_nsid; 4466 ns = RB_FIND(nvme_ns_tree, &ctrlr->ns, &tmp); 4467 if (ns == NULL) { 4468 return 0; 4469 } 4470 4471 ns = RB_NEXT(nvme_ns_tree, &ctrlr->ns, ns); 4472 while (ns != NULL) { 4473 if (ns->active) { 4474 return ns->id; 4475 } 4476 4477 ns = RB_NEXT(nvme_ns_tree, &ctrlr->ns, ns); 4478 } 4479 4480 return 0; 4481 } 4482 4483 struct spdk_nvme_ns * 4484 spdk_nvme_ctrlr_get_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 4485 { 4486 struct spdk_nvme_ns tmp; 4487 struct spdk_nvme_ns *ns; 4488 4489 if (nsid < 1 || nsid > ctrlr->cdata.nn) { 4490 return NULL; 4491 } 4492 4493 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4494 4495 tmp.id = nsid; 4496 ns = RB_FIND(nvme_ns_tree, &ctrlr->ns, &tmp); 4497 4498 if (ns == NULL) { 4499 ns = spdk_zmalloc(sizeof(struct spdk_nvme_ns), 64, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE); 4500 if (ns == NULL) { 4501 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4502 return NULL; 4503 } 4504 4505 NVME_CTRLR_DEBUGLOG(ctrlr, "Namespace %u was added\n", nsid); 4506 ns->id = nsid; 4507 RB_INSERT(nvme_ns_tree, &ctrlr->ns, ns); 4508 } 4509 4510 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4511 4512 return ns; 4513 } 4514 4515 struct spdk_pci_device * 4516 spdk_nvme_ctrlr_get_pci_device(struct spdk_nvme_ctrlr *ctrlr) 4517 { 4518 if (ctrlr == NULL) { 4519 return NULL; 4520 } 4521 4522 if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) { 4523 return NULL; 4524 } 4525 4526 return nvme_ctrlr_proc_get_devhandle(ctrlr); 4527 } 4528 4529 uint32_t 4530 spdk_nvme_ctrlr_get_max_xfer_size(const struct spdk_nvme_ctrlr *ctrlr) 4531 { 4532 return ctrlr->max_xfer_size; 4533 } 4534 4535 void 4536 spdk_nvme_ctrlr_register_aer_callback(struct spdk_nvme_ctrlr *ctrlr, 4537 spdk_nvme_aer_cb aer_cb_fn, 4538 void *aer_cb_arg) 4539 { 4540 struct spdk_nvme_ctrlr_process *active_proc; 4541 4542 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4543 4544 active_proc = nvme_ctrlr_get_current_process(ctrlr); 4545 if (active_proc) { 4546 active_proc->aer_cb_fn = aer_cb_fn; 4547 active_proc->aer_cb_arg = aer_cb_arg; 4548 } 4549 4550 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4551 } 4552 4553 void 4554 spdk_nvme_ctrlr_disable_read_changed_ns_list_log_page(struct spdk_nvme_ctrlr *ctrlr) 4555 { 4556 ctrlr->opts.disable_read_changed_ns_list_log_page = true; 4557 } 4558 4559 void 4560 spdk_nvme_ctrlr_register_timeout_callback(struct spdk_nvme_ctrlr *ctrlr, 4561 uint64_t timeout_io_us, uint64_t timeout_admin_us, 4562 spdk_nvme_timeout_cb cb_fn, void *cb_arg) 4563 { 4564 struct spdk_nvme_ctrlr_process *active_proc; 4565 4566 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4567 4568 active_proc = nvme_ctrlr_get_current_process(ctrlr); 4569 if (active_proc) { 4570 active_proc->timeout_io_ticks = timeout_io_us * spdk_get_ticks_hz() / 1000000ULL; 4571 active_proc->timeout_admin_ticks = timeout_admin_us * spdk_get_ticks_hz() / 1000000ULL; 4572 active_proc->timeout_cb_fn = cb_fn; 4573 active_proc->timeout_cb_arg = cb_arg; 4574 } 4575 4576 ctrlr->timeout_enabled = true; 4577 4578 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4579 } 4580 4581 bool 4582 spdk_nvme_ctrlr_is_log_page_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t log_page) 4583 { 4584 /* No bounds check necessary, since log_page is uint8_t and log_page_supported has 256 entries */ 4585 SPDK_STATIC_ASSERT(sizeof(ctrlr->log_page_supported) == 256, "log_page_supported size mismatch"); 4586 return ctrlr->log_page_supported[log_page]; 4587 } 4588 4589 bool 4590 spdk_nvme_ctrlr_is_feature_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t feature_code) 4591 { 4592 /* No bounds check necessary, since feature_code is uint8_t and feature_supported has 256 entries */ 4593 SPDK_STATIC_ASSERT(sizeof(ctrlr->feature_supported) == 256, "feature_supported size mismatch"); 4594 return ctrlr->feature_supported[feature_code]; 4595 } 4596 4597 int 4598 spdk_nvme_ctrlr_attach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, 4599 struct spdk_nvme_ctrlr_list *payload) 4600 { 4601 struct nvme_completion_poll_status *status; 4602 struct spdk_nvme_ns *ns; 4603 int res; 4604 4605 if (nsid == 0) { 4606 return -EINVAL; 4607 } 4608 4609 status = calloc(1, sizeof(*status)); 4610 if (!status) { 4611 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4612 return -ENOMEM; 4613 } 4614 4615 res = nvme_ctrlr_cmd_attach_ns(ctrlr, nsid, payload, 4616 nvme_completion_poll_cb, status); 4617 if (res) { 4618 free(status); 4619 return res; 4620 } 4621 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4622 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_attach_ns failed!\n"); 4623 if (!status->timed_out) { 4624 free(status); 4625 } 4626 return -ENXIO; 4627 } 4628 free(status); 4629 4630 res = nvme_ctrlr_identify_active_ns(ctrlr); 4631 if (res) { 4632 return res; 4633 } 4634 4635 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 4636 if (ns == NULL) { 4637 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_get_ns failed!\n"); 4638 return -ENXIO; 4639 } 4640 4641 return nvme_ns_construct(ns, nsid, ctrlr); 4642 } 4643 4644 int 4645 spdk_nvme_ctrlr_detach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, 4646 struct spdk_nvme_ctrlr_list *payload) 4647 { 4648 struct nvme_completion_poll_status *status; 4649 int res; 4650 4651 if (nsid == 0) { 4652 return -EINVAL; 4653 } 4654 4655 status = calloc(1, sizeof(*status)); 4656 if (!status) { 4657 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4658 return -ENOMEM; 4659 } 4660 4661 res = nvme_ctrlr_cmd_detach_ns(ctrlr, nsid, payload, 4662 nvme_completion_poll_cb, status); 4663 if (res) { 4664 free(status); 4665 return res; 4666 } 4667 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4668 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_detach_ns failed!\n"); 4669 if (!status->timed_out) { 4670 free(status); 4671 } 4672 return -ENXIO; 4673 } 4674 free(status); 4675 4676 return nvme_ctrlr_identify_active_ns(ctrlr); 4677 } 4678 4679 uint32_t 4680 spdk_nvme_ctrlr_create_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns_data *payload) 4681 { 4682 struct nvme_completion_poll_status *status; 4683 int res; 4684 uint32_t nsid; 4685 4686 status = calloc(1, sizeof(*status)); 4687 if (!status) { 4688 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4689 return 0; 4690 } 4691 4692 res = nvme_ctrlr_cmd_create_ns(ctrlr, payload, nvme_completion_poll_cb, status); 4693 if (res) { 4694 free(status); 4695 return 0; 4696 } 4697 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4698 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_create_ns failed!\n"); 4699 if (!status->timed_out) { 4700 free(status); 4701 } 4702 return 0; 4703 } 4704 4705 nsid = status->cpl.cdw0; 4706 free(status); 4707 4708 assert(nsid > 0); 4709 4710 /* Return the namespace ID that was created */ 4711 return nsid; 4712 } 4713 4714 int 4715 spdk_nvme_ctrlr_delete_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 4716 { 4717 struct nvme_completion_poll_status *status; 4718 int res; 4719 4720 if (nsid == 0) { 4721 return -EINVAL; 4722 } 4723 4724 status = calloc(1, sizeof(*status)); 4725 if (!status) { 4726 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4727 return -ENOMEM; 4728 } 4729 4730 res = nvme_ctrlr_cmd_delete_ns(ctrlr, nsid, nvme_completion_poll_cb, status); 4731 if (res) { 4732 free(status); 4733 return res; 4734 } 4735 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4736 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_delete_ns failed!\n"); 4737 if (!status->timed_out) { 4738 free(status); 4739 } 4740 return -ENXIO; 4741 } 4742 free(status); 4743 4744 return nvme_ctrlr_identify_active_ns(ctrlr); 4745 } 4746 4747 int 4748 spdk_nvme_ctrlr_format(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, 4749 struct spdk_nvme_format *format) 4750 { 4751 struct nvme_completion_poll_status *status; 4752 int res; 4753 4754 status = calloc(1, sizeof(*status)); 4755 if (!status) { 4756 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4757 return -ENOMEM; 4758 } 4759 4760 res = nvme_ctrlr_cmd_format(ctrlr, nsid, format, nvme_completion_poll_cb, 4761 status); 4762 if (res) { 4763 free(status); 4764 return res; 4765 } 4766 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4767 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_format failed!\n"); 4768 if (!status->timed_out) { 4769 free(status); 4770 } 4771 return -ENXIO; 4772 } 4773 free(status); 4774 4775 return spdk_nvme_ctrlr_reset(ctrlr); 4776 } 4777 4778 int 4779 spdk_nvme_ctrlr_update_firmware(struct spdk_nvme_ctrlr *ctrlr, void *payload, uint32_t size, 4780 int slot, enum spdk_nvme_fw_commit_action commit_action, struct spdk_nvme_status *completion_status) 4781 { 4782 struct spdk_nvme_fw_commit fw_commit; 4783 struct nvme_completion_poll_status *status; 4784 int res; 4785 unsigned int size_remaining; 4786 unsigned int offset; 4787 unsigned int transfer; 4788 void *p; 4789 4790 if (!completion_status) { 4791 return -EINVAL; 4792 } 4793 memset(completion_status, 0, sizeof(struct spdk_nvme_status)); 4794 if (size % 4) { 4795 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_update_firmware invalid size!\n"); 4796 return -1; 4797 } 4798 4799 /* Current support only for SPDK_NVME_FW_COMMIT_REPLACE_IMG 4800 * and SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG 4801 */ 4802 if ((commit_action != SPDK_NVME_FW_COMMIT_REPLACE_IMG) && 4803 (commit_action != SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG)) { 4804 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_update_firmware invalid command!\n"); 4805 return -1; 4806 } 4807 4808 status = calloc(1, sizeof(*status)); 4809 if (!status) { 4810 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4811 return -ENOMEM; 4812 } 4813 4814 /* Firmware download */ 4815 size_remaining = size; 4816 offset = 0; 4817 p = payload; 4818 4819 while (size_remaining > 0) { 4820 transfer = spdk_min(size_remaining, ctrlr->min_page_size); 4821 4822 memset(status, 0, sizeof(*status)); 4823 res = nvme_ctrlr_cmd_fw_image_download(ctrlr, transfer, offset, p, 4824 nvme_completion_poll_cb, 4825 status); 4826 if (res) { 4827 free(status); 4828 return res; 4829 } 4830 4831 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4832 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_fw_image_download failed!\n"); 4833 if (!status->timed_out) { 4834 free(status); 4835 } 4836 return -ENXIO; 4837 } 4838 p += transfer; 4839 offset += transfer; 4840 size_remaining -= transfer; 4841 } 4842 4843 /* Firmware commit */ 4844 memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit)); 4845 fw_commit.fs = slot; 4846 fw_commit.ca = commit_action; 4847 4848 memset(status, 0, sizeof(*status)); 4849 res = nvme_ctrlr_cmd_fw_commit(ctrlr, &fw_commit, nvme_completion_poll_cb, 4850 status); 4851 if (res) { 4852 free(status); 4853 return res; 4854 } 4855 4856 res = nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock); 4857 4858 memcpy(completion_status, &status->cpl.status, sizeof(struct spdk_nvme_status)); 4859 4860 if (!status->timed_out) { 4861 free(status); 4862 } 4863 4864 if (res) { 4865 if (completion_status->sct != SPDK_NVME_SCT_COMMAND_SPECIFIC || 4866 completion_status->sc != SPDK_NVME_SC_FIRMWARE_REQ_NVM_RESET) { 4867 if (completion_status->sct == SPDK_NVME_SCT_COMMAND_SPECIFIC && 4868 completion_status->sc == SPDK_NVME_SC_FIRMWARE_REQ_CONVENTIONAL_RESET) { 4869 NVME_CTRLR_NOTICELOG(ctrlr, 4870 "firmware activation requires conventional reset to be performed. !\n"); 4871 } else { 4872 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_cmd_fw_commit failed!\n"); 4873 } 4874 return -ENXIO; 4875 } 4876 } 4877 4878 return spdk_nvme_ctrlr_reset(ctrlr); 4879 } 4880 4881 int 4882 spdk_nvme_ctrlr_reserve_cmb(struct spdk_nvme_ctrlr *ctrlr) 4883 { 4884 int rc, size; 4885 union spdk_nvme_cmbsz_register cmbsz; 4886 4887 cmbsz = spdk_nvme_ctrlr_get_regs_cmbsz(ctrlr); 4888 4889 if (cmbsz.bits.rds == 0 || cmbsz.bits.wds == 0) { 4890 return -ENOTSUP; 4891 } 4892 4893 size = cmbsz.bits.sz * (0x1000 << (cmbsz.bits.szu * 4)); 4894 4895 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4896 rc = nvme_transport_ctrlr_reserve_cmb(ctrlr); 4897 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4898 4899 if (rc < 0) { 4900 return rc; 4901 } 4902 4903 return size; 4904 } 4905 4906 void * 4907 spdk_nvme_ctrlr_map_cmb(struct spdk_nvme_ctrlr *ctrlr, size_t *size) 4908 { 4909 void *buf; 4910 4911 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4912 buf = nvme_transport_ctrlr_map_cmb(ctrlr, size); 4913 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4914 4915 return buf; 4916 } 4917 4918 void 4919 spdk_nvme_ctrlr_unmap_cmb(struct spdk_nvme_ctrlr *ctrlr) 4920 { 4921 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4922 nvme_transport_ctrlr_unmap_cmb(ctrlr); 4923 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4924 } 4925 4926 int 4927 spdk_nvme_ctrlr_enable_pmr(struct spdk_nvme_ctrlr *ctrlr) 4928 { 4929 int rc; 4930 4931 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4932 rc = nvme_transport_ctrlr_enable_pmr(ctrlr); 4933 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4934 4935 return rc; 4936 } 4937 4938 int 4939 spdk_nvme_ctrlr_disable_pmr(struct spdk_nvme_ctrlr *ctrlr) 4940 { 4941 int rc; 4942 4943 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4944 rc = nvme_transport_ctrlr_disable_pmr(ctrlr); 4945 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4946 4947 return rc; 4948 } 4949 4950 void * 4951 spdk_nvme_ctrlr_map_pmr(struct spdk_nvme_ctrlr *ctrlr, size_t *size) 4952 { 4953 void *buf; 4954 4955 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4956 buf = nvme_transport_ctrlr_map_pmr(ctrlr, size); 4957 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4958 4959 return buf; 4960 } 4961 4962 int 4963 spdk_nvme_ctrlr_unmap_pmr(struct spdk_nvme_ctrlr *ctrlr) 4964 { 4965 int rc; 4966 4967 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4968 rc = nvme_transport_ctrlr_unmap_pmr(ctrlr); 4969 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4970 4971 return rc; 4972 } 4973 4974 int 4975 spdk_nvme_ctrlr_read_boot_partition_start(struct spdk_nvme_ctrlr *ctrlr, void *payload, 4976 uint32_t bprsz, uint32_t bprof, uint32_t bpid) 4977 { 4978 union spdk_nvme_bprsel_register bprsel; 4979 union spdk_nvme_bpinfo_register bpinfo; 4980 uint64_t bpmbl, bpmb_size; 4981 4982 if (ctrlr->cap.bits.bps == 0) { 4983 return -ENOTSUP; 4984 } 4985 4986 if (nvme_ctrlr_get_bpinfo(ctrlr, &bpinfo)) { 4987 NVME_CTRLR_ERRLOG(ctrlr, "get bpinfo failed\n"); 4988 return -EIO; 4989 } 4990 4991 if (bpinfo.bits.brs == SPDK_NVME_BRS_READ_IN_PROGRESS) { 4992 NVME_CTRLR_ERRLOG(ctrlr, "Boot Partition read already initiated\n"); 4993 return -EALREADY; 4994 } 4995 4996 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4997 4998 bpmb_size = bprsz * 4096; 4999 bpmbl = spdk_vtophys(payload, &bpmb_size); 5000 if (bpmbl == SPDK_VTOPHYS_ERROR) { 5001 NVME_CTRLR_ERRLOG(ctrlr, "spdk_vtophys of bpmbl failed\n"); 5002 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 5003 return -EFAULT; 5004 } 5005 5006 if (bpmb_size != bprsz * 4096) { 5007 NVME_CTRLR_ERRLOG(ctrlr, "Boot Partition buffer is not physically contiguous\n"); 5008 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 5009 return -EFAULT; 5010 } 5011 5012 if (nvme_ctrlr_set_bpmbl(ctrlr, bpmbl)) { 5013 NVME_CTRLR_ERRLOG(ctrlr, "set_bpmbl() failed\n"); 5014 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 5015 return -EIO; 5016 } 5017 5018 bprsel.bits.bpid = bpid; 5019 bprsel.bits.bprof = bprof; 5020 bprsel.bits.bprsz = bprsz; 5021 5022 if (nvme_ctrlr_set_bprsel(ctrlr, &bprsel)) { 5023 NVME_CTRLR_ERRLOG(ctrlr, "set_bprsel() failed\n"); 5024 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 5025 return -EIO; 5026 } 5027 5028 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 5029 return 0; 5030 } 5031 5032 int 5033 spdk_nvme_ctrlr_read_boot_partition_poll(struct spdk_nvme_ctrlr *ctrlr) 5034 { 5035 int rc = 0; 5036 union spdk_nvme_bpinfo_register bpinfo; 5037 5038 if (nvme_ctrlr_get_bpinfo(ctrlr, &bpinfo)) { 5039 NVME_CTRLR_ERRLOG(ctrlr, "get bpinfo failed\n"); 5040 return -EIO; 5041 } 5042 5043 switch (bpinfo.bits.brs) { 5044 case SPDK_NVME_BRS_NO_READ: 5045 NVME_CTRLR_ERRLOG(ctrlr, "Boot Partition read not initiated\n"); 5046 rc = -EINVAL; 5047 break; 5048 case SPDK_NVME_BRS_READ_IN_PROGRESS: 5049 NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition read in progress\n"); 5050 rc = -EAGAIN; 5051 break; 5052 case SPDK_NVME_BRS_READ_ERROR: 5053 NVME_CTRLR_ERRLOG(ctrlr, "Error completing Boot Partition read\n"); 5054 rc = -EIO; 5055 break; 5056 case SPDK_NVME_BRS_READ_SUCCESS: 5057 NVME_CTRLR_INFOLOG(ctrlr, "Boot Partition read completed successfully\n"); 5058 break; 5059 default: 5060 NVME_CTRLR_ERRLOG(ctrlr, "Invalid Boot Partition read status\n"); 5061 rc = -EINVAL; 5062 } 5063 5064 return rc; 5065 } 5066 5067 static void 5068 nvme_write_boot_partition_cb(void *arg, const struct spdk_nvme_cpl *cpl) 5069 { 5070 int res; 5071 struct spdk_nvme_ctrlr *ctrlr = arg; 5072 struct spdk_nvme_fw_commit fw_commit; 5073 struct spdk_nvme_cpl err_cpl = 5074 {.status = {.sct = SPDK_NVME_SCT_GENERIC, .sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR }}; 5075 5076 if (spdk_nvme_cpl_is_error(cpl)) { 5077 NVME_CTRLR_ERRLOG(ctrlr, "Write Boot Partition failed\n"); 5078 ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, cpl); 5079 return; 5080 } 5081 5082 if (ctrlr->bp_ws == SPDK_NVME_BP_WS_DOWNLOADING) { 5083 NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition Downloading at Offset %d Success\n", ctrlr->fw_offset); 5084 ctrlr->fw_payload += ctrlr->fw_transfer_size; 5085 ctrlr->fw_offset += ctrlr->fw_transfer_size; 5086 ctrlr->fw_size_remaining -= ctrlr->fw_transfer_size; 5087 ctrlr->fw_transfer_size = spdk_min(ctrlr->fw_size_remaining, ctrlr->min_page_size); 5088 res = nvme_ctrlr_cmd_fw_image_download(ctrlr, ctrlr->fw_transfer_size, ctrlr->fw_offset, 5089 ctrlr->fw_payload, nvme_write_boot_partition_cb, ctrlr); 5090 if (res) { 5091 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_cmd_fw_image_download failed!\n"); 5092 ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, &err_cpl); 5093 return; 5094 } 5095 5096 if (ctrlr->fw_transfer_size < ctrlr->min_page_size) { 5097 ctrlr->bp_ws = SPDK_NVME_BP_WS_DOWNLOADED; 5098 } 5099 } else if (ctrlr->bp_ws == SPDK_NVME_BP_WS_DOWNLOADED) { 5100 NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition Download Success\n"); 5101 memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit)); 5102 fw_commit.bpid = ctrlr->bpid; 5103 fw_commit.ca = SPDK_NVME_FW_COMMIT_REPLACE_BOOT_PARTITION; 5104 res = nvme_ctrlr_cmd_fw_commit(ctrlr, &fw_commit, 5105 nvme_write_boot_partition_cb, ctrlr); 5106 if (res) { 5107 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_cmd_fw_commit failed!\n"); 5108 NVME_CTRLR_ERRLOG(ctrlr, "commit action: %d\n", fw_commit.ca); 5109 ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, &err_cpl); 5110 return; 5111 } 5112 5113 ctrlr->bp_ws = SPDK_NVME_BP_WS_REPLACE; 5114 } else if (ctrlr->bp_ws == SPDK_NVME_BP_WS_REPLACE) { 5115 NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition Replacement Success\n"); 5116 memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit)); 5117 fw_commit.bpid = ctrlr->bpid; 5118 fw_commit.ca = SPDK_NVME_FW_COMMIT_ACTIVATE_BOOT_PARTITION; 5119 res = nvme_ctrlr_cmd_fw_commit(ctrlr, &fw_commit, 5120 nvme_write_boot_partition_cb, ctrlr); 5121 if (res) { 5122 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_cmd_fw_commit failed!\n"); 5123 NVME_CTRLR_ERRLOG(ctrlr, "commit action: %d\n", fw_commit.ca); 5124 ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, &err_cpl); 5125 return; 5126 } 5127 5128 ctrlr->bp_ws = SPDK_NVME_BP_WS_ACTIVATE; 5129 } else if (ctrlr->bp_ws == SPDK_NVME_BP_WS_ACTIVATE) { 5130 NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition Activation Success\n"); 5131 ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, cpl); 5132 } else { 5133 NVME_CTRLR_ERRLOG(ctrlr, "Invalid Boot Partition write state\n"); 5134 ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, &err_cpl); 5135 return; 5136 } 5137 } 5138 5139 int 5140 spdk_nvme_ctrlr_write_boot_partition(struct spdk_nvme_ctrlr *ctrlr, 5141 void *payload, uint32_t size, uint32_t bpid, 5142 spdk_nvme_cmd_cb cb_fn, void *cb_arg) 5143 { 5144 int res; 5145 5146 if (ctrlr->cap.bits.bps == 0) { 5147 return -ENOTSUP; 5148 } 5149 5150 ctrlr->bp_ws = SPDK_NVME_BP_WS_DOWNLOADING; 5151 ctrlr->bpid = bpid; 5152 ctrlr->bp_write_cb_fn = cb_fn; 5153 ctrlr->bp_write_cb_arg = cb_arg; 5154 ctrlr->fw_offset = 0; 5155 ctrlr->fw_size_remaining = size; 5156 ctrlr->fw_payload = payload; 5157 ctrlr->fw_transfer_size = spdk_min(ctrlr->fw_size_remaining, ctrlr->min_page_size); 5158 5159 res = nvme_ctrlr_cmd_fw_image_download(ctrlr, ctrlr->fw_transfer_size, ctrlr->fw_offset, 5160 ctrlr->fw_payload, nvme_write_boot_partition_cb, ctrlr); 5161 5162 return res; 5163 } 5164 5165 bool 5166 spdk_nvme_ctrlr_is_discovery(struct spdk_nvme_ctrlr *ctrlr) 5167 { 5168 assert(ctrlr); 5169 5170 return !strncmp(ctrlr->trid.subnqn, SPDK_NVMF_DISCOVERY_NQN, 5171 strlen(SPDK_NVMF_DISCOVERY_NQN)); 5172 } 5173 5174 bool 5175 spdk_nvme_ctrlr_is_fabrics(struct spdk_nvme_ctrlr *ctrlr) 5176 { 5177 assert(ctrlr); 5178 5179 return spdk_nvme_trtype_is_fabrics(ctrlr->trid.trtype); 5180 } 5181 5182 int 5183 spdk_nvme_ctrlr_security_receive(struct spdk_nvme_ctrlr *ctrlr, uint8_t secp, 5184 uint16_t spsp, uint8_t nssf, void *payload, size_t size) 5185 { 5186 struct nvme_completion_poll_status *status; 5187 int res; 5188 5189 status = calloc(1, sizeof(*status)); 5190 if (!status) { 5191 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 5192 return -ENOMEM; 5193 } 5194 5195 res = spdk_nvme_ctrlr_cmd_security_receive(ctrlr, secp, spsp, nssf, payload, size, 5196 nvme_completion_poll_cb, status); 5197 if (res) { 5198 free(status); 5199 return res; 5200 } 5201 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 5202 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_cmd_security_receive failed!\n"); 5203 if (!status->timed_out) { 5204 free(status); 5205 } 5206 return -ENXIO; 5207 } 5208 free(status); 5209 5210 return 0; 5211 } 5212 5213 int 5214 spdk_nvme_ctrlr_security_send(struct spdk_nvme_ctrlr *ctrlr, uint8_t secp, 5215 uint16_t spsp, uint8_t nssf, void *payload, size_t size) 5216 { 5217 struct nvme_completion_poll_status *status; 5218 int res; 5219 5220 status = calloc(1, sizeof(*status)); 5221 if (!status) { 5222 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 5223 return -ENOMEM; 5224 } 5225 5226 res = spdk_nvme_ctrlr_cmd_security_send(ctrlr, secp, spsp, nssf, payload, size, 5227 nvme_completion_poll_cb, 5228 status); 5229 if (res) { 5230 free(status); 5231 return res; 5232 } 5233 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 5234 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_cmd_security_send failed!\n"); 5235 if (!status->timed_out) { 5236 free(status); 5237 } 5238 return -ENXIO; 5239 } 5240 5241 free(status); 5242 5243 return 0; 5244 } 5245 5246 uint64_t 5247 spdk_nvme_ctrlr_get_flags(struct spdk_nvme_ctrlr *ctrlr) 5248 { 5249 return ctrlr->flags; 5250 } 5251 5252 const struct spdk_nvme_transport_id * 5253 spdk_nvme_ctrlr_get_transport_id(struct spdk_nvme_ctrlr *ctrlr) 5254 { 5255 return &ctrlr->trid; 5256 } 5257 5258 int32_t 5259 spdk_nvme_ctrlr_alloc_qid(struct spdk_nvme_ctrlr *ctrlr) 5260 { 5261 uint32_t qid; 5262 5263 assert(ctrlr->free_io_qids); 5264 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 5265 qid = spdk_bit_array_find_first_set(ctrlr->free_io_qids, 1); 5266 if (qid > ctrlr->opts.num_io_queues) { 5267 NVME_CTRLR_ERRLOG(ctrlr, "No free I/O queue IDs\n"); 5268 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 5269 return -1; 5270 } 5271 5272 spdk_bit_array_clear(ctrlr->free_io_qids, qid); 5273 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 5274 return qid; 5275 } 5276 5277 void 5278 spdk_nvme_ctrlr_free_qid(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid) 5279 { 5280 assert(qid <= ctrlr->opts.num_io_queues); 5281 5282 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 5283 5284 if (spdk_likely(ctrlr->free_io_qids)) { 5285 spdk_bit_array_set(ctrlr->free_io_qids, qid); 5286 } 5287 5288 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 5289 } 5290 5291 int 5292 spdk_nvme_ctrlr_get_memory_domains(const struct spdk_nvme_ctrlr *ctrlr, 5293 struct spdk_memory_domain **domains, int array_size) 5294 { 5295 return nvme_transport_ctrlr_get_memory_domains(ctrlr, domains, array_size); 5296 } 5297