1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2015 Intel Corporation. All rights reserved. 3 * Copyright (c) 2019-2021 Mellanox Technologies LTD. All rights reserved. 4 * Copyright (c) 2021, 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 #include "spdk/stdinc.h" 8 9 #include "nvme_internal.h" 10 #include "nvme_io_msg.h" 11 12 #include "spdk/env.h" 13 #include "spdk/string.h" 14 #include "spdk/endian.h" 15 16 struct nvme_active_ns_ctx; 17 18 static int nvme_ctrlr_construct_and_submit_aer(struct spdk_nvme_ctrlr *ctrlr, 19 struct nvme_async_event_request *aer); 20 static void nvme_ctrlr_identify_active_ns_async(struct nvme_active_ns_ctx *ctx); 21 static int nvme_ctrlr_identify_ns_async(struct spdk_nvme_ns *ns); 22 static int nvme_ctrlr_identify_ns_iocs_specific_async(struct spdk_nvme_ns *ns); 23 static int nvme_ctrlr_identify_id_desc_async(struct spdk_nvme_ns *ns); 24 static void nvme_ctrlr_init_cap(struct spdk_nvme_ctrlr *ctrlr); 25 static void nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state, 26 uint64_t timeout_in_ms); 27 28 static int 29 nvme_ns_cmp(struct spdk_nvme_ns *ns1, struct spdk_nvme_ns *ns2) 30 { 31 if (ns1->id < ns2->id) { 32 return -1; 33 } else if (ns1->id > ns2->id) { 34 return 1; 35 } else { 36 return 0; 37 } 38 } 39 40 RB_GENERATE_STATIC(nvme_ns_tree, spdk_nvme_ns, node, nvme_ns_cmp); 41 42 #define CTRLR_STRING(ctrlr) \ 43 ((ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_TCP || ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_RDMA) ? \ 44 ctrlr->trid.subnqn : ctrlr->trid.traddr) 45 46 #define NVME_CTRLR_ERRLOG(ctrlr, format, ...) \ 47 SPDK_ERRLOG("[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__); 48 49 #define NVME_CTRLR_WARNLOG(ctrlr, format, ...) \ 50 SPDK_WARNLOG("[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__); 51 52 #define NVME_CTRLR_NOTICELOG(ctrlr, format, ...) \ 53 SPDK_NOTICELOG("[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__); 54 55 #define NVME_CTRLR_INFOLOG(ctrlr, format, ...) \ 56 SPDK_INFOLOG(nvme, "[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__); 57 58 #ifdef DEBUG 59 #define NVME_CTRLR_DEBUGLOG(ctrlr, format, ...) \ 60 SPDK_DEBUGLOG(nvme, "[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__); 61 #else 62 #define NVME_CTRLR_DEBUGLOG(ctrlr, ...) do { } while (0) 63 #endif 64 65 #define nvme_ctrlr_get_reg_async(ctrlr, reg, sz, cb_fn, cb_arg) \ 66 nvme_transport_ctrlr_get_reg_ ## sz ## _async(ctrlr, \ 67 offsetof(struct spdk_nvme_registers, reg), cb_fn, cb_arg) 68 69 #define nvme_ctrlr_set_reg_async(ctrlr, reg, sz, val, cb_fn, cb_arg) \ 70 nvme_transport_ctrlr_set_reg_ ## sz ## _async(ctrlr, \ 71 offsetof(struct spdk_nvme_registers, reg), val, cb_fn, cb_arg) 72 73 #define nvme_ctrlr_get_cc_async(ctrlr, cb_fn, cb_arg) \ 74 nvme_ctrlr_get_reg_async(ctrlr, cc, 4, cb_fn, cb_arg) 75 76 #define nvme_ctrlr_get_csts_async(ctrlr, cb_fn, cb_arg) \ 77 nvme_ctrlr_get_reg_async(ctrlr, csts, 4, cb_fn, cb_arg) 78 79 #define nvme_ctrlr_get_cap_async(ctrlr, cb_fn, cb_arg) \ 80 nvme_ctrlr_get_reg_async(ctrlr, cap, 8, cb_fn, cb_arg) 81 82 #define nvme_ctrlr_get_vs_async(ctrlr, cb_fn, cb_arg) \ 83 nvme_ctrlr_get_reg_async(ctrlr, vs, 4, cb_fn, cb_arg) 84 85 #define nvme_ctrlr_set_cc_async(ctrlr, value, cb_fn, cb_arg) \ 86 nvme_ctrlr_set_reg_async(ctrlr, cc, 4, value, cb_fn, cb_arg) 87 88 static int 89 nvme_ctrlr_get_cc(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cc_register *cc) 90 { 91 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cc.raw), 92 &cc->raw); 93 } 94 95 static int 96 nvme_ctrlr_get_csts(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_csts_register *csts) 97 { 98 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, csts.raw), 99 &csts->raw); 100 } 101 102 int 103 nvme_ctrlr_get_cap(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cap_register *cap) 104 { 105 return nvme_transport_ctrlr_get_reg_8(ctrlr, offsetof(struct spdk_nvme_registers, cap.raw), 106 &cap->raw); 107 } 108 109 int 110 nvme_ctrlr_get_vs(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_vs_register *vs) 111 { 112 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, vs.raw), 113 &vs->raw); 114 } 115 116 int 117 nvme_ctrlr_get_cmbsz(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cmbsz_register *cmbsz) 118 { 119 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cmbsz.raw), 120 &cmbsz->raw); 121 } 122 123 int 124 nvme_ctrlr_get_pmrcap(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_pmrcap_register *pmrcap) 125 { 126 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, pmrcap.raw), 127 &pmrcap->raw); 128 } 129 130 int 131 nvme_ctrlr_get_bpinfo(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_bpinfo_register *bpinfo) 132 { 133 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, bpinfo.raw), 134 &bpinfo->raw); 135 } 136 137 int 138 nvme_ctrlr_set_bprsel(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_bprsel_register *bprsel) 139 { 140 return nvme_transport_ctrlr_set_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, bprsel.raw), 141 bprsel->raw); 142 } 143 144 int 145 nvme_ctrlr_set_bpmbl(struct spdk_nvme_ctrlr *ctrlr, uint64_t bpmbl_value) 146 { 147 return nvme_transport_ctrlr_set_reg_8(ctrlr, offsetof(struct spdk_nvme_registers, bpmbl), 148 bpmbl_value); 149 } 150 151 static int 152 nvme_ctrlr_set_nssr(struct spdk_nvme_ctrlr *ctrlr, uint32_t nssr_value) 153 { 154 return nvme_transport_ctrlr_set_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, nssr), 155 nssr_value); 156 } 157 158 bool 159 nvme_ctrlr_multi_iocs_enabled(struct spdk_nvme_ctrlr *ctrlr) 160 { 161 return ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_IOCS && 162 ctrlr->opts.command_set == SPDK_NVME_CC_CSS_IOCS; 163 } 164 165 /* When the field in spdk_nvme_ctrlr_opts are changed and you change this function, please 166 * also update the nvme_ctrl_opts_init function in nvme_ctrlr.c 167 */ 168 void 169 spdk_nvme_ctrlr_get_default_ctrlr_opts(struct spdk_nvme_ctrlr_opts *opts, size_t opts_size) 170 { 171 char host_id_str[SPDK_UUID_STRING_LEN]; 172 173 assert(opts); 174 175 opts->opts_size = opts_size; 176 177 #define FIELD_OK(field) \ 178 offsetof(struct spdk_nvme_ctrlr_opts, field) + sizeof(opts->field) <= opts_size 179 180 #define SET_FIELD(field, value) \ 181 if (offsetof(struct spdk_nvme_ctrlr_opts, field) + sizeof(opts->field) <= opts_size) { \ 182 opts->field = value; \ 183 } \ 184 185 SET_FIELD(num_io_queues, DEFAULT_MAX_IO_QUEUES); 186 SET_FIELD(use_cmb_sqs, false); 187 SET_FIELD(no_shn_notification, false); 188 SET_FIELD(arb_mechanism, SPDK_NVME_CC_AMS_RR); 189 SET_FIELD(arbitration_burst, 0); 190 SET_FIELD(low_priority_weight, 0); 191 SET_FIELD(medium_priority_weight, 0); 192 SET_FIELD(high_priority_weight, 0); 193 SET_FIELD(keep_alive_timeout_ms, MIN_KEEP_ALIVE_TIMEOUT_IN_MS); 194 SET_FIELD(transport_retry_count, SPDK_NVME_DEFAULT_RETRY_COUNT); 195 SET_FIELD(io_queue_size, DEFAULT_IO_QUEUE_SIZE); 196 197 if (nvme_driver_init() == 0) { 198 if (FIELD_OK(hostnqn)) { 199 spdk_uuid_fmt_lower(host_id_str, sizeof(host_id_str), 200 &g_spdk_nvme_driver->default_extended_host_id); 201 snprintf(opts->hostnqn, sizeof(opts->hostnqn), 202 "nqn.2014-08.org.nvmexpress:uuid:%s", host_id_str); 203 } 204 205 if (FIELD_OK(extended_host_id)) { 206 memcpy(opts->extended_host_id, &g_spdk_nvme_driver->default_extended_host_id, 207 sizeof(opts->extended_host_id)); 208 } 209 210 } 211 212 SET_FIELD(io_queue_requests, DEFAULT_IO_QUEUE_REQUESTS); 213 214 if (FIELD_OK(src_addr)) { 215 memset(opts->src_addr, 0, sizeof(opts->src_addr)); 216 } 217 218 if (FIELD_OK(src_svcid)) { 219 memset(opts->src_svcid, 0, sizeof(opts->src_svcid)); 220 } 221 222 if (FIELD_OK(host_id)) { 223 memset(opts->host_id, 0, sizeof(opts->host_id)); 224 } 225 226 SET_FIELD(command_set, CHAR_BIT); 227 SET_FIELD(admin_timeout_ms, NVME_MAX_ADMIN_TIMEOUT_IN_SECS * 1000); 228 SET_FIELD(header_digest, false); 229 SET_FIELD(data_digest, false); 230 SET_FIELD(disable_error_logging, false); 231 SET_FIELD(transport_ack_timeout, SPDK_NVME_DEFAULT_TRANSPORT_ACK_TIMEOUT); 232 SET_FIELD(admin_queue_size, DEFAULT_ADMIN_QUEUE_SIZE); 233 SET_FIELD(fabrics_connect_timeout_us, NVME_FABRIC_CONNECT_COMMAND_TIMEOUT); 234 SET_FIELD(disable_read_ana_log_page, false); 235 SET_FIELD(disable_read_changed_ns_list_log_page, false); 236 237 if (FIELD_OK(psk)) { 238 memset(opts->psk, 0, sizeof(opts->psk)); 239 } 240 241 #undef FIELD_OK 242 #undef SET_FIELD 243 } 244 245 const struct spdk_nvme_ctrlr_opts * 246 spdk_nvme_ctrlr_get_opts(struct spdk_nvme_ctrlr *ctrlr) 247 { 248 return &ctrlr->opts; 249 } 250 251 /** 252 * This function will be called when the process allocates the IO qpair. 253 * Note: the ctrlr_lock must be held when calling this function. 254 */ 255 static void 256 nvme_ctrlr_proc_add_io_qpair(struct spdk_nvme_qpair *qpair) 257 { 258 struct spdk_nvme_ctrlr_process *active_proc; 259 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 260 261 active_proc = nvme_ctrlr_get_current_process(ctrlr); 262 if (active_proc) { 263 TAILQ_INSERT_TAIL(&active_proc->allocated_io_qpairs, qpair, per_process_tailq); 264 qpair->active_proc = active_proc; 265 } 266 } 267 268 /** 269 * This function will be called when the process frees the IO qpair. 270 * Note: the ctrlr_lock must be held when calling this function. 271 */ 272 static void 273 nvme_ctrlr_proc_remove_io_qpair(struct spdk_nvme_qpair *qpair) 274 { 275 struct spdk_nvme_ctrlr_process *active_proc; 276 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 277 struct spdk_nvme_qpair *active_qpair, *tmp_qpair; 278 279 active_proc = nvme_ctrlr_get_current_process(ctrlr); 280 if (!active_proc) { 281 return; 282 } 283 284 TAILQ_FOREACH_SAFE(active_qpair, &active_proc->allocated_io_qpairs, 285 per_process_tailq, tmp_qpair) { 286 if (active_qpair == qpair) { 287 TAILQ_REMOVE(&active_proc->allocated_io_qpairs, 288 active_qpair, per_process_tailq); 289 290 break; 291 } 292 } 293 } 294 295 void 296 spdk_nvme_ctrlr_get_default_io_qpair_opts(struct spdk_nvme_ctrlr *ctrlr, 297 struct spdk_nvme_io_qpair_opts *opts, 298 size_t opts_size) 299 { 300 assert(ctrlr); 301 302 assert(opts); 303 304 memset(opts, 0, opts_size); 305 306 #define FIELD_OK(field) \ 307 offsetof(struct spdk_nvme_io_qpair_opts, field) + sizeof(opts->field) <= opts_size 308 309 if (FIELD_OK(qprio)) { 310 opts->qprio = SPDK_NVME_QPRIO_URGENT; 311 } 312 313 if (FIELD_OK(io_queue_size)) { 314 opts->io_queue_size = ctrlr->opts.io_queue_size; 315 } 316 317 if (FIELD_OK(io_queue_requests)) { 318 opts->io_queue_requests = ctrlr->opts.io_queue_requests; 319 } 320 321 if (FIELD_OK(delay_cmd_submit)) { 322 opts->delay_cmd_submit = false; 323 } 324 325 if (FIELD_OK(sq.vaddr)) { 326 opts->sq.vaddr = NULL; 327 } 328 329 if (FIELD_OK(sq.paddr)) { 330 opts->sq.paddr = 0; 331 } 332 333 if (FIELD_OK(sq.buffer_size)) { 334 opts->sq.buffer_size = 0; 335 } 336 337 if (FIELD_OK(cq.vaddr)) { 338 opts->cq.vaddr = NULL; 339 } 340 341 if (FIELD_OK(cq.paddr)) { 342 opts->cq.paddr = 0; 343 } 344 345 if (FIELD_OK(cq.buffer_size)) { 346 opts->cq.buffer_size = 0; 347 } 348 349 if (FIELD_OK(create_only)) { 350 opts->create_only = false; 351 } 352 353 if (FIELD_OK(async_mode)) { 354 opts->async_mode = false; 355 } 356 357 #undef FIELD_OK 358 } 359 360 static struct spdk_nvme_qpair * 361 nvme_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, 362 const struct spdk_nvme_io_qpair_opts *opts) 363 { 364 int32_t qid; 365 struct spdk_nvme_qpair *qpair; 366 union spdk_nvme_cc_register cc; 367 368 if (!ctrlr) { 369 return NULL; 370 } 371 372 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 373 cc.raw = ctrlr->process_init_cc.raw; 374 375 if (opts->qprio & ~SPDK_NVME_CREATE_IO_SQ_QPRIO_MASK) { 376 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 377 return NULL; 378 } 379 380 /* 381 * Only value SPDK_NVME_QPRIO_URGENT(0) is valid for the 382 * default round robin arbitration method. 383 */ 384 if ((cc.bits.ams == SPDK_NVME_CC_AMS_RR) && (opts->qprio != SPDK_NVME_QPRIO_URGENT)) { 385 NVME_CTRLR_ERRLOG(ctrlr, "invalid queue priority for default round robin arbitration method\n"); 386 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 387 return NULL; 388 } 389 390 qid = spdk_nvme_ctrlr_alloc_qid(ctrlr); 391 if (qid < 0) { 392 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 393 return NULL; 394 } 395 396 qpair = nvme_transport_ctrlr_create_io_qpair(ctrlr, qid, opts); 397 if (qpair == NULL) { 398 NVME_CTRLR_ERRLOG(ctrlr, "nvme_transport_ctrlr_create_io_qpair() failed\n"); 399 spdk_nvme_ctrlr_free_qid(ctrlr, qid); 400 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 401 return NULL; 402 } 403 404 TAILQ_INSERT_TAIL(&ctrlr->active_io_qpairs, qpair, tailq); 405 406 nvme_ctrlr_proc_add_io_qpair(qpair); 407 408 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 409 410 return qpair; 411 } 412 413 int 414 spdk_nvme_ctrlr_connect_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 415 { 416 int rc; 417 418 if (nvme_qpair_get_state(qpair) != NVME_QPAIR_DISCONNECTED) { 419 return -EISCONN; 420 } 421 422 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 423 rc = nvme_transport_ctrlr_connect_qpair(ctrlr, qpair); 424 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 425 426 if (ctrlr->quirks & NVME_QUIRK_DELAY_AFTER_QUEUE_ALLOC) { 427 spdk_delay_us(100); 428 } 429 430 return rc; 431 } 432 433 void 434 spdk_nvme_ctrlr_disconnect_io_qpair(struct spdk_nvme_qpair *qpair) 435 { 436 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 437 438 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 439 nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair); 440 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 441 } 442 443 struct spdk_nvme_qpair * 444 spdk_nvme_ctrlr_alloc_io_qpair(struct spdk_nvme_ctrlr *ctrlr, 445 const struct spdk_nvme_io_qpair_opts *user_opts, 446 size_t opts_size) 447 { 448 449 struct spdk_nvme_qpair *qpair; 450 struct spdk_nvme_io_qpair_opts opts; 451 int rc; 452 453 if (spdk_unlikely(ctrlr->state != NVME_CTRLR_STATE_READY)) { 454 /* When controller is resetting or initializing, free_io_qids is deleted or not created yet. 455 * We can't create IO qpair in that case */ 456 return NULL; 457 } 458 459 /* 460 * Get the default options, then overwrite them with the user-provided options 461 * up to opts_size. 462 * 463 * This allows for extensions of the opts structure without breaking 464 * ABI compatibility. 465 */ 466 spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &opts, sizeof(opts)); 467 if (user_opts) { 468 memcpy(&opts, user_opts, spdk_min(sizeof(opts), opts_size)); 469 470 /* If user passes buffers, make sure they're big enough for the requested queue size */ 471 if (opts.sq.vaddr) { 472 if (opts.sq.buffer_size < (opts.io_queue_size * sizeof(struct spdk_nvme_cmd))) { 473 NVME_CTRLR_ERRLOG(ctrlr, "sq buffer size %" PRIx64 " is too small for sq size %zx\n", 474 opts.sq.buffer_size, (opts.io_queue_size * sizeof(struct spdk_nvme_cmd))); 475 return NULL; 476 } 477 } 478 if (opts.cq.vaddr) { 479 if (opts.cq.buffer_size < (opts.io_queue_size * sizeof(struct spdk_nvme_cpl))) { 480 NVME_CTRLR_ERRLOG(ctrlr, "cq buffer size %" PRIx64 " is too small for cq size %zx\n", 481 opts.cq.buffer_size, (opts.io_queue_size * sizeof(struct spdk_nvme_cpl))); 482 return NULL; 483 } 484 } 485 } 486 487 qpair = nvme_ctrlr_create_io_qpair(ctrlr, &opts); 488 489 if (qpair == NULL || opts.create_only == true) { 490 return qpair; 491 } 492 493 rc = spdk_nvme_ctrlr_connect_io_qpair(ctrlr, qpair); 494 if (rc != 0) { 495 NVME_CTRLR_ERRLOG(ctrlr, "nvme_transport_ctrlr_connect_io_qpair() failed\n"); 496 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 497 nvme_ctrlr_proc_remove_io_qpair(qpair); 498 TAILQ_REMOVE(&ctrlr->active_io_qpairs, qpair, tailq); 499 spdk_bit_array_set(ctrlr->free_io_qids, qpair->id); 500 nvme_transport_ctrlr_delete_io_qpair(ctrlr, qpair); 501 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 502 return NULL; 503 } 504 505 return qpair; 506 } 507 508 int 509 spdk_nvme_ctrlr_reconnect_io_qpair(struct spdk_nvme_qpair *qpair) 510 { 511 struct spdk_nvme_ctrlr *ctrlr; 512 enum nvme_qpair_state qpair_state; 513 int rc; 514 515 assert(qpair != NULL); 516 assert(nvme_qpair_is_admin_queue(qpair) == false); 517 assert(qpair->ctrlr != NULL); 518 519 ctrlr = qpair->ctrlr; 520 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 521 qpair_state = nvme_qpair_get_state(qpair); 522 523 if (ctrlr->is_removed) { 524 rc = -ENODEV; 525 goto out; 526 } 527 528 if (ctrlr->is_resetting || qpair_state == NVME_QPAIR_DISCONNECTING) { 529 rc = -EAGAIN; 530 goto out; 531 } 532 533 if (ctrlr->is_failed || qpair_state == NVME_QPAIR_DESTROYING) { 534 rc = -ENXIO; 535 goto out; 536 } 537 538 if (qpair_state != NVME_QPAIR_DISCONNECTED) { 539 rc = 0; 540 goto out; 541 } 542 543 rc = nvme_transport_ctrlr_connect_qpair(ctrlr, qpair); 544 if (rc) { 545 rc = -EAGAIN; 546 goto out; 547 } 548 549 out: 550 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 551 return rc; 552 } 553 554 spdk_nvme_qp_failure_reason 555 spdk_nvme_ctrlr_get_admin_qp_failure_reason(struct spdk_nvme_ctrlr *ctrlr) 556 { 557 return ctrlr->adminq->transport_failure_reason; 558 } 559 560 /* 561 * This internal function will attempt to take the controller 562 * lock before calling disconnect on a controller qpair. 563 * Functions already holding the controller lock should 564 * call nvme_transport_ctrlr_disconnect_qpair directly. 565 */ 566 void 567 nvme_ctrlr_disconnect_qpair(struct spdk_nvme_qpair *qpair) 568 { 569 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 570 571 assert(ctrlr != NULL); 572 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 573 nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair); 574 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 575 } 576 577 int 578 spdk_nvme_ctrlr_free_io_qpair(struct spdk_nvme_qpair *qpair) 579 { 580 struct spdk_nvme_ctrlr *ctrlr; 581 582 if (qpair == NULL) { 583 return 0; 584 } 585 586 ctrlr = qpair->ctrlr; 587 588 if (qpair->in_completion_context) { 589 /* 590 * There are many cases where it is convenient to delete an io qpair in the context 591 * of that qpair's completion routine. To handle this properly, set a flag here 592 * so that the completion routine will perform an actual delete after the context 593 * unwinds. 594 */ 595 qpair->delete_after_completion_context = 1; 596 return 0; 597 } 598 599 nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair); 600 601 if (qpair->poll_group && (qpair->active_proc == nvme_ctrlr_get_current_process(ctrlr))) { 602 spdk_nvme_poll_group_remove(qpair->poll_group->group, qpair); 603 } 604 605 /* Do not retry. */ 606 nvme_qpair_set_state(qpair, NVME_QPAIR_DESTROYING); 607 608 /* In the multi-process case, a process may call this function on a foreign 609 * I/O qpair (i.e. one that this process did not create) when that qpairs process 610 * exits unexpectedly. In that case, we must not try to abort any reqs associated 611 * with that qpair, since the callbacks will also be foreign to this process. 612 */ 613 if (qpair->active_proc == nvme_ctrlr_get_current_process(ctrlr)) { 614 nvme_qpair_abort_all_queued_reqs(qpair); 615 } 616 617 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 618 619 nvme_ctrlr_proc_remove_io_qpair(qpair); 620 621 TAILQ_REMOVE(&ctrlr->active_io_qpairs, qpair, tailq); 622 spdk_nvme_ctrlr_free_qid(ctrlr, qpair->id); 623 624 nvme_transport_ctrlr_delete_io_qpair(ctrlr, qpair); 625 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 626 return 0; 627 } 628 629 static void 630 nvme_ctrlr_construct_intel_support_log_page_list(struct spdk_nvme_ctrlr *ctrlr, 631 struct spdk_nvme_intel_log_page_directory *log_page_directory) 632 { 633 if (log_page_directory == NULL) { 634 return; 635 } 636 637 assert(ctrlr->cdata.vid == SPDK_PCI_VID_INTEL); 638 639 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY] = true; 640 641 if (log_page_directory->read_latency_log_len || 642 (ctrlr->quirks & NVME_INTEL_QUIRK_READ_LATENCY)) { 643 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_READ_CMD_LATENCY] = true; 644 } 645 if (log_page_directory->write_latency_log_len || 646 (ctrlr->quirks & NVME_INTEL_QUIRK_WRITE_LATENCY)) { 647 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_WRITE_CMD_LATENCY] = true; 648 } 649 if (log_page_directory->temperature_statistics_log_len) { 650 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_TEMPERATURE] = true; 651 } 652 if (log_page_directory->smart_log_len) { 653 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_SMART] = true; 654 } 655 if (log_page_directory->marketing_description_log_len) { 656 ctrlr->log_page_supported[SPDK_NVME_INTEL_MARKETING_DESCRIPTION] = true; 657 } 658 } 659 660 struct intel_log_pages_ctx { 661 struct spdk_nvme_intel_log_page_directory log_page_directory; 662 struct spdk_nvme_ctrlr *ctrlr; 663 }; 664 665 static void 666 nvme_ctrlr_set_intel_support_log_pages_done(void *arg, const struct spdk_nvme_cpl *cpl) 667 { 668 struct intel_log_pages_ctx *ctx = arg; 669 struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr; 670 671 if (!spdk_nvme_cpl_is_error(cpl)) { 672 nvme_ctrlr_construct_intel_support_log_page_list(ctrlr, &ctx->log_page_directory); 673 } 674 675 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES, 676 ctrlr->opts.admin_timeout_ms); 677 free(ctx); 678 } 679 680 static int 681 nvme_ctrlr_set_intel_support_log_pages(struct spdk_nvme_ctrlr *ctrlr) 682 { 683 int rc = 0; 684 struct intel_log_pages_ctx *ctx; 685 686 ctx = calloc(1, sizeof(*ctx)); 687 if (!ctx) { 688 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES, 689 ctrlr->opts.admin_timeout_ms); 690 return 0; 691 } 692 693 ctx->ctrlr = ctrlr; 694 695 rc = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY, 696 SPDK_NVME_GLOBAL_NS_TAG, &ctx->log_page_directory, 697 sizeof(struct spdk_nvme_intel_log_page_directory), 698 0, nvme_ctrlr_set_intel_support_log_pages_done, ctx); 699 if (rc != 0) { 700 free(ctx); 701 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES, 702 ctrlr->opts.admin_timeout_ms); 703 return 0; 704 } 705 706 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_SUPPORTED_INTEL_LOG_PAGES, 707 ctrlr->opts.admin_timeout_ms); 708 709 return 0; 710 } 711 712 static int 713 nvme_ctrlr_alloc_ana_log_page(struct spdk_nvme_ctrlr *ctrlr) 714 { 715 uint32_t ana_log_page_size; 716 717 ana_log_page_size = sizeof(struct spdk_nvme_ana_page) + ctrlr->cdata.nanagrpid * 718 sizeof(struct spdk_nvme_ana_group_descriptor) + ctrlr->active_ns_count * 719 sizeof(uint32_t); 720 721 /* Number of active namespaces may have changed. 722 * Check if ANA log page fits into existing buffer. 723 */ 724 if (ana_log_page_size > ctrlr->ana_log_page_size) { 725 void *new_buffer; 726 727 if (ctrlr->ana_log_page) { 728 new_buffer = realloc(ctrlr->ana_log_page, ana_log_page_size); 729 } else { 730 new_buffer = calloc(1, ana_log_page_size); 731 } 732 733 if (!new_buffer) { 734 NVME_CTRLR_ERRLOG(ctrlr, "could not allocate ANA log page buffer, size %u\n", 735 ana_log_page_size); 736 return -ENXIO; 737 } 738 739 ctrlr->ana_log_page = new_buffer; 740 if (ctrlr->copied_ana_desc) { 741 new_buffer = realloc(ctrlr->copied_ana_desc, ana_log_page_size); 742 } else { 743 new_buffer = calloc(1, ana_log_page_size); 744 } 745 746 if (!new_buffer) { 747 NVME_CTRLR_ERRLOG(ctrlr, "could not allocate a buffer to parse ANA descriptor, size %u\n", 748 ana_log_page_size); 749 return -ENOMEM; 750 } 751 752 ctrlr->copied_ana_desc = new_buffer; 753 ctrlr->ana_log_page_size = ana_log_page_size; 754 } 755 756 return 0; 757 } 758 759 static int 760 nvme_ctrlr_update_ana_log_page(struct spdk_nvme_ctrlr *ctrlr) 761 { 762 struct nvme_completion_poll_status *status; 763 int rc; 764 765 rc = nvme_ctrlr_alloc_ana_log_page(ctrlr); 766 if (rc != 0) { 767 return rc; 768 } 769 770 status = calloc(1, sizeof(*status)); 771 if (status == NULL) { 772 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 773 return -ENOMEM; 774 } 775 776 rc = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS, 777 SPDK_NVME_GLOBAL_NS_TAG, ctrlr->ana_log_page, 778 ctrlr->ana_log_page_size, 0, 779 nvme_completion_poll_cb, status); 780 if (rc != 0) { 781 free(status); 782 return rc; 783 } 784 785 if (nvme_wait_for_completion_robust_lock_timeout(ctrlr->adminq, status, &ctrlr->ctrlr_lock, 786 ctrlr->opts.admin_timeout_ms * 1000)) { 787 if (!status->timed_out) { 788 free(status); 789 } 790 return -EIO; 791 } 792 793 free(status); 794 return 0; 795 } 796 797 static int 798 nvme_ctrlr_update_ns_ana_states(const struct spdk_nvme_ana_group_descriptor *desc, 799 void *cb_arg) 800 { 801 struct spdk_nvme_ctrlr *ctrlr = cb_arg; 802 struct spdk_nvme_ns *ns; 803 uint32_t i, nsid; 804 805 for (i = 0; i < desc->num_of_nsid; i++) { 806 nsid = desc->nsid[i]; 807 if (nsid == 0 || nsid > ctrlr->cdata.nn) { 808 continue; 809 } 810 811 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 812 assert(ns != NULL); 813 814 ns->ana_group_id = desc->ana_group_id; 815 ns->ana_state = desc->ana_state; 816 } 817 818 return 0; 819 } 820 821 int 822 nvme_ctrlr_parse_ana_log_page(struct spdk_nvme_ctrlr *ctrlr, 823 spdk_nvme_parse_ana_log_page_cb cb_fn, void *cb_arg) 824 { 825 struct spdk_nvme_ana_group_descriptor *copied_desc; 826 uint8_t *orig_desc; 827 uint32_t i, desc_size, copy_len; 828 int rc = 0; 829 830 if (ctrlr->ana_log_page == NULL) { 831 return -EINVAL; 832 } 833 834 copied_desc = ctrlr->copied_ana_desc; 835 836 orig_desc = (uint8_t *)ctrlr->ana_log_page + sizeof(struct spdk_nvme_ana_page); 837 copy_len = ctrlr->ana_log_page_size - sizeof(struct spdk_nvme_ana_page); 838 839 for (i = 0; i < ctrlr->ana_log_page->num_ana_group_desc; i++) { 840 memcpy(copied_desc, orig_desc, copy_len); 841 842 rc = cb_fn(copied_desc, cb_arg); 843 if (rc != 0) { 844 break; 845 } 846 847 desc_size = sizeof(struct spdk_nvme_ana_group_descriptor) + 848 copied_desc->num_of_nsid * sizeof(uint32_t); 849 orig_desc += desc_size; 850 copy_len -= desc_size; 851 } 852 853 return rc; 854 } 855 856 static int 857 nvme_ctrlr_set_supported_log_pages(struct spdk_nvme_ctrlr *ctrlr) 858 { 859 int rc = 0; 860 861 memset(ctrlr->log_page_supported, 0, sizeof(ctrlr->log_page_supported)); 862 /* Mandatory pages */ 863 ctrlr->log_page_supported[SPDK_NVME_LOG_ERROR] = true; 864 ctrlr->log_page_supported[SPDK_NVME_LOG_HEALTH_INFORMATION] = true; 865 ctrlr->log_page_supported[SPDK_NVME_LOG_FIRMWARE_SLOT] = true; 866 if (ctrlr->cdata.lpa.celp) { 867 ctrlr->log_page_supported[SPDK_NVME_LOG_COMMAND_EFFECTS_LOG] = true; 868 } 869 870 if (ctrlr->cdata.cmic.ana_reporting) { 871 ctrlr->log_page_supported[SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS] = true; 872 if (!ctrlr->opts.disable_read_ana_log_page) { 873 rc = nvme_ctrlr_update_ana_log_page(ctrlr); 874 if (rc == 0) { 875 nvme_ctrlr_parse_ana_log_page(ctrlr, nvme_ctrlr_update_ns_ana_states, 876 ctrlr); 877 } 878 } 879 } 880 881 if (ctrlr->cdata.ctratt.fdps) { 882 ctrlr->log_page_supported[SPDK_NVME_LOG_FDP_CONFIGURATIONS] = true; 883 ctrlr->log_page_supported[SPDK_NVME_LOG_RECLAIM_UNIT_HANDLE_USAGE] = true; 884 ctrlr->log_page_supported[SPDK_NVME_LOG_FDP_STATISTICS] = true; 885 ctrlr->log_page_supported[SPDK_NVME_LOG_FDP_EVENTS] = true; 886 } 887 888 if (ctrlr->cdata.vid == SPDK_PCI_VID_INTEL && 889 ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE && 890 !(ctrlr->quirks & NVME_INTEL_QUIRK_NO_LOG_PAGES)) { 891 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_INTEL_LOG_PAGES, 892 ctrlr->opts.admin_timeout_ms); 893 894 } else { 895 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES, 896 ctrlr->opts.admin_timeout_ms); 897 898 } 899 900 return rc; 901 } 902 903 static void 904 nvme_ctrlr_set_intel_supported_features(struct spdk_nvme_ctrlr *ctrlr) 905 { 906 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_MAX_LBA] = true; 907 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_NATIVE_MAX_LBA] = true; 908 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_POWER_GOVERNOR_SETTING] = true; 909 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_SMBUS_ADDRESS] = true; 910 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_LED_PATTERN] = true; 911 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_RESET_TIMED_WORKLOAD_COUNTERS] = true; 912 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING] = true; 913 } 914 915 static void 916 nvme_ctrlr_set_arbitration_feature(struct spdk_nvme_ctrlr *ctrlr) 917 { 918 uint32_t cdw11; 919 struct nvme_completion_poll_status *status; 920 921 if (ctrlr->opts.arbitration_burst == 0) { 922 return; 923 } 924 925 if (ctrlr->opts.arbitration_burst > 7) { 926 NVME_CTRLR_WARNLOG(ctrlr, "Valid arbitration burst values is from 0-7\n"); 927 return; 928 } 929 930 status = calloc(1, sizeof(*status)); 931 if (!status) { 932 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 933 return; 934 } 935 936 cdw11 = ctrlr->opts.arbitration_burst; 937 938 if (spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_WRR_SUPPORTED) { 939 cdw11 |= (uint32_t)ctrlr->opts.low_priority_weight << 8; 940 cdw11 |= (uint32_t)ctrlr->opts.medium_priority_weight << 16; 941 cdw11 |= (uint32_t)ctrlr->opts.high_priority_weight << 24; 942 } 943 944 if (spdk_nvme_ctrlr_cmd_set_feature(ctrlr, SPDK_NVME_FEAT_ARBITRATION, 945 cdw11, 0, NULL, 0, 946 nvme_completion_poll_cb, status) < 0) { 947 NVME_CTRLR_ERRLOG(ctrlr, "Set arbitration feature failed\n"); 948 free(status); 949 return; 950 } 951 952 if (nvme_wait_for_completion_timeout(ctrlr->adminq, status, 953 ctrlr->opts.admin_timeout_ms * 1000)) { 954 NVME_CTRLR_ERRLOG(ctrlr, "Timeout to set arbitration feature\n"); 955 } 956 957 if (!status->timed_out) { 958 free(status); 959 } 960 } 961 962 static void 963 nvme_ctrlr_set_supported_features(struct spdk_nvme_ctrlr *ctrlr) 964 { 965 memset(ctrlr->feature_supported, 0, sizeof(ctrlr->feature_supported)); 966 /* Mandatory features */ 967 ctrlr->feature_supported[SPDK_NVME_FEAT_ARBITRATION] = true; 968 ctrlr->feature_supported[SPDK_NVME_FEAT_POWER_MANAGEMENT] = true; 969 ctrlr->feature_supported[SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD] = true; 970 ctrlr->feature_supported[SPDK_NVME_FEAT_ERROR_RECOVERY] = true; 971 ctrlr->feature_supported[SPDK_NVME_FEAT_NUMBER_OF_QUEUES] = true; 972 ctrlr->feature_supported[SPDK_NVME_FEAT_INTERRUPT_COALESCING] = true; 973 ctrlr->feature_supported[SPDK_NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION] = true; 974 ctrlr->feature_supported[SPDK_NVME_FEAT_WRITE_ATOMICITY] = true; 975 ctrlr->feature_supported[SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION] = true; 976 /* Optional features */ 977 if (ctrlr->cdata.vwc.present) { 978 ctrlr->feature_supported[SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE] = true; 979 } 980 if (ctrlr->cdata.apsta.supported) { 981 ctrlr->feature_supported[SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION] = true; 982 } 983 if (ctrlr->cdata.hmpre) { 984 ctrlr->feature_supported[SPDK_NVME_FEAT_HOST_MEM_BUFFER] = true; 985 } 986 if (ctrlr->cdata.vid == SPDK_PCI_VID_INTEL) { 987 nvme_ctrlr_set_intel_supported_features(ctrlr); 988 } 989 990 nvme_ctrlr_set_arbitration_feature(ctrlr); 991 } 992 993 bool 994 spdk_nvme_ctrlr_is_failed(struct spdk_nvme_ctrlr *ctrlr) 995 { 996 return ctrlr->is_failed; 997 } 998 999 void 1000 nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr, bool hot_remove) 1001 { 1002 /* 1003 * Set the flag here and leave the work failure of qpairs to 1004 * spdk_nvme_qpair_process_completions(). 1005 */ 1006 if (hot_remove) { 1007 ctrlr->is_removed = true; 1008 } 1009 1010 if (ctrlr->is_failed) { 1011 NVME_CTRLR_NOTICELOG(ctrlr, "already in failed state\n"); 1012 return; 1013 } 1014 1015 if (ctrlr->is_disconnecting) { 1016 NVME_CTRLR_DEBUGLOG(ctrlr, "already disconnecting\n"); 1017 return; 1018 } 1019 1020 ctrlr->is_failed = true; 1021 nvme_transport_ctrlr_disconnect_qpair(ctrlr, ctrlr->adminq); 1022 NVME_CTRLR_ERRLOG(ctrlr, "in failed state.\n"); 1023 } 1024 1025 /** 1026 * This public API function will try to take the controller lock. 1027 * Any private functions being called from a thread already holding 1028 * the ctrlr lock should call nvme_ctrlr_fail directly. 1029 */ 1030 void 1031 spdk_nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr) 1032 { 1033 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1034 nvme_ctrlr_fail(ctrlr, false); 1035 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1036 } 1037 1038 static void 1039 nvme_ctrlr_shutdown_set_cc_done(void *_ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 1040 { 1041 struct nvme_ctrlr_detach_ctx *ctx = _ctx; 1042 struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr; 1043 1044 if (spdk_nvme_cpl_is_error(cpl)) { 1045 NVME_CTRLR_ERRLOG(ctrlr, "Failed to write CC.SHN\n"); 1046 ctx->shutdown_complete = true; 1047 return; 1048 } 1049 1050 if (ctrlr->opts.no_shn_notification) { 1051 ctx->shutdown_complete = true; 1052 return; 1053 } 1054 1055 /* 1056 * The NVMe specification defines RTD3E to be the time between 1057 * setting SHN = 1 until the controller will set SHST = 10b. 1058 * If the device doesn't report RTD3 entry latency, or if it 1059 * reports RTD3 entry latency less than 10 seconds, pick 1060 * 10 seconds as a reasonable amount of time to 1061 * wait before proceeding. 1062 */ 1063 NVME_CTRLR_DEBUGLOG(ctrlr, "RTD3E = %" PRIu32 " us\n", ctrlr->cdata.rtd3e); 1064 ctx->shutdown_timeout_ms = SPDK_CEIL_DIV(ctrlr->cdata.rtd3e, 1000); 1065 ctx->shutdown_timeout_ms = spdk_max(ctx->shutdown_timeout_ms, 10000); 1066 NVME_CTRLR_DEBUGLOG(ctrlr, "shutdown timeout = %" PRIu32 " ms\n", ctx->shutdown_timeout_ms); 1067 1068 ctx->shutdown_start_tsc = spdk_get_ticks(); 1069 ctx->state = NVME_CTRLR_DETACH_CHECK_CSTS; 1070 } 1071 1072 static void 1073 nvme_ctrlr_shutdown_get_cc_done(void *_ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 1074 { 1075 struct nvme_ctrlr_detach_ctx *ctx = _ctx; 1076 struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr; 1077 union spdk_nvme_cc_register cc; 1078 int rc; 1079 1080 if (spdk_nvme_cpl_is_error(cpl)) { 1081 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CC register\n"); 1082 ctx->shutdown_complete = true; 1083 return; 1084 } 1085 1086 assert(value <= UINT32_MAX); 1087 cc.raw = (uint32_t)value; 1088 1089 if (ctrlr->opts.no_shn_notification) { 1090 NVME_CTRLR_INFOLOG(ctrlr, "Disable SSD without shutdown notification\n"); 1091 if (cc.bits.en == 0) { 1092 ctx->shutdown_complete = true; 1093 return; 1094 } 1095 1096 cc.bits.en = 0; 1097 } else { 1098 cc.bits.shn = SPDK_NVME_SHN_NORMAL; 1099 } 1100 1101 rc = nvme_ctrlr_set_cc_async(ctrlr, cc.raw, nvme_ctrlr_shutdown_set_cc_done, ctx); 1102 if (rc != 0) { 1103 NVME_CTRLR_ERRLOG(ctrlr, "Failed to write CC.SHN\n"); 1104 ctx->shutdown_complete = true; 1105 } 1106 } 1107 1108 static void 1109 nvme_ctrlr_shutdown_async(struct spdk_nvme_ctrlr *ctrlr, 1110 struct nvme_ctrlr_detach_ctx *ctx) 1111 { 1112 int rc; 1113 1114 if (ctrlr->is_removed) { 1115 ctx->shutdown_complete = true; 1116 return; 1117 } 1118 1119 ctx->state = NVME_CTRLR_DETACH_SET_CC; 1120 rc = nvme_ctrlr_get_cc_async(ctrlr, nvme_ctrlr_shutdown_get_cc_done, ctx); 1121 if (rc != 0) { 1122 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CC register\n"); 1123 ctx->shutdown_complete = true; 1124 } 1125 } 1126 1127 static void 1128 nvme_ctrlr_shutdown_get_csts_done(void *_ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 1129 { 1130 struct nvme_ctrlr_detach_ctx *ctx = _ctx; 1131 1132 if (spdk_nvme_cpl_is_error(cpl)) { 1133 NVME_CTRLR_ERRLOG(ctx->ctrlr, "Failed to read the CSTS register\n"); 1134 ctx->shutdown_complete = true; 1135 return; 1136 } 1137 1138 assert(value <= UINT32_MAX); 1139 ctx->csts.raw = (uint32_t)value; 1140 ctx->state = NVME_CTRLR_DETACH_GET_CSTS_DONE; 1141 } 1142 1143 static int 1144 nvme_ctrlr_shutdown_poll_async(struct spdk_nvme_ctrlr *ctrlr, 1145 struct nvme_ctrlr_detach_ctx *ctx) 1146 { 1147 union spdk_nvme_csts_register csts; 1148 uint32_t ms_waited; 1149 1150 switch (ctx->state) { 1151 case NVME_CTRLR_DETACH_SET_CC: 1152 case NVME_CTRLR_DETACH_GET_CSTS: 1153 /* We're still waiting for the register operation to complete */ 1154 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 1155 return -EAGAIN; 1156 1157 case NVME_CTRLR_DETACH_CHECK_CSTS: 1158 ctx->state = NVME_CTRLR_DETACH_GET_CSTS; 1159 if (nvme_ctrlr_get_csts_async(ctrlr, nvme_ctrlr_shutdown_get_csts_done, ctx)) { 1160 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CSTS register\n"); 1161 return -EIO; 1162 } 1163 return -EAGAIN; 1164 1165 case NVME_CTRLR_DETACH_GET_CSTS_DONE: 1166 ctx->state = NVME_CTRLR_DETACH_CHECK_CSTS; 1167 break; 1168 1169 default: 1170 assert(0 && "Should never happen"); 1171 return -EINVAL; 1172 } 1173 1174 ms_waited = (spdk_get_ticks() - ctx->shutdown_start_tsc) * 1000 / spdk_get_ticks_hz(); 1175 csts.raw = ctx->csts.raw; 1176 1177 if (csts.bits.shst == SPDK_NVME_SHST_COMPLETE) { 1178 NVME_CTRLR_DEBUGLOG(ctrlr, "shutdown complete in %u milliseconds\n", ms_waited); 1179 return 0; 1180 } 1181 1182 if (ms_waited < ctx->shutdown_timeout_ms) { 1183 return -EAGAIN; 1184 } 1185 1186 NVME_CTRLR_ERRLOG(ctrlr, "did not shutdown within %u milliseconds\n", 1187 ctx->shutdown_timeout_ms); 1188 if (ctrlr->quirks & NVME_QUIRK_SHST_COMPLETE) { 1189 NVME_CTRLR_ERRLOG(ctrlr, "likely due to shutdown handling in the VMWare emulated NVMe SSD\n"); 1190 } 1191 1192 return 0; 1193 } 1194 1195 static inline uint64_t 1196 nvme_ctrlr_get_ready_timeout(struct spdk_nvme_ctrlr *ctrlr) 1197 { 1198 return ctrlr->cap.bits.to * 500; 1199 } 1200 1201 static void 1202 nvme_ctrlr_set_cc_en_done(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 1203 { 1204 struct spdk_nvme_ctrlr *ctrlr = ctx; 1205 1206 if (spdk_nvme_cpl_is_error(cpl)) { 1207 NVME_CTRLR_ERRLOG(ctrlr, "Failed to set the CC register\n"); 1208 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 1209 return; 1210 } 1211 1212 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, 1213 nvme_ctrlr_get_ready_timeout(ctrlr)); 1214 } 1215 1216 static int 1217 nvme_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr) 1218 { 1219 union spdk_nvme_cc_register cc; 1220 int rc; 1221 1222 rc = nvme_transport_ctrlr_enable(ctrlr); 1223 if (rc != 0) { 1224 NVME_CTRLR_ERRLOG(ctrlr, "transport ctrlr_enable failed\n"); 1225 return rc; 1226 } 1227 1228 cc.raw = ctrlr->process_init_cc.raw; 1229 if (cc.bits.en != 0) { 1230 NVME_CTRLR_ERRLOG(ctrlr, "called with CC.EN = 1\n"); 1231 return -EINVAL; 1232 } 1233 1234 cc.bits.en = 1; 1235 cc.bits.css = 0; 1236 cc.bits.shn = 0; 1237 cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */ 1238 cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */ 1239 1240 /* Page size is 2 ^ (12 + mps). */ 1241 cc.bits.mps = spdk_u32log2(ctrlr->page_size) - 12; 1242 1243 /* 1244 * Since NVMe 1.0, a controller should have at least one bit set in CAP.CSS. 1245 * A controller that does not have any bit set in CAP.CSS is not spec compliant. 1246 * Try to support such a controller regardless. 1247 */ 1248 if (ctrlr->cap.bits.css == 0) { 1249 NVME_CTRLR_INFOLOG(ctrlr, "Drive reports no command sets supported. Assuming NVM is supported.\n"); 1250 ctrlr->cap.bits.css = SPDK_NVME_CAP_CSS_NVM; 1251 } 1252 1253 /* 1254 * If the user did not explicitly request a command set, or supplied a value larger than 1255 * what can be saved in CC.CSS, use the most reasonable default. 1256 */ 1257 if (ctrlr->opts.command_set >= CHAR_BIT) { 1258 if (ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_IOCS) { 1259 ctrlr->opts.command_set = SPDK_NVME_CC_CSS_IOCS; 1260 } else if (ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_NVM) { 1261 ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NVM; 1262 } else if (ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_NOIO) { 1263 ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NOIO; 1264 } else { 1265 /* Invalid supported bits detected, falling back to NVM. */ 1266 ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NVM; 1267 } 1268 } 1269 1270 /* Verify that the selected command set is supported by the controller. */ 1271 if (!(ctrlr->cap.bits.css & (1u << ctrlr->opts.command_set))) { 1272 NVME_CTRLR_DEBUGLOG(ctrlr, "Requested I/O command set %u but supported mask is 0x%x\n", 1273 ctrlr->opts.command_set, ctrlr->cap.bits.css); 1274 NVME_CTRLR_DEBUGLOG(ctrlr, "Falling back to NVM. Assuming NVM is supported.\n"); 1275 ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NVM; 1276 } 1277 1278 cc.bits.css = ctrlr->opts.command_set; 1279 1280 switch (ctrlr->opts.arb_mechanism) { 1281 case SPDK_NVME_CC_AMS_RR: 1282 break; 1283 case SPDK_NVME_CC_AMS_WRR: 1284 if (SPDK_NVME_CAP_AMS_WRR & ctrlr->cap.bits.ams) { 1285 break; 1286 } 1287 return -EINVAL; 1288 case SPDK_NVME_CC_AMS_VS: 1289 if (SPDK_NVME_CAP_AMS_VS & ctrlr->cap.bits.ams) { 1290 break; 1291 } 1292 return -EINVAL; 1293 default: 1294 return -EINVAL; 1295 } 1296 1297 cc.bits.ams = ctrlr->opts.arb_mechanism; 1298 ctrlr->process_init_cc.raw = cc.raw; 1299 1300 if (nvme_ctrlr_set_cc_async(ctrlr, cc.raw, nvme_ctrlr_set_cc_en_done, ctrlr)) { 1301 NVME_CTRLR_ERRLOG(ctrlr, "set_cc() failed\n"); 1302 return -EIO; 1303 } 1304 1305 return 0; 1306 } 1307 1308 static const char * 1309 nvme_ctrlr_state_string(enum nvme_ctrlr_state state) 1310 { 1311 switch (state) { 1312 case NVME_CTRLR_STATE_INIT_DELAY: 1313 return "delay init"; 1314 case NVME_CTRLR_STATE_CONNECT_ADMINQ: 1315 return "connect adminq"; 1316 case NVME_CTRLR_STATE_WAIT_FOR_CONNECT_ADMINQ: 1317 return "wait for connect adminq"; 1318 case NVME_CTRLR_STATE_READ_VS: 1319 return "read vs"; 1320 case NVME_CTRLR_STATE_READ_VS_WAIT_FOR_VS: 1321 return "read vs wait for vs"; 1322 case NVME_CTRLR_STATE_READ_CAP: 1323 return "read cap"; 1324 case NVME_CTRLR_STATE_READ_CAP_WAIT_FOR_CAP: 1325 return "read cap wait for cap"; 1326 case NVME_CTRLR_STATE_CHECK_EN: 1327 return "check en"; 1328 case NVME_CTRLR_STATE_CHECK_EN_WAIT_FOR_CC: 1329 return "check en wait for cc"; 1330 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1: 1331 return "disable and wait for CSTS.RDY = 1"; 1332 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS: 1333 return "disable and wait for CSTS.RDY = 1 reg"; 1334 case NVME_CTRLR_STATE_SET_EN_0: 1335 return "set CC.EN = 0"; 1336 case NVME_CTRLR_STATE_SET_EN_0_WAIT_FOR_CC: 1337 return "set CC.EN = 0 wait for cc"; 1338 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0: 1339 return "disable and wait for CSTS.RDY = 0"; 1340 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0_WAIT_FOR_CSTS: 1341 return "disable and wait for CSTS.RDY = 0 reg"; 1342 case NVME_CTRLR_STATE_DISABLED: 1343 return "controller is disabled"; 1344 case NVME_CTRLR_STATE_ENABLE: 1345 return "enable controller by writing CC.EN = 1"; 1346 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_CC: 1347 return "enable controller by writing CC.EN = 1 reg"; 1348 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1: 1349 return "wait for CSTS.RDY = 1"; 1350 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS: 1351 return "wait for CSTS.RDY = 1 reg"; 1352 case NVME_CTRLR_STATE_RESET_ADMIN_QUEUE: 1353 return "reset admin queue"; 1354 case NVME_CTRLR_STATE_IDENTIFY: 1355 return "identify controller"; 1356 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY: 1357 return "wait for identify controller"; 1358 case NVME_CTRLR_STATE_CONFIGURE_AER: 1359 return "configure AER"; 1360 case NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER: 1361 return "wait for configure aer"; 1362 case NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT: 1363 return "set keep alive timeout"; 1364 case NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT: 1365 return "wait for set keep alive timeout"; 1366 case NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC: 1367 return "identify controller iocs specific"; 1368 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_IOCS_SPECIFIC: 1369 return "wait for identify controller iocs specific"; 1370 case NVME_CTRLR_STATE_GET_ZNS_CMD_EFFECTS_LOG: 1371 return "get zns cmd and effects log page"; 1372 case NVME_CTRLR_STATE_WAIT_FOR_GET_ZNS_CMD_EFFECTS_LOG: 1373 return "wait for get zns cmd and effects log page"; 1374 case NVME_CTRLR_STATE_SET_NUM_QUEUES: 1375 return "set number of queues"; 1376 case NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES: 1377 return "wait for set number of queues"; 1378 case NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS: 1379 return "identify active ns"; 1380 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ACTIVE_NS: 1381 return "wait for identify active ns"; 1382 case NVME_CTRLR_STATE_IDENTIFY_NS: 1383 return "identify ns"; 1384 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS: 1385 return "wait for identify ns"; 1386 case NVME_CTRLR_STATE_IDENTIFY_ID_DESCS: 1387 return "identify namespace id descriptors"; 1388 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS: 1389 return "wait for identify namespace id descriptors"; 1390 case NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC: 1391 return "identify ns iocs specific"; 1392 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS_IOCS_SPECIFIC: 1393 return "wait for identify ns iocs specific"; 1394 case NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES: 1395 return "set supported log pages"; 1396 case NVME_CTRLR_STATE_SET_SUPPORTED_INTEL_LOG_PAGES: 1397 return "set supported INTEL log pages"; 1398 case NVME_CTRLR_STATE_WAIT_FOR_SUPPORTED_INTEL_LOG_PAGES: 1399 return "wait for supported INTEL log pages"; 1400 case NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES: 1401 return "set supported features"; 1402 case NVME_CTRLR_STATE_SET_DB_BUF_CFG: 1403 return "set doorbell buffer config"; 1404 case NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG: 1405 return "wait for doorbell buffer config"; 1406 case NVME_CTRLR_STATE_SET_HOST_ID: 1407 return "set host ID"; 1408 case NVME_CTRLR_STATE_WAIT_FOR_HOST_ID: 1409 return "wait for set host ID"; 1410 case NVME_CTRLR_STATE_TRANSPORT_READY: 1411 return "transport ready"; 1412 case NVME_CTRLR_STATE_READY: 1413 return "ready"; 1414 case NVME_CTRLR_STATE_ERROR: 1415 return "error"; 1416 } 1417 return "unknown"; 1418 }; 1419 1420 static void 1421 _nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state, 1422 uint64_t timeout_in_ms, bool quiet) 1423 { 1424 uint64_t ticks_per_ms, timeout_in_ticks, now_ticks; 1425 1426 ctrlr->state = state; 1427 if (timeout_in_ms == NVME_TIMEOUT_KEEP_EXISTING) { 1428 if (!quiet) { 1429 NVME_CTRLR_DEBUGLOG(ctrlr, "setting state to %s (keeping existing timeout)\n", 1430 nvme_ctrlr_state_string(ctrlr->state)); 1431 } 1432 return; 1433 } 1434 1435 if (timeout_in_ms == NVME_TIMEOUT_INFINITE) { 1436 goto inf; 1437 } 1438 1439 ticks_per_ms = spdk_get_ticks_hz() / 1000; 1440 if (timeout_in_ms > UINT64_MAX / ticks_per_ms) { 1441 NVME_CTRLR_ERRLOG(ctrlr, 1442 "Specified timeout would cause integer overflow. Defaulting to no timeout.\n"); 1443 goto inf; 1444 } 1445 1446 now_ticks = spdk_get_ticks(); 1447 timeout_in_ticks = timeout_in_ms * ticks_per_ms; 1448 if (timeout_in_ticks > UINT64_MAX - now_ticks) { 1449 NVME_CTRLR_ERRLOG(ctrlr, 1450 "Specified timeout would cause integer overflow. Defaulting to no timeout.\n"); 1451 goto inf; 1452 } 1453 1454 ctrlr->state_timeout_tsc = timeout_in_ticks + now_ticks; 1455 if (!quiet) { 1456 NVME_CTRLR_DEBUGLOG(ctrlr, "setting state to %s (timeout %" PRIu64 " ms)\n", 1457 nvme_ctrlr_state_string(ctrlr->state), timeout_in_ms); 1458 } 1459 return; 1460 inf: 1461 if (!quiet) { 1462 NVME_CTRLR_DEBUGLOG(ctrlr, "setting state to %s (no timeout)\n", 1463 nvme_ctrlr_state_string(ctrlr->state)); 1464 } 1465 ctrlr->state_timeout_tsc = NVME_TIMEOUT_INFINITE; 1466 } 1467 1468 static void 1469 nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state, 1470 uint64_t timeout_in_ms) 1471 { 1472 _nvme_ctrlr_set_state(ctrlr, state, timeout_in_ms, false); 1473 } 1474 1475 static void 1476 nvme_ctrlr_set_state_quiet(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state, 1477 uint64_t timeout_in_ms) 1478 { 1479 _nvme_ctrlr_set_state(ctrlr, state, timeout_in_ms, true); 1480 } 1481 1482 static void 1483 nvme_ctrlr_free_zns_specific_data(struct spdk_nvme_ctrlr *ctrlr) 1484 { 1485 spdk_free(ctrlr->cdata_zns); 1486 ctrlr->cdata_zns = NULL; 1487 } 1488 1489 static void 1490 nvme_ctrlr_free_iocs_specific_data(struct spdk_nvme_ctrlr *ctrlr) 1491 { 1492 nvme_ctrlr_free_zns_specific_data(ctrlr); 1493 } 1494 1495 static void 1496 nvme_ctrlr_free_doorbell_buffer(struct spdk_nvme_ctrlr *ctrlr) 1497 { 1498 if (ctrlr->shadow_doorbell) { 1499 spdk_free(ctrlr->shadow_doorbell); 1500 ctrlr->shadow_doorbell = NULL; 1501 } 1502 1503 if (ctrlr->eventidx) { 1504 spdk_free(ctrlr->eventidx); 1505 ctrlr->eventidx = NULL; 1506 } 1507 } 1508 1509 static void 1510 nvme_ctrlr_set_doorbell_buffer_config_done(void *arg, const struct spdk_nvme_cpl *cpl) 1511 { 1512 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 1513 1514 if (spdk_nvme_cpl_is_error(cpl)) { 1515 NVME_CTRLR_WARNLOG(ctrlr, "Doorbell buffer config failed\n"); 1516 } else { 1517 NVME_CTRLR_INFOLOG(ctrlr, "Doorbell buffer config enabled\n"); 1518 } 1519 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID, 1520 ctrlr->opts.admin_timeout_ms); 1521 } 1522 1523 static int 1524 nvme_ctrlr_set_doorbell_buffer_config(struct spdk_nvme_ctrlr *ctrlr) 1525 { 1526 int rc = 0; 1527 uint64_t prp1, prp2, len; 1528 1529 if (!ctrlr->cdata.oacs.doorbell_buffer_config) { 1530 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID, 1531 ctrlr->opts.admin_timeout_ms); 1532 return 0; 1533 } 1534 1535 if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) { 1536 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID, 1537 ctrlr->opts.admin_timeout_ms); 1538 return 0; 1539 } 1540 1541 /* only 1 page size for doorbell buffer */ 1542 ctrlr->shadow_doorbell = spdk_zmalloc(ctrlr->page_size, ctrlr->page_size, 1543 NULL, SPDK_ENV_LCORE_ID_ANY, 1544 SPDK_MALLOC_DMA | SPDK_MALLOC_SHARE); 1545 if (ctrlr->shadow_doorbell == NULL) { 1546 rc = -ENOMEM; 1547 goto error; 1548 } 1549 1550 len = ctrlr->page_size; 1551 prp1 = spdk_vtophys(ctrlr->shadow_doorbell, &len); 1552 if (prp1 == SPDK_VTOPHYS_ERROR || len != ctrlr->page_size) { 1553 rc = -EFAULT; 1554 goto error; 1555 } 1556 1557 ctrlr->eventidx = spdk_zmalloc(ctrlr->page_size, ctrlr->page_size, 1558 NULL, SPDK_ENV_LCORE_ID_ANY, 1559 SPDK_MALLOC_DMA | SPDK_MALLOC_SHARE); 1560 if (ctrlr->eventidx == NULL) { 1561 rc = -ENOMEM; 1562 goto error; 1563 } 1564 1565 len = ctrlr->page_size; 1566 prp2 = spdk_vtophys(ctrlr->eventidx, &len); 1567 if (prp2 == SPDK_VTOPHYS_ERROR || len != ctrlr->page_size) { 1568 rc = -EFAULT; 1569 goto error; 1570 } 1571 1572 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG, 1573 ctrlr->opts.admin_timeout_ms); 1574 1575 rc = nvme_ctrlr_cmd_doorbell_buffer_config(ctrlr, prp1, prp2, 1576 nvme_ctrlr_set_doorbell_buffer_config_done, ctrlr); 1577 if (rc != 0) { 1578 goto error; 1579 } 1580 1581 return 0; 1582 1583 error: 1584 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 1585 nvme_ctrlr_free_doorbell_buffer(ctrlr); 1586 return rc; 1587 } 1588 1589 void 1590 nvme_ctrlr_abort_queued_aborts(struct spdk_nvme_ctrlr *ctrlr) 1591 { 1592 struct nvme_request *req, *tmp; 1593 struct spdk_nvme_cpl cpl = {}; 1594 1595 cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION; 1596 cpl.status.sct = SPDK_NVME_SCT_GENERIC; 1597 1598 STAILQ_FOREACH_SAFE(req, &ctrlr->queued_aborts, stailq, tmp) { 1599 STAILQ_REMOVE_HEAD(&ctrlr->queued_aborts, stailq); 1600 ctrlr->outstanding_aborts++; 1601 1602 nvme_complete_request(req->cb_fn, req->cb_arg, req->qpair, req, &cpl); 1603 nvme_free_request(req); 1604 } 1605 } 1606 1607 static int 1608 nvme_ctrlr_disconnect(struct spdk_nvme_ctrlr *ctrlr) 1609 { 1610 if (ctrlr->is_resetting || ctrlr->is_removed) { 1611 /* 1612 * Controller is already resetting or has been removed. Return 1613 * immediately since there is no need to kick off another 1614 * reset in these cases. 1615 */ 1616 return ctrlr->is_resetting ? -EBUSY : -ENXIO; 1617 } 1618 1619 ctrlr->is_resetting = true; 1620 ctrlr->is_failed = false; 1621 ctrlr->is_disconnecting = true; 1622 ctrlr->prepare_for_reset = true; 1623 1624 NVME_CTRLR_NOTICELOG(ctrlr, "resetting controller\n"); 1625 1626 /* Disable keep-alive, it'll be re-enabled as part of the init process */ 1627 ctrlr->keep_alive_interval_ticks = 0; 1628 1629 /* Abort all of the queued abort requests */ 1630 nvme_ctrlr_abort_queued_aborts(ctrlr); 1631 1632 nvme_transport_admin_qpair_abort_aers(ctrlr->adminq); 1633 1634 ctrlr->adminq->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL; 1635 nvme_transport_ctrlr_disconnect_qpair(ctrlr, ctrlr->adminq); 1636 1637 return 0; 1638 } 1639 1640 static void 1641 nvme_ctrlr_disconnect_done(struct spdk_nvme_ctrlr *ctrlr) 1642 { 1643 assert(ctrlr->is_failed == false); 1644 ctrlr->is_disconnecting = false; 1645 1646 /* Doorbell buffer config is invalid during reset */ 1647 nvme_ctrlr_free_doorbell_buffer(ctrlr); 1648 1649 /* I/O Command Set Specific Identify Controller data is invalidated during reset */ 1650 nvme_ctrlr_free_iocs_specific_data(ctrlr); 1651 1652 spdk_bit_array_free(&ctrlr->free_io_qids); 1653 } 1654 1655 int 1656 spdk_nvme_ctrlr_disconnect(struct spdk_nvme_ctrlr *ctrlr) 1657 { 1658 int rc; 1659 1660 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1661 rc = nvme_ctrlr_disconnect(ctrlr); 1662 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1663 1664 return rc; 1665 } 1666 1667 void 1668 spdk_nvme_ctrlr_reconnect_async(struct spdk_nvme_ctrlr *ctrlr) 1669 { 1670 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1671 1672 ctrlr->prepare_for_reset = false; 1673 1674 /* Set the state back to INIT to cause a full hardware reset. */ 1675 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE); 1676 1677 /* Return without releasing ctrlr_lock. ctrlr_lock will be released when 1678 * spdk_nvme_ctrlr_reset_poll_async() returns 0. 1679 */ 1680 } 1681 1682 /** 1683 * This function will be called when the controller is being reinitialized. 1684 * Note: the ctrlr_lock must be held when calling this function. 1685 */ 1686 int 1687 spdk_nvme_ctrlr_reconnect_poll_async(struct spdk_nvme_ctrlr *ctrlr) 1688 { 1689 struct spdk_nvme_ns *ns, *tmp_ns; 1690 struct spdk_nvme_qpair *qpair; 1691 int rc = 0, rc_tmp = 0; 1692 bool async; 1693 1694 if (nvme_ctrlr_process_init(ctrlr) != 0) { 1695 NVME_CTRLR_ERRLOG(ctrlr, "controller reinitialization failed\n"); 1696 rc = -1; 1697 } 1698 if (ctrlr->state != NVME_CTRLR_STATE_READY && rc != -1) { 1699 return -EAGAIN; 1700 } 1701 1702 /* 1703 * For non-fabrics controllers, the memory locations of the transport qpair 1704 * don't change when the controller is reset. They simply need to be 1705 * re-enabled with admin commands to the controller. For fabric 1706 * controllers we need to disconnect and reconnect the qpair on its 1707 * own thread outside of the context of the reset. 1708 */ 1709 if (rc == 0 && !spdk_nvme_ctrlr_is_fabrics(ctrlr)) { 1710 /* Reinitialize qpairs */ 1711 TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) { 1712 assert(spdk_bit_array_get(ctrlr->free_io_qids, qpair->id)); 1713 spdk_bit_array_clear(ctrlr->free_io_qids, qpair->id); 1714 1715 /* Force a synchronous connect. We can't currently handle an asynchronous 1716 * operation here. */ 1717 async = qpair->async; 1718 qpair->async = false; 1719 rc_tmp = nvme_transport_ctrlr_connect_qpair(ctrlr, qpair); 1720 qpair->async = async; 1721 1722 if (rc_tmp != 0) { 1723 rc = rc_tmp; 1724 qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL; 1725 continue; 1726 } 1727 } 1728 } 1729 1730 /* 1731 * Take this opportunity to remove inactive namespaces. During a reset namespace 1732 * handles can be invalidated. 1733 */ 1734 RB_FOREACH_SAFE(ns, nvme_ns_tree, &ctrlr->ns, tmp_ns) { 1735 if (!ns->active) { 1736 RB_REMOVE(nvme_ns_tree, &ctrlr->ns, ns); 1737 spdk_free(ns); 1738 } 1739 } 1740 1741 if (rc) { 1742 nvme_ctrlr_fail(ctrlr, false); 1743 } 1744 ctrlr->is_resetting = false; 1745 1746 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1747 1748 if (!ctrlr->cdata.oaes.ns_attribute_notices) { 1749 /* 1750 * If controller doesn't support ns_attribute_notices and 1751 * namespace attributes change (e.g. number of namespaces) 1752 * we need to update system handling device reset. 1753 */ 1754 nvme_io_msg_ctrlr_update(ctrlr); 1755 } 1756 1757 return rc; 1758 } 1759 1760 /* 1761 * For PCIe transport, spdk_nvme_ctrlr_disconnect() will do a Controller Level Reset 1762 * (Change CC.EN from 1 to 0) as a operation to disconnect the admin qpair. 1763 * The following two functions are added to do a Controller Level Reset. They have 1764 * to be called under the nvme controller's lock. 1765 */ 1766 void 1767 nvme_ctrlr_disable(struct spdk_nvme_ctrlr *ctrlr) 1768 { 1769 assert(ctrlr->is_disconnecting == true); 1770 1771 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CHECK_EN, NVME_TIMEOUT_INFINITE); 1772 } 1773 1774 int 1775 nvme_ctrlr_disable_poll(struct spdk_nvme_ctrlr *ctrlr) 1776 { 1777 int rc = 0; 1778 1779 if (nvme_ctrlr_process_init(ctrlr) != 0) { 1780 NVME_CTRLR_ERRLOG(ctrlr, "failed to disable controller\n"); 1781 rc = -1; 1782 } 1783 1784 if (ctrlr->state != NVME_CTRLR_STATE_DISABLED && rc != -1) { 1785 return -EAGAIN; 1786 } 1787 1788 return rc; 1789 } 1790 1791 static void 1792 nvme_ctrlr_fail_io_qpairs(struct spdk_nvme_ctrlr *ctrlr) 1793 { 1794 struct spdk_nvme_qpair *qpair; 1795 1796 TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) { 1797 qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL; 1798 } 1799 } 1800 1801 int 1802 spdk_nvme_ctrlr_reset(struct spdk_nvme_ctrlr *ctrlr) 1803 { 1804 int rc; 1805 1806 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1807 1808 rc = nvme_ctrlr_disconnect(ctrlr); 1809 if (rc == 0) { 1810 nvme_ctrlr_fail_io_qpairs(ctrlr); 1811 } 1812 1813 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1814 1815 if (rc != 0) { 1816 if (rc == -EBUSY) { 1817 rc = 0; 1818 } 1819 return rc; 1820 } 1821 1822 while (1) { 1823 rc = spdk_nvme_ctrlr_process_admin_completions(ctrlr); 1824 if (rc == -ENXIO) { 1825 break; 1826 } 1827 } 1828 1829 spdk_nvme_ctrlr_reconnect_async(ctrlr); 1830 1831 while (true) { 1832 rc = spdk_nvme_ctrlr_reconnect_poll_async(ctrlr); 1833 if (rc != -EAGAIN) { 1834 break; 1835 } 1836 } 1837 1838 return rc; 1839 } 1840 1841 int 1842 spdk_nvme_ctrlr_reset_subsystem(struct spdk_nvme_ctrlr *ctrlr) 1843 { 1844 union spdk_nvme_cap_register cap; 1845 int rc = 0; 1846 1847 cap = spdk_nvme_ctrlr_get_regs_cap(ctrlr); 1848 if (cap.bits.nssrs == 0) { 1849 NVME_CTRLR_WARNLOG(ctrlr, "subsystem reset is not supported\n"); 1850 return -ENOTSUP; 1851 } 1852 1853 NVME_CTRLR_NOTICELOG(ctrlr, "resetting subsystem\n"); 1854 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1855 ctrlr->is_resetting = true; 1856 rc = nvme_ctrlr_set_nssr(ctrlr, SPDK_NVME_NSSR_VALUE); 1857 ctrlr->is_resetting = false; 1858 1859 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1860 /* 1861 * No more cleanup at this point like in the ctrlr reset. A subsystem reset will cause 1862 * a hot remove for PCIe transport. The hot remove handling does all the necessary ctrlr cleanup. 1863 */ 1864 return rc; 1865 } 1866 1867 int 1868 spdk_nvme_ctrlr_set_trid(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_transport_id *trid) 1869 { 1870 int rc = 0; 1871 1872 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1873 1874 if (ctrlr->is_failed == false) { 1875 rc = -EPERM; 1876 goto out; 1877 } 1878 1879 if (trid->trtype != ctrlr->trid.trtype) { 1880 rc = -EINVAL; 1881 goto out; 1882 } 1883 1884 if (strncmp(trid->subnqn, ctrlr->trid.subnqn, SPDK_NVMF_NQN_MAX_LEN)) { 1885 rc = -EINVAL; 1886 goto out; 1887 } 1888 1889 ctrlr->trid = *trid; 1890 1891 out: 1892 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1893 return rc; 1894 } 1895 1896 void 1897 spdk_nvme_ctrlr_set_remove_cb(struct spdk_nvme_ctrlr *ctrlr, 1898 spdk_nvme_remove_cb remove_cb, void *remove_ctx) 1899 { 1900 if (!spdk_process_is_primary()) { 1901 return; 1902 } 1903 1904 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1905 ctrlr->remove_cb = remove_cb; 1906 ctrlr->cb_ctx = remove_ctx; 1907 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1908 } 1909 1910 static void 1911 nvme_ctrlr_identify_done(void *arg, const struct spdk_nvme_cpl *cpl) 1912 { 1913 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 1914 1915 if (spdk_nvme_cpl_is_error(cpl)) { 1916 NVME_CTRLR_ERRLOG(ctrlr, "nvme_identify_controller failed!\n"); 1917 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 1918 return; 1919 } 1920 1921 /* 1922 * Use MDTS to ensure our default max_xfer_size doesn't exceed what the 1923 * controller supports. 1924 */ 1925 ctrlr->max_xfer_size = nvme_transport_ctrlr_get_max_xfer_size(ctrlr); 1926 NVME_CTRLR_DEBUGLOG(ctrlr, "transport max_xfer_size %u\n", ctrlr->max_xfer_size); 1927 if (ctrlr->cdata.mdts > 0) { 1928 ctrlr->max_xfer_size = spdk_min(ctrlr->max_xfer_size, 1929 ctrlr->min_page_size * (1 << ctrlr->cdata.mdts)); 1930 NVME_CTRLR_DEBUGLOG(ctrlr, "MDTS max_xfer_size %u\n", ctrlr->max_xfer_size); 1931 } 1932 1933 NVME_CTRLR_DEBUGLOG(ctrlr, "CNTLID 0x%04" PRIx16 "\n", ctrlr->cdata.cntlid); 1934 if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { 1935 ctrlr->cntlid = ctrlr->cdata.cntlid; 1936 } else { 1937 /* 1938 * Fabrics controllers should already have CNTLID from the Connect command. 1939 * 1940 * If CNTLID from Connect doesn't match CNTLID in the Identify Controller data, 1941 * trust the one from Connect. 1942 */ 1943 if (ctrlr->cntlid != ctrlr->cdata.cntlid) { 1944 NVME_CTRLR_DEBUGLOG(ctrlr, "Identify CNTLID 0x%04" PRIx16 " != Connect CNTLID 0x%04" PRIx16 "\n", 1945 ctrlr->cdata.cntlid, ctrlr->cntlid); 1946 } 1947 } 1948 1949 if (ctrlr->cdata.sgls.supported && !(ctrlr->quirks & NVME_QUIRK_NOT_USE_SGL)) { 1950 assert(ctrlr->cdata.sgls.supported != 0x3); 1951 ctrlr->flags |= SPDK_NVME_CTRLR_SGL_SUPPORTED; 1952 if (ctrlr->cdata.sgls.supported == 0x2) { 1953 ctrlr->flags |= SPDK_NVME_CTRLR_SGL_REQUIRES_DWORD_ALIGNMENT; 1954 } 1955 1956 ctrlr->max_sges = nvme_transport_ctrlr_get_max_sges(ctrlr); 1957 NVME_CTRLR_DEBUGLOG(ctrlr, "transport max_sges %u\n", ctrlr->max_sges); 1958 } 1959 1960 if (ctrlr->cdata.sgls.metadata_address && !(ctrlr->quirks & NVME_QUIRK_NOT_USE_SGL)) { 1961 ctrlr->flags |= SPDK_NVME_CTRLR_MPTR_SGL_SUPPORTED; 1962 } 1963 1964 if (ctrlr->cdata.oacs.security && !(ctrlr->quirks & NVME_QUIRK_OACS_SECURITY)) { 1965 ctrlr->flags |= SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED; 1966 } 1967 1968 if (ctrlr->cdata.oacs.directives) { 1969 ctrlr->flags |= SPDK_NVME_CTRLR_DIRECTIVES_SUPPORTED; 1970 } 1971 1972 NVME_CTRLR_DEBUGLOG(ctrlr, "fuses compare and write: %d\n", 1973 ctrlr->cdata.fuses.compare_and_write); 1974 if (ctrlr->cdata.fuses.compare_and_write) { 1975 ctrlr->flags |= SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED; 1976 } 1977 1978 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONFIGURE_AER, 1979 ctrlr->opts.admin_timeout_ms); 1980 } 1981 1982 static int 1983 nvme_ctrlr_identify(struct spdk_nvme_ctrlr *ctrlr) 1984 { 1985 int rc; 1986 1987 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY, 1988 ctrlr->opts.admin_timeout_ms); 1989 1990 rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_CTRLR, 0, 0, 0, 1991 &ctrlr->cdata, sizeof(ctrlr->cdata), 1992 nvme_ctrlr_identify_done, ctrlr); 1993 if (rc != 0) { 1994 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 1995 return rc; 1996 } 1997 1998 return 0; 1999 } 2000 2001 static void 2002 nvme_ctrlr_get_zns_cmd_and_effects_log_done(void *arg, const struct spdk_nvme_cpl *cpl) 2003 { 2004 struct spdk_nvme_cmds_and_effect_log_page *log_page; 2005 struct spdk_nvme_ctrlr *ctrlr = arg; 2006 2007 if (spdk_nvme_cpl_is_error(cpl)) { 2008 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_get_zns_cmd_and_effects_log failed!\n"); 2009 spdk_free(ctrlr->tmp_ptr); 2010 ctrlr->tmp_ptr = NULL; 2011 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2012 return; 2013 } 2014 2015 log_page = ctrlr->tmp_ptr; 2016 2017 if (log_page->io_cmds_supported[SPDK_NVME_OPC_ZONE_APPEND].csupp) { 2018 ctrlr->flags |= SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED; 2019 } 2020 spdk_free(ctrlr->tmp_ptr); 2021 ctrlr->tmp_ptr = NULL; 2022 2023 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_NUM_QUEUES, ctrlr->opts.admin_timeout_ms); 2024 } 2025 2026 static int 2027 nvme_ctrlr_get_zns_cmd_and_effects_log(struct spdk_nvme_ctrlr *ctrlr) 2028 { 2029 int rc; 2030 2031 assert(!ctrlr->tmp_ptr); 2032 ctrlr->tmp_ptr = spdk_zmalloc(sizeof(struct spdk_nvme_cmds_and_effect_log_page), 64, NULL, 2033 SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE | SPDK_MALLOC_DMA); 2034 if (!ctrlr->tmp_ptr) { 2035 rc = -ENOMEM; 2036 goto error; 2037 } 2038 2039 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_GET_ZNS_CMD_EFFECTS_LOG, 2040 ctrlr->opts.admin_timeout_ms); 2041 2042 rc = spdk_nvme_ctrlr_cmd_get_log_page_ext(ctrlr, SPDK_NVME_LOG_COMMAND_EFFECTS_LOG, 2043 0, ctrlr->tmp_ptr, sizeof(struct spdk_nvme_cmds_and_effect_log_page), 2044 0, 0, 0, SPDK_NVME_CSI_ZNS << 24, 2045 nvme_ctrlr_get_zns_cmd_and_effects_log_done, ctrlr); 2046 if (rc != 0) { 2047 goto error; 2048 } 2049 2050 return 0; 2051 2052 error: 2053 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2054 spdk_free(ctrlr->tmp_ptr); 2055 ctrlr->tmp_ptr = NULL; 2056 return rc; 2057 } 2058 2059 static void 2060 nvme_ctrlr_identify_zns_specific_done(void *arg, const struct spdk_nvme_cpl *cpl) 2061 { 2062 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 2063 2064 if (spdk_nvme_cpl_is_error(cpl)) { 2065 /* no need to print an error, the controller simply does not support ZNS */ 2066 nvme_ctrlr_free_zns_specific_data(ctrlr); 2067 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_NUM_QUEUES, 2068 ctrlr->opts.admin_timeout_ms); 2069 return; 2070 } 2071 2072 /* A zero zasl value means use mdts */ 2073 if (ctrlr->cdata_zns->zasl) { 2074 uint32_t max_append = ctrlr->min_page_size * (1 << ctrlr->cdata_zns->zasl); 2075 ctrlr->max_zone_append_size = spdk_min(ctrlr->max_xfer_size, max_append); 2076 } else { 2077 ctrlr->max_zone_append_size = ctrlr->max_xfer_size; 2078 } 2079 2080 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_GET_ZNS_CMD_EFFECTS_LOG, 2081 ctrlr->opts.admin_timeout_ms); 2082 } 2083 2084 /** 2085 * This function will try to fetch the I/O Command Specific Controller data structure for 2086 * each I/O Command Set supported by SPDK. 2087 * 2088 * If an I/O Command Set is not supported by the controller, "Invalid Field in Command" 2089 * will be returned. Since we are fetching in a exploratively way, getting an error back 2090 * from the controller should not be treated as fatal. 2091 * 2092 * I/O Command Sets not supported by SPDK will be skipped (e.g. Key Value Command Set). 2093 * 2094 * I/O Command Sets without a IOCS specific data structure (i.e. a zero-filled IOCS specific 2095 * data structure) will be skipped (e.g. NVM Command Set, Key Value Command Set). 2096 */ 2097 static int 2098 nvme_ctrlr_identify_iocs_specific(struct spdk_nvme_ctrlr *ctrlr) 2099 { 2100 int rc; 2101 2102 if (!nvme_ctrlr_multi_iocs_enabled(ctrlr)) { 2103 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_NUM_QUEUES, 2104 ctrlr->opts.admin_timeout_ms); 2105 return 0; 2106 } 2107 2108 /* 2109 * Since SPDK currently only needs to fetch a single Command Set, keep the code here, 2110 * instead of creating multiple NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC substates, 2111 * which would require additional functions and complexity for no good reason. 2112 */ 2113 assert(!ctrlr->cdata_zns); 2114 ctrlr->cdata_zns = spdk_zmalloc(sizeof(*ctrlr->cdata_zns), 64, NULL, SPDK_ENV_SOCKET_ID_ANY, 2115 SPDK_MALLOC_SHARE | SPDK_MALLOC_DMA); 2116 if (!ctrlr->cdata_zns) { 2117 rc = -ENOMEM; 2118 goto error; 2119 } 2120 2121 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_IOCS_SPECIFIC, 2122 ctrlr->opts.admin_timeout_ms); 2123 2124 rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_CTRLR_IOCS, 0, 0, SPDK_NVME_CSI_ZNS, 2125 ctrlr->cdata_zns, sizeof(*ctrlr->cdata_zns), 2126 nvme_ctrlr_identify_zns_specific_done, ctrlr); 2127 if (rc != 0) { 2128 goto error; 2129 } 2130 2131 return 0; 2132 2133 error: 2134 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2135 nvme_ctrlr_free_zns_specific_data(ctrlr); 2136 return rc; 2137 } 2138 2139 enum nvme_active_ns_state { 2140 NVME_ACTIVE_NS_STATE_IDLE, 2141 NVME_ACTIVE_NS_STATE_PROCESSING, 2142 NVME_ACTIVE_NS_STATE_DONE, 2143 NVME_ACTIVE_NS_STATE_ERROR 2144 }; 2145 2146 typedef void (*nvme_active_ns_ctx_deleter)(struct nvme_active_ns_ctx *); 2147 2148 struct nvme_active_ns_ctx { 2149 struct spdk_nvme_ctrlr *ctrlr; 2150 uint32_t page_count; 2151 uint32_t next_nsid; 2152 uint32_t *new_ns_list; 2153 nvme_active_ns_ctx_deleter deleter; 2154 2155 enum nvme_active_ns_state state; 2156 }; 2157 2158 static struct nvme_active_ns_ctx * 2159 nvme_active_ns_ctx_create(struct spdk_nvme_ctrlr *ctrlr, nvme_active_ns_ctx_deleter deleter) 2160 { 2161 struct nvme_active_ns_ctx *ctx; 2162 uint32_t *new_ns_list = NULL; 2163 2164 ctx = calloc(1, sizeof(*ctx)); 2165 if (!ctx) { 2166 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate nvme_active_ns_ctx!\n"); 2167 return NULL; 2168 } 2169 2170 new_ns_list = spdk_zmalloc(sizeof(struct spdk_nvme_ns_list), ctrlr->page_size, 2171 NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_SHARE); 2172 if (!new_ns_list) { 2173 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate active_ns_list!\n"); 2174 free(ctx); 2175 return NULL; 2176 } 2177 2178 ctx->page_count = 1; 2179 ctx->new_ns_list = new_ns_list; 2180 ctx->ctrlr = ctrlr; 2181 ctx->deleter = deleter; 2182 2183 return ctx; 2184 } 2185 2186 static void 2187 nvme_active_ns_ctx_destroy(struct nvme_active_ns_ctx *ctx) 2188 { 2189 spdk_free(ctx->new_ns_list); 2190 free(ctx); 2191 } 2192 2193 static int 2194 nvme_ctrlr_destruct_namespace(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 2195 { 2196 struct spdk_nvme_ns tmp, *ns; 2197 2198 assert(ctrlr != NULL); 2199 2200 tmp.id = nsid; 2201 ns = RB_FIND(nvme_ns_tree, &ctrlr->ns, &tmp); 2202 if (ns == NULL) { 2203 return -EINVAL; 2204 } 2205 2206 nvme_ns_destruct(ns); 2207 ns->active = false; 2208 2209 return 0; 2210 } 2211 2212 static int 2213 nvme_ctrlr_construct_namespace(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 2214 { 2215 struct spdk_nvme_ns *ns; 2216 2217 if (nsid < 1 || nsid > ctrlr->cdata.nn) { 2218 return -EINVAL; 2219 } 2220 2221 /* Namespaces are constructed on demand, so simply request it. */ 2222 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2223 if (ns == NULL) { 2224 return -ENOMEM; 2225 } 2226 2227 ns->active = true; 2228 2229 return 0; 2230 } 2231 2232 static void 2233 nvme_ctrlr_identify_active_ns_swap(struct spdk_nvme_ctrlr *ctrlr, uint32_t *new_ns_list, 2234 size_t max_entries) 2235 { 2236 uint32_t active_ns_count = 0; 2237 size_t i; 2238 uint32_t nsid; 2239 struct spdk_nvme_ns *ns, *tmp_ns; 2240 int rc; 2241 2242 /* First, remove namespaces that no longer exist */ 2243 RB_FOREACH_SAFE(ns, nvme_ns_tree, &ctrlr->ns, tmp_ns) { 2244 nsid = new_ns_list[0]; 2245 active_ns_count = 0; 2246 while (nsid != 0) { 2247 if (nsid == ns->id) { 2248 break; 2249 } 2250 2251 nsid = new_ns_list[active_ns_count++]; 2252 } 2253 2254 if (nsid != ns->id) { 2255 /* Did not find this namespace id in the new list. */ 2256 NVME_CTRLR_DEBUGLOG(ctrlr, "Namespace %u was removed\n", ns->id); 2257 nvme_ctrlr_destruct_namespace(ctrlr, ns->id); 2258 } 2259 } 2260 2261 /* Next, add new namespaces */ 2262 active_ns_count = 0; 2263 for (i = 0; i < max_entries; i++) { 2264 nsid = new_ns_list[active_ns_count]; 2265 2266 if (nsid == 0) { 2267 break; 2268 } 2269 2270 /* If the namespace already exists, this will not construct it a second time. */ 2271 rc = nvme_ctrlr_construct_namespace(ctrlr, nsid); 2272 if (rc != 0) { 2273 /* We can't easily handle a failure here. But just move on. */ 2274 assert(false); 2275 NVME_CTRLR_DEBUGLOG(ctrlr, "Failed to allocate a namespace object.\n"); 2276 continue; 2277 } 2278 2279 active_ns_count++; 2280 } 2281 2282 ctrlr->active_ns_count = active_ns_count; 2283 } 2284 2285 static void 2286 nvme_ctrlr_identify_active_ns_async_done(void *arg, const struct spdk_nvme_cpl *cpl) 2287 { 2288 struct nvme_active_ns_ctx *ctx = arg; 2289 uint32_t *new_ns_list = NULL; 2290 2291 if (spdk_nvme_cpl_is_error(cpl)) { 2292 ctx->state = NVME_ACTIVE_NS_STATE_ERROR; 2293 goto out; 2294 } 2295 2296 ctx->next_nsid = ctx->new_ns_list[1024 * ctx->page_count - 1]; 2297 if (ctx->next_nsid == 0) { 2298 ctx->state = NVME_ACTIVE_NS_STATE_DONE; 2299 goto out; 2300 } 2301 2302 ctx->page_count++; 2303 new_ns_list = spdk_realloc(ctx->new_ns_list, 2304 ctx->page_count * sizeof(struct spdk_nvme_ns_list), 2305 ctx->ctrlr->page_size); 2306 if (!new_ns_list) { 2307 SPDK_ERRLOG("Failed to reallocate active_ns_list!\n"); 2308 ctx->state = NVME_ACTIVE_NS_STATE_ERROR; 2309 goto out; 2310 } 2311 2312 ctx->new_ns_list = new_ns_list; 2313 nvme_ctrlr_identify_active_ns_async(ctx); 2314 return; 2315 2316 out: 2317 if (ctx->deleter) { 2318 ctx->deleter(ctx); 2319 } 2320 } 2321 2322 static void 2323 nvme_ctrlr_identify_active_ns_async(struct nvme_active_ns_ctx *ctx) 2324 { 2325 struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr; 2326 uint32_t i; 2327 int rc; 2328 2329 if (ctrlr->cdata.nn == 0) { 2330 ctx->state = NVME_ACTIVE_NS_STATE_DONE; 2331 goto out; 2332 } 2333 2334 assert(ctx->new_ns_list != NULL); 2335 2336 /* 2337 * If controller doesn't support active ns list CNS 0x02 dummy up 2338 * an active ns list, i.e. all namespaces report as active 2339 */ 2340 if (ctrlr->vs.raw < SPDK_NVME_VERSION(1, 1, 0) || ctrlr->quirks & NVME_QUIRK_IDENTIFY_CNS) { 2341 uint32_t *new_ns_list; 2342 2343 /* 2344 * Active NS list must always end with zero element. 2345 * So, we allocate for cdata.nn+1. 2346 */ 2347 ctx->page_count = spdk_divide_round_up(ctrlr->cdata.nn + 1, 2348 sizeof(struct spdk_nvme_ns_list) / sizeof(new_ns_list[0])); 2349 new_ns_list = spdk_realloc(ctx->new_ns_list, 2350 ctx->page_count * sizeof(struct spdk_nvme_ns_list), 2351 ctx->ctrlr->page_size); 2352 if (!new_ns_list) { 2353 SPDK_ERRLOG("Failed to reallocate active_ns_list!\n"); 2354 ctx->state = NVME_ACTIVE_NS_STATE_ERROR; 2355 goto out; 2356 } 2357 2358 ctx->new_ns_list = new_ns_list; 2359 ctx->new_ns_list[ctrlr->cdata.nn] = 0; 2360 for (i = 0; i < ctrlr->cdata.nn; i++) { 2361 ctx->new_ns_list[i] = i + 1; 2362 } 2363 2364 ctx->state = NVME_ACTIVE_NS_STATE_DONE; 2365 goto out; 2366 } 2367 2368 ctx->state = NVME_ACTIVE_NS_STATE_PROCESSING; 2369 rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_ACTIVE_NS_LIST, 0, ctx->next_nsid, 0, 2370 &ctx->new_ns_list[1024 * (ctx->page_count - 1)], sizeof(struct spdk_nvme_ns_list), 2371 nvme_ctrlr_identify_active_ns_async_done, ctx); 2372 if (rc != 0) { 2373 ctx->state = NVME_ACTIVE_NS_STATE_ERROR; 2374 goto out; 2375 } 2376 2377 return; 2378 2379 out: 2380 if (ctx->deleter) { 2381 ctx->deleter(ctx); 2382 } 2383 } 2384 2385 static void 2386 _nvme_active_ns_ctx_deleter(struct nvme_active_ns_ctx *ctx) 2387 { 2388 struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr; 2389 struct spdk_nvme_ns *ns; 2390 2391 if (ctx->state == NVME_ACTIVE_NS_STATE_ERROR) { 2392 nvme_active_ns_ctx_destroy(ctx); 2393 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2394 return; 2395 } 2396 2397 assert(ctx->state == NVME_ACTIVE_NS_STATE_DONE); 2398 2399 RB_FOREACH(ns, nvme_ns_tree, &ctrlr->ns) { 2400 nvme_ns_free_iocs_specific_data(ns); 2401 } 2402 2403 nvme_ctrlr_identify_active_ns_swap(ctrlr, ctx->new_ns_list, ctx->page_count * 1024); 2404 nvme_active_ns_ctx_destroy(ctx); 2405 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS, ctrlr->opts.admin_timeout_ms); 2406 } 2407 2408 static void 2409 _nvme_ctrlr_identify_active_ns(struct spdk_nvme_ctrlr *ctrlr) 2410 { 2411 struct nvme_active_ns_ctx *ctx; 2412 2413 ctx = nvme_active_ns_ctx_create(ctrlr, _nvme_active_ns_ctx_deleter); 2414 if (!ctx) { 2415 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2416 return; 2417 } 2418 2419 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ACTIVE_NS, 2420 ctrlr->opts.admin_timeout_ms); 2421 nvme_ctrlr_identify_active_ns_async(ctx); 2422 } 2423 2424 int 2425 nvme_ctrlr_identify_active_ns(struct spdk_nvme_ctrlr *ctrlr) 2426 { 2427 struct nvme_active_ns_ctx *ctx; 2428 int rc; 2429 2430 ctx = nvme_active_ns_ctx_create(ctrlr, NULL); 2431 if (!ctx) { 2432 return -ENOMEM; 2433 } 2434 2435 nvme_ctrlr_identify_active_ns_async(ctx); 2436 while (ctx->state == NVME_ACTIVE_NS_STATE_PROCESSING) { 2437 rc = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 2438 if (rc < 0) { 2439 ctx->state = NVME_ACTIVE_NS_STATE_ERROR; 2440 break; 2441 } 2442 } 2443 2444 if (ctx->state == NVME_ACTIVE_NS_STATE_ERROR) { 2445 nvme_active_ns_ctx_destroy(ctx); 2446 return -ENXIO; 2447 } 2448 2449 assert(ctx->state == NVME_ACTIVE_NS_STATE_DONE); 2450 nvme_ctrlr_identify_active_ns_swap(ctrlr, ctx->new_ns_list, ctx->page_count * 1024); 2451 nvme_active_ns_ctx_destroy(ctx); 2452 2453 return 0; 2454 } 2455 2456 static void 2457 nvme_ctrlr_identify_ns_async_done(void *arg, const struct spdk_nvme_cpl *cpl) 2458 { 2459 struct spdk_nvme_ns *ns = (struct spdk_nvme_ns *)arg; 2460 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2461 uint32_t nsid; 2462 int rc; 2463 2464 if (spdk_nvme_cpl_is_error(cpl)) { 2465 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2466 return; 2467 } 2468 2469 nvme_ns_set_identify_data(ns); 2470 2471 /* move on to the next active NS */ 2472 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, ns->id); 2473 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2474 if (ns == NULL) { 2475 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_ID_DESCS, 2476 ctrlr->opts.admin_timeout_ms); 2477 return; 2478 } 2479 ns->ctrlr = ctrlr; 2480 ns->id = nsid; 2481 2482 rc = nvme_ctrlr_identify_ns_async(ns); 2483 if (rc) { 2484 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2485 } 2486 } 2487 2488 static int 2489 nvme_ctrlr_identify_ns_async(struct spdk_nvme_ns *ns) 2490 { 2491 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2492 struct spdk_nvme_ns_data *nsdata; 2493 2494 nsdata = &ns->nsdata; 2495 2496 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS, 2497 ctrlr->opts.admin_timeout_ms); 2498 return nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS, 0, ns->id, 0, 2499 nsdata, sizeof(*nsdata), 2500 nvme_ctrlr_identify_ns_async_done, ns); 2501 } 2502 2503 static int 2504 nvme_ctrlr_identify_namespaces(struct spdk_nvme_ctrlr *ctrlr) 2505 { 2506 uint32_t nsid; 2507 struct spdk_nvme_ns *ns; 2508 int rc; 2509 2510 nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); 2511 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2512 if (ns == NULL) { 2513 /* No active NS, move on to the next state */ 2514 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_ID_DESCS, 2515 ctrlr->opts.admin_timeout_ms); 2516 return 0; 2517 } 2518 2519 ns->ctrlr = ctrlr; 2520 ns->id = nsid; 2521 2522 rc = nvme_ctrlr_identify_ns_async(ns); 2523 if (rc) { 2524 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2525 } 2526 2527 return rc; 2528 } 2529 2530 static int 2531 nvme_ctrlr_identify_namespaces_iocs_specific_next(struct spdk_nvme_ctrlr *ctrlr, uint32_t prev_nsid) 2532 { 2533 uint32_t nsid; 2534 struct spdk_nvme_ns *ns; 2535 int rc; 2536 2537 if (!prev_nsid) { 2538 nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); 2539 } else { 2540 /* move on to the next active NS */ 2541 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, prev_nsid); 2542 } 2543 2544 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2545 if (ns == NULL) { 2546 /* No first/next active NS, move on to the next state */ 2547 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES, 2548 ctrlr->opts.admin_timeout_ms); 2549 return 0; 2550 } 2551 2552 /* loop until we find a ns which has (supported) iocs specific data */ 2553 while (!nvme_ns_has_supported_iocs_specific_data(ns)) { 2554 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, ns->id); 2555 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2556 if (ns == NULL) { 2557 /* no namespace with (supported) iocs specific data found */ 2558 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES, 2559 ctrlr->opts.admin_timeout_ms); 2560 return 0; 2561 } 2562 } 2563 2564 rc = nvme_ctrlr_identify_ns_iocs_specific_async(ns); 2565 if (rc) { 2566 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2567 } 2568 2569 return rc; 2570 } 2571 2572 static void 2573 nvme_ctrlr_identify_ns_zns_specific_async_done(void *arg, const struct spdk_nvme_cpl *cpl) 2574 { 2575 struct spdk_nvme_ns *ns = (struct spdk_nvme_ns *)arg; 2576 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2577 2578 if (spdk_nvme_cpl_is_error(cpl)) { 2579 nvme_ns_free_zns_specific_data(ns); 2580 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2581 return; 2582 } 2583 2584 nvme_ctrlr_identify_namespaces_iocs_specific_next(ctrlr, ns->id); 2585 } 2586 2587 static int 2588 nvme_ctrlr_identify_ns_iocs_specific_async(struct spdk_nvme_ns *ns) 2589 { 2590 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2591 int rc; 2592 2593 switch (ns->csi) { 2594 case SPDK_NVME_CSI_ZNS: 2595 break; 2596 default: 2597 /* 2598 * This switch must handle all cases for which 2599 * nvme_ns_has_supported_iocs_specific_data() returns true, 2600 * other cases should never happen. 2601 */ 2602 assert(0); 2603 } 2604 2605 assert(!ns->nsdata_zns); 2606 ns->nsdata_zns = spdk_zmalloc(sizeof(*ns->nsdata_zns), 64, NULL, SPDK_ENV_SOCKET_ID_ANY, 2607 SPDK_MALLOC_SHARE); 2608 if (!ns->nsdata_zns) { 2609 return -ENOMEM; 2610 } 2611 2612 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS_IOCS_SPECIFIC, 2613 ctrlr->opts.admin_timeout_ms); 2614 rc = nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS_IOCS, 0, ns->id, ns->csi, 2615 ns->nsdata_zns, sizeof(*ns->nsdata_zns), 2616 nvme_ctrlr_identify_ns_zns_specific_async_done, ns); 2617 if (rc) { 2618 nvme_ns_free_zns_specific_data(ns); 2619 } 2620 2621 return rc; 2622 } 2623 2624 static int 2625 nvme_ctrlr_identify_namespaces_iocs_specific(struct spdk_nvme_ctrlr *ctrlr) 2626 { 2627 if (!nvme_ctrlr_multi_iocs_enabled(ctrlr)) { 2628 /* Multi IOCS not supported/enabled, move on to the next state */ 2629 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES, 2630 ctrlr->opts.admin_timeout_ms); 2631 return 0; 2632 } 2633 2634 return nvme_ctrlr_identify_namespaces_iocs_specific_next(ctrlr, 0); 2635 } 2636 2637 static void 2638 nvme_ctrlr_identify_id_desc_async_done(void *arg, const struct spdk_nvme_cpl *cpl) 2639 { 2640 struct spdk_nvme_ns *ns = (struct spdk_nvme_ns *)arg; 2641 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2642 uint32_t nsid; 2643 int rc; 2644 2645 if (spdk_nvme_cpl_is_error(cpl)) { 2646 /* 2647 * Many controllers claim to be compatible with NVMe 1.3, however, 2648 * they do not implement NS ID Desc List. Therefore, instead of setting 2649 * the state to NVME_CTRLR_STATE_ERROR, silently ignore the completion 2650 * error and move on to the next state. 2651 * 2652 * The proper way is to create a new quirk for controllers that violate 2653 * the NVMe 1.3 spec by not supporting NS ID Desc List. 2654 * (Re-using the NVME_QUIRK_IDENTIFY_CNS quirk is not possible, since 2655 * it is too generic and was added in order to handle controllers that 2656 * violate the NVMe 1.1 spec by not supporting ACTIVE LIST). 2657 */ 2658 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC, 2659 ctrlr->opts.admin_timeout_ms); 2660 return; 2661 } 2662 2663 nvme_ns_set_id_desc_list_data(ns); 2664 2665 /* move on to the next active NS */ 2666 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, ns->id); 2667 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2668 if (ns == NULL) { 2669 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC, 2670 ctrlr->opts.admin_timeout_ms); 2671 return; 2672 } 2673 2674 rc = nvme_ctrlr_identify_id_desc_async(ns); 2675 if (rc) { 2676 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2677 } 2678 } 2679 2680 static int 2681 nvme_ctrlr_identify_id_desc_async(struct spdk_nvme_ns *ns) 2682 { 2683 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2684 2685 memset(ns->id_desc_list, 0, sizeof(ns->id_desc_list)); 2686 2687 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS, 2688 ctrlr->opts.admin_timeout_ms); 2689 return nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS_ID_DESCRIPTOR_LIST, 2690 0, ns->id, 0, ns->id_desc_list, sizeof(ns->id_desc_list), 2691 nvme_ctrlr_identify_id_desc_async_done, ns); 2692 } 2693 2694 static int 2695 nvme_ctrlr_identify_id_desc_namespaces(struct spdk_nvme_ctrlr *ctrlr) 2696 { 2697 uint32_t nsid; 2698 struct spdk_nvme_ns *ns; 2699 int rc; 2700 2701 if ((ctrlr->vs.raw < SPDK_NVME_VERSION(1, 3, 0) && 2702 !(ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_IOCS)) || 2703 (ctrlr->quirks & NVME_QUIRK_IDENTIFY_CNS)) { 2704 NVME_CTRLR_DEBUGLOG(ctrlr, "Version < 1.3; not attempting to retrieve NS ID Descriptor List\n"); 2705 /* NS ID Desc List not supported, move on to the next state */ 2706 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC, 2707 ctrlr->opts.admin_timeout_ms); 2708 return 0; 2709 } 2710 2711 nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); 2712 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2713 if (ns == NULL) { 2714 /* No active NS, move on to the next state */ 2715 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC, 2716 ctrlr->opts.admin_timeout_ms); 2717 return 0; 2718 } 2719 2720 rc = nvme_ctrlr_identify_id_desc_async(ns); 2721 if (rc) { 2722 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2723 } 2724 2725 return rc; 2726 } 2727 2728 static void 2729 nvme_ctrlr_update_nvmf_ioccsz(struct spdk_nvme_ctrlr *ctrlr) 2730 { 2731 if (spdk_nvme_ctrlr_is_fabrics(ctrlr)) { 2732 if (ctrlr->cdata.nvmf_specific.ioccsz < 4) { 2733 NVME_CTRLR_ERRLOG(ctrlr, "Incorrect IOCCSZ %u, the minimum value should be 4\n", 2734 ctrlr->cdata.nvmf_specific.ioccsz); 2735 ctrlr->cdata.nvmf_specific.ioccsz = 4; 2736 assert(0); 2737 } 2738 ctrlr->ioccsz_bytes = ctrlr->cdata.nvmf_specific.ioccsz * 16 - sizeof(struct spdk_nvme_cmd); 2739 ctrlr->icdoff = ctrlr->cdata.nvmf_specific.icdoff; 2740 } 2741 } 2742 2743 static void 2744 nvme_ctrlr_set_num_queues_done(void *arg, const struct spdk_nvme_cpl *cpl) 2745 { 2746 uint32_t cq_allocated, sq_allocated, min_allocated, i; 2747 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 2748 2749 if (spdk_nvme_cpl_is_error(cpl)) { 2750 NVME_CTRLR_ERRLOG(ctrlr, "Set Features - Number of Queues failed!\n"); 2751 ctrlr->opts.num_io_queues = 0; 2752 } else { 2753 /* 2754 * Data in cdw0 is 0-based. 2755 * Lower 16-bits indicate number of submission queues allocated. 2756 * Upper 16-bits indicate number of completion queues allocated. 2757 */ 2758 sq_allocated = (cpl->cdw0 & 0xFFFF) + 1; 2759 cq_allocated = (cpl->cdw0 >> 16) + 1; 2760 2761 /* 2762 * For 1:1 queue mapping, set number of allocated queues to be minimum of 2763 * submission and completion queues. 2764 */ 2765 min_allocated = spdk_min(sq_allocated, cq_allocated); 2766 2767 /* Set number of queues to be minimum of requested and actually allocated. */ 2768 ctrlr->opts.num_io_queues = spdk_min(min_allocated, ctrlr->opts.num_io_queues); 2769 } 2770 2771 ctrlr->free_io_qids = spdk_bit_array_create(ctrlr->opts.num_io_queues + 1); 2772 if (ctrlr->free_io_qids == NULL) { 2773 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2774 return; 2775 } 2776 2777 /* Initialize list of free I/O queue IDs. QID 0 is the admin queue (implicitly allocated). */ 2778 for (i = 1; i <= ctrlr->opts.num_io_queues; i++) { 2779 spdk_nvme_ctrlr_free_qid(ctrlr, i); 2780 } 2781 2782 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS, 2783 ctrlr->opts.admin_timeout_ms); 2784 } 2785 2786 static int 2787 nvme_ctrlr_set_num_queues(struct spdk_nvme_ctrlr *ctrlr) 2788 { 2789 int rc; 2790 2791 if (ctrlr->opts.num_io_queues > SPDK_NVME_MAX_IO_QUEUES) { 2792 NVME_CTRLR_NOTICELOG(ctrlr, "Limiting requested num_io_queues %u to max %d\n", 2793 ctrlr->opts.num_io_queues, SPDK_NVME_MAX_IO_QUEUES); 2794 ctrlr->opts.num_io_queues = SPDK_NVME_MAX_IO_QUEUES; 2795 } else if (ctrlr->opts.num_io_queues < 1) { 2796 NVME_CTRLR_NOTICELOG(ctrlr, "Requested num_io_queues 0, increasing to 1\n"); 2797 ctrlr->opts.num_io_queues = 1; 2798 } 2799 2800 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES, 2801 ctrlr->opts.admin_timeout_ms); 2802 2803 rc = nvme_ctrlr_cmd_set_num_queues(ctrlr, ctrlr->opts.num_io_queues, 2804 nvme_ctrlr_set_num_queues_done, ctrlr); 2805 if (rc != 0) { 2806 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2807 return rc; 2808 } 2809 2810 return 0; 2811 } 2812 2813 static void 2814 nvme_ctrlr_set_keep_alive_timeout_done(void *arg, const struct spdk_nvme_cpl *cpl) 2815 { 2816 uint32_t keep_alive_interval_us; 2817 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 2818 2819 if (spdk_nvme_cpl_is_error(cpl)) { 2820 if ((cpl->status.sct == SPDK_NVME_SCT_GENERIC) && 2821 (cpl->status.sc == SPDK_NVME_SC_INVALID_FIELD)) { 2822 NVME_CTRLR_DEBUGLOG(ctrlr, "Keep alive timeout Get Feature is not supported\n"); 2823 } else { 2824 NVME_CTRLR_ERRLOG(ctrlr, "Keep alive timeout Get Feature failed: SC %x SCT %x\n", 2825 cpl->status.sc, cpl->status.sct); 2826 ctrlr->opts.keep_alive_timeout_ms = 0; 2827 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2828 return; 2829 } 2830 } else { 2831 if (ctrlr->opts.keep_alive_timeout_ms != cpl->cdw0) { 2832 NVME_CTRLR_DEBUGLOG(ctrlr, "Controller adjusted keep alive timeout to %u ms\n", 2833 cpl->cdw0); 2834 } 2835 2836 ctrlr->opts.keep_alive_timeout_ms = cpl->cdw0; 2837 } 2838 2839 if (ctrlr->opts.keep_alive_timeout_ms == 0) { 2840 ctrlr->keep_alive_interval_ticks = 0; 2841 } else { 2842 keep_alive_interval_us = ctrlr->opts.keep_alive_timeout_ms * 1000 / 2; 2843 2844 NVME_CTRLR_DEBUGLOG(ctrlr, "Sending keep alive every %u us\n", keep_alive_interval_us); 2845 2846 ctrlr->keep_alive_interval_ticks = (keep_alive_interval_us * spdk_get_ticks_hz()) / 2847 UINT64_C(1000000); 2848 2849 /* Schedule the first Keep Alive to be sent as soon as possible. */ 2850 ctrlr->next_keep_alive_tick = spdk_get_ticks(); 2851 } 2852 2853 if (spdk_nvme_ctrlr_is_discovery(ctrlr)) { 2854 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE); 2855 } else { 2856 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC, 2857 ctrlr->opts.admin_timeout_ms); 2858 } 2859 } 2860 2861 static int 2862 nvme_ctrlr_set_keep_alive_timeout(struct spdk_nvme_ctrlr *ctrlr) 2863 { 2864 int rc; 2865 2866 if (ctrlr->opts.keep_alive_timeout_ms == 0) { 2867 if (spdk_nvme_ctrlr_is_discovery(ctrlr)) { 2868 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE); 2869 } else { 2870 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC, 2871 ctrlr->opts.admin_timeout_ms); 2872 } 2873 return 0; 2874 } 2875 2876 /* Note: Discovery controller identify data does not populate KAS according to spec. */ 2877 if (!spdk_nvme_ctrlr_is_discovery(ctrlr) && ctrlr->cdata.kas == 0) { 2878 NVME_CTRLR_DEBUGLOG(ctrlr, "Controller KAS is 0 - not enabling Keep Alive\n"); 2879 ctrlr->opts.keep_alive_timeout_ms = 0; 2880 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC, 2881 ctrlr->opts.admin_timeout_ms); 2882 return 0; 2883 } 2884 2885 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT, 2886 ctrlr->opts.admin_timeout_ms); 2887 2888 /* Retrieve actual keep alive timeout, since the controller may have adjusted it. */ 2889 rc = spdk_nvme_ctrlr_cmd_get_feature(ctrlr, SPDK_NVME_FEAT_KEEP_ALIVE_TIMER, 0, NULL, 0, 2890 nvme_ctrlr_set_keep_alive_timeout_done, ctrlr); 2891 if (rc != 0) { 2892 NVME_CTRLR_ERRLOG(ctrlr, "Keep alive timeout Get Feature failed: %d\n", rc); 2893 ctrlr->opts.keep_alive_timeout_ms = 0; 2894 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2895 return rc; 2896 } 2897 2898 return 0; 2899 } 2900 2901 static void 2902 nvme_ctrlr_set_host_id_done(void *arg, const struct spdk_nvme_cpl *cpl) 2903 { 2904 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 2905 2906 if (spdk_nvme_cpl_is_error(cpl)) { 2907 /* 2908 * Treat Set Features - Host ID failure as non-fatal, since the Host ID feature 2909 * is optional. 2910 */ 2911 NVME_CTRLR_WARNLOG(ctrlr, "Set Features - Host ID failed: SC 0x%x SCT 0x%x\n", 2912 cpl->status.sc, cpl->status.sct); 2913 } else { 2914 NVME_CTRLR_DEBUGLOG(ctrlr, "Set Features - Host ID was successful\n"); 2915 } 2916 2917 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_TRANSPORT_READY, ctrlr->opts.admin_timeout_ms); 2918 } 2919 2920 static int 2921 nvme_ctrlr_set_host_id(struct spdk_nvme_ctrlr *ctrlr) 2922 { 2923 uint8_t *host_id; 2924 uint32_t host_id_size; 2925 int rc; 2926 2927 if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) { 2928 /* 2929 * NVMe-oF sends the host ID during Connect and doesn't allow 2930 * Set Features - Host Identifier after Connect, so we don't need to do anything here. 2931 */ 2932 NVME_CTRLR_DEBUGLOG(ctrlr, "NVMe-oF transport - not sending Set Features - Host ID\n"); 2933 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_TRANSPORT_READY, ctrlr->opts.admin_timeout_ms); 2934 return 0; 2935 } 2936 2937 if (ctrlr->cdata.ctratt.host_id_exhid_supported) { 2938 NVME_CTRLR_DEBUGLOG(ctrlr, "Using 128-bit extended host identifier\n"); 2939 host_id = ctrlr->opts.extended_host_id; 2940 host_id_size = sizeof(ctrlr->opts.extended_host_id); 2941 } else { 2942 NVME_CTRLR_DEBUGLOG(ctrlr, "Using 64-bit host identifier\n"); 2943 host_id = ctrlr->opts.host_id; 2944 host_id_size = sizeof(ctrlr->opts.host_id); 2945 } 2946 2947 /* If the user specified an all-zeroes host identifier, don't send the command. */ 2948 if (spdk_mem_all_zero(host_id, host_id_size)) { 2949 NVME_CTRLR_DEBUGLOG(ctrlr, "User did not specify host ID - not sending Set Features - Host ID\n"); 2950 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_TRANSPORT_READY, ctrlr->opts.admin_timeout_ms); 2951 return 0; 2952 } 2953 2954 SPDK_LOGDUMP(nvme, "host_id", host_id, host_id_size); 2955 2956 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_HOST_ID, 2957 ctrlr->opts.admin_timeout_ms); 2958 2959 rc = nvme_ctrlr_cmd_set_host_id(ctrlr, host_id, host_id_size, nvme_ctrlr_set_host_id_done, ctrlr); 2960 if (rc != 0) { 2961 NVME_CTRLR_ERRLOG(ctrlr, "Set Features - Host ID failed: %d\n", rc); 2962 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2963 return rc; 2964 } 2965 2966 return 0; 2967 } 2968 2969 void 2970 nvme_ctrlr_update_namespaces(struct spdk_nvme_ctrlr *ctrlr) 2971 { 2972 uint32_t nsid; 2973 struct spdk_nvme_ns *ns; 2974 2975 for (nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); 2976 nsid != 0; nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) { 2977 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2978 nvme_ns_construct(ns, nsid, ctrlr); 2979 } 2980 } 2981 2982 static int 2983 nvme_ctrlr_clear_changed_ns_log(struct spdk_nvme_ctrlr *ctrlr) 2984 { 2985 struct nvme_completion_poll_status *status; 2986 int rc = -ENOMEM; 2987 char *buffer = NULL; 2988 uint32_t nsid; 2989 size_t buf_size = (SPDK_NVME_MAX_CHANGED_NAMESPACES * sizeof(uint32_t)); 2990 2991 if (ctrlr->opts.disable_read_changed_ns_list_log_page) { 2992 return 0; 2993 } 2994 2995 buffer = spdk_dma_zmalloc(buf_size, 4096, NULL); 2996 if (!buffer) { 2997 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate buffer for getting " 2998 "changed ns log.\n"); 2999 return rc; 3000 } 3001 3002 status = calloc(1, sizeof(*status)); 3003 if (!status) { 3004 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 3005 goto free_buffer; 3006 } 3007 3008 rc = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, 3009 SPDK_NVME_LOG_CHANGED_NS_LIST, 3010 SPDK_NVME_GLOBAL_NS_TAG, 3011 buffer, buf_size, 0, 3012 nvme_completion_poll_cb, status); 3013 3014 if (rc) { 3015 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_cmd_get_log_page() failed: rc=%d\n", rc); 3016 free(status); 3017 goto free_buffer; 3018 } 3019 3020 rc = nvme_wait_for_completion_timeout(ctrlr->adminq, status, 3021 ctrlr->opts.admin_timeout_ms * 1000); 3022 if (!status->timed_out) { 3023 free(status); 3024 } 3025 3026 if (rc) { 3027 NVME_CTRLR_ERRLOG(ctrlr, "wait for spdk_nvme_ctrlr_cmd_get_log_page failed: rc=%d\n", rc); 3028 goto free_buffer; 3029 } 3030 3031 /* only check the case of overflow. */ 3032 nsid = from_le32(buffer); 3033 if (nsid == 0xffffffffu) { 3034 NVME_CTRLR_WARNLOG(ctrlr, "changed ns log overflowed.\n"); 3035 } 3036 3037 free_buffer: 3038 spdk_dma_free(buffer); 3039 return rc; 3040 } 3041 3042 void 3043 nvme_ctrlr_process_async_event(struct spdk_nvme_ctrlr *ctrlr, 3044 const struct spdk_nvme_cpl *cpl) 3045 { 3046 union spdk_nvme_async_event_completion event; 3047 struct spdk_nvme_ctrlr_process *active_proc; 3048 int rc; 3049 3050 event.raw = cpl->cdw0; 3051 3052 if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) && 3053 (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED)) { 3054 nvme_ctrlr_clear_changed_ns_log(ctrlr); 3055 3056 rc = nvme_ctrlr_identify_active_ns(ctrlr); 3057 if (rc) { 3058 return; 3059 } 3060 nvme_ctrlr_update_namespaces(ctrlr); 3061 nvme_io_msg_ctrlr_update(ctrlr); 3062 } 3063 3064 if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) && 3065 (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_ANA_CHANGE)) { 3066 if (!ctrlr->opts.disable_read_ana_log_page) { 3067 rc = nvme_ctrlr_update_ana_log_page(ctrlr); 3068 if (rc) { 3069 return; 3070 } 3071 nvme_ctrlr_parse_ana_log_page(ctrlr, nvme_ctrlr_update_ns_ana_states, 3072 ctrlr); 3073 } 3074 } 3075 3076 active_proc = nvme_ctrlr_get_current_process(ctrlr); 3077 if (active_proc && active_proc->aer_cb_fn) { 3078 active_proc->aer_cb_fn(active_proc->aer_cb_arg, cpl); 3079 } 3080 } 3081 3082 static void 3083 nvme_ctrlr_queue_async_event(struct spdk_nvme_ctrlr *ctrlr, 3084 const struct spdk_nvme_cpl *cpl) 3085 { 3086 struct spdk_nvme_ctrlr_aer_completion_list *nvme_event; 3087 struct spdk_nvme_ctrlr_process *proc; 3088 3089 /* Add async event to each process objects event list */ 3090 TAILQ_FOREACH(proc, &ctrlr->active_procs, tailq) { 3091 /* Must be shared memory so other processes can access */ 3092 nvme_event = spdk_zmalloc(sizeof(*nvme_event), 0, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE); 3093 if (!nvme_event) { 3094 NVME_CTRLR_ERRLOG(ctrlr, "Alloc nvme event failed, ignore the event\n"); 3095 return; 3096 } 3097 nvme_event->cpl = *cpl; 3098 3099 STAILQ_INSERT_TAIL(&proc->async_events, nvme_event, link); 3100 } 3101 } 3102 3103 void 3104 nvme_ctrlr_complete_queued_async_events(struct spdk_nvme_ctrlr *ctrlr) 3105 { 3106 struct spdk_nvme_ctrlr_aer_completion_list *nvme_event, *nvme_event_tmp; 3107 struct spdk_nvme_ctrlr_process *active_proc; 3108 3109 active_proc = nvme_ctrlr_get_current_process(ctrlr); 3110 3111 STAILQ_FOREACH_SAFE(nvme_event, &active_proc->async_events, link, nvme_event_tmp) { 3112 STAILQ_REMOVE(&active_proc->async_events, nvme_event, 3113 spdk_nvme_ctrlr_aer_completion_list, link); 3114 nvme_ctrlr_process_async_event(ctrlr, &nvme_event->cpl); 3115 spdk_free(nvme_event); 3116 3117 } 3118 } 3119 3120 static void 3121 nvme_ctrlr_async_event_cb(void *arg, const struct spdk_nvme_cpl *cpl) 3122 { 3123 struct nvme_async_event_request *aer = arg; 3124 struct spdk_nvme_ctrlr *ctrlr = aer->ctrlr; 3125 3126 if (cpl->status.sct == SPDK_NVME_SCT_GENERIC && 3127 cpl->status.sc == SPDK_NVME_SC_ABORTED_SQ_DELETION) { 3128 /* 3129 * This is simulated when controller is being shut down, to 3130 * effectively abort outstanding asynchronous event requests 3131 * and make sure all memory is freed. Do not repost the 3132 * request in this case. 3133 */ 3134 return; 3135 } 3136 3137 if (cpl->status.sct == SPDK_NVME_SCT_COMMAND_SPECIFIC && 3138 cpl->status.sc == SPDK_NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED) { 3139 /* 3140 * SPDK will only send as many AERs as the device says it supports, 3141 * so this status code indicates an out-of-spec device. Do not repost 3142 * the request in this case. 3143 */ 3144 NVME_CTRLR_ERRLOG(ctrlr, "Controller appears out-of-spec for asynchronous event request\n" 3145 "handling. Do not repost this AER.\n"); 3146 return; 3147 } 3148 3149 /* Add the events to the list */ 3150 nvme_ctrlr_queue_async_event(ctrlr, cpl); 3151 3152 /* If the ctrlr was removed or in the destruct state, we should not send aer again */ 3153 if (ctrlr->is_removed || ctrlr->is_destructed) { 3154 return; 3155 } 3156 3157 /* 3158 * Repost another asynchronous event request to replace the one 3159 * that just completed. 3160 */ 3161 if (nvme_ctrlr_construct_and_submit_aer(ctrlr, aer)) { 3162 /* 3163 * We can't do anything to recover from a failure here, 3164 * so just print a warning message and leave the AER unsubmitted. 3165 */ 3166 NVME_CTRLR_ERRLOG(ctrlr, "resubmitting AER failed!\n"); 3167 } 3168 } 3169 3170 static int 3171 nvme_ctrlr_construct_and_submit_aer(struct spdk_nvme_ctrlr *ctrlr, 3172 struct nvme_async_event_request *aer) 3173 { 3174 struct nvme_request *req; 3175 3176 aer->ctrlr = ctrlr; 3177 req = nvme_allocate_request_null(ctrlr->adminq, nvme_ctrlr_async_event_cb, aer); 3178 aer->req = req; 3179 if (req == NULL) { 3180 return -1; 3181 } 3182 3183 req->cmd.opc = SPDK_NVME_OPC_ASYNC_EVENT_REQUEST; 3184 return nvme_ctrlr_submit_admin_request(ctrlr, req); 3185 } 3186 3187 static void 3188 nvme_ctrlr_configure_aer_done(void *arg, const struct spdk_nvme_cpl *cpl) 3189 { 3190 struct nvme_async_event_request *aer; 3191 int rc; 3192 uint32_t i; 3193 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 3194 3195 if (spdk_nvme_cpl_is_error(cpl)) { 3196 NVME_CTRLR_NOTICELOG(ctrlr, "nvme_ctrlr_configure_aer failed!\n"); 3197 ctrlr->num_aers = 0; 3198 } else { 3199 /* aerl is a zero-based value, so we need to add 1 here. */ 3200 ctrlr->num_aers = spdk_min(NVME_MAX_ASYNC_EVENTS, (ctrlr->cdata.aerl + 1)); 3201 } 3202 3203 for (i = 0; i < ctrlr->num_aers; i++) { 3204 aer = &ctrlr->aer[i]; 3205 rc = nvme_ctrlr_construct_and_submit_aer(ctrlr, aer); 3206 if (rc) { 3207 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_construct_and_submit_aer failed!\n"); 3208 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3209 return; 3210 } 3211 } 3212 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT, ctrlr->opts.admin_timeout_ms); 3213 } 3214 3215 static int 3216 nvme_ctrlr_configure_aer(struct spdk_nvme_ctrlr *ctrlr) 3217 { 3218 union spdk_nvme_feat_async_event_configuration config; 3219 int rc; 3220 3221 config.raw = 0; 3222 3223 if (spdk_nvme_ctrlr_is_discovery(ctrlr)) { 3224 config.bits.discovery_log_change_notice = 1; 3225 } else { 3226 config.bits.crit_warn.bits.available_spare = 1; 3227 config.bits.crit_warn.bits.temperature = 1; 3228 config.bits.crit_warn.bits.device_reliability = 1; 3229 config.bits.crit_warn.bits.read_only = 1; 3230 config.bits.crit_warn.bits.volatile_memory_backup = 1; 3231 3232 if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 2, 0)) { 3233 if (ctrlr->cdata.oaes.ns_attribute_notices) { 3234 config.bits.ns_attr_notice = 1; 3235 } 3236 if (ctrlr->cdata.oaes.fw_activation_notices) { 3237 config.bits.fw_activation_notice = 1; 3238 } 3239 if (ctrlr->cdata.oaes.ana_change_notices) { 3240 config.bits.ana_change_notice = 1; 3241 } 3242 } 3243 if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 3, 0) && ctrlr->cdata.lpa.telemetry) { 3244 config.bits.telemetry_log_notice = 1; 3245 } 3246 } 3247 3248 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER, 3249 ctrlr->opts.admin_timeout_ms); 3250 3251 rc = nvme_ctrlr_cmd_set_async_event_config(ctrlr, config, 3252 nvme_ctrlr_configure_aer_done, 3253 ctrlr); 3254 if (rc != 0) { 3255 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3256 return rc; 3257 } 3258 3259 return 0; 3260 } 3261 3262 struct spdk_nvme_ctrlr_process * 3263 nvme_ctrlr_get_process(struct spdk_nvme_ctrlr *ctrlr, pid_t pid) 3264 { 3265 struct spdk_nvme_ctrlr_process *active_proc; 3266 3267 TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) { 3268 if (active_proc->pid == pid) { 3269 return active_proc; 3270 } 3271 } 3272 3273 return NULL; 3274 } 3275 3276 struct spdk_nvme_ctrlr_process * 3277 nvme_ctrlr_get_current_process(struct spdk_nvme_ctrlr *ctrlr) 3278 { 3279 return nvme_ctrlr_get_process(ctrlr, getpid()); 3280 } 3281 3282 /** 3283 * This function will be called when a process is using the controller. 3284 * 1. For the primary process, it is called when constructing the controller. 3285 * 2. For the secondary process, it is called at probing the controller. 3286 * Note: will check whether the process is already added for the same process. 3287 */ 3288 int 3289 nvme_ctrlr_add_process(struct spdk_nvme_ctrlr *ctrlr, void *devhandle) 3290 { 3291 struct spdk_nvme_ctrlr_process *ctrlr_proc; 3292 pid_t pid = getpid(); 3293 3294 /* Check whether the process is already added or not */ 3295 if (nvme_ctrlr_get_process(ctrlr, pid)) { 3296 return 0; 3297 } 3298 3299 /* Initialize the per process properties for this ctrlr */ 3300 ctrlr_proc = spdk_zmalloc(sizeof(struct spdk_nvme_ctrlr_process), 3301 64, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE); 3302 if (ctrlr_proc == NULL) { 3303 NVME_CTRLR_ERRLOG(ctrlr, "failed to allocate memory to track the process props\n"); 3304 3305 return -1; 3306 } 3307 3308 ctrlr_proc->is_primary = spdk_process_is_primary(); 3309 ctrlr_proc->pid = pid; 3310 STAILQ_INIT(&ctrlr_proc->active_reqs); 3311 ctrlr_proc->devhandle = devhandle; 3312 ctrlr_proc->ref = 0; 3313 TAILQ_INIT(&ctrlr_proc->allocated_io_qpairs); 3314 STAILQ_INIT(&ctrlr_proc->async_events); 3315 3316 TAILQ_INSERT_TAIL(&ctrlr->active_procs, ctrlr_proc, tailq); 3317 3318 return 0; 3319 } 3320 3321 /** 3322 * This function will be called when the process detaches the controller. 3323 * Note: the ctrlr_lock must be held when calling this function. 3324 */ 3325 static void 3326 nvme_ctrlr_remove_process(struct spdk_nvme_ctrlr *ctrlr, 3327 struct spdk_nvme_ctrlr_process *proc) 3328 { 3329 struct spdk_nvme_qpair *qpair, *tmp_qpair; 3330 3331 assert(STAILQ_EMPTY(&proc->active_reqs)); 3332 3333 TAILQ_FOREACH_SAFE(qpair, &proc->allocated_io_qpairs, per_process_tailq, tmp_qpair) { 3334 spdk_nvme_ctrlr_free_io_qpair(qpair); 3335 } 3336 3337 TAILQ_REMOVE(&ctrlr->active_procs, proc, tailq); 3338 3339 if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { 3340 spdk_pci_device_detach(proc->devhandle); 3341 } 3342 3343 spdk_free(proc); 3344 } 3345 3346 /** 3347 * This function will be called when the process exited unexpectedly 3348 * in order to free any incomplete nvme request, allocated IO qpairs 3349 * and allocated memory. 3350 * Note: the ctrlr_lock must be held when calling this function. 3351 */ 3352 static void 3353 nvme_ctrlr_cleanup_process(struct spdk_nvme_ctrlr_process *proc) 3354 { 3355 struct nvme_request *req, *tmp_req; 3356 struct spdk_nvme_qpair *qpair, *tmp_qpair; 3357 struct spdk_nvme_ctrlr_aer_completion_list *event; 3358 3359 STAILQ_FOREACH_SAFE(req, &proc->active_reqs, stailq, tmp_req) { 3360 STAILQ_REMOVE(&proc->active_reqs, req, nvme_request, stailq); 3361 3362 assert(req->pid == proc->pid); 3363 if (req->user_buffer && req->payload_size) { 3364 spdk_free(req->payload.contig_or_cb_arg); 3365 } 3366 nvme_free_request(req); 3367 } 3368 3369 /* Remove async event from each process objects event list */ 3370 while (!STAILQ_EMPTY(&proc->async_events)) { 3371 event = STAILQ_FIRST(&proc->async_events); 3372 STAILQ_REMOVE_HEAD(&proc->async_events, link); 3373 spdk_free(event); 3374 } 3375 3376 TAILQ_FOREACH_SAFE(qpair, &proc->allocated_io_qpairs, per_process_tailq, tmp_qpair) { 3377 TAILQ_REMOVE(&proc->allocated_io_qpairs, qpair, per_process_tailq); 3378 3379 /* 3380 * The process may have been killed while some qpairs were in their 3381 * completion context. Clear that flag here to allow these IO 3382 * qpairs to be deleted. 3383 */ 3384 qpair->in_completion_context = 0; 3385 3386 qpair->no_deletion_notification_needed = 1; 3387 3388 spdk_nvme_ctrlr_free_io_qpair(qpair); 3389 } 3390 3391 spdk_free(proc); 3392 } 3393 3394 /** 3395 * This function will be called when destructing the controller. 3396 * 1. There is no more admin request on this controller. 3397 * 2. Clean up any left resource allocation when its associated process is gone. 3398 */ 3399 void 3400 nvme_ctrlr_free_processes(struct spdk_nvme_ctrlr *ctrlr) 3401 { 3402 struct spdk_nvme_ctrlr_process *active_proc, *tmp; 3403 3404 /* Free all the processes' properties and make sure no pending admin IOs */ 3405 TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) { 3406 TAILQ_REMOVE(&ctrlr->active_procs, active_proc, tailq); 3407 3408 assert(STAILQ_EMPTY(&active_proc->active_reqs)); 3409 3410 spdk_free(active_proc); 3411 } 3412 } 3413 3414 /** 3415 * This function will be called when any other process attaches or 3416 * detaches the controller in order to cleanup those unexpectedly 3417 * terminated processes. 3418 * Note: the ctrlr_lock must be held when calling this function. 3419 */ 3420 static int 3421 nvme_ctrlr_remove_inactive_proc(struct spdk_nvme_ctrlr *ctrlr) 3422 { 3423 struct spdk_nvme_ctrlr_process *active_proc, *tmp; 3424 int active_proc_count = 0; 3425 3426 TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) { 3427 if ((kill(active_proc->pid, 0) == -1) && (errno == ESRCH)) { 3428 NVME_CTRLR_ERRLOG(ctrlr, "process %d terminated unexpected\n", active_proc->pid); 3429 3430 TAILQ_REMOVE(&ctrlr->active_procs, active_proc, tailq); 3431 3432 nvme_ctrlr_cleanup_process(active_proc); 3433 } else { 3434 active_proc_count++; 3435 } 3436 } 3437 3438 return active_proc_count; 3439 } 3440 3441 void 3442 nvme_ctrlr_proc_get_ref(struct spdk_nvme_ctrlr *ctrlr) 3443 { 3444 struct spdk_nvme_ctrlr_process *active_proc; 3445 3446 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 3447 3448 nvme_ctrlr_remove_inactive_proc(ctrlr); 3449 3450 active_proc = nvme_ctrlr_get_current_process(ctrlr); 3451 if (active_proc) { 3452 active_proc->ref++; 3453 } 3454 3455 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 3456 } 3457 3458 void 3459 nvme_ctrlr_proc_put_ref(struct spdk_nvme_ctrlr *ctrlr) 3460 { 3461 struct spdk_nvme_ctrlr_process *active_proc; 3462 int proc_count; 3463 3464 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 3465 3466 proc_count = nvme_ctrlr_remove_inactive_proc(ctrlr); 3467 3468 active_proc = nvme_ctrlr_get_current_process(ctrlr); 3469 if (active_proc) { 3470 active_proc->ref--; 3471 assert(active_proc->ref >= 0); 3472 3473 /* 3474 * The last active process will be removed at the end of 3475 * the destruction of the controller. 3476 */ 3477 if (active_proc->ref == 0 && proc_count != 1) { 3478 nvme_ctrlr_remove_process(ctrlr, active_proc); 3479 } 3480 } 3481 3482 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 3483 } 3484 3485 int 3486 nvme_ctrlr_get_ref_count(struct spdk_nvme_ctrlr *ctrlr) 3487 { 3488 struct spdk_nvme_ctrlr_process *active_proc; 3489 int ref = 0; 3490 3491 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 3492 3493 nvme_ctrlr_remove_inactive_proc(ctrlr); 3494 3495 TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) { 3496 ref += active_proc->ref; 3497 } 3498 3499 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 3500 3501 return ref; 3502 } 3503 3504 /** 3505 * Get the PCI device handle which is only visible to its associated process. 3506 */ 3507 struct spdk_pci_device * 3508 nvme_ctrlr_proc_get_devhandle(struct spdk_nvme_ctrlr *ctrlr) 3509 { 3510 struct spdk_nvme_ctrlr_process *active_proc; 3511 struct spdk_pci_device *devhandle = NULL; 3512 3513 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 3514 3515 active_proc = nvme_ctrlr_get_current_process(ctrlr); 3516 if (active_proc) { 3517 devhandle = active_proc->devhandle; 3518 } 3519 3520 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 3521 3522 return devhandle; 3523 } 3524 3525 static void 3526 nvme_ctrlr_process_init_vs_done(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3527 { 3528 struct spdk_nvme_ctrlr *ctrlr = ctx; 3529 3530 if (spdk_nvme_cpl_is_error(cpl)) { 3531 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the VS register\n"); 3532 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3533 return; 3534 } 3535 3536 assert(value <= UINT32_MAX); 3537 ctrlr->vs.raw = (uint32_t)value; 3538 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READ_CAP, NVME_TIMEOUT_INFINITE); 3539 } 3540 3541 static void 3542 nvme_ctrlr_process_init_cap_done(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3543 { 3544 struct spdk_nvme_ctrlr *ctrlr = ctx; 3545 3546 if (spdk_nvme_cpl_is_error(cpl)) { 3547 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CAP register\n"); 3548 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3549 return; 3550 } 3551 3552 ctrlr->cap.raw = value; 3553 nvme_ctrlr_init_cap(ctrlr); 3554 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CHECK_EN, NVME_TIMEOUT_INFINITE); 3555 } 3556 3557 static void 3558 nvme_ctrlr_process_init_check_en(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3559 { 3560 struct spdk_nvme_ctrlr *ctrlr = ctx; 3561 enum nvme_ctrlr_state state; 3562 3563 if (spdk_nvme_cpl_is_error(cpl)) { 3564 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CC register\n"); 3565 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3566 return; 3567 } 3568 3569 assert(value <= UINT32_MAX); 3570 ctrlr->process_init_cc.raw = (uint32_t)value; 3571 3572 if (ctrlr->process_init_cc.bits.en) { 3573 NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 1\n"); 3574 state = NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1; 3575 } else { 3576 state = NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0; 3577 } 3578 3579 nvme_ctrlr_set_state(ctrlr, state, nvme_ctrlr_get_ready_timeout(ctrlr)); 3580 } 3581 3582 static void 3583 nvme_ctrlr_process_init_set_en_0(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3584 { 3585 struct spdk_nvme_ctrlr *ctrlr = ctx; 3586 3587 if (spdk_nvme_cpl_is_error(cpl)) { 3588 NVME_CTRLR_ERRLOG(ctrlr, "Failed to write the CC register\n"); 3589 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3590 return; 3591 } 3592 3593 /* 3594 * Wait 2.5 seconds before accessing PCI registers. 3595 * Not using sleep() to avoid blocking other controller's initialization. 3596 */ 3597 if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) { 3598 NVME_CTRLR_DEBUGLOG(ctrlr, "Applying quirk: delay 2.5 seconds before reading registers\n"); 3599 ctrlr->sleep_timeout_tsc = spdk_get_ticks() + (2500 * spdk_get_ticks_hz() / 1000); 3600 } 3601 3602 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, 3603 nvme_ctrlr_get_ready_timeout(ctrlr)); 3604 } 3605 3606 static void 3607 nvme_ctrlr_process_init_set_en_0_read_cc(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3608 { 3609 struct spdk_nvme_ctrlr *ctrlr = ctx; 3610 union spdk_nvme_cc_register cc; 3611 int rc; 3612 3613 if (spdk_nvme_cpl_is_error(cpl)) { 3614 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CC register\n"); 3615 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3616 return; 3617 } 3618 3619 assert(value <= UINT32_MAX); 3620 cc.raw = (uint32_t)value; 3621 cc.bits.en = 0; 3622 ctrlr->process_init_cc.raw = cc.raw; 3623 3624 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_EN_0_WAIT_FOR_CC, 3625 nvme_ctrlr_get_ready_timeout(ctrlr)); 3626 3627 rc = nvme_ctrlr_set_cc_async(ctrlr, cc.raw, nvme_ctrlr_process_init_set_en_0, ctrlr); 3628 if (rc != 0) { 3629 NVME_CTRLR_ERRLOG(ctrlr, "set_cc() failed\n"); 3630 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3631 } 3632 } 3633 3634 static void 3635 nvme_ctrlr_process_init_wait_for_ready_1(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3636 { 3637 struct spdk_nvme_ctrlr *ctrlr = ctx; 3638 union spdk_nvme_csts_register csts; 3639 3640 if (spdk_nvme_cpl_is_error(cpl)) { 3641 /* While a device is resetting, it may be unable to service MMIO reads 3642 * temporarily. Allow for this case. 3643 */ 3644 if (!ctrlr->is_failed && ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE) { 3645 NVME_CTRLR_DEBUGLOG(ctrlr, "Failed to read the CSTS register\n"); 3646 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1, 3647 NVME_TIMEOUT_KEEP_EXISTING); 3648 } else { 3649 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CSTS register\n"); 3650 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3651 } 3652 3653 return; 3654 } 3655 3656 assert(value <= UINT32_MAX); 3657 csts.raw = (uint32_t)value; 3658 if (csts.bits.rdy == 1 || csts.bits.cfs == 1) { 3659 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_EN_0, 3660 nvme_ctrlr_get_ready_timeout(ctrlr)); 3661 } else { 3662 NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 1 && CSTS.RDY = 0 - waiting for reset to complete\n"); 3663 nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1, 3664 NVME_TIMEOUT_KEEP_EXISTING); 3665 } 3666 } 3667 3668 static void 3669 nvme_ctrlr_process_init_wait_for_ready_0(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3670 { 3671 struct spdk_nvme_ctrlr *ctrlr = ctx; 3672 union spdk_nvme_csts_register csts; 3673 3674 if (spdk_nvme_cpl_is_error(cpl)) { 3675 /* While a device is resetting, it may be unable to service MMIO reads 3676 * temporarily. Allow for this case. 3677 */ 3678 if (!ctrlr->is_failed && ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE) { 3679 NVME_CTRLR_DEBUGLOG(ctrlr, "Failed to read the CSTS register\n"); 3680 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, 3681 NVME_TIMEOUT_KEEP_EXISTING); 3682 } else { 3683 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CSTS register\n"); 3684 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3685 } 3686 3687 return; 3688 } 3689 3690 assert(value <= UINT32_MAX); 3691 csts.raw = (uint32_t)value; 3692 if (csts.bits.rdy == 0) { 3693 NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 0 && CSTS.RDY = 0\n"); 3694 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLED, 3695 nvme_ctrlr_get_ready_timeout(ctrlr)); 3696 } else { 3697 nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, 3698 NVME_TIMEOUT_KEEP_EXISTING); 3699 } 3700 } 3701 3702 static void 3703 nvme_ctrlr_process_init_enable_wait_for_ready_1(void *ctx, uint64_t value, 3704 const struct spdk_nvme_cpl *cpl) 3705 { 3706 struct spdk_nvme_ctrlr *ctrlr = ctx; 3707 union spdk_nvme_csts_register csts; 3708 3709 if (spdk_nvme_cpl_is_error(cpl)) { 3710 /* While a device is resetting, it may be unable to service MMIO reads 3711 * temporarily. Allow for this case. 3712 */ 3713 if (!ctrlr->is_failed && ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE) { 3714 NVME_CTRLR_DEBUGLOG(ctrlr, "Failed to read the CSTS register\n"); 3715 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, 3716 NVME_TIMEOUT_KEEP_EXISTING); 3717 } else { 3718 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CSTS register\n"); 3719 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3720 } 3721 3722 return; 3723 } 3724 3725 assert(value <= UINT32_MAX); 3726 csts.raw = value; 3727 if (csts.bits.rdy == 1) { 3728 NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 1 && CSTS.RDY = 1 - controller is ready\n"); 3729 /* 3730 * The controller has been enabled. 3731 * Perform the rest of initialization serially. 3732 */ 3733 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_RESET_ADMIN_QUEUE, 3734 ctrlr->opts.admin_timeout_ms); 3735 } else { 3736 nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, 3737 NVME_TIMEOUT_KEEP_EXISTING); 3738 } 3739 } 3740 3741 /** 3742 * This function will be called repeatedly during initialization until the controller is ready. 3743 */ 3744 int 3745 nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr) 3746 { 3747 uint32_t ready_timeout_in_ms; 3748 uint64_t ticks; 3749 int rc = 0; 3750 3751 ticks = spdk_get_ticks(); 3752 3753 /* 3754 * May need to avoid accessing any register on the target controller 3755 * for a while. Return early without touching the FSM. 3756 * Check sleep_timeout_tsc > 0 for unit test. 3757 */ 3758 if ((ctrlr->sleep_timeout_tsc > 0) && 3759 (ticks <= ctrlr->sleep_timeout_tsc)) { 3760 return 0; 3761 } 3762 ctrlr->sleep_timeout_tsc = 0; 3763 3764 ready_timeout_in_ms = nvme_ctrlr_get_ready_timeout(ctrlr); 3765 3766 /* 3767 * Check if the current initialization step is done or has timed out. 3768 */ 3769 switch (ctrlr->state) { 3770 case NVME_CTRLR_STATE_INIT_DELAY: 3771 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, ready_timeout_in_ms); 3772 if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_INIT) { 3773 /* 3774 * Controller may need some delay before it's enabled. 3775 * 3776 * This is a workaround for an issue where the PCIe-attached NVMe controller 3777 * is not ready after VFIO reset. We delay the initialization rather than the 3778 * enabling itself, because this is required only for the very first enabling 3779 * - directly after a VFIO reset. 3780 */ 3781 NVME_CTRLR_DEBUGLOG(ctrlr, "Adding 2 second delay before initializing the controller\n"); 3782 ctrlr->sleep_timeout_tsc = ticks + (2000 * spdk_get_ticks_hz() / 1000); 3783 } 3784 break; 3785 3786 case NVME_CTRLR_STATE_CONNECT_ADMINQ: /* synonymous with NVME_CTRLR_STATE_INIT */ 3787 rc = nvme_transport_ctrlr_connect_qpair(ctrlr, ctrlr->adminq); 3788 if (rc == 0) { 3789 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_CONNECT_ADMINQ, 3790 NVME_TIMEOUT_INFINITE); 3791 } else { 3792 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3793 } 3794 break; 3795 3796 case NVME_CTRLR_STATE_WAIT_FOR_CONNECT_ADMINQ: 3797 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 3798 3799 switch (nvme_qpair_get_state(ctrlr->adminq)) { 3800 case NVME_QPAIR_CONNECTING: 3801 break; 3802 case NVME_QPAIR_CONNECTED: 3803 nvme_qpair_set_state(ctrlr->adminq, NVME_QPAIR_ENABLED); 3804 /* Fall through */ 3805 case NVME_QPAIR_ENABLED: 3806 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READ_VS, 3807 NVME_TIMEOUT_INFINITE); 3808 /* Abort any queued requests that were sent while the adminq was connecting 3809 * to avoid stalling the init process during a reset, as requests don't get 3810 * resubmitted while the controller is resetting and subsequent commands 3811 * would get queued too. 3812 */ 3813 nvme_qpair_abort_queued_reqs(ctrlr->adminq); 3814 break; 3815 case NVME_QPAIR_DISCONNECTING: 3816 assert(ctrlr->adminq->async == true); 3817 break; 3818 case NVME_QPAIR_DISCONNECTED: 3819 /* fallthrough */ 3820 default: 3821 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3822 break; 3823 } 3824 3825 break; 3826 3827 case NVME_CTRLR_STATE_READ_VS: 3828 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READ_VS_WAIT_FOR_VS, NVME_TIMEOUT_INFINITE); 3829 rc = nvme_ctrlr_get_vs_async(ctrlr, nvme_ctrlr_process_init_vs_done, ctrlr); 3830 break; 3831 3832 case NVME_CTRLR_STATE_READ_CAP: 3833 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READ_CAP_WAIT_FOR_CAP, NVME_TIMEOUT_INFINITE); 3834 rc = nvme_ctrlr_get_cap_async(ctrlr, nvme_ctrlr_process_init_cap_done, ctrlr); 3835 break; 3836 3837 case NVME_CTRLR_STATE_CHECK_EN: 3838 /* Begin the hardware initialization by making sure the controller is disabled. */ 3839 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CHECK_EN_WAIT_FOR_CC, ready_timeout_in_ms); 3840 rc = nvme_ctrlr_get_cc_async(ctrlr, nvme_ctrlr_process_init_check_en, ctrlr); 3841 break; 3842 3843 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1: 3844 /* 3845 * Controller is currently enabled. We need to disable it to cause a reset. 3846 * 3847 * If CC.EN = 1 && CSTS.RDY = 0, the controller is in the process of becoming ready. 3848 * Wait for the ready bit to be 1 before disabling the controller. 3849 */ 3850 nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS, 3851 NVME_TIMEOUT_KEEP_EXISTING); 3852 rc = nvme_ctrlr_get_csts_async(ctrlr, nvme_ctrlr_process_init_wait_for_ready_1, ctrlr); 3853 break; 3854 3855 case NVME_CTRLR_STATE_SET_EN_0: 3856 NVME_CTRLR_DEBUGLOG(ctrlr, "Setting CC.EN = 0\n"); 3857 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_EN_0_WAIT_FOR_CC, ready_timeout_in_ms); 3858 rc = nvme_ctrlr_get_cc_async(ctrlr, nvme_ctrlr_process_init_set_en_0_read_cc, ctrlr); 3859 break; 3860 3861 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0: 3862 nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0_WAIT_FOR_CSTS, 3863 NVME_TIMEOUT_KEEP_EXISTING); 3864 rc = nvme_ctrlr_get_csts_async(ctrlr, nvme_ctrlr_process_init_wait_for_ready_0, ctrlr); 3865 break; 3866 3867 case NVME_CTRLR_STATE_DISABLED: 3868 if (ctrlr->is_disconnecting) { 3869 NVME_CTRLR_DEBUGLOG(ctrlr, "Ctrlr was disabled.\n"); 3870 } else { 3871 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE, ready_timeout_in_ms); 3872 3873 /* 3874 * Delay 100us before setting CC.EN = 1. Some NVMe SSDs miss CC.EN getting 3875 * set to 1 if it is too soon after CSTS.RDY is reported as 0. 3876 */ 3877 spdk_delay_us(100); 3878 } 3879 break; 3880 3881 case NVME_CTRLR_STATE_ENABLE: 3882 NVME_CTRLR_DEBUGLOG(ctrlr, "Setting CC.EN = 1\n"); 3883 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_CC, ready_timeout_in_ms); 3884 rc = nvme_ctrlr_enable(ctrlr); 3885 if (rc) { 3886 NVME_CTRLR_ERRLOG(ctrlr, "Ctrlr enable failed with error: %d", rc); 3887 } 3888 return rc; 3889 3890 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1: 3891 nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS, 3892 NVME_TIMEOUT_KEEP_EXISTING); 3893 rc = nvme_ctrlr_get_csts_async(ctrlr, nvme_ctrlr_process_init_enable_wait_for_ready_1, 3894 ctrlr); 3895 break; 3896 3897 case NVME_CTRLR_STATE_RESET_ADMIN_QUEUE: 3898 nvme_transport_qpair_reset(ctrlr->adminq); 3899 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY, NVME_TIMEOUT_INFINITE); 3900 break; 3901 3902 case NVME_CTRLR_STATE_IDENTIFY: 3903 rc = nvme_ctrlr_identify(ctrlr); 3904 break; 3905 3906 case NVME_CTRLR_STATE_CONFIGURE_AER: 3907 rc = nvme_ctrlr_configure_aer(ctrlr); 3908 break; 3909 3910 case NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT: 3911 rc = nvme_ctrlr_set_keep_alive_timeout(ctrlr); 3912 break; 3913 3914 case NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC: 3915 rc = nvme_ctrlr_identify_iocs_specific(ctrlr); 3916 break; 3917 3918 case NVME_CTRLR_STATE_GET_ZNS_CMD_EFFECTS_LOG: 3919 rc = nvme_ctrlr_get_zns_cmd_and_effects_log(ctrlr); 3920 break; 3921 3922 case NVME_CTRLR_STATE_SET_NUM_QUEUES: 3923 nvme_ctrlr_update_nvmf_ioccsz(ctrlr); 3924 rc = nvme_ctrlr_set_num_queues(ctrlr); 3925 break; 3926 3927 case NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS: 3928 _nvme_ctrlr_identify_active_ns(ctrlr); 3929 break; 3930 3931 case NVME_CTRLR_STATE_IDENTIFY_NS: 3932 rc = nvme_ctrlr_identify_namespaces(ctrlr); 3933 break; 3934 3935 case NVME_CTRLR_STATE_IDENTIFY_ID_DESCS: 3936 rc = nvme_ctrlr_identify_id_desc_namespaces(ctrlr); 3937 break; 3938 3939 case NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC: 3940 rc = nvme_ctrlr_identify_namespaces_iocs_specific(ctrlr); 3941 break; 3942 3943 case NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES: 3944 rc = nvme_ctrlr_set_supported_log_pages(ctrlr); 3945 break; 3946 3947 case NVME_CTRLR_STATE_SET_SUPPORTED_INTEL_LOG_PAGES: 3948 rc = nvme_ctrlr_set_intel_support_log_pages(ctrlr); 3949 break; 3950 3951 case NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES: 3952 nvme_ctrlr_set_supported_features(ctrlr); 3953 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_DB_BUF_CFG, 3954 ctrlr->opts.admin_timeout_ms); 3955 break; 3956 3957 case NVME_CTRLR_STATE_SET_DB_BUF_CFG: 3958 rc = nvme_ctrlr_set_doorbell_buffer_config(ctrlr); 3959 break; 3960 3961 case NVME_CTRLR_STATE_SET_HOST_ID: 3962 rc = nvme_ctrlr_set_host_id(ctrlr); 3963 break; 3964 3965 case NVME_CTRLR_STATE_TRANSPORT_READY: 3966 rc = nvme_transport_ctrlr_ready(ctrlr); 3967 if (rc) { 3968 NVME_CTRLR_ERRLOG(ctrlr, "Transport controller ready step failed: rc %d\n", rc); 3969 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3970 } else { 3971 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE); 3972 } 3973 break; 3974 3975 case NVME_CTRLR_STATE_READY: 3976 NVME_CTRLR_DEBUGLOG(ctrlr, "Ctrlr already in ready state\n"); 3977 return 0; 3978 3979 case NVME_CTRLR_STATE_ERROR: 3980 NVME_CTRLR_ERRLOG(ctrlr, "Ctrlr is in error state\n"); 3981 return -1; 3982 3983 case NVME_CTRLR_STATE_READ_VS_WAIT_FOR_VS: 3984 case NVME_CTRLR_STATE_READ_CAP_WAIT_FOR_CAP: 3985 case NVME_CTRLR_STATE_CHECK_EN_WAIT_FOR_CC: 3986 case NVME_CTRLR_STATE_SET_EN_0_WAIT_FOR_CC: 3987 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS: 3988 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0_WAIT_FOR_CSTS: 3989 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_CC: 3990 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS: 3991 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY: 3992 case NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER: 3993 case NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT: 3994 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_IOCS_SPECIFIC: 3995 case NVME_CTRLR_STATE_WAIT_FOR_GET_ZNS_CMD_EFFECTS_LOG: 3996 case NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES: 3997 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ACTIVE_NS: 3998 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS: 3999 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS: 4000 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS_IOCS_SPECIFIC: 4001 case NVME_CTRLR_STATE_WAIT_FOR_SUPPORTED_INTEL_LOG_PAGES: 4002 case NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG: 4003 case NVME_CTRLR_STATE_WAIT_FOR_HOST_ID: 4004 /* 4005 * nvme_ctrlr_process_init() may be called from the completion context 4006 * for the admin qpair. Avoid recursive calls for this case. 4007 */ 4008 if (!ctrlr->adminq->in_completion_context) { 4009 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 4010 } 4011 break; 4012 4013 default: 4014 assert(0); 4015 return -1; 4016 } 4017 4018 if (rc) { 4019 NVME_CTRLR_ERRLOG(ctrlr, "Ctrlr operation failed with error: %d, ctrlr state: %d (%s)\n", 4020 rc, ctrlr->state, nvme_ctrlr_state_string(ctrlr->state)); 4021 } 4022 4023 /* Note: we use the ticks captured when we entered this function. 4024 * This covers environments where the SPDK process gets swapped out after 4025 * we tried to advance the state but before we check the timeout here. 4026 * It is not normal for this to happen, but harmless to handle it in this 4027 * way. 4028 */ 4029 if (ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE && 4030 ticks > ctrlr->state_timeout_tsc) { 4031 NVME_CTRLR_ERRLOG(ctrlr, "Initialization timed out in state %d (%s)\n", 4032 ctrlr->state, nvme_ctrlr_state_string(ctrlr->state)); 4033 return -1; 4034 } 4035 4036 return rc; 4037 } 4038 4039 int 4040 nvme_robust_mutex_init_recursive_shared(pthread_mutex_t *mtx) 4041 { 4042 pthread_mutexattr_t attr; 4043 int rc = 0; 4044 4045 if (pthread_mutexattr_init(&attr)) { 4046 return -1; 4047 } 4048 if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE) || 4049 #ifndef __FreeBSD__ 4050 pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST) || 4051 pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED) || 4052 #endif 4053 pthread_mutex_init(mtx, &attr)) { 4054 rc = -1; 4055 } 4056 pthread_mutexattr_destroy(&attr); 4057 return rc; 4058 } 4059 4060 int 4061 nvme_ctrlr_construct(struct spdk_nvme_ctrlr *ctrlr) 4062 { 4063 int rc; 4064 4065 if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { 4066 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT_DELAY, NVME_TIMEOUT_INFINITE); 4067 } else { 4068 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE); 4069 } 4070 4071 if (ctrlr->opts.admin_queue_size > SPDK_NVME_ADMIN_QUEUE_MAX_ENTRIES) { 4072 NVME_CTRLR_ERRLOG(ctrlr, "admin_queue_size %u exceeds max defined by NVMe spec, use max value\n", 4073 ctrlr->opts.admin_queue_size); 4074 ctrlr->opts.admin_queue_size = SPDK_NVME_ADMIN_QUEUE_MAX_ENTRIES; 4075 } 4076 4077 if (ctrlr->quirks & NVME_QUIRK_MINIMUM_ADMIN_QUEUE_SIZE && 4078 (ctrlr->opts.admin_queue_size % SPDK_NVME_ADMIN_QUEUE_QUIRK_ENTRIES_MULTIPLE) != 0) { 4079 NVME_CTRLR_ERRLOG(ctrlr, 4080 "admin_queue_size %u is invalid for this NVMe device, adjust to next multiple\n", 4081 ctrlr->opts.admin_queue_size); 4082 ctrlr->opts.admin_queue_size = SPDK_ALIGN_CEIL(ctrlr->opts.admin_queue_size, 4083 SPDK_NVME_ADMIN_QUEUE_QUIRK_ENTRIES_MULTIPLE); 4084 } 4085 4086 if (ctrlr->opts.admin_queue_size < SPDK_NVME_ADMIN_QUEUE_MIN_ENTRIES) { 4087 NVME_CTRLR_ERRLOG(ctrlr, 4088 "admin_queue_size %u is less than minimum defined by NVMe spec, use min value\n", 4089 ctrlr->opts.admin_queue_size); 4090 ctrlr->opts.admin_queue_size = SPDK_NVME_ADMIN_QUEUE_MIN_ENTRIES; 4091 } 4092 4093 ctrlr->flags = 0; 4094 ctrlr->free_io_qids = NULL; 4095 ctrlr->is_resetting = false; 4096 ctrlr->is_failed = false; 4097 ctrlr->is_destructed = false; 4098 4099 TAILQ_INIT(&ctrlr->active_io_qpairs); 4100 STAILQ_INIT(&ctrlr->queued_aborts); 4101 ctrlr->outstanding_aborts = 0; 4102 4103 ctrlr->ana_log_page = NULL; 4104 ctrlr->ana_log_page_size = 0; 4105 4106 rc = nvme_robust_mutex_init_recursive_shared(&ctrlr->ctrlr_lock); 4107 if (rc != 0) { 4108 return rc; 4109 } 4110 4111 TAILQ_INIT(&ctrlr->active_procs); 4112 STAILQ_INIT(&ctrlr->register_operations); 4113 4114 RB_INIT(&ctrlr->ns); 4115 4116 return rc; 4117 } 4118 4119 static void 4120 nvme_ctrlr_init_cap(struct spdk_nvme_ctrlr *ctrlr) 4121 { 4122 if (ctrlr->cap.bits.ams & SPDK_NVME_CAP_AMS_WRR) { 4123 ctrlr->flags |= SPDK_NVME_CTRLR_WRR_SUPPORTED; 4124 } 4125 4126 ctrlr->min_page_size = 1u << (12 + ctrlr->cap.bits.mpsmin); 4127 4128 /* For now, always select page_size == min_page_size. */ 4129 ctrlr->page_size = ctrlr->min_page_size; 4130 4131 ctrlr->opts.io_queue_size = spdk_max(ctrlr->opts.io_queue_size, SPDK_NVME_IO_QUEUE_MIN_ENTRIES); 4132 ctrlr->opts.io_queue_size = spdk_min(ctrlr->opts.io_queue_size, MAX_IO_QUEUE_ENTRIES); 4133 if (ctrlr->quirks & NVME_QUIRK_MINIMUM_IO_QUEUE_SIZE && 4134 ctrlr->opts.io_queue_size == DEFAULT_IO_QUEUE_SIZE) { 4135 /* If the user specifically set an IO queue size different than the 4136 * default, use that value. Otherwise overwrite with the quirked value. 4137 * This allows this quirk to be overridden when necessary. 4138 * However, cap.mqes still needs to be respected. 4139 */ 4140 ctrlr->opts.io_queue_size = DEFAULT_IO_QUEUE_SIZE_FOR_QUIRK; 4141 } 4142 ctrlr->opts.io_queue_size = spdk_min(ctrlr->opts.io_queue_size, ctrlr->cap.bits.mqes + 1u); 4143 4144 ctrlr->opts.io_queue_requests = spdk_max(ctrlr->opts.io_queue_requests, ctrlr->opts.io_queue_size); 4145 } 4146 4147 void 4148 nvme_ctrlr_destruct_finish(struct spdk_nvme_ctrlr *ctrlr) 4149 { 4150 pthread_mutex_destroy(&ctrlr->ctrlr_lock); 4151 } 4152 4153 void 4154 nvme_ctrlr_destruct_async(struct spdk_nvme_ctrlr *ctrlr, 4155 struct nvme_ctrlr_detach_ctx *ctx) 4156 { 4157 struct spdk_nvme_qpair *qpair, *tmp; 4158 4159 NVME_CTRLR_DEBUGLOG(ctrlr, "Prepare to destruct SSD\n"); 4160 4161 ctrlr->prepare_for_reset = false; 4162 ctrlr->is_destructed = true; 4163 4164 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 4165 4166 nvme_ctrlr_abort_queued_aborts(ctrlr); 4167 nvme_transport_admin_qpair_abort_aers(ctrlr->adminq); 4168 4169 TAILQ_FOREACH_SAFE(qpair, &ctrlr->active_io_qpairs, tailq, tmp) { 4170 spdk_nvme_ctrlr_free_io_qpair(qpair); 4171 } 4172 4173 nvme_ctrlr_free_doorbell_buffer(ctrlr); 4174 nvme_ctrlr_free_iocs_specific_data(ctrlr); 4175 4176 nvme_ctrlr_shutdown_async(ctrlr, ctx); 4177 } 4178 4179 int 4180 nvme_ctrlr_destruct_poll_async(struct spdk_nvme_ctrlr *ctrlr, 4181 struct nvme_ctrlr_detach_ctx *ctx) 4182 { 4183 struct spdk_nvme_ns *ns, *tmp_ns; 4184 int rc = 0; 4185 4186 if (!ctx->shutdown_complete) { 4187 rc = nvme_ctrlr_shutdown_poll_async(ctrlr, ctx); 4188 if (rc == -EAGAIN) { 4189 return -EAGAIN; 4190 } 4191 /* Destruct ctrlr forcefully for any other error. */ 4192 } 4193 4194 if (ctx->cb_fn) { 4195 ctx->cb_fn(ctrlr); 4196 } 4197 4198 nvme_transport_ctrlr_disconnect_qpair(ctrlr, ctrlr->adminq); 4199 4200 RB_FOREACH_SAFE(ns, nvme_ns_tree, &ctrlr->ns, tmp_ns) { 4201 nvme_ctrlr_destruct_namespace(ctrlr, ns->id); 4202 RB_REMOVE(nvme_ns_tree, &ctrlr->ns, ns); 4203 spdk_free(ns); 4204 } 4205 4206 ctrlr->active_ns_count = 0; 4207 4208 spdk_bit_array_free(&ctrlr->free_io_qids); 4209 4210 free(ctrlr->ana_log_page); 4211 free(ctrlr->copied_ana_desc); 4212 ctrlr->ana_log_page = NULL; 4213 ctrlr->copied_ana_desc = NULL; 4214 ctrlr->ana_log_page_size = 0; 4215 4216 nvme_transport_ctrlr_destruct(ctrlr); 4217 4218 return rc; 4219 } 4220 4221 void 4222 nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) 4223 { 4224 struct nvme_ctrlr_detach_ctx ctx = { .ctrlr = ctrlr }; 4225 int rc; 4226 4227 nvme_ctrlr_destruct_async(ctrlr, &ctx); 4228 4229 while (1) { 4230 rc = nvme_ctrlr_destruct_poll_async(ctrlr, &ctx); 4231 if (rc != -EAGAIN) { 4232 break; 4233 } 4234 nvme_delay(1000); 4235 } 4236 } 4237 4238 int 4239 nvme_ctrlr_submit_admin_request(struct spdk_nvme_ctrlr *ctrlr, 4240 struct nvme_request *req) 4241 { 4242 return nvme_qpair_submit_request(ctrlr->adminq, req); 4243 } 4244 4245 static void 4246 nvme_keep_alive_completion(void *cb_ctx, const struct spdk_nvme_cpl *cpl) 4247 { 4248 /* Do nothing */ 4249 } 4250 4251 /* 4252 * Check if we need to send a Keep Alive command. 4253 * Caller must hold ctrlr->ctrlr_lock. 4254 */ 4255 static int 4256 nvme_ctrlr_keep_alive(struct spdk_nvme_ctrlr *ctrlr) 4257 { 4258 uint64_t now; 4259 struct nvme_request *req; 4260 struct spdk_nvme_cmd *cmd; 4261 int rc = 0; 4262 4263 now = spdk_get_ticks(); 4264 if (now < ctrlr->next_keep_alive_tick) { 4265 return rc; 4266 } 4267 4268 req = nvme_allocate_request_null(ctrlr->adminq, nvme_keep_alive_completion, NULL); 4269 if (req == NULL) { 4270 return rc; 4271 } 4272 4273 cmd = &req->cmd; 4274 cmd->opc = SPDK_NVME_OPC_KEEP_ALIVE; 4275 4276 rc = nvme_ctrlr_submit_admin_request(ctrlr, req); 4277 if (rc != 0) { 4278 NVME_CTRLR_ERRLOG(ctrlr, "Submitting Keep Alive failed\n"); 4279 rc = -ENXIO; 4280 } 4281 4282 ctrlr->next_keep_alive_tick = now + ctrlr->keep_alive_interval_ticks; 4283 return rc; 4284 } 4285 4286 int32_t 4287 spdk_nvme_ctrlr_process_admin_completions(struct spdk_nvme_ctrlr *ctrlr) 4288 { 4289 int32_t num_completions; 4290 int32_t rc; 4291 struct spdk_nvme_ctrlr_process *active_proc; 4292 4293 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4294 4295 if (ctrlr->keep_alive_interval_ticks) { 4296 rc = nvme_ctrlr_keep_alive(ctrlr); 4297 if (rc) { 4298 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4299 return rc; 4300 } 4301 } 4302 4303 rc = nvme_io_msg_process(ctrlr); 4304 if (rc < 0) { 4305 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4306 return rc; 4307 } 4308 num_completions = rc; 4309 4310 rc = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 4311 4312 /* Each process has an async list, complete the ones for this process object */ 4313 active_proc = nvme_ctrlr_get_current_process(ctrlr); 4314 if (active_proc) { 4315 nvme_ctrlr_complete_queued_async_events(ctrlr); 4316 } 4317 4318 if (rc == -ENXIO && ctrlr->is_disconnecting) { 4319 nvme_ctrlr_disconnect_done(ctrlr); 4320 } 4321 4322 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4323 4324 if (rc < 0) { 4325 num_completions = rc; 4326 } else { 4327 num_completions += rc; 4328 } 4329 4330 return num_completions; 4331 } 4332 4333 const struct spdk_nvme_ctrlr_data * 4334 spdk_nvme_ctrlr_get_data(struct spdk_nvme_ctrlr *ctrlr) 4335 { 4336 return &ctrlr->cdata; 4337 } 4338 4339 union spdk_nvme_csts_register spdk_nvme_ctrlr_get_regs_csts(struct spdk_nvme_ctrlr *ctrlr) 4340 { 4341 union spdk_nvme_csts_register csts; 4342 4343 if (nvme_ctrlr_get_csts(ctrlr, &csts)) { 4344 csts.raw = SPDK_NVME_INVALID_REGISTER_VALUE; 4345 } 4346 return csts; 4347 } 4348 4349 union spdk_nvme_cc_register spdk_nvme_ctrlr_get_regs_cc(struct spdk_nvme_ctrlr *ctrlr) 4350 { 4351 union spdk_nvme_cc_register cc; 4352 4353 if (nvme_ctrlr_get_cc(ctrlr, &cc)) { 4354 cc.raw = SPDK_NVME_INVALID_REGISTER_VALUE; 4355 } 4356 return cc; 4357 } 4358 4359 union spdk_nvme_cap_register spdk_nvme_ctrlr_get_regs_cap(struct spdk_nvme_ctrlr *ctrlr) 4360 { 4361 return ctrlr->cap; 4362 } 4363 4364 union spdk_nvme_vs_register spdk_nvme_ctrlr_get_regs_vs(struct spdk_nvme_ctrlr *ctrlr) 4365 { 4366 return ctrlr->vs; 4367 } 4368 4369 union spdk_nvme_cmbsz_register spdk_nvme_ctrlr_get_regs_cmbsz(struct spdk_nvme_ctrlr *ctrlr) 4370 { 4371 union spdk_nvme_cmbsz_register cmbsz; 4372 4373 if (nvme_ctrlr_get_cmbsz(ctrlr, &cmbsz)) { 4374 cmbsz.raw = 0; 4375 } 4376 4377 return cmbsz; 4378 } 4379 4380 union spdk_nvme_pmrcap_register spdk_nvme_ctrlr_get_regs_pmrcap(struct spdk_nvme_ctrlr *ctrlr) 4381 { 4382 union spdk_nvme_pmrcap_register pmrcap; 4383 4384 if (nvme_ctrlr_get_pmrcap(ctrlr, &pmrcap)) { 4385 pmrcap.raw = 0; 4386 } 4387 4388 return pmrcap; 4389 } 4390 4391 union spdk_nvme_bpinfo_register spdk_nvme_ctrlr_get_regs_bpinfo(struct spdk_nvme_ctrlr *ctrlr) 4392 { 4393 union spdk_nvme_bpinfo_register bpinfo; 4394 4395 if (nvme_ctrlr_get_bpinfo(ctrlr, &bpinfo)) { 4396 bpinfo.raw = 0; 4397 } 4398 4399 return bpinfo; 4400 } 4401 4402 uint64_t 4403 spdk_nvme_ctrlr_get_pmrsz(struct spdk_nvme_ctrlr *ctrlr) 4404 { 4405 return ctrlr->pmr_size; 4406 } 4407 4408 uint32_t 4409 spdk_nvme_ctrlr_get_num_ns(struct spdk_nvme_ctrlr *ctrlr) 4410 { 4411 return ctrlr->cdata.nn; 4412 } 4413 4414 bool 4415 spdk_nvme_ctrlr_is_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 4416 { 4417 struct spdk_nvme_ns tmp, *ns; 4418 4419 tmp.id = nsid; 4420 ns = RB_FIND(nvme_ns_tree, &ctrlr->ns, &tmp); 4421 4422 if (ns != NULL) { 4423 return ns->active; 4424 } 4425 4426 return false; 4427 } 4428 4429 uint32_t 4430 spdk_nvme_ctrlr_get_first_active_ns(struct spdk_nvme_ctrlr *ctrlr) 4431 { 4432 struct spdk_nvme_ns *ns; 4433 4434 ns = RB_MIN(nvme_ns_tree, &ctrlr->ns); 4435 if (ns == NULL) { 4436 return 0; 4437 } 4438 4439 while (ns != NULL) { 4440 if (ns->active) { 4441 return ns->id; 4442 } 4443 4444 ns = RB_NEXT(nvme_ns_tree, &ctrlr->ns, ns); 4445 } 4446 4447 return 0; 4448 } 4449 4450 uint32_t 4451 spdk_nvme_ctrlr_get_next_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t prev_nsid) 4452 { 4453 struct spdk_nvme_ns tmp, *ns; 4454 4455 tmp.id = prev_nsid; 4456 ns = RB_FIND(nvme_ns_tree, &ctrlr->ns, &tmp); 4457 if (ns == NULL) { 4458 return 0; 4459 } 4460 4461 ns = RB_NEXT(nvme_ns_tree, &ctrlr->ns, ns); 4462 while (ns != NULL) { 4463 if (ns->active) { 4464 return ns->id; 4465 } 4466 4467 ns = RB_NEXT(nvme_ns_tree, &ctrlr->ns, ns); 4468 } 4469 4470 return 0; 4471 } 4472 4473 struct spdk_nvme_ns * 4474 spdk_nvme_ctrlr_get_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 4475 { 4476 struct spdk_nvme_ns tmp; 4477 struct spdk_nvme_ns *ns; 4478 4479 if (nsid < 1 || nsid > ctrlr->cdata.nn) { 4480 return NULL; 4481 } 4482 4483 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4484 4485 tmp.id = nsid; 4486 ns = RB_FIND(nvme_ns_tree, &ctrlr->ns, &tmp); 4487 4488 if (ns == NULL) { 4489 ns = spdk_zmalloc(sizeof(struct spdk_nvme_ns), 64, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE); 4490 if (ns == NULL) { 4491 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4492 return NULL; 4493 } 4494 4495 NVME_CTRLR_DEBUGLOG(ctrlr, "Namespace %u was added\n", nsid); 4496 ns->id = nsid; 4497 RB_INSERT(nvme_ns_tree, &ctrlr->ns, ns); 4498 } 4499 4500 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4501 4502 return ns; 4503 } 4504 4505 struct spdk_pci_device * 4506 spdk_nvme_ctrlr_get_pci_device(struct spdk_nvme_ctrlr *ctrlr) 4507 { 4508 if (ctrlr == NULL) { 4509 return NULL; 4510 } 4511 4512 if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) { 4513 return NULL; 4514 } 4515 4516 return nvme_ctrlr_proc_get_devhandle(ctrlr); 4517 } 4518 4519 uint32_t 4520 spdk_nvme_ctrlr_get_max_xfer_size(const struct spdk_nvme_ctrlr *ctrlr) 4521 { 4522 return ctrlr->max_xfer_size; 4523 } 4524 4525 void 4526 spdk_nvme_ctrlr_register_aer_callback(struct spdk_nvme_ctrlr *ctrlr, 4527 spdk_nvme_aer_cb aer_cb_fn, 4528 void *aer_cb_arg) 4529 { 4530 struct spdk_nvme_ctrlr_process *active_proc; 4531 4532 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4533 4534 active_proc = nvme_ctrlr_get_current_process(ctrlr); 4535 if (active_proc) { 4536 active_proc->aer_cb_fn = aer_cb_fn; 4537 active_proc->aer_cb_arg = aer_cb_arg; 4538 } 4539 4540 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4541 } 4542 4543 void 4544 spdk_nvme_ctrlr_disable_read_changed_ns_list_log_page(struct spdk_nvme_ctrlr *ctrlr) 4545 { 4546 ctrlr->opts.disable_read_changed_ns_list_log_page = true; 4547 } 4548 4549 void 4550 spdk_nvme_ctrlr_register_timeout_callback(struct spdk_nvme_ctrlr *ctrlr, 4551 uint64_t timeout_io_us, uint64_t timeout_admin_us, 4552 spdk_nvme_timeout_cb cb_fn, void *cb_arg) 4553 { 4554 struct spdk_nvme_ctrlr_process *active_proc; 4555 4556 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4557 4558 active_proc = nvme_ctrlr_get_current_process(ctrlr); 4559 if (active_proc) { 4560 active_proc->timeout_io_ticks = timeout_io_us * spdk_get_ticks_hz() / 1000000ULL; 4561 active_proc->timeout_admin_ticks = timeout_admin_us * spdk_get_ticks_hz() / 1000000ULL; 4562 active_proc->timeout_cb_fn = cb_fn; 4563 active_proc->timeout_cb_arg = cb_arg; 4564 } 4565 4566 ctrlr->timeout_enabled = true; 4567 4568 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4569 } 4570 4571 bool 4572 spdk_nvme_ctrlr_is_log_page_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t log_page) 4573 { 4574 /* No bounds check necessary, since log_page is uint8_t and log_page_supported has 256 entries */ 4575 SPDK_STATIC_ASSERT(sizeof(ctrlr->log_page_supported) == 256, "log_page_supported size mismatch"); 4576 return ctrlr->log_page_supported[log_page]; 4577 } 4578 4579 bool 4580 spdk_nvme_ctrlr_is_feature_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t feature_code) 4581 { 4582 /* No bounds check necessary, since feature_code is uint8_t and feature_supported has 256 entries */ 4583 SPDK_STATIC_ASSERT(sizeof(ctrlr->feature_supported) == 256, "feature_supported size mismatch"); 4584 return ctrlr->feature_supported[feature_code]; 4585 } 4586 4587 int 4588 spdk_nvme_ctrlr_attach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, 4589 struct spdk_nvme_ctrlr_list *payload) 4590 { 4591 struct nvme_completion_poll_status *status; 4592 struct spdk_nvme_ns *ns; 4593 int res; 4594 4595 if (nsid == 0) { 4596 return -EINVAL; 4597 } 4598 4599 status = calloc(1, sizeof(*status)); 4600 if (!status) { 4601 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4602 return -ENOMEM; 4603 } 4604 4605 res = nvme_ctrlr_cmd_attach_ns(ctrlr, nsid, payload, 4606 nvme_completion_poll_cb, status); 4607 if (res) { 4608 free(status); 4609 return res; 4610 } 4611 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4612 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_attach_ns failed!\n"); 4613 if (!status->timed_out) { 4614 free(status); 4615 } 4616 return -ENXIO; 4617 } 4618 free(status); 4619 4620 res = nvme_ctrlr_identify_active_ns(ctrlr); 4621 if (res) { 4622 return res; 4623 } 4624 4625 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 4626 if (ns == NULL) { 4627 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_get_ns failed!\n"); 4628 return -ENXIO; 4629 } 4630 4631 return nvme_ns_construct(ns, nsid, ctrlr); 4632 } 4633 4634 int 4635 spdk_nvme_ctrlr_detach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, 4636 struct spdk_nvme_ctrlr_list *payload) 4637 { 4638 struct nvme_completion_poll_status *status; 4639 int res; 4640 4641 if (nsid == 0) { 4642 return -EINVAL; 4643 } 4644 4645 status = calloc(1, sizeof(*status)); 4646 if (!status) { 4647 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4648 return -ENOMEM; 4649 } 4650 4651 res = nvme_ctrlr_cmd_detach_ns(ctrlr, nsid, payload, 4652 nvme_completion_poll_cb, status); 4653 if (res) { 4654 free(status); 4655 return res; 4656 } 4657 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4658 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_detach_ns failed!\n"); 4659 if (!status->timed_out) { 4660 free(status); 4661 } 4662 return -ENXIO; 4663 } 4664 free(status); 4665 4666 return nvme_ctrlr_identify_active_ns(ctrlr); 4667 } 4668 4669 uint32_t 4670 spdk_nvme_ctrlr_create_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns_data *payload) 4671 { 4672 struct nvme_completion_poll_status *status; 4673 int res; 4674 uint32_t nsid; 4675 4676 status = calloc(1, sizeof(*status)); 4677 if (!status) { 4678 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4679 return 0; 4680 } 4681 4682 res = nvme_ctrlr_cmd_create_ns(ctrlr, payload, nvme_completion_poll_cb, status); 4683 if (res) { 4684 free(status); 4685 return 0; 4686 } 4687 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4688 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_create_ns failed!\n"); 4689 if (!status->timed_out) { 4690 free(status); 4691 } 4692 return 0; 4693 } 4694 4695 nsid = status->cpl.cdw0; 4696 free(status); 4697 4698 assert(nsid > 0); 4699 4700 /* Return the namespace ID that was created */ 4701 return nsid; 4702 } 4703 4704 int 4705 spdk_nvme_ctrlr_delete_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 4706 { 4707 struct nvme_completion_poll_status *status; 4708 int res; 4709 4710 if (nsid == 0) { 4711 return -EINVAL; 4712 } 4713 4714 status = calloc(1, sizeof(*status)); 4715 if (!status) { 4716 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4717 return -ENOMEM; 4718 } 4719 4720 res = nvme_ctrlr_cmd_delete_ns(ctrlr, nsid, nvme_completion_poll_cb, status); 4721 if (res) { 4722 free(status); 4723 return res; 4724 } 4725 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4726 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_delete_ns failed!\n"); 4727 if (!status->timed_out) { 4728 free(status); 4729 } 4730 return -ENXIO; 4731 } 4732 free(status); 4733 4734 return nvme_ctrlr_identify_active_ns(ctrlr); 4735 } 4736 4737 int 4738 spdk_nvme_ctrlr_format(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, 4739 struct spdk_nvme_format *format) 4740 { 4741 struct nvme_completion_poll_status *status; 4742 int res; 4743 4744 status = calloc(1, sizeof(*status)); 4745 if (!status) { 4746 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4747 return -ENOMEM; 4748 } 4749 4750 res = nvme_ctrlr_cmd_format(ctrlr, nsid, format, nvme_completion_poll_cb, 4751 status); 4752 if (res) { 4753 free(status); 4754 return res; 4755 } 4756 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4757 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_format failed!\n"); 4758 if (!status->timed_out) { 4759 free(status); 4760 } 4761 return -ENXIO; 4762 } 4763 free(status); 4764 4765 return spdk_nvme_ctrlr_reset(ctrlr); 4766 } 4767 4768 int 4769 spdk_nvme_ctrlr_update_firmware(struct spdk_nvme_ctrlr *ctrlr, void *payload, uint32_t size, 4770 int slot, enum spdk_nvme_fw_commit_action commit_action, struct spdk_nvme_status *completion_status) 4771 { 4772 struct spdk_nvme_fw_commit fw_commit; 4773 struct nvme_completion_poll_status *status; 4774 int res; 4775 unsigned int size_remaining; 4776 unsigned int offset; 4777 unsigned int transfer; 4778 void *p; 4779 4780 if (!completion_status) { 4781 return -EINVAL; 4782 } 4783 memset(completion_status, 0, sizeof(struct spdk_nvme_status)); 4784 if (size % 4) { 4785 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_update_firmware invalid size!\n"); 4786 return -1; 4787 } 4788 4789 /* Current support only for SPDK_NVME_FW_COMMIT_REPLACE_IMG 4790 * and SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG 4791 */ 4792 if ((commit_action != SPDK_NVME_FW_COMMIT_REPLACE_IMG) && 4793 (commit_action != SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG)) { 4794 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_update_firmware invalid command!\n"); 4795 return -1; 4796 } 4797 4798 status = calloc(1, sizeof(*status)); 4799 if (!status) { 4800 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4801 return -ENOMEM; 4802 } 4803 4804 /* Firmware download */ 4805 size_remaining = size; 4806 offset = 0; 4807 p = payload; 4808 4809 while (size_remaining > 0) { 4810 transfer = spdk_min(size_remaining, ctrlr->min_page_size); 4811 4812 memset(status, 0, sizeof(*status)); 4813 res = nvme_ctrlr_cmd_fw_image_download(ctrlr, transfer, offset, p, 4814 nvme_completion_poll_cb, 4815 status); 4816 if (res) { 4817 free(status); 4818 return res; 4819 } 4820 4821 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4822 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_fw_image_download failed!\n"); 4823 if (!status->timed_out) { 4824 free(status); 4825 } 4826 return -ENXIO; 4827 } 4828 p += transfer; 4829 offset += transfer; 4830 size_remaining -= transfer; 4831 } 4832 4833 /* Firmware commit */ 4834 memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit)); 4835 fw_commit.fs = slot; 4836 fw_commit.ca = commit_action; 4837 4838 memset(status, 0, sizeof(*status)); 4839 res = nvme_ctrlr_cmd_fw_commit(ctrlr, &fw_commit, nvme_completion_poll_cb, 4840 status); 4841 if (res) { 4842 free(status); 4843 return res; 4844 } 4845 4846 res = nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock); 4847 4848 memcpy(completion_status, &status->cpl.status, sizeof(struct spdk_nvme_status)); 4849 4850 if (!status->timed_out) { 4851 free(status); 4852 } 4853 4854 if (res) { 4855 if (completion_status->sct != SPDK_NVME_SCT_COMMAND_SPECIFIC || 4856 completion_status->sc != SPDK_NVME_SC_FIRMWARE_REQ_NVM_RESET) { 4857 if (completion_status->sct == SPDK_NVME_SCT_COMMAND_SPECIFIC && 4858 completion_status->sc == SPDK_NVME_SC_FIRMWARE_REQ_CONVENTIONAL_RESET) { 4859 NVME_CTRLR_NOTICELOG(ctrlr, 4860 "firmware activation requires conventional reset to be performed. !\n"); 4861 } else { 4862 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_cmd_fw_commit failed!\n"); 4863 } 4864 return -ENXIO; 4865 } 4866 } 4867 4868 return spdk_nvme_ctrlr_reset(ctrlr); 4869 } 4870 4871 int 4872 spdk_nvme_ctrlr_reserve_cmb(struct spdk_nvme_ctrlr *ctrlr) 4873 { 4874 int rc, size; 4875 union spdk_nvme_cmbsz_register cmbsz; 4876 4877 cmbsz = spdk_nvme_ctrlr_get_regs_cmbsz(ctrlr); 4878 4879 if (cmbsz.bits.rds == 0 || cmbsz.bits.wds == 0) { 4880 return -ENOTSUP; 4881 } 4882 4883 size = cmbsz.bits.sz * (0x1000 << (cmbsz.bits.szu * 4)); 4884 4885 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4886 rc = nvme_transport_ctrlr_reserve_cmb(ctrlr); 4887 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4888 4889 if (rc < 0) { 4890 return rc; 4891 } 4892 4893 return size; 4894 } 4895 4896 void * 4897 spdk_nvme_ctrlr_map_cmb(struct spdk_nvme_ctrlr *ctrlr, size_t *size) 4898 { 4899 void *buf; 4900 4901 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4902 buf = nvme_transport_ctrlr_map_cmb(ctrlr, size); 4903 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4904 4905 return buf; 4906 } 4907 4908 void 4909 spdk_nvme_ctrlr_unmap_cmb(struct spdk_nvme_ctrlr *ctrlr) 4910 { 4911 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4912 nvme_transport_ctrlr_unmap_cmb(ctrlr); 4913 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4914 } 4915 4916 int 4917 spdk_nvme_ctrlr_enable_pmr(struct spdk_nvme_ctrlr *ctrlr) 4918 { 4919 int rc; 4920 4921 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4922 rc = nvme_transport_ctrlr_enable_pmr(ctrlr); 4923 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4924 4925 return rc; 4926 } 4927 4928 int 4929 spdk_nvme_ctrlr_disable_pmr(struct spdk_nvme_ctrlr *ctrlr) 4930 { 4931 int rc; 4932 4933 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4934 rc = nvme_transport_ctrlr_disable_pmr(ctrlr); 4935 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4936 4937 return rc; 4938 } 4939 4940 void * 4941 spdk_nvme_ctrlr_map_pmr(struct spdk_nvme_ctrlr *ctrlr, size_t *size) 4942 { 4943 void *buf; 4944 4945 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4946 buf = nvme_transport_ctrlr_map_pmr(ctrlr, size); 4947 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4948 4949 return buf; 4950 } 4951 4952 int 4953 spdk_nvme_ctrlr_unmap_pmr(struct spdk_nvme_ctrlr *ctrlr) 4954 { 4955 int rc; 4956 4957 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4958 rc = nvme_transport_ctrlr_unmap_pmr(ctrlr); 4959 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4960 4961 return rc; 4962 } 4963 4964 int 4965 spdk_nvme_ctrlr_read_boot_partition_start(struct spdk_nvme_ctrlr *ctrlr, void *payload, 4966 uint32_t bprsz, uint32_t bprof, uint32_t bpid) 4967 { 4968 union spdk_nvme_bprsel_register bprsel; 4969 union spdk_nvme_bpinfo_register bpinfo; 4970 uint64_t bpmbl, bpmb_size; 4971 4972 if (ctrlr->cap.bits.bps == 0) { 4973 return -ENOTSUP; 4974 } 4975 4976 if (nvme_ctrlr_get_bpinfo(ctrlr, &bpinfo)) { 4977 NVME_CTRLR_ERRLOG(ctrlr, "get bpinfo failed\n"); 4978 return -EIO; 4979 } 4980 4981 if (bpinfo.bits.brs == SPDK_NVME_BRS_READ_IN_PROGRESS) { 4982 NVME_CTRLR_ERRLOG(ctrlr, "Boot Partition read already initiated\n"); 4983 return -EALREADY; 4984 } 4985 4986 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4987 4988 bpmb_size = bprsz * 4096; 4989 bpmbl = spdk_vtophys(payload, &bpmb_size); 4990 if (bpmbl == SPDK_VTOPHYS_ERROR) { 4991 NVME_CTRLR_ERRLOG(ctrlr, "spdk_vtophys of bpmbl failed\n"); 4992 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4993 return -EFAULT; 4994 } 4995 4996 if (bpmb_size != bprsz * 4096) { 4997 NVME_CTRLR_ERRLOG(ctrlr, "Boot Partition buffer is not physically contiguous\n"); 4998 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4999 return -EFAULT; 5000 } 5001 5002 if (nvme_ctrlr_set_bpmbl(ctrlr, bpmbl)) { 5003 NVME_CTRLR_ERRLOG(ctrlr, "set_bpmbl() failed\n"); 5004 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 5005 return -EIO; 5006 } 5007 5008 bprsel.bits.bpid = bpid; 5009 bprsel.bits.bprof = bprof; 5010 bprsel.bits.bprsz = bprsz; 5011 5012 if (nvme_ctrlr_set_bprsel(ctrlr, &bprsel)) { 5013 NVME_CTRLR_ERRLOG(ctrlr, "set_bprsel() failed\n"); 5014 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 5015 return -EIO; 5016 } 5017 5018 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 5019 return 0; 5020 } 5021 5022 int 5023 spdk_nvme_ctrlr_read_boot_partition_poll(struct spdk_nvme_ctrlr *ctrlr) 5024 { 5025 int rc = 0; 5026 union spdk_nvme_bpinfo_register bpinfo; 5027 5028 if (nvme_ctrlr_get_bpinfo(ctrlr, &bpinfo)) { 5029 NVME_CTRLR_ERRLOG(ctrlr, "get bpinfo failed\n"); 5030 return -EIO; 5031 } 5032 5033 switch (bpinfo.bits.brs) { 5034 case SPDK_NVME_BRS_NO_READ: 5035 NVME_CTRLR_ERRLOG(ctrlr, "Boot Partition read not initiated\n"); 5036 rc = -EINVAL; 5037 break; 5038 case SPDK_NVME_BRS_READ_IN_PROGRESS: 5039 NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition read in progress\n"); 5040 rc = -EAGAIN; 5041 break; 5042 case SPDK_NVME_BRS_READ_ERROR: 5043 NVME_CTRLR_ERRLOG(ctrlr, "Error completing Boot Partition read\n"); 5044 rc = -EIO; 5045 break; 5046 case SPDK_NVME_BRS_READ_SUCCESS: 5047 NVME_CTRLR_INFOLOG(ctrlr, "Boot Partition read completed successfully\n"); 5048 break; 5049 default: 5050 NVME_CTRLR_ERRLOG(ctrlr, "Invalid Boot Partition read status\n"); 5051 rc = -EINVAL; 5052 } 5053 5054 return rc; 5055 } 5056 5057 static void 5058 nvme_write_boot_partition_cb(void *arg, const struct spdk_nvme_cpl *cpl) 5059 { 5060 int res; 5061 struct spdk_nvme_ctrlr *ctrlr = arg; 5062 struct spdk_nvme_fw_commit fw_commit; 5063 struct spdk_nvme_cpl err_cpl = 5064 {.status = {.sct = SPDK_NVME_SCT_GENERIC, .sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR }}; 5065 5066 if (spdk_nvme_cpl_is_error(cpl)) { 5067 NVME_CTRLR_ERRLOG(ctrlr, "Write Boot Partition failed\n"); 5068 ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, cpl); 5069 return; 5070 } 5071 5072 if (ctrlr->bp_ws == SPDK_NVME_BP_WS_DOWNLOADING) { 5073 NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition Downloading at Offset %d Success\n", ctrlr->fw_offset); 5074 ctrlr->fw_payload += ctrlr->fw_transfer_size; 5075 ctrlr->fw_offset += ctrlr->fw_transfer_size; 5076 ctrlr->fw_size_remaining -= ctrlr->fw_transfer_size; 5077 ctrlr->fw_transfer_size = spdk_min(ctrlr->fw_size_remaining, ctrlr->min_page_size); 5078 res = nvme_ctrlr_cmd_fw_image_download(ctrlr, ctrlr->fw_transfer_size, ctrlr->fw_offset, 5079 ctrlr->fw_payload, nvme_write_boot_partition_cb, ctrlr); 5080 if (res) { 5081 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_cmd_fw_image_download failed!\n"); 5082 ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, &err_cpl); 5083 return; 5084 } 5085 5086 if (ctrlr->fw_transfer_size < ctrlr->min_page_size) { 5087 ctrlr->bp_ws = SPDK_NVME_BP_WS_DOWNLOADED; 5088 } 5089 } else if (ctrlr->bp_ws == SPDK_NVME_BP_WS_DOWNLOADED) { 5090 NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition Download Success\n"); 5091 memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit)); 5092 fw_commit.bpid = ctrlr->bpid; 5093 fw_commit.ca = SPDK_NVME_FW_COMMIT_REPLACE_BOOT_PARTITION; 5094 res = nvme_ctrlr_cmd_fw_commit(ctrlr, &fw_commit, 5095 nvme_write_boot_partition_cb, ctrlr); 5096 if (res) { 5097 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_cmd_fw_commit failed!\n"); 5098 NVME_CTRLR_ERRLOG(ctrlr, "commit action: %d\n", fw_commit.ca); 5099 ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, &err_cpl); 5100 return; 5101 } 5102 5103 ctrlr->bp_ws = SPDK_NVME_BP_WS_REPLACE; 5104 } else if (ctrlr->bp_ws == SPDK_NVME_BP_WS_REPLACE) { 5105 NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition Replacement Success\n"); 5106 memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit)); 5107 fw_commit.bpid = ctrlr->bpid; 5108 fw_commit.ca = SPDK_NVME_FW_COMMIT_ACTIVATE_BOOT_PARTITION; 5109 res = nvme_ctrlr_cmd_fw_commit(ctrlr, &fw_commit, 5110 nvme_write_boot_partition_cb, ctrlr); 5111 if (res) { 5112 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_cmd_fw_commit failed!\n"); 5113 NVME_CTRLR_ERRLOG(ctrlr, "commit action: %d\n", fw_commit.ca); 5114 ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, &err_cpl); 5115 return; 5116 } 5117 5118 ctrlr->bp_ws = SPDK_NVME_BP_WS_ACTIVATE; 5119 } else if (ctrlr->bp_ws == SPDK_NVME_BP_WS_ACTIVATE) { 5120 NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition Activation Success\n"); 5121 ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, cpl); 5122 } else { 5123 NVME_CTRLR_ERRLOG(ctrlr, "Invalid Boot Partition write state\n"); 5124 ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, &err_cpl); 5125 return; 5126 } 5127 } 5128 5129 int 5130 spdk_nvme_ctrlr_write_boot_partition(struct spdk_nvme_ctrlr *ctrlr, 5131 void *payload, uint32_t size, uint32_t bpid, 5132 spdk_nvme_cmd_cb cb_fn, void *cb_arg) 5133 { 5134 int res; 5135 5136 if (ctrlr->cap.bits.bps == 0) { 5137 return -ENOTSUP; 5138 } 5139 5140 ctrlr->bp_ws = SPDK_NVME_BP_WS_DOWNLOADING; 5141 ctrlr->bpid = bpid; 5142 ctrlr->bp_write_cb_fn = cb_fn; 5143 ctrlr->bp_write_cb_arg = cb_arg; 5144 ctrlr->fw_offset = 0; 5145 ctrlr->fw_size_remaining = size; 5146 ctrlr->fw_payload = payload; 5147 ctrlr->fw_transfer_size = spdk_min(ctrlr->fw_size_remaining, ctrlr->min_page_size); 5148 5149 res = nvme_ctrlr_cmd_fw_image_download(ctrlr, ctrlr->fw_transfer_size, ctrlr->fw_offset, 5150 ctrlr->fw_payload, nvme_write_boot_partition_cb, ctrlr); 5151 5152 return res; 5153 } 5154 5155 bool 5156 spdk_nvme_ctrlr_is_discovery(struct spdk_nvme_ctrlr *ctrlr) 5157 { 5158 assert(ctrlr); 5159 5160 return !strncmp(ctrlr->trid.subnqn, SPDK_NVMF_DISCOVERY_NQN, 5161 strlen(SPDK_NVMF_DISCOVERY_NQN)); 5162 } 5163 5164 bool 5165 spdk_nvme_ctrlr_is_fabrics(struct spdk_nvme_ctrlr *ctrlr) 5166 { 5167 assert(ctrlr); 5168 5169 return spdk_nvme_trtype_is_fabrics(ctrlr->trid.trtype); 5170 } 5171 5172 int 5173 spdk_nvme_ctrlr_security_receive(struct spdk_nvme_ctrlr *ctrlr, uint8_t secp, 5174 uint16_t spsp, uint8_t nssf, void *payload, size_t size) 5175 { 5176 struct nvme_completion_poll_status *status; 5177 int res; 5178 5179 status = calloc(1, sizeof(*status)); 5180 if (!status) { 5181 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 5182 return -ENOMEM; 5183 } 5184 5185 res = spdk_nvme_ctrlr_cmd_security_receive(ctrlr, secp, spsp, nssf, payload, size, 5186 nvme_completion_poll_cb, status); 5187 if (res) { 5188 free(status); 5189 return res; 5190 } 5191 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 5192 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_cmd_security_receive failed!\n"); 5193 if (!status->timed_out) { 5194 free(status); 5195 } 5196 return -ENXIO; 5197 } 5198 free(status); 5199 5200 return 0; 5201 } 5202 5203 int 5204 spdk_nvme_ctrlr_security_send(struct spdk_nvme_ctrlr *ctrlr, uint8_t secp, 5205 uint16_t spsp, uint8_t nssf, void *payload, size_t size) 5206 { 5207 struct nvme_completion_poll_status *status; 5208 int res; 5209 5210 status = calloc(1, sizeof(*status)); 5211 if (!status) { 5212 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 5213 return -ENOMEM; 5214 } 5215 5216 res = spdk_nvme_ctrlr_cmd_security_send(ctrlr, secp, spsp, nssf, payload, size, 5217 nvme_completion_poll_cb, 5218 status); 5219 if (res) { 5220 free(status); 5221 return res; 5222 } 5223 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 5224 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_cmd_security_send failed!\n"); 5225 if (!status->timed_out) { 5226 free(status); 5227 } 5228 return -ENXIO; 5229 } 5230 5231 free(status); 5232 5233 return 0; 5234 } 5235 5236 uint64_t 5237 spdk_nvme_ctrlr_get_flags(struct spdk_nvme_ctrlr *ctrlr) 5238 { 5239 return ctrlr->flags; 5240 } 5241 5242 const struct spdk_nvme_transport_id * 5243 spdk_nvme_ctrlr_get_transport_id(struct spdk_nvme_ctrlr *ctrlr) 5244 { 5245 return &ctrlr->trid; 5246 } 5247 5248 int32_t 5249 spdk_nvme_ctrlr_alloc_qid(struct spdk_nvme_ctrlr *ctrlr) 5250 { 5251 uint32_t qid; 5252 5253 assert(ctrlr->free_io_qids); 5254 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 5255 qid = spdk_bit_array_find_first_set(ctrlr->free_io_qids, 1); 5256 if (qid > ctrlr->opts.num_io_queues) { 5257 NVME_CTRLR_ERRLOG(ctrlr, "No free I/O queue IDs\n"); 5258 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 5259 return -1; 5260 } 5261 5262 spdk_bit_array_clear(ctrlr->free_io_qids, qid); 5263 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 5264 return qid; 5265 } 5266 5267 void 5268 spdk_nvme_ctrlr_free_qid(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid) 5269 { 5270 assert(qid <= ctrlr->opts.num_io_queues); 5271 5272 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 5273 5274 if (spdk_likely(ctrlr->free_io_qids)) { 5275 spdk_bit_array_set(ctrlr->free_io_qids, qid); 5276 } 5277 5278 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 5279 } 5280 5281 int 5282 spdk_nvme_ctrlr_get_memory_domains(const struct spdk_nvme_ctrlr *ctrlr, 5283 struct spdk_memory_domain **domains, int array_size) 5284 { 5285 return nvme_transport_ctrlr_get_memory_domains(ctrlr, domains, array_size); 5286 } 5287