1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2015 Intel Corporation. All rights reserved. 3 * Copyright (c) 2019-2021 Mellanox Technologies LTD. All rights reserved. 4 * Copyright (c) 2021, 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 #include "spdk/stdinc.h" 8 9 #include "nvme_internal.h" 10 #include "nvme_io_msg.h" 11 12 #include "spdk/env.h" 13 #include "spdk/string.h" 14 #include "spdk/endian.h" 15 16 struct nvme_active_ns_ctx; 17 18 static int nvme_ctrlr_construct_and_submit_aer(struct spdk_nvme_ctrlr *ctrlr, 19 struct nvme_async_event_request *aer); 20 static void nvme_ctrlr_identify_active_ns_async(struct nvme_active_ns_ctx *ctx); 21 static int nvme_ctrlr_identify_ns_async(struct spdk_nvme_ns *ns); 22 static int nvme_ctrlr_identify_ns_iocs_specific_async(struct spdk_nvme_ns *ns); 23 static int nvme_ctrlr_identify_id_desc_async(struct spdk_nvme_ns *ns); 24 static void nvme_ctrlr_init_cap(struct spdk_nvme_ctrlr *ctrlr); 25 static void nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state, 26 uint64_t timeout_in_ms); 27 28 static int 29 nvme_ns_cmp(struct spdk_nvme_ns *ns1, struct spdk_nvme_ns *ns2) 30 { 31 if (ns1->id < ns2->id) { 32 return -1; 33 } else if (ns1->id > ns2->id) { 34 return 1; 35 } else { 36 return 0; 37 } 38 } 39 40 RB_GENERATE_STATIC(nvme_ns_tree, spdk_nvme_ns, node, nvme_ns_cmp); 41 42 #define CTRLR_STRING(ctrlr) \ 43 ((ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_TCP || ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_RDMA) ? \ 44 ctrlr->trid.subnqn : ctrlr->trid.traddr) 45 46 #define NVME_CTRLR_ERRLOG(ctrlr, format, ...) \ 47 SPDK_ERRLOG("[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__); 48 49 #define NVME_CTRLR_WARNLOG(ctrlr, format, ...) \ 50 SPDK_WARNLOG("[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__); 51 52 #define NVME_CTRLR_NOTICELOG(ctrlr, format, ...) \ 53 SPDK_NOTICELOG("[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__); 54 55 #define NVME_CTRLR_INFOLOG(ctrlr, format, ...) \ 56 SPDK_INFOLOG(nvme, "[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__); 57 58 #ifdef DEBUG 59 #define NVME_CTRLR_DEBUGLOG(ctrlr, format, ...) \ 60 SPDK_DEBUGLOG(nvme, "[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__); 61 #else 62 #define NVME_CTRLR_DEBUGLOG(ctrlr, ...) do { } while (0) 63 #endif 64 65 #define nvme_ctrlr_get_reg_async(ctrlr, reg, sz, cb_fn, cb_arg) \ 66 nvme_transport_ctrlr_get_reg_ ## sz ## _async(ctrlr, \ 67 offsetof(struct spdk_nvme_registers, reg), cb_fn, cb_arg) 68 69 #define nvme_ctrlr_set_reg_async(ctrlr, reg, sz, val, cb_fn, cb_arg) \ 70 nvme_transport_ctrlr_set_reg_ ## sz ## _async(ctrlr, \ 71 offsetof(struct spdk_nvme_registers, reg), val, cb_fn, cb_arg) 72 73 #define nvme_ctrlr_get_cc_async(ctrlr, cb_fn, cb_arg) \ 74 nvme_ctrlr_get_reg_async(ctrlr, cc, 4, cb_fn, cb_arg) 75 76 #define nvme_ctrlr_get_csts_async(ctrlr, cb_fn, cb_arg) \ 77 nvme_ctrlr_get_reg_async(ctrlr, csts, 4, cb_fn, cb_arg) 78 79 #define nvme_ctrlr_get_cap_async(ctrlr, cb_fn, cb_arg) \ 80 nvme_ctrlr_get_reg_async(ctrlr, cap, 8, cb_fn, cb_arg) 81 82 #define nvme_ctrlr_get_vs_async(ctrlr, cb_fn, cb_arg) \ 83 nvme_ctrlr_get_reg_async(ctrlr, vs, 4, cb_fn, cb_arg) 84 85 #define nvme_ctrlr_set_cc_async(ctrlr, value, cb_fn, cb_arg) \ 86 nvme_ctrlr_set_reg_async(ctrlr, cc, 4, value, cb_fn, cb_arg) 87 88 static int 89 nvme_ctrlr_get_cc(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cc_register *cc) 90 { 91 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cc.raw), 92 &cc->raw); 93 } 94 95 static int 96 nvme_ctrlr_get_csts(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_csts_register *csts) 97 { 98 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, csts.raw), 99 &csts->raw); 100 } 101 102 int 103 nvme_ctrlr_get_cap(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cap_register *cap) 104 { 105 return nvme_transport_ctrlr_get_reg_8(ctrlr, offsetof(struct spdk_nvme_registers, cap.raw), 106 &cap->raw); 107 } 108 109 int 110 nvme_ctrlr_get_vs(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_vs_register *vs) 111 { 112 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, vs.raw), 113 &vs->raw); 114 } 115 116 int 117 nvme_ctrlr_get_cmbsz(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cmbsz_register *cmbsz) 118 { 119 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cmbsz.raw), 120 &cmbsz->raw); 121 } 122 123 int 124 nvme_ctrlr_get_pmrcap(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_pmrcap_register *pmrcap) 125 { 126 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, pmrcap.raw), 127 &pmrcap->raw); 128 } 129 130 int 131 nvme_ctrlr_get_bpinfo(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_bpinfo_register *bpinfo) 132 { 133 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, bpinfo.raw), 134 &bpinfo->raw); 135 } 136 137 int 138 nvme_ctrlr_set_bprsel(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_bprsel_register *bprsel) 139 { 140 return nvme_transport_ctrlr_set_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, bprsel.raw), 141 bprsel->raw); 142 } 143 144 int 145 nvme_ctrlr_set_bpmbl(struct spdk_nvme_ctrlr *ctrlr, uint64_t bpmbl_value) 146 { 147 return nvme_transport_ctrlr_set_reg_8(ctrlr, offsetof(struct spdk_nvme_registers, bpmbl), 148 bpmbl_value); 149 } 150 151 static int 152 nvme_ctrlr_set_nssr(struct spdk_nvme_ctrlr *ctrlr, uint32_t nssr_value) 153 { 154 return nvme_transport_ctrlr_set_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, nssr), 155 nssr_value); 156 } 157 158 bool 159 nvme_ctrlr_multi_iocs_enabled(struct spdk_nvme_ctrlr *ctrlr) 160 { 161 return ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_IOCS && 162 ctrlr->opts.command_set == SPDK_NVME_CC_CSS_IOCS; 163 } 164 165 /* When the field in spdk_nvme_ctrlr_opts are changed and you change this function, please 166 * also update the nvme_ctrl_opts_init function in nvme_ctrlr.c 167 */ 168 void 169 spdk_nvme_ctrlr_get_default_ctrlr_opts(struct spdk_nvme_ctrlr_opts *opts, size_t opts_size) 170 { 171 char host_id_str[SPDK_UUID_STRING_LEN]; 172 173 assert(opts); 174 175 opts->opts_size = opts_size; 176 177 #define FIELD_OK(field) \ 178 offsetof(struct spdk_nvme_ctrlr_opts, field) + sizeof(opts->field) <= opts_size 179 180 #define SET_FIELD(field, value) \ 181 if (offsetof(struct spdk_nvme_ctrlr_opts, field) + sizeof(opts->field) <= opts_size) { \ 182 opts->field = value; \ 183 } \ 184 185 SET_FIELD(num_io_queues, DEFAULT_MAX_IO_QUEUES); 186 SET_FIELD(use_cmb_sqs, false); 187 SET_FIELD(no_shn_notification, false); 188 SET_FIELD(arb_mechanism, SPDK_NVME_CC_AMS_RR); 189 SET_FIELD(arbitration_burst, 0); 190 SET_FIELD(low_priority_weight, 0); 191 SET_FIELD(medium_priority_weight, 0); 192 SET_FIELD(high_priority_weight, 0); 193 SET_FIELD(keep_alive_timeout_ms, MIN_KEEP_ALIVE_TIMEOUT_IN_MS); 194 SET_FIELD(transport_retry_count, SPDK_NVME_DEFAULT_RETRY_COUNT); 195 SET_FIELD(io_queue_size, DEFAULT_IO_QUEUE_SIZE); 196 197 if (nvme_driver_init() == 0) { 198 if (FIELD_OK(hostnqn)) { 199 spdk_uuid_fmt_lower(host_id_str, sizeof(host_id_str), 200 &g_spdk_nvme_driver->default_extended_host_id); 201 snprintf(opts->hostnqn, sizeof(opts->hostnqn), 202 "nqn.2014-08.org.nvmexpress:uuid:%s", host_id_str); 203 } 204 205 if (FIELD_OK(extended_host_id)) { 206 memcpy(opts->extended_host_id, &g_spdk_nvme_driver->default_extended_host_id, 207 sizeof(opts->extended_host_id)); 208 } 209 210 } 211 212 SET_FIELD(io_queue_requests, DEFAULT_IO_QUEUE_REQUESTS); 213 214 if (FIELD_OK(src_addr)) { 215 memset(opts->src_addr, 0, sizeof(opts->src_addr)); 216 } 217 218 if (FIELD_OK(src_svcid)) { 219 memset(opts->src_svcid, 0, sizeof(opts->src_svcid)); 220 } 221 222 if (FIELD_OK(host_id)) { 223 memset(opts->host_id, 0, sizeof(opts->host_id)); 224 } 225 226 SET_FIELD(command_set, CHAR_BIT); 227 SET_FIELD(admin_timeout_ms, NVME_MAX_ADMIN_TIMEOUT_IN_SECS * 1000); 228 SET_FIELD(header_digest, false); 229 SET_FIELD(data_digest, false); 230 SET_FIELD(disable_error_logging, false); 231 SET_FIELD(transport_ack_timeout, SPDK_NVME_DEFAULT_TRANSPORT_ACK_TIMEOUT); 232 SET_FIELD(admin_queue_size, DEFAULT_ADMIN_QUEUE_SIZE); 233 SET_FIELD(fabrics_connect_timeout_us, NVME_FABRIC_CONNECT_COMMAND_TIMEOUT); 234 SET_FIELD(disable_read_ana_log_page, false); 235 SET_FIELD(disable_read_changed_ns_list_log_page, false); 236 237 if (FIELD_OK(psk)) { 238 memset(opts->psk, 0, sizeof(opts->psk)); 239 } 240 241 #undef FIELD_OK 242 #undef SET_FIELD 243 } 244 245 const struct spdk_nvme_ctrlr_opts * 246 spdk_nvme_ctrlr_get_opts(struct spdk_nvme_ctrlr *ctrlr) 247 { 248 return &ctrlr->opts; 249 } 250 251 /** 252 * This function will be called when the process allocates the IO qpair. 253 * Note: the ctrlr_lock must be held when calling this function. 254 */ 255 static void 256 nvme_ctrlr_proc_add_io_qpair(struct spdk_nvme_qpair *qpair) 257 { 258 struct spdk_nvme_ctrlr_process *active_proc; 259 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 260 261 active_proc = nvme_ctrlr_get_current_process(ctrlr); 262 if (active_proc) { 263 TAILQ_INSERT_TAIL(&active_proc->allocated_io_qpairs, qpair, per_process_tailq); 264 qpair->active_proc = active_proc; 265 } 266 } 267 268 /** 269 * This function will be called when the process frees the IO qpair. 270 * Note: the ctrlr_lock must be held when calling this function. 271 */ 272 static void 273 nvme_ctrlr_proc_remove_io_qpair(struct spdk_nvme_qpair *qpair) 274 { 275 struct spdk_nvme_ctrlr_process *active_proc; 276 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 277 struct spdk_nvme_qpair *active_qpair, *tmp_qpair; 278 279 active_proc = nvme_ctrlr_get_current_process(ctrlr); 280 if (!active_proc) { 281 return; 282 } 283 284 TAILQ_FOREACH_SAFE(active_qpair, &active_proc->allocated_io_qpairs, 285 per_process_tailq, tmp_qpair) { 286 if (active_qpair == qpair) { 287 TAILQ_REMOVE(&active_proc->allocated_io_qpairs, 288 active_qpair, per_process_tailq); 289 290 break; 291 } 292 } 293 } 294 295 void 296 spdk_nvme_ctrlr_get_default_io_qpair_opts(struct spdk_nvme_ctrlr *ctrlr, 297 struct spdk_nvme_io_qpair_opts *opts, 298 size_t opts_size) 299 { 300 assert(ctrlr); 301 302 assert(opts); 303 304 memset(opts, 0, opts_size); 305 306 #define FIELD_OK(field) \ 307 offsetof(struct spdk_nvme_io_qpair_opts, field) + sizeof(opts->field) <= opts_size 308 309 if (FIELD_OK(qprio)) { 310 opts->qprio = SPDK_NVME_QPRIO_URGENT; 311 } 312 313 if (FIELD_OK(io_queue_size)) { 314 opts->io_queue_size = ctrlr->opts.io_queue_size; 315 } 316 317 if (FIELD_OK(io_queue_requests)) { 318 opts->io_queue_requests = ctrlr->opts.io_queue_requests; 319 } 320 321 if (FIELD_OK(delay_cmd_submit)) { 322 opts->delay_cmd_submit = false; 323 } 324 325 if (FIELD_OK(sq.vaddr)) { 326 opts->sq.vaddr = NULL; 327 } 328 329 if (FIELD_OK(sq.paddr)) { 330 opts->sq.paddr = 0; 331 } 332 333 if (FIELD_OK(sq.buffer_size)) { 334 opts->sq.buffer_size = 0; 335 } 336 337 if (FIELD_OK(cq.vaddr)) { 338 opts->cq.vaddr = NULL; 339 } 340 341 if (FIELD_OK(cq.paddr)) { 342 opts->cq.paddr = 0; 343 } 344 345 if (FIELD_OK(cq.buffer_size)) { 346 opts->cq.buffer_size = 0; 347 } 348 349 if (FIELD_OK(create_only)) { 350 opts->create_only = false; 351 } 352 353 if (FIELD_OK(async_mode)) { 354 opts->async_mode = false; 355 } 356 357 #undef FIELD_OK 358 } 359 360 static struct spdk_nvme_qpair * 361 nvme_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, 362 const struct spdk_nvme_io_qpair_opts *opts) 363 { 364 int32_t qid; 365 struct spdk_nvme_qpair *qpair; 366 union spdk_nvme_cc_register cc; 367 368 if (!ctrlr) { 369 return NULL; 370 } 371 372 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 373 cc.raw = ctrlr->process_init_cc.raw; 374 375 if (opts->qprio & ~SPDK_NVME_CREATE_IO_SQ_QPRIO_MASK) { 376 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 377 return NULL; 378 } 379 380 /* 381 * Only value SPDK_NVME_QPRIO_URGENT(0) is valid for the 382 * default round robin arbitration method. 383 */ 384 if ((cc.bits.ams == SPDK_NVME_CC_AMS_RR) && (opts->qprio != SPDK_NVME_QPRIO_URGENT)) { 385 NVME_CTRLR_ERRLOG(ctrlr, "invalid queue priority for default round robin arbitration method\n"); 386 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 387 return NULL; 388 } 389 390 qid = spdk_nvme_ctrlr_alloc_qid(ctrlr); 391 if (qid < 0) { 392 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 393 return NULL; 394 } 395 396 qpair = nvme_transport_ctrlr_create_io_qpair(ctrlr, qid, opts); 397 if (qpair == NULL) { 398 NVME_CTRLR_ERRLOG(ctrlr, "nvme_transport_ctrlr_create_io_qpair() failed\n"); 399 spdk_nvme_ctrlr_free_qid(ctrlr, qid); 400 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 401 return NULL; 402 } 403 404 TAILQ_INSERT_TAIL(&ctrlr->active_io_qpairs, qpair, tailq); 405 406 nvme_ctrlr_proc_add_io_qpair(qpair); 407 408 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 409 410 return qpair; 411 } 412 413 int 414 spdk_nvme_ctrlr_connect_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 415 { 416 int rc; 417 418 if (nvme_qpair_get_state(qpair) != NVME_QPAIR_DISCONNECTED) { 419 return -EISCONN; 420 } 421 422 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 423 rc = nvme_transport_ctrlr_connect_qpair(ctrlr, qpair); 424 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 425 426 if (ctrlr->quirks & NVME_QUIRK_DELAY_AFTER_QUEUE_ALLOC) { 427 spdk_delay_us(100); 428 } 429 430 return rc; 431 } 432 433 void 434 spdk_nvme_ctrlr_disconnect_io_qpair(struct spdk_nvme_qpair *qpair) 435 { 436 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 437 438 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 439 nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair); 440 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 441 } 442 443 struct spdk_nvme_qpair * 444 spdk_nvme_ctrlr_alloc_io_qpair(struct spdk_nvme_ctrlr *ctrlr, 445 const struct spdk_nvme_io_qpair_opts *user_opts, 446 size_t opts_size) 447 { 448 449 struct spdk_nvme_qpair *qpair; 450 struct spdk_nvme_io_qpair_opts opts; 451 int rc; 452 453 if (spdk_unlikely(ctrlr->state != NVME_CTRLR_STATE_READY)) { 454 /* When controller is resetting or initializing, free_io_qids is deleted or not created yet. 455 * We can't create IO qpair in that case */ 456 return NULL; 457 } 458 459 /* 460 * Get the default options, then overwrite them with the user-provided options 461 * up to opts_size. 462 * 463 * This allows for extensions of the opts structure without breaking 464 * ABI compatibility. 465 */ 466 spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &opts, sizeof(opts)); 467 if (user_opts) { 468 memcpy(&opts, user_opts, spdk_min(sizeof(opts), opts_size)); 469 470 /* If user passes buffers, make sure they're big enough for the requested queue size */ 471 if (opts.sq.vaddr) { 472 if (opts.sq.buffer_size < (opts.io_queue_size * sizeof(struct spdk_nvme_cmd))) { 473 NVME_CTRLR_ERRLOG(ctrlr, "sq buffer size %" PRIx64 " is too small for sq size %zx\n", 474 opts.sq.buffer_size, (opts.io_queue_size * sizeof(struct spdk_nvme_cmd))); 475 return NULL; 476 } 477 } 478 if (opts.cq.vaddr) { 479 if (opts.cq.buffer_size < (opts.io_queue_size * sizeof(struct spdk_nvme_cpl))) { 480 NVME_CTRLR_ERRLOG(ctrlr, "cq buffer size %" PRIx64 " is too small for cq size %zx\n", 481 opts.cq.buffer_size, (opts.io_queue_size * sizeof(struct spdk_nvme_cpl))); 482 return NULL; 483 } 484 } 485 } 486 487 qpair = nvme_ctrlr_create_io_qpair(ctrlr, &opts); 488 489 if (qpair == NULL || opts.create_only == true) { 490 return qpair; 491 } 492 493 rc = spdk_nvme_ctrlr_connect_io_qpair(ctrlr, qpair); 494 if (rc != 0) { 495 NVME_CTRLR_ERRLOG(ctrlr, "nvme_transport_ctrlr_connect_io_qpair() failed\n"); 496 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 497 nvme_ctrlr_proc_remove_io_qpair(qpair); 498 TAILQ_REMOVE(&ctrlr->active_io_qpairs, qpair, tailq); 499 spdk_bit_array_set(ctrlr->free_io_qids, qpair->id); 500 nvme_transport_ctrlr_delete_io_qpair(ctrlr, qpair); 501 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 502 return NULL; 503 } 504 505 return qpair; 506 } 507 508 int 509 spdk_nvme_ctrlr_reconnect_io_qpair(struct spdk_nvme_qpair *qpair) 510 { 511 struct spdk_nvme_ctrlr *ctrlr; 512 enum nvme_qpair_state qpair_state; 513 int rc; 514 515 assert(qpair != NULL); 516 assert(nvme_qpair_is_admin_queue(qpair) == false); 517 assert(qpair->ctrlr != NULL); 518 519 ctrlr = qpair->ctrlr; 520 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 521 qpair_state = nvme_qpair_get_state(qpair); 522 523 if (ctrlr->is_removed) { 524 rc = -ENODEV; 525 goto out; 526 } 527 528 if (ctrlr->is_resetting || qpair_state == NVME_QPAIR_DISCONNECTING) { 529 rc = -EAGAIN; 530 goto out; 531 } 532 533 if (ctrlr->is_failed || qpair_state == NVME_QPAIR_DESTROYING) { 534 rc = -ENXIO; 535 goto out; 536 } 537 538 if (qpair_state != NVME_QPAIR_DISCONNECTED) { 539 rc = 0; 540 goto out; 541 } 542 543 rc = nvme_transport_ctrlr_connect_qpair(ctrlr, qpair); 544 if (rc) { 545 rc = -EAGAIN; 546 goto out; 547 } 548 549 out: 550 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 551 return rc; 552 } 553 554 spdk_nvme_qp_failure_reason 555 spdk_nvme_ctrlr_get_admin_qp_failure_reason(struct spdk_nvme_ctrlr *ctrlr) 556 { 557 return ctrlr->adminq->transport_failure_reason; 558 } 559 560 /* 561 * This internal function will attempt to take the controller 562 * lock before calling disconnect on a controller qpair. 563 * Functions already holding the controller lock should 564 * call nvme_transport_ctrlr_disconnect_qpair directly. 565 */ 566 void 567 nvme_ctrlr_disconnect_qpair(struct spdk_nvme_qpair *qpair) 568 { 569 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 570 571 assert(ctrlr != NULL); 572 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 573 nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair); 574 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 575 } 576 577 int 578 spdk_nvme_ctrlr_free_io_qpair(struct spdk_nvme_qpair *qpair) 579 { 580 struct spdk_nvme_ctrlr *ctrlr; 581 582 if (qpair == NULL) { 583 return 0; 584 } 585 586 ctrlr = qpair->ctrlr; 587 588 if (qpair->in_completion_context) { 589 /* 590 * There are many cases where it is convenient to delete an io qpair in the context 591 * of that qpair's completion routine. To handle this properly, set a flag here 592 * so that the completion routine will perform an actual delete after the context 593 * unwinds. 594 */ 595 qpair->delete_after_completion_context = 1; 596 return 0; 597 } 598 599 nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair); 600 601 if (qpair->poll_group && (qpair->active_proc == nvme_ctrlr_get_current_process(ctrlr))) { 602 spdk_nvme_poll_group_remove(qpair->poll_group->group, qpair); 603 } 604 605 /* Do not retry. */ 606 nvme_qpair_set_state(qpair, NVME_QPAIR_DESTROYING); 607 608 /* In the multi-process case, a process may call this function on a foreign 609 * I/O qpair (i.e. one that this process did not create) when that qpairs process 610 * exits unexpectedly. In that case, we must not try to abort any reqs associated 611 * with that qpair, since the callbacks will also be foreign to this process. 612 */ 613 if (qpair->active_proc == nvme_ctrlr_get_current_process(ctrlr)) { 614 nvme_qpair_abort_all_queued_reqs(qpair, 0); 615 } 616 617 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 618 619 nvme_ctrlr_proc_remove_io_qpair(qpair); 620 621 TAILQ_REMOVE(&ctrlr->active_io_qpairs, qpair, tailq); 622 spdk_nvme_ctrlr_free_qid(ctrlr, qpair->id); 623 624 nvme_transport_ctrlr_delete_io_qpair(ctrlr, qpair); 625 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 626 return 0; 627 } 628 629 static void 630 nvme_ctrlr_construct_intel_support_log_page_list(struct spdk_nvme_ctrlr *ctrlr, 631 struct spdk_nvme_intel_log_page_directory *log_page_directory) 632 { 633 if (log_page_directory == NULL) { 634 return; 635 } 636 637 assert(ctrlr->cdata.vid == SPDK_PCI_VID_INTEL); 638 639 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY] = true; 640 641 if (log_page_directory->read_latency_log_len || 642 (ctrlr->quirks & NVME_INTEL_QUIRK_READ_LATENCY)) { 643 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_READ_CMD_LATENCY] = true; 644 } 645 if (log_page_directory->write_latency_log_len || 646 (ctrlr->quirks & NVME_INTEL_QUIRK_WRITE_LATENCY)) { 647 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_WRITE_CMD_LATENCY] = true; 648 } 649 if (log_page_directory->temperature_statistics_log_len) { 650 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_TEMPERATURE] = true; 651 } 652 if (log_page_directory->smart_log_len) { 653 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_SMART] = true; 654 } 655 if (log_page_directory->marketing_description_log_len) { 656 ctrlr->log_page_supported[SPDK_NVME_INTEL_MARKETING_DESCRIPTION] = true; 657 } 658 } 659 660 struct intel_log_pages_ctx { 661 struct spdk_nvme_intel_log_page_directory log_page_directory; 662 struct spdk_nvme_ctrlr *ctrlr; 663 }; 664 665 static void 666 nvme_ctrlr_set_intel_support_log_pages_done(void *arg, const struct spdk_nvme_cpl *cpl) 667 { 668 struct intel_log_pages_ctx *ctx = arg; 669 struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr; 670 671 if (!spdk_nvme_cpl_is_error(cpl)) { 672 nvme_ctrlr_construct_intel_support_log_page_list(ctrlr, &ctx->log_page_directory); 673 } 674 675 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES, 676 ctrlr->opts.admin_timeout_ms); 677 free(ctx); 678 } 679 680 static int 681 nvme_ctrlr_set_intel_support_log_pages(struct spdk_nvme_ctrlr *ctrlr) 682 { 683 int rc = 0; 684 struct intel_log_pages_ctx *ctx; 685 686 ctx = calloc(1, sizeof(*ctx)); 687 if (!ctx) { 688 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES, 689 ctrlr->opts.admin_timeout_ms); 690 return 0; 691 } 692 693 ctx->ctrlr = ctrlr; 694 695 rc = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY, 696 SPDK_NVME_GLOBAL_NS_TAG, &ctx->log_page_directory, 697 sizeof(struct spdk_nvme_intel_log_page_directory), 698 0, nvme_ctrlr_set_intel_support_log_pages_done, ctx); 699 if (rc != 0) { 700 free(ctx); 701 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES, 702 ctrlr->opts.admin_timeout_ms); 703 return 0; 704 } 705 706 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_SUPPORTED_INTEL_LOG_PAGES, 707 ctrlr->opts.admin_timeout_ms); 708 709 return 0; 710 } 711 712 static int 713 nvme_ctrlr_alloc_ana_log_page(struct spdk_nvme_ctrlr *ctrlr) 714 { 715 uint32_t ana_log_page_size; 716 717 ana_log_page_size = sizeof(struct spdk_nvme_ana_page) + ctrlr->cdata.nanagrpid * 718 sizeof(struct spdk_nvme_ana_group_descriptor) + ctrlr->active_ns_count * 719 sizeof(uint32_t); 720 721 /* Number of active namespaces may have changed. 722 * Check if ANA log page fits into existing buffer. 723 */ 724 if (ana_log_page_size > ctrlr->ana_log_page_size) { 725 void *new_buffer; 726 727 if (ctrlr->ana_log_page) { 728 new_buffer = realloc(ctrlr->ana_log_page, ana_log_page_size); 729 } else { 730 new_buffer = calloc(1, ana_log_page_size); 731 } 732 733 if (!new_buffer) { 734 NVME_CTRLR_ERRLOG(ctrlr, "could not allocate ANA log page buffer, size %u\n", 735 ana_log_page_size); 736 return -ENXIO; 737 } 738 739 ctrlr->ana_log_page = new_buffer; 740 if (ctrlr->copied_ana_desc) { 741 new_buffer = realloc(ctrlr->copied_ana_desc, ana_log_page_size); 742 } else { 743 new_buffer = calloc(1, ana_log_page_size); 744 } 745 746 if (!new_buffer) { 747 NVME_CTRLR_ERRLOG(ctrlr, "could not allocate a buffer to parse ANA descriptor, size %u\n", 748 ana_log_page_size); 749 return -ENOMEM; 750 } 751 752 ctrlr->copied_ana_desc = new_buffer; 753 ctrlr->ana_log_page_size = ana_log_page_size; 754 } 755 756 return 0; 757 } 758 759 static int 760 nvme_ctrlr_update_ana_log_page(struct spdk_nvme_ctrlr *ctrlr) 761 { 762 struct nvme_completion_poll_status *status; 763 int rc; 764 765 rc = nvme_ctrlr_alloc_ana_log_page(ctrlr); 766 if (rc != 0) { 767 return rc; 768 } 769 770 status = calloc(1, sizeof(*status)); 771 if (status == NULL) { 772 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 773 return -ENOMEM; 774 } 775 776 rc = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS, 777 SPDK_NVME_GLOBAL_NS_TAG, ctrlr->ana_log_page, 778 ctrlr->ana_log_page_size, 0, 779 nvme_completion_poll_cb, status); 780 if (rc != 0) { 781 free(status); 782 return rc; 783 } 784 785 if (nvme_wait_for_completion_robust_lock_timeout(ctrlr->adminq, status, &ctrlr->ctrlr_lock, 786 ctrlr->opts.admin_timeout_ms * 1000)) { 787 if (!status->timed_out) { 788 free(status); 789 } 790 return -EIO; 791 } 792 793 free(status); 794 return 0; 795 } 796 797 static int 798 nvme_ctrlr_update_ns_ana_states(const struct spdk_nvme_ana_group_descriptor *desc, 799 void *cb_arg) 800 { 801 struct spdk_nvme_ctrlr *ctrlr = cb_arg; 802 struct spdk_nvme_ns *ns; 803 uint32_t i, nsid; 804 805 for (i = 0; i < desc->num_of_nsid; i++) { 806 nsid = desc->nsid[i]; 807 if (nsid == 0 || nsid > ctrlr->cdata.nn) { 808 continue; 809 } 810 811 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 812 assert(ns != NULL); 813 814 ns->ana_group_id = desc->ana_group_id; 815 ns->ana_state = desc->ana_state; 816 } 817 818 return 0; 819 } 820 821 int 822 nvme_ctrlr_parse_ana_log_page(struct spdk_nvme_ctrlr *ctrlr, 823 spdk_nvme_parse_ana_log_page_cb cb_fn, void *cb_arg) 824 { 825 struct spdk_nvme_ana_group_descriptor *copied_desc; 826 uint8_t *orig_desc; 827 uint32_t i, desc_size, copy_len; 828 int rc = 0; 829 830 if (ctrlr->ana_log_page == NULL) { 831 return -EINVAL; 832 } 833 834 copied_desc = ctrlr->copied_ana_desc; 835 836 orig_desc = (uint8_t *)ctrlr->ana_log_page + sizeof(struct spdk_nvme_ana_page); 837 copy_len = ctrlr->ana_log_page_size - sizeof(struct spdk_nvme_ana_page); 838 839 for (i = 0; i < ctrlr->ana_log_page->num_ana_group_desc; i++) { 840 memcpy(copied_desc, orig_desc, copy_len); 841 842 rc = cb_fn(copied_desc, cb_arg); 843 if (rc != 0) { 844 break; 845 } 846 847 desc_size = sizeof(struct spdk_nvme_ana_group_descriptor) + 848 copied_desc->num_of_nsid * sizeof(uint32_t); 849 orig_desc += desc_size; 850 copy_len -= desc_size; 851 } 852 853 return rc; 854 } 855 856 static int 857 nvme_ctrlr_set_supported_log_pages(struct spdk_nvme_ctrlr *ctrlr) 858 { 859 int rc = 0; 860 861 memset(ctrlr->log_page_supported, 0, sizeof(ctrlr->log_page_supported)); 862 /* Mandatory pages */ 863 ctrlr->log_page_supported[SPDK_NVME_LOG_ERROR] = true; 864 ctrlr->log_page_supported[SPDK_NVME_LOG_HEALTH_INFORMATION] = true; 865 ctrlr->log_page_supported[SPDK_NVME_LOG_FIRMWARE_SLOT] = true; 866 if (ctrlr->cdata.lpa.celp) { 867 ctrlr->log_page_supported[SPDK_NVME_LOG_COMMAND_EFFECTS_LOG] = true; 868 } 869 870 if (ctrlr->cdata.cmic.ana_reporting) { 871 ctrlr->log_page_supported[SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS] = true; 872 if (!ctrlr->opts.disable_read_ana_log_page) { 873 rc = nvme_ctrlr_update_ana_log_page(ctrlr); 874 if (rc == 0) { 875 nvme_ctrlr_parse_ana_log_page(ctrlr, nvme_ctrlr_update_ns_ana_states, 876 ctrlr); 877 } 878 } 879 } 880 881 if (ctrlr->cdata.vid == SPDK_PCI_VID_INTEL && 882 ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE && 883 !(ctrlr->quirks & NVME_INTEL_QUIRK_NO_LOG_PAGES)) { 884 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_INTEL_LOG_PAGES, 885 ctrlr->opts.admin_timeout_ms); 886 887 } else { 888 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES, 889 ctrlr->opts.admin_timeout_ms); 890 891 } 892 893 return rc; 894 } 895 896 static void 897 nvme_ctrlr_set_intel_supported_features(struct spdk_nvme_ctrlr *ctrlr) 898 { 899 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_MAX_LBA] = true; 900 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_NATIVE_MAX_LBA] = true; 901 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_POWER_GOVERNOR_SETTING] = true; 902 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_SMBUS_ADDRESS] = true; 903 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_LED_PATTERN] = true; 904 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_RESET_TIMED_WORKLOAD_COUNTERS] = true; 905 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING] = true; 906 } 907 908 static void 909 nvme_ctrlr_set_arbitration_feature(struct spdk_nvme_ctrlr *ctrlr) 910 { 911 uint32_t cdw11; 912 struct nvme_completion_poll_status *status; 913 914 if (ctrlr->opts.arbitration_burst == 0) { 915 return; 916 } 917 918 if (ctrlr->opts.arbitration_burst > 7) { 919 NVME_CTRLR_WARNLOG(ctrlr, "Valid arbitration burst values is from 0-7\n"); 920 return; 921 } 922 923 status = calloc(1, sizeof(*status)); 924 if (!status) { 925 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 926 return; 927 } 928 929 cdw11 = ctrlr->opts.arbitration_burst; 930 931 if (spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_WRR_SUPPORTED) { 932 cdw11 |= (uint32_t)ctrlr->opts.low_priority_weight << 8; 933 cdw11 |= (uint32_t)ctrlr->opts.medium_priority_weight << 16; 934 cdw11 |= (uint32_t)ctrlr->opts.high_priority_weight << 24; 935 } 936 937 if (spdk_nvme_ctrlr_cmd_set_feature(ctrlr, SPDK_NVME_FEAT_ARBITRATION, 938 cdw11, 0, NULL, 0, 939 nvme_completion_poll_cb, status) < 0) { 940 NVME_CTRLR_ERRLOG(ctrlr, "Set arbitration feature failed\n"); 941 free(status); 942 return; 943 } 944 945 if (nvme_wait_for_completion_timeout(ctrlr->adminq, status, 946 ctrlr->opts.admin_timeout_ms * 1000)) { 947 NVME_CTRLR_ERRLOG(ctrlr, "Timeout to set arbitration feature\n"); 948 } 949 950 if (!status->timed_out) { 951 free(status); 952 } 953 } 954 955 static void 956 nvme_ctrlr_set_supported_features(struct spdk_nvme_ctrlr *ctrlr) 957 { 958 memset(ctrlr->feature_supported, 0, sizeof(ctrlr->feature_supported)); 959 /* Mandatory features */ 960 ctrlr->feature_supported[SPDK_NVME_FEAT_ARBITRATION] = true; 961 ctrlr->feature_supported[SPDK_NVME_FEAT_POWER_MANAGEMENT] = true; 962 ctrlr->feature_supported[SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD] = true; 963 ctrlr->feature_supported[SPDK_NVME_FEAT_ERROR_RECOVERY] = true; 964 ctrlr->feature_supported[SPDK_NVME_FEAT_NUMBER_OF_QUEUES] = true; 965 ctrlr->feature_supported[SPDK_NVME_FEAT_INTERRUPT_COALESCING] = true; 966 ctrlr->feature_supported[SPDK_NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION] = true; 967 ctrlr->feature_supported[SPDK_NVME_FEAT_WRITE_ATOMICITY] = true; 968 ctrlr->feature_supported[SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION] = true; 969 /* Optional features */ 970 if (ctrlr->cdata.vwc.present) { 971 ctrlr->feature_supported[SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE] = true; 972 } 973 if (ctrlr->cdata.apsta.supported) { 974 ctrlr->feature_supported[SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION] = true; 975 } 976 if (ctrlr->cdata.hmpre) { 977 ctrlr->feature_supported[SPDK_NVME_FEAT_HOST_MEM_BUFFER] = true; 978 } 979 if (ctrlr->cdata.vid == SPDK_PCI_VID_INTEL) { 980 nvme_ctrlr_set_intel_supported_features(ctrlr); 981 } 982 983 nvme_ctrlr_set_arbitration_feature(ctrlr); 984 } 985 986 bool 987 spdk_nvme_ctrlr_is_failed(struct spdk_nvme_ctrlr *ctrlr) 988 { 989 return ctrlr->is_failed; 990 } 991 992 void 993 nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr, bool hot_remove) 994 { 995 /* 996 * Set the flag here and leave the work failure of qpairs to 997 * spdk_nvme_qpair_process_completions(). 998 */ 999 if (hot_remove) { 1000 ctrlr->is_removed = true; 1001 } 1002 1003 if (ctrlr->is_failed) { 1004 NVME_CTRLR_NOTICELOG(ctrlr, "already in failed state\n"); 1005 return; 1006 } 1007 1008 if (ctrlr->is_disconnecting) { 1009 NVME_CTRLR_DEBUGLOG(ctrlr, "already disconnecting\n"); 1010 return; 1011 } 1012 1013 ctrlr->is_failed = true; 1014 nvme_transport_ctrlr_disconnect_qpair(ctrlr, ctrlr->adminq); 1015 NVME_CTRLR_ERRLOG(ctrlr, "in failed state.\n"); 1016 } 1017 1018 /** 1019 * This public API function will try to take the controller lock. 1020 * Any private functions being called from a thread already holding 1021 * the ctrlr lock should call nvme_ctrlr_fail directly. 1022 */ 1023 void 1024 spdk_nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr) 1025 { 1026 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1027 nvme_ctrlr_fail(ctrlr, false); 1028 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1029 } 1030 1031 static void 1032 nvme_ctrlr_shutdown_set_cc_done(void *_ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 1033 { 1034 struct nvme_ctrlr_detach_ctx *ctx = _ctx; 1035 struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr; 1036 1037 if (spdk_nvme_cpl_is_error(cpl)) { 1038 NVME_CTRLR_ERRLOG(ctrlr, "Failed to write CC.SHN\n"); 1039 ctx->shutdown_complete = true; 1040 return; 1041 } 1042 1043 if (ctrlr->opts.no_shn_notification) { 1044 ctx->shutdown_complete = true; 1045 return; 1046 } 1047 1048 /* 1049 * The NVMe specification defines RTD3E to be the time between 1050 * setting SHN = 1 until the controller will set SHST = 10b. 1051 * If the device doesn't report RTD3 entry latency, or if it 1052 * reports RTD3 entry latency less than 10 seconds, pick 1053 * 10 seconds as a reasonable amount of time to 1054 * wait before proceeding. 1055 */ 1056 NVME_CTRLR_DEBUGLOG(ctrlr, "RTD3E = %" PRIu32 " us\n", ctrlr->cdata.rtd3e); 1057 ctx->shutdown_timeout_ms = SPDK_CEIL_DIV(ctrlr->cdata.rtd3e, 1000); 1058 ctx->shutdown_timeout_ms = spdk_max(ctx->shutdown_timeout_ms, 10000); 1059 NVME_CTRLR_DEBUGLOG(ctrlr, "shutdown timeout = %" PRIu32 " ms\n", ctx->shutdown_timeout_ms); 1060 1061 ctx->shutdown_start_tsc = spdk_get_ticks(); 1062 ctx->state = NVME_CTRLR_DETACH_CHECK_CSTS; 1063 } 1064 1065 static void 1066 nvme_ctrlr_shutdown_get_cc_done(void *_ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 1067 { 1068 struct nvme_ctrlr_detach_ctx *ctx = _ctx; 1069 struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr; 1070 union spdk_nvme_cc_register cc; 1071 int rc; 1072 1073 if (spdk_nvme_cpl_is_error(cpl)) { 1074 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CC register\n"); 1075 ctx->shutdown_complete = true; 1076 return; 1077 } 1078 1079 assert(value <= UINT32_MAX); 1080 cc.raw = (uint32_t)value; 1081 1082 if (ctrlr->opts.no_shn_notification) { 1083 NVME_CTRLR_INFOLOG(ctrlr, "Disable SSD without shutdown notification\n"); 1084 if (cc.bits.en == 0) { 1085 ctx->shutdown_complete = true; 1086 return; 1087 } 1088 1089 cc.bits.en = 0; 1090 } else { 1091 cc.bits.shn = SPDK_NVME_SHN_NORMAL; 1092 } 1093 1094 rc = nvme_ctrlr_set_cc_async(ctrlr, cc.raw, nvme_ctrlr_shutdown_set_cc_done, ctx); 1095 if (rc != 0) { 1096 NVME_CTRLR_ERRLOG(ctrlr, "Failed to write CC.SHN\n"); 1097 ctx->shutdown_complete = true; 1098 } 1099 } 1100 1101 static void 1102 nvme_ctrlr_shutdown_async(struct spdk_nvme_ctrlr *ctrlr, 1103 struct nvme_ctrlr_detach_ctx *ctx) 1104 { 1105 int rc; 1106 1107 if (ctrlr->is_removed) { 1108 ctx->shutdown_complete = true; 1109 return; 1110 } 1111 1112 ctx->state = NVME_CTRLR_DETACH_SET_CC; 1113 rc = nvme_ctrlr_get_cc_async(ctrlr, nvme_ctrlr_shutdown_get_cc_done, ctx); 1114 if (rc != 0) { 1115 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CC register\n"); 1116 ctx->shutdown_complete = true; 1117 } 1118 } 1119 1120 static void 1121 nvme_ctrlr_shutdown_get_csts_done(void *_ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 1122 { 1123 struct nvme_ctrlr_detach_ctx *ctx = _ctx; 1124 1125 if (spdk_nvme_cpl_is_error(cpl)) { 1126 NVME_CTRLR_ERRLOG(ctx->ctrlr, "Failed to read the CSTS register\n"); 1127 ctx->shutdown_complete = true; 1128 return; 1129 } 1130 1131 assert(value <= UINT32_MAX); 1132 ctx->csts.raw = (uint32_t)value; 1133 ctx->state = NVME_CTRLR_DETACH_GET_CSTS_DONE; 1134 } 1135 1136 static int 1137 nvme_ctrlr_shutdown_poll_async(struct spdk_nvme_ctrlr *ctrlr, 1138 struct nvme_ctrlr_detach_ctx *ctx) 1139 { 1140 union spdk_nvme_csts_register csts; 1141 uint32_t ms_waited; 1142 1143 switch (ctx->state) { 1144 case NVME_CTRLR_DETACH_SET_CC: 1145 case NVME_CTRLR_DETACH_GET_CSTS: 1146 /* We're still waiting for the register operation to complete */ 1147 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 1148 return -EAGAIN; 1149 1150 case NVME_CTRLR_DETACH_CHECK_CSTS: 1151 ctx->state = NVME_CTRLR_DETACH_GET_CSTS; 1152 if (nvme_ctrlr_get_csts_async(ctrlr, nvme_ctrlr_shutdown_get_csts_done, ctx)) { 1153 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CSTS register\n"); 1154 return -EIO; 1155 } 1156 return -EAGAIN; 1157 1158 case NVME_CTRLR_DETACH_GET_CSTS_DONE: 1159 ctx->state = NVME_CTRLR_DETACH_CHECK_CSTS; 1160 break; 1161 1162 default: 1163 assert(0 && "Should never happen"); 1164 return -EINVAL; 1165 } 1166 1167 ms_waited = (spdk_get_ticks() - ctx->shutdown_start_tsc) * 1000 / spdk_get_ticks_hz(); 1168 csts.raw = ctx->csts.raw; 1169 1170 if (csts.bits.shst == SPDK_NVME_SHST_COMPLETE) { 1171 NVME_CTRLR_DEBUGLOG(ctrlr, "shutdown complete in %u milliseconds\n", ms_waited); 1172 return 0; 1173 } 1174 1175 if (ms_waited < ctx->shutdown_timeout_ms) { 1176 return -EAGAIN; 1177 } 1178 1179 NVME_CTRLR_ERRLOG(ctrlr, "did not shutdown within %u milliseconds\n", 1180 ctx->shutdown_timeout_ms); 1181 if (ctrlr->quirks & NVME_QUIRK_SHST_COMPLETE) { 1182 NVME_CTRLR_ERRLOG(ctrlr, "likely due to shutdown handling in the VMWare emulated NVMe SSD\n"); 1183 } 1184 1185 return 0; 1186 } 1187 1188 static inline uint64_t 1189 nvme_ctrlr_get_ready_timeout(struct spdk_nvme_ctrlr *ctrlr) 1190 { 1191 return ctrlr->cap.bits.to * 500; 1192 } 1193 1194 static void 1195 nvme_ctrlr_set_cc_en_done(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 1196 { 1197 struct spdk_nvme_ctrlr *ctrlr = ctx; 1198 1199 if (spdk_nvme_cpl_is_error(cpl)) { 1200 NVME_CTRLR_ERRLOG(ctrlr, "Failed to set the CC register\n"); 1201 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 1202 return; 1203 } 1204 1205 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, 1206 nvme_ctrlr_get_ready_timeout(ctrlr)); 1207 } 1208 1209 static int 1210 nvme_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr) 1211 { 1212 union spdk_nvme_cc_register cc; 1213 int rc; 1214 1215 rc = nvme_transport_ctrlr_enable(ctrlr); 1216 if (rc != 0) { 1217 NVME_CTRLR_ERRLOG(ctrlr, "transport ctrlr_enable failed\n"); 1218 return rc; 1219 } 1220 1221 cc.raw = ctrlr->process_init_cc.raw; 1222 if (cc.bits.en != 0) { 1223 NVME_CTRLR_ERRLOG(ctrlr, "called with CC.EN = 1\n"); 1224 return -EINVAL; 1225 } 1226 1227 cc.bits.en = 1; 1228 cc.bits.css = 0; 1229 cc.bits.shn = 0; 1230 cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */ 1231 cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */ 1232 1233 /* Page size is 2 ^ (12 + mps). */ 1234 cc.bits.mps = spdk_u32log2(ctrlr->page_size) - 12; 1235 1236 /* 1237 * Since NVMe 1.0, a controller should have at least one bit set in CAP.CSS. 1238 * A controller that does not have any bit set in CAP.CSS is not spec compliant. 1239 * Try to support such a controller regardless. 1240 */ 1241 if (ctrlr->cap.bits.css == 0) { 1242 NVME_CTRLR_INFOLOG(ctrlr, "Drive reports no command sets supported. Assuming NVM is supported.\n"); 1243 ctrlr->cap.bits.css = SPDK_NVME_CAP_CSS_NVM; 1244 } 1245 1246 /* 1247 * If the user did not explicitly request a command set, or supplied a value larger than 1248 * what can be saved in CC.CSS, use the most reasonable default. 1249 */ 1250 if (ctrlr->opts.command_set >= CHAR_BIT) { 1251 if (ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_IOCS) { 1252 ctrlr->opts.command_set = SPDK_NVME_CC_CSS_IOCS; 1253 } else if (ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_NVM) { 1254 ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NVM; 1255 } else if (ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_NOIO) { 1256 ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NOIO; 1257 } else { 1258 /* Invalid supported bits detected, falling back to NVM. */ 1259 ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NVM; 1260 } 1261 } 1262 1263 /* Verify that the selected command set is supported by the controller. */ 1264 if (!(ctrlr->cap.bits.css & (1u << ctrlr->opts.command_set))) { 1265 NVME_CTRLR_DEBUGLOG(ctrlr, "Requested I/O command set %u but supported mask is 0x%x\n", 1266 ctrlr->opts.command_set, ctrlr->cap.bits.css); 1267 NVME_CTRLR_DEBUGLOG(ctrlr, "Falling back to NVM. Assuming NVM is supported.\n"); 1268 ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NVM; 1269 } 1270 1271 cc.bits.css = ctrlr->opts.command_set; 1272 1273 switch (ctrlr->opts.arb_mechanism) { 1274 case SPDK_NVME_CC_AMS_RR: 1275 break; 1276 case SPDK_NVME_CC_AMS_WRR: 1277 if (SPDK_NVME_CAP_AMS_WRR & ctrlr->cap.bits.ams) { 1278 break; 1279 } 1280 return -EINVAL; 1281 case SPDK_NVME_CC_AMS_VS: 1282 if (SPDK_NVME_CAP_AMS_VS & ctrlr->cap.bits.ams) { 1283 break; 1284 } 1285 return -EINVAL; 1286 default: 1287 return -EINVAL; 1288 } 1289 1290 cc.bits.ams = ctrlr->opts.arb_mechanism; 1291 ctrlr->process_init_cc.raw = cc.raw; 1292 1293 if (nvme_ctrlr_set_cc_async(ctrlr, cc.raw, nvme_ctrlr_set_cc_en_done, ctrlr)) { 1294 NVME_CTRLR_ERRLOG(ctrlr, "set_cc() failed\n"); 1295 return -EIO; 1296 } 1297 1298 return 0; 1299 } 1300 1301 static const char * 1302 nvme_ctrlr_state_string(enum nvme_ctrlr_state state) 1303 { 1304 switch (state) { 1305 case NVME_CTRLR_STATE_INIT_DELAY: 1306 return "delay init"; 1307 case NVME_CTRLR_STATE_CONNECT_ADMINQ: 1308 return "connect adminq"; 1309 case NVME_CTRLR_STATE_WAIT_FOR_CONNECT_ADMINQ: 1310 return "wait for connect adminq"; 1311 case NVME_CTRLR_STATE_READ_VS: 1312 return "read vs"; 1313 case NVME_CTRLR_STATE_READ_VS_WAIT_FOR_VS: 1314 return "read vs wait for vs"; 1315 case NVME_CTRLR_STATE_READ_CAP: 1316 return "read cap"; 1317 case NVME_CTRLR_STATE_READ_CAP_WAIT_FOR_CAP: 1318 return "read cap wait for cap"; 1319 case NVME_CTRLR_STATE_CHECK_EN: 1320 return "check en"; 1321 case NVME_CTRLR_STATE_CHECK_EN_WAIT_FOR_CC: 1322 return "check en wait for cc"; 1323 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1: 1324 return "disable and wait for CSTS.RDY = 1"; 1325 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS: 1326 return "disable and wait for CSTS.RDY = 1 reg"; 1327 case NVME_CTRLR_STATE_SET_EN_0: 1328 return "set CC.EN = 0"; 1329 case NVME_CTRLR_STATE_SET_EN_0_WAIT_FOR_CC: 1330 return "set CC.EN = 0 wait for cc"; 1331 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0: 1332 return "disable and wait for CSTS.RDY = 0"; 1333 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0_WAIT_FOR_CSTS: 1334 return "disable and wait for CSTS.RDY = 0 reg"; 1335 case NVME_CTRLR_STATE_DISABLED: 1336 return "controller is disabled"; 1337 case NVME_CTRLR_STATE_ENABLE: 1338 return "enable controller by writing CC.EN = 1"; 1339 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_CC: 1340 return "enable controller by writing CC.EN = 1 reg"; 1341 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1: 1342 return "wait for CSTS.RDY = 1"; 1343 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS: 1344 return "wait for CSTS.RDY = 1 reg"; 1345 case NVME_CTRLR_STATE_RESET_ADMIN_QUEUE: 1346 return "reset admin queue"; 1347 case NVME_CTRLR_STATE_IDENTIFY: 1348 return "identify controller"; 1349 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY: 1350 return "wait for identify controller"; 1351 case NVME_CTRLR_STATE_CONFIGURE_AER: 1352 return "configure AER"; 1353 case NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER: 1354 return "wait for configure aer"; 1355 case NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT: 1356 return "set keep alive timeout"; 1357 case NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT: 1358 return "wait for set keep alive timeout"; 1359 case NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC: 1360 return "identify controller iocs specific"; 1361 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_IOCS_SPECIFIC: 1362 return "wait for identify controller iocs specific"; 1363 case NVME_CTRLR_STATE_GET_ZNS_CMD_EFFECTS_LOG: 1364 return "get zns cmd and effects log page"; 1365 case NVME_CTRLR_STATE_WAIT_FOR_GET_ZNS_CMD_EFFECTS_LOG: 1366 return "wait for get zns cmd and effects log page"; 1367 case NVME_CTRLR_STATE_SET_NUM_QUEUES: 1368 return "set number of queues"; 1369 case NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES: 1370 return "wait for set number of queues"; 1371 case NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS: 1372 return "identify active ns"; 1373 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ACTIVE_NS: 1374 return "wait for identify active ns"; 1375 case NVME_CTRLR_STATE_IDENTIFY_NS: 1376 return "identify ns"; 1377 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS: 1378 return "wait for identify ns"; 1379 case NVME_CTRLR_STATE_IDENTIFY_ID_DESCS: 1380 return "identify namespace id descriptors"; 1381 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS: 1382 return "wait for identify namespace id descriptors"; 1383 case NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC: 1384 return "identify ns iocs specific"; 1385 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS_IOCS_SPECIFIC: 1386 return "wait for identify ns iocs specific"; 1387 case NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES: 1388 return "set supported log pages"; 1389 case NVME_CTRLR_STATE_SET_SUPPORTED_INTEL_LOG_PAGES: 1390 return "set supported INTEL log pages"; 1391 case NVME_CTRLR_STATE_WAIT_FOR_SUPPORTED_INTEL_LOG_PAGES: 1392 return "wait for supported INTEL log pages"; 1393 case NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES: 1394 return "set supported features"; 1395 case NVME_CTRLR_STATE_SET_DB_BUF_CFG: 1396 return "set doorbell buffer config"; 1397 case NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG: 1398 return "wait for doorbell buffer config"; 1399 case NVME_CTRLR_STATE_SET_HOST_ID: 1400 return "set host ID"; 1401 case NVME_CTRLR_STATE_WAIT_FOR_HOST_ID: 1402 return "wait for set host ID"; 1403 case NVME_CTRLR_STATE_TRANSPORT_READY: 1404 return "transport ready"; 1405 case NVME_CTRLR_STATE_READY: 1406 return "ready"; 1407 case NVME_CTRLR_STATE_ERROR: 1408 return "error"; 1409 } 1410 return "unknown"; 1411 }; 1412 1413 static void 1414 _nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state, 1415 uint64_t timeout_in_ms, bool quiet) 1416 { 1417 uint64_t ticks_per_ms, timeout_in_ticks, now_ticks; 1418 1419 ctrlr->state = state; 1420 if (timeout_in_ms == NVME_TIMEOUT_KEEP_EXISTING) { 1421 if (!quiet) { 1422 NVME_CTRLR_DEBUGLOG(ctrlr, "setting state to %s (keeping existing timeout)\n", 1423 nvme_ctrlr_state_string(ctrlr->state)); 1424 } 1425 return; 1426 } 1427 1428 if (timeout_in_ms == NVME_TIMEOUT_INFINITE) { 1429 goto inf; 1430 } 1431 1432 ticks_per_ms = spdk_get_ticks_hz() / 1000; 1433 if (timeout_in_ms > UINT64_MAX / ticks_per_ms) { 1434 NVME_CTRLR_ERRLOG(ctrlr, 1435 "Specified timeout would cause integer overflow. Defaulting to no timeout.\n"); 1436 goto inf; 1437 } 1438 1439 now_ticks = spdk_get_ticks(); 1440 timeout_in_ticks = timeout_in_ms * ticks_per_ms; 1441 if (timeout_in_ticks > UINT64_MAX - now_ticks) { 1442 NVME_CTRLR_ERRLOG(ctrlr, 1443 "Specified timeout would cause integer overflow. Defaulting to no timeout.\n"); 1444 goto inf; 1445 } 1446 1447 ctrlr->state_timeout_tsc = timeout_in_ticks + now_ticks; 1448 if (!quiet) { 1449 NVME_CTRLR_DEBUGLOG(ctrlr, "setting state to %s (timeout %" PRIu64 " ms)\n", 1450 nvme_ctrlr_state_string(ctrlr->state), timeout_in_ms); 1451 } 1452 return; 1453 inf: 1454 if (!quiet) { 1455 NVME_CTRLR_DEBUGLOG(ctrlr, "setting state to %s (no timeout)\n", 1456 nvme_ctrlr_state_string(ctrlr->state)); 1457 } 1458 ctrlr->state_timeout_tsc = NVME_TIMEOUT_INFINITE; 1459 } 1460 1461 static void 1462 nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state, 1463 uint64_t timeout_in_ms) 1464 { 1465 _nvme_ctrlr_set_state(ctrlr, state, timeout_in_ms, false); 1466 } 1467 1468 static void 1469 nvme_ctrlr_set_state_quiet(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state, 1470 uint64_t timeout_in_ms) 1471 { 1472 _nvme_ctrlr_set_state(ctrlr, state, timeout_in_ms, true); 1473 } 1474 1475 static void 1476 nvme_ctrlr_free_zns_specific_data(struct spdk_nvme_ctrlr *ctrlr) 1477 { 1478 spdk_free(ctrlr->cdata_zns); 1479 ctrlr->cdata_zns = NULL; 1480 } 1481 1482 static void 1483 nvme_ctrlr_free_iocs_specific_data(struct spdk_nvme_ctrlr *ctrlr) 1484 { 1485 nvme_ctrlr_free_zns_specific_data(ctrlr); 1486 } 1487 1488 static void 1489 nvme_ctrlr_free_doorbell_buffer(struct spdk_nvme_ctrlr *ctrlr) 1490 { 1491 if (ctrlr->shadow_doorbell) { 1492 spdk_free(ctrlr->shadow_doorbell); 1493 ctrlr->shadow_doorbell = NULL; 1494 } 1495 1496 if (ctrlr->eventidx) { 1497 spdk_free(ctrlr->eventidx); 1498 ctrlr->eventidx = NULL; 1499 } 1500 } 1501 1502 static void 1503 nvme_ctrlr_set_doorbell_buffer_config_done(void *arg, const struct spdk_nvme_cpl *cpl) 1504 { 1505 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 1506 1507 if (spdk_nvme_cpl_is_error(cpl)) { 1508 NVME_CTRLR_WARNLOG(ctrlr, "Doorbell buffer config failed\n"); 1509 } else { 1510 NVME_CTRLR_INFOLOG(ctrlr, "Doorbell buffer config enabled\n"); 1511 } 1512 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID, 1513 ctrlr->opts.admin_timeout_ms); 1514 } 1515 1516 static int 1517 nvme_ctrlr_set_doorbell_buffer_config(struct spdk_nvme_ctrlr *ctrlr) 1518 { 1519 int rc = 0; 1520 uint64_t prp1, prp2, len; 1521 1522 if (!ctrlr->cdata.oacs.doorbell_buffer_config) { 1523 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID, 1524 ctrlr->opts.admin_timeout_ms); 1525 return 0; 1526 } 1527 1528 if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) { 1529 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID, 1530 ctrlr->opts.admin_timeout_ms); 1531 return 0; 1532 } 1533 1534 /* only 1 page size for doorbell buffer */ 1535 ctrlr->shadow_doorbell = spdk_zmalloc(ctrlr->page_size, ctrlr->page_size, 1536 NULL, SPDK_ENV_LCORE_ID_ANY, 1537 SPDK_MALLOC_DMA | SPDK_MALLOC_SHARE); 1538 if (ctrlr->shadow_doorbell == NULL) { 1539 rc = -ENOMEM; 1540 goto error; 1541 } 1542 1543 len = ctrlr->page_size; 1544 prp1 = spdk_vtophys(ctrlr->shadow_doorbell, &len); 1545 if (prp1 == SPDK_VTOPHYS_ERROR || len != ctrlr->page_size) { 1546 rc = -EFAULT; 1547 goto error; 1548 } 1549 1550 ctrlr->eventidx = spdk_zmalloc(ctrlr->page_size, ctrlr->page_size, 1551 NULL, SPDK_ENV_LCORE_ID_ANY, 1552 SPDK_MALLOC_DMA | SPDK_MALLOC_SHARE); 1553 if (ctrlr->eventidx == NULL) { 1554 rc = -ENOMEM; 1555 goto error; 1556 } 1557 1558 len = ctrlr->page_size; 1559 prp2 = spdk_vtophys(ctrlr->eventidx, &len); 1560 if (prp2 == SPDK_VTOPHYS_ERROR || len != ctrlr->page_size) { 1561 rc = -EFAULT; 1562 goto error; 1563 } 1564 1565 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG, 1566 ctrlr->opts.admin_timeout_ms); 1567 1568 rc = nvme_ctrlr_cmd_doorbell_buffer_config(ctrlr, prp1, prp2, 1569 nvme_ctrlr_set_doorbell_buffer_config_done, ctrlr); 1570 if (rc != 0) { 1571 goto error; 1572 } 1573 1574 return 0; 1575 1576 error: 1577 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 1578 nvme_ctrlr_free_doorbell_buffer(ctrlr); 1579 return rc; 1580 } 1581 1582 void 1583 nvme_ctrlr_abort_queued_aborts(struct spdk_nvme_ctrlr *ctrlr) 1584 { 1585 struct nvme_request *req, *tmp; 1586 struct spdk_nvme_cpl cpl = {}; 1587 1588 cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION; 1589 cpl.status.sct = SPDK_NVME_SCT_GENERIC; 1590 1591 STAILQ_FOREACH_SAFE(req, &ctrlr->queued_aborts, stailq, tmp) { 1592 STAILQ_REMOVE_HEAD(&ctrlr->queued_aborts, stailq); 1593 ctrlr->outstanding_aborts++; 1594 1595 nvme_complete_request(req->cb_fn, req->cb_arg, req->qpair, req, &cpl); 1596 nvme_free_request(req); 1597 } 1598 } 1599 1600 static int 1601 nvme_ctrlr_disconnect(struct spdk_nvme_ctrlr *ctrlr) 1602 { 1603 if (ctrlr->is_resetting || ctrlr->is_removed) { 1604 /* 1605 * Controller is already resetting or has been removed. Return 1606 * immediately since there is no need to kick off another 1607 * reset in these cases. 1608 */ 1609 return ctrlr->is_resetting ? -EBUSY : -ENXIO; 1610 } 1611 1612 ctrlr->is_resetting = true; 1613 ctrlr->is_failed = false; 1614 ctrlr->is_disconnecting = true; 1615 ctrlr->prepare_for_reset = true; 1616 1617 NVME_CTRLR_NOTICELOG(ctrlr, "resetting controller\n"); 1618 1619 /* Disable keep-alive, it'll be re-enabled as part of the init process */ 1620 ctrlr->keep_alive_interval_ticks = 0; 1621 1622 /* Abort all of the queued abort requests */ 1623 nvme_ctrlr_abort_queued_aborts(ctrlr); 1624 1625 nvme_transport_admin_qpair_abort_aers(ctrlr->adminq); 1626 1627 ctrlr->adminq->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL; 1628 nvme_transport_ctrlr_disconnect_qpair(ctrlr, ctrlr->adminq); 1629 1630 return 0; 1631 } 1632 1633 static void 1634 nvme_ctrlr_disconnect_done(struct spdk_nvme_ctrlr *ctrlr) 1635 { 1636 assert(ctrlr->is_failed == false); 1637 ctrlr->is_disconnecting = false; 1638 1639 /* Doorbell buffer config is invalid during reset */ 1640 nvme_ctrlr_free_doorbell_buffer(ctrlr); 1641 1642 /* I/O Command Set Specific Identify Controller data is invalidated during reset */ 1643 nvme_ctrlr_free_iocs_specific_data(ctrlr); 1644 1645 spdk_bit_array_free(&ctrlr->free_io_qids); 1646 } 1647 1648 int 1649 spdk_nvme_ctrlr_disconnect(struct spdk_nvme_ctrlr *ctrlr) 1650 { 1651 int rc; 1652 1653 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1654 rc = nvme_ctrlr_disconnect(ctrlr); 1655 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1656 1657 return rc; 1658 } 1659 1660 void 1661 spdk_nvme_ctrlr_reconnect_async(struct spdk_nvme_ctrlr *ctrlr) 1662 { 1663 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1664 1665 ctrlr->prepare_for_reset = false; 1666 1667 /* Set the state back to INIT to cause a full hardware reset. */ 1668 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE); 1669 1670 /* Return without releasing ctrlr_lock. ctrlr_lock will be released when 1671 * spdk_nvme_ctrlr_reset_poll_async() returns 0. 1672 */ 1673 } 1674 1675 /** 1676 * This function will be called when the controller is being reinitialized. 1677 * Note: the ctrlr_lock must be held when calling this function. 1678 */ 1679 int 1680 spdk_nvme_ctrlr_reconnect_poll_async(struct spdk_nvme_ctrlr *ctrlr) 1681 { 1682 struct spdk_nvme_ns *ns, *tmp_ns; 1683 struct spdk_nvme_qpair *qpair; 1684 int rc = 0, rc_tmp = 0; 1685 bool async; 1686 1687 if (nvme_ctrlr_process_init(ctrlr) != 0) { 1688 NVME_CTRLR_ERRLOG(ctrlr, "controller reinitialization failed\n"); 1689 rc = -1; 1690 } 1691 if (ctrlr->state != NVME_CTRLR_STATE_READY && rc != -1) { 1692 return -EAGAIN; 1693 } 1694 1695 /* 1696 * For non-fabrics controllers, the memory locations of the transport qpair 1697 * don't change when the controller is reset. They simply need to be 1698 * re-enabled with admin commands to the controller. For fabric 1699 * controllers we need to disconnect and reconnect the qpair on its 1700 * own thread outside of the context of the reset. 1701 */ 1702 if (rc == 0 && !spdk_nvme_ctrlr_is_fabrics(ctrlr)) { 1703 /* Reinitialize qpairs */ 1704 TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) { 1705 assert(spdk_bit_array_get(ctrlr->free_io_qids, qpair->id)); 1706 spdk_bit_array_clear(ctrlr->free_io_qids, qpair->id); 1707 1708 /* Force a synchronous connect. We can't currently handle an asynchronous 1709 * operation here. */ 1710 async = qpair->async; 1711 qpair->async = false; 1712 rc_tmp = nvme_transport_ctrlr_connect_qpair(ctrlr, qpair); 1713 qpair->async = async; 1714 1715 if (rc_tmp != 0) { 1716 rc = rc_tmp; 1717 qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL; 1718 continue; 1719 } 1720 } 1721 } 1722 1723 /* 1724 * Take this opportunity to remove inactive namespaces. During a reset namespace 1725 * handles can be invalidated. 1726 */ 1727 RB_FOREACH_SAFE(ns, nvme_ns_tree, &ctrlr->ns, tmp_ns) { 1728 if (!ns->active) { 1729 RB_REMOVE(nvme_ns_tree, &ctrlr->ns, ns); 1730 spdk_free(ns); 1731 } 1732 } 1733 1734 if (rc) { 1735 nvme_ctrlr_fail(ctrlr, false); 1736 } 1737 ctrlr->is_resetting = false; 1738 1739 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1740 1741 if (!ctrlr->cdata.oaes.ns_attribute_notices) { 1742 /* 1743 * If controller doesn't support ns_attribute_notices and 1744 * namespace attributes change (e.g. number of namespaces) 1745 * we need to update system handling device reset. 1746 */ 1747 nvme_io_msg_ctrlr_update(ctrlr); 1748 } 1749 1750 return rc; 1751 } 1752 1753 /* 1754 * For PCIe transport, spdk_nvme_ctrlr_disconnect() will do a Controller Level Reset 1755 * (Change CC.EN from 1 to 0) as a operation to disconnect the admin qpair. 1756 * The following two functions are added to do a Controller Level Reset. They have 1757 * to be called under the nvme controller's lock. 1758 */ 1759 void 1760 nvme_ctrlr_disable(struct spdk_nvme_ctrlr *ctrlr) 1761 { 1762 assert(ctrlr->is_disconnecting == true); 1763 1764 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CHECK_EN, NVME_TIMEOUT_INFINITE); 1765 } 1766 1767 int 1768 nvme_ctrlr_disable_poll(struct spdk_nvme_ctrlr *ctrlr) 1769 { 1770 int rc = 0; 1771 1772 if (nvme_ctrlr_process_init(ctrlr) != 0) { 1773 NVME_CTRLR_ERRLOG(ctrlr, "failed to disable controller\n"); 1774 rc = -1; 1775 } 1776 1777 if (ctrlr->state != NVME_CTRLR_STATE_DISABLED && rc != -1) { 1778 return -EAGAIN; 1779 } 1780 1781 return rc; 1782 } 1783 1784 static void 1785 nvme_ctrlr_fail_io_qpairs(struct spdk_nvme_ctrlr *ctrlr) 1786 { 1787 struct spdk_nvme_qpair *qpair; 1788 1789 TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) { 1790 qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL; 1791 } 1792 } 1793 1794 int 1795 spdk_nvme_ctrlr_reset(struct spdk_nvme_ctrlr *ctrlr) 1796 { 1797 int rc; 1798 1799 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1800 1801 rc = nvme_ctrlr_disconnect(ctrlr); 1802 if (rc == 0) { 1803 nvme_ctrlr_fail_io_qpairs(ctrlr); 1804 } 1805 1806 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1807 1808 if (rc != 0) { 1809 if (rc == -EBUSY) { 1810 rc = 0; 1811 } 1812 return rc; 1813 } 1814 1815 while (1) { 1816 rc = spdk_nvme_ctrlr_process_admin_completions(ctrlr); 1817 if (rc == -ENXIO) { 1818 break; 1819 } 1820 } 1821 1822 spdk_nvme_ctrlr_reconnect_async(ctrlr); 1823 1824 while (true) { 1825 rc = spdk_nvme_ctrlr_reconnect_poll_async(ctrlr); 1826 if (rc != -EAGAIN) { 1827 break; 1828 } 1829 } 1830 1831 return rc; 1832 } 1833 1834 void 1835 spdk_nvme_ctrlr_prepare_for_reset(struct spdk_nvme_ctrlr *ctrlr) 1836 { 1837 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1838 ctrlr->prepare_for_reset = true; 1839 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1840 } 1841 1842 int 1843 spdk_nvme_ctrlr_reset_subsystem(struct spdk_nvme_ctrlr *ctrlr) 1844 { 1845 union spdk_nvme_cap_register cap; 1846 int rc = 0; 1847 1848 cap = spdk_nvme_ctrlr_get_regs_cap(ctrlr); 1849 if (cap.bits.nssrs == 0) { 1850 NVME_CTRLR_WARNLOG(ctrlr, "subsystem reset is not supported\n"); 1851 return -ENOTSUP; 1852 } 1853 1854 NVME_CTRLR_NOTICELOG(ctrlr, "resetting subsystem\n"); 1855 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1856 ctrlr->is_resetting = true; 1857 rc = nvme_ctrlr_set_nssr(ctrlr, SPDK_NVME_NSSR_VALUE); 1858 ctrlr->is_resetting = false; 1859 1860 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1861 /* 1862 * No more cleanup at this point like in the ctrlr reset. A subsystem reset will cause 1863 * a hot remove for PCIe transport. The hot remove handling does all the necessary ctrlr cleanup. 1864 */ 1865 return rc; 1866 } 1867 1868 int 1869 spdk_nvme_ctrlr_set_trid(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_transport_id *trid) 1870 { 1871 int rc = 0; 1872 1873 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1874 1875 if (ctrlr->is_failed == false) { 1876 rc = -EPERM; 1877 goto out; 1878 } 1879 1880 if (trid->trtype != ctrlr->trid.trtype) { 1881 rc = -EINVAL; 1882 goto out; 1883 } 1884 1885 if (strncmp(trid->subnqn, ctrlr->trid.subnqn, SPDK_NVMF_NQN_MAX_LEN)) { 1886 rc = -EINVAL; 1887 goto out; 1888 } 1889 1890 ctrlr->trid = *trid; 1891 1892 out: 1893 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1894 return rc; 1895 } 1896 1897 void 1898 spdk_nvme_ctrlr_set_remove_cb(struct spdk_nvme_ctrlr *ctrlr, 1899 spdk_nvme_remove_cb remove_cb, void *remove_ctx) 1900 { 1901 if (!spdk_process_is_primary()) { 1902 return; 1903 } 1904 1905 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1906 ctrlr->remove_cb = remove_cb; 1907 ctrlr->cb_ctx = remove_ctx; 1908 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1909 } 1910 1911 static void 1912 nvme_ctrlr_identify_done(void *arg, const struct spdk_nvme_cpl *cpl) 1913 { 1914 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 1915 1916 if (spdk_nvme_cpl_is_error(cpl)) { 1917 NVME_CTRLR_ERRLOG(ctrlr, "nvme_identify_controller failed!\n"); 1918 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 1919 return; 1920 } 1921 1922 /* 1923 * Use MDTS to ensure our default max_xfer_size doesn't exceed what the 1924 * controller supports. 1925 */ 1926 ctrlr->max_xfer_size = nvme_transport_ctrlr_get_max_xfer_size(ctrlr); 1927 NVME_CTRLR_DEBUGLOG(ctrlr, "transport max_xfer_size %u\n", ctrlr->max_xfer_size); 1928 if (ctrlr->cdata.mdts > 0) { 1929 ctrlr->max_xfer_size = spdk_min(ctrlr->max_xfer_size, 1930 ctrlr->min_page_size * (1 << ctrlr->cdata.mdts)); 1931 NVME_CTRLR_DEBUGLOG(ctrlr, "MDTS max_xfer_size %u\n", ctrlr->max_xfer_size); 1932 } 1933 1934 NVME_CTRLR_DEBUGLOG(ctrlr, "CNTLID 0x%04" PRIx16 "\n", ctrlr->cdata.cntlid); 1935 if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { 1936 ctrlr->cntlid = ctrlr->cdata.cntlid; 1937 } else { 1938 /* 1939 * Fabrics controllers should already have CNTLID from the Connect command. 1940 * 1941 * If CNTLID from Connect doesn't match CNTLID in the Identify Controller data, 1942 * trust the one from Connect. 1943 */ 1944 if (ctrlr->cntlid != ctrlr->cdata.cntlid) { 1945 NVME_CTRLR_DEBUGLOG(ctrlr, "Identify CNTLID 0x%04" PRIx16 " != Connect CNTLID 0x%04" PRIx16 "\n", 1946 ctrlr->cdata.cntlid, ctrlr->cntlid); 1947 } 1948 } 1949 1950 if (ctrlr->cdata.sgls.supported && !(ctrlr->quirks & NVME_QUIRK_NOT_USE_SGL)) { 1951 assert(ctrlr->cdata.sgls.supported != 0x3); 1952 ctrlr->flags |= SPDK_NVME_CTRLR_SGL_SUPPORTED; 1953 if (ctrlr->cdata.sgls.supported == 0x2) { 1954 ctrlr->flags |= SPDK_NVME_CTRLR_SGL_REQUIRES_DWORD_ALIGNMENT; 1955 } 1956 1957 ctrlr->max_sges = nvme_transport_ctrlr_get_max_sges(ctrlr); 1958 NVME_CTRLR_DEBUGLOG(ctrlr, "transport max_sges %u\n", ctrlr->max_sges); 1959 } 1960 1961 if (ctrlr->cdata.oacs.security && !(ctrlr->quirks & NVME_QUIRK_OACS_SECURITY)) { 1962 ctrlr->flags |= SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED; 1963 } 1964 1965 if (ctrlr->cdata.oacs.directives) { 1966 ctrlr->flags |= SPDK_NVME_CTRLR_DIRECTIVES_SUPPORTED; 1967 } 1968 1969 NVME_CTRLR_DEBUGLOG(ctrlr, "fuses compare and write: %d\n", 1970 ctrlr->cdata.fuses.compare_and_write); 1971 if (ctrlr->cdata.fuses.compare_and_write) { 1972 ctrlr->flags |= SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED; 1973 } 1974 1975 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONFIGURE_AER, 1976 ctrlr->opts.admin_timeout_ms); 1977 } 1978 1979 static int 1980 nvme_ctrlr_identify(struct spdk_nvme_ctrlr *ctrlr) 1981 { 1982 int rc; 1983 1984 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY, 1985 ctrlr->opts.admin_timeout_ms); 1986 1987 rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_CTRLR, 0, 0, 0, 1988 &ctrlr->cdata, sizeof(ctrlr->cdata), 1989 nvme_ctrlr_identify_done, ctrlr); 1990 if (rc != 0) { 1991 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 1992 return rc; 1993 } 1994 1995 return 0; 1996 } 1997 1998 static void 1999 nvme_ctrlr_get_zns_cmd_and_effects_log_done(void *arg, const struct spdk_nvme_cpl *cpl) 2000 { 2001 struct spdk_nvme_cmds_and_effect_log_page *log_page; 2002 struct spdk_nvme_ctrlr *ctrlr = arg; 2003 2004 if (spdk_nvme_cpl_is_error(cpl)) { 2005 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_get_zns_cmd_and_effects_log failed!\n"); 2006 spdk_free(ctrlr->tmp_ptr); 2007 ctrlr->tmp_ptr = NULL; 2008 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2009 return; 2010 } 2011 2012 log_page = ctrlr->tmp_ptr; 2013 2014 if (log_page->io_cmds_supported[SPDK_NVME_OPC_ZONE_APPEND].csupp) { 2015 ctrlr->flags |= SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED; 2016 } 2017 spdk_free(ctrlr->tmp_ptr); 2018 ctrlr->tmp_ptr = NULL; 2019 2020 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_NUM_QUEUES, ctrlr->opts.admin_timeout_ms); 2021 } 2022 2023 static int 2024 nvme_ctrlr_get_zns_cmd_and_effects_log(struct spdk_nvme_ctrlr *ctrlr) 2025 { 2026 int rc; 2027 2028 assert(!ctrlr->tmp_ptr); 2029 ctrlr->tmp_ptr = spdk_zmalloc(sizeof(struct spdk_nvme_cmds_and_effect_log_page), 64, NULL, 2030 SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE | SPDK_MALLOC_DMA); 2031 if (!ctrlr->tmp_ptr) { 2032 rc = -ENOMEM; 2033 goto error; 2034 } 2035 2036 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_GET_ZNS_CMD_EFFECTS_LOG, 2037 ctrlr->opts.admin_timeout_ms); 2038 2039 rc = spdk_nvme_ctrlr_cmd_get_log_page_ext(ctrlr, SPDK_NVME_LOG_COMMAND_EFFECTS_LOG, 2040 0, ctrlr->tmp_ptr, sizeof(struct spdk_nvme_cmds_and_effect_log_page), 2041 0, 0, 0, SPDK_NVME_CSI_ZNS << 24, 2042 nvme_ctrlr_get_zns_cmd_and_effects_log_done, ctrlr); 2043 if (rc != 0) { 2044 goto error; 2045 } 2046 2047 return 0; 2048 2049 error: 2050 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2051 spdk_free(ctrlr->tmp_ptr); 2052 ctrlr->tmp_ptr = NULL; 2053 return rc; 2054 } 2055 2056 static void 2057 nvme_ctrlr_identify_zns_specific_done(void *arg, const struct spdk_nvme_cpl *cpl) 2058 { 2059 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 2060 2061 if (spdk_nvme_cpl_is_error(cpl)) { 2062 /* no need to print an error, the controller simply does not support ZNS */ 2063 nvme_ctrlr_free_zns_specific_data(ctrlr); 2064 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_NUM_QUEUES, 2065 ctrlr->opts.admin_timeout_ms); 2066 return; 2067 } 2068 2069 /* A zero zasl value means use mdts */ 2070 if (ctrlr->cdata_zns->zasl) { 2071 uint32_t max_append = ctrlr->min_page_size * (1 << ctrlr->cdata_zns->zasl); 2072 ctrlr->max_zone_append_size = spdk_min(ctrlr->max_xfer_size, max_append); 2073 } else { 2074 ctrlr->max_zone_append_size = ctrlr->max_xfer_size; 2075 } 2076 2077 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_GET_ZNS_CMD_EFFECTS_LOG, 2078 ctrlr->opts.admin_timeout_ms); 2079 } 2080 2081 /** 2082 * This function will try to fetch the I/O Command Specific Controller data structure for 2083 * each I/O Command Set supported by SPDK. 2084 * 2085 * If an I/O Command Set is not supported by the controller, "Invalid Field in Command" 2086 * will be returned. Since we are fetching in a exploratively way, getting an error back 2087 * from the controller should not be treated as fatal. 2088 * 2089 * I/O Command Sets not supported by SPDK will be skipped (e.g. Key Value Command Set). 2090 * 2091 * I/O Command Sets without a IOCS specific data structure (i.e. a zero-filled IOCS specific 2092 * data structure) will be skipped (e.g. NVM Command Set, Key Value Command Set). 2093 */ 2094 static int 2095 nvme_ctrlr_identify_iocs_specific(struct spdk_nvme_ctrlr *ctrlr) 2096 { 2097 int rc; 2098 2099 if (!nvme_ctrlr_multi_iocs_enabled(ctrlr)) { 2100 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_NUM_QUEUES, 2101 ctrlr->opts.admin_timeout_ms); 2102 return 0; 2103 } 2104 2105 /* 2106 * Since SPDK currently only needs to fetch a single Command Set, keep the code here, 2107 * instead of creating multiple NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC substates, 2108 * which would require additional functions and complexity for no good reason. 2109 */ 2110 assert(!ctrlr->cdata_zns); 2111 ctrlr->cdata_zns = spdk_zmalloc(sizeof(*ctrlr->cdata_zns), 64, NULL, SPDK_ENV_SOCKET_ID_ANY, 2112 SPDK_MALLOC_SHARE | SPDK_MALLOC_DMA); 2113 if (!ctrlr->cdata_zns) { 2114 rc = -ENOMEM; 2115 goto error; 2116 } 2117 2118 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_IOCS_SPECIFIC, 2119 ctrlr->opts.admin_timeout_ms); 2120 2121 rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_CTRLR_IOCS, 0, 0, SPDK_NVME_CSI_ZNS, 2122 ctrlr->cdata_zns, sizeof(*ctrlr->cdata_zns), 2123 nvme_ctrlr_identify_zns_specific_done, ctrlr); 2124 if (rc != 0) { 2125 goto error; 2126 } 2127 2128 return 0; 2129 2130 error: 2131 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2132 nvme_ctrlr_free_zns_specific_data(ctrlr); 2133 return rc; 2134 } 2135 2136 enum nvme_active_ns_state { 2137 NVME_ACTIVE_NS_STATE_IDLE, 2138 NVME_ACTIVE_NS_STATE_PROCESSING, 2139 NVME_ACTIVE_NS_STATE_DONE, 2140 NVME_ACTIVE_NS_STATE_ERROR 2141 }; 2142 2143 typedef void (*nvme_active_ns_ctx_deleter)(struct nvme_active_ns_ctx *); 2144 2145 struct nvme_active_ns_ctx { 2146 struct spdk_nvme_ctrlr *ctrlr; 2147 uint32_t page_count; 2148 uint32_t next_nsid; 2149 uint32_t *new_ns_list; 2150 nvme_active_ns_ctx_deleter deleter; 2151 2152 enum nvme_active_ns_state state; 2153 }; 2154 2155 static struct nvme_active_ns_ctx * 2156 nvme_active_ns_ctx_create(struct spdk_nvme_ctrlr *ctrlr, nvme_active_ns_ctx_deleter deleter) 2157 { 2158 struct nvme_active_ns_ctx *ctx; 2159 uint32_t *new_ns_list = NULL; 2160 2161 ctx = calloc(1, sizeof(*ctx)); 2162 if (!ctx) { 2163 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate nvme_active_ns_ctx!\n"); 2164 return NULL; 2165 } 2166 2167 new_ns_list = spdk_zmalloc(sizeof(struct spdk_nvme_ns_list), ctrlr->page_size, 2168 NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_SHARE); 2169 if (!new_ns_list) { 2170 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate active_ns_list!\n"); 2171 free(ctx); 2172 return NULL; 2173 } 2174 2175 ctx->page_count = 1; 2176 ctx->new_ns_list = new_ns_list; 2177 ctx->ctrlr = ctrlr; 2178 ctx->deleter = deleter; 2179 2180 return ctx; 2181 } 2182 2183 static void 2184 nvme_active_ns_ctx_destroy(struct nvme_active_ns_ctx *ctx) 2185 { 2186 spdk_free(ctx->new_ns_list); 2187 free(ctx); 2188 } 2189 2190 static int 2191 nvme_ctrlr_destruct_namespace(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 2192 { 2193 struct spdk_nvme_ns tmp, *ns; 2194 2195 assert(ctrlr != NULL); 2196 2197 tmp.id = nsid; 2198 ns = RB_FIND(nvme_ns_tree, &ctrlr->ns, &tmp); 2199 if (ns == NULL) { 2200 return -EINVAL; 2201 } 2202 2203 nvme_ns_destruct(ns); 2204 ns->active = false; 2205 2206 return 0; 2207 } 2208 2209 static int 2210 nvme_ctrlr_construct_namespace(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 2211 { 2212 struct spdk_nvme_ns *ns; 2213 2214 if (nsid < 1 || nsid > ctrlr->cdata.nn) { 2215 return -EINVAL; 2216 } 2217 2218 /* Namespaces are constructed on demand, so simply request it. */ 2219 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2220 if (ns == NULL) { 2221 return -ENOMEM; 2222 } 2223 2224 ns->active = true; 2225 2226 return 0; 2227 } 2228 2229 static void 2230 nvme_ctrlr_identify_active_ns_swap(struct spdk_nvme_ctrlr *ctrlr, uint32_t *new_ns_list, 2231 size_t max_entries) 2232 { 2233 uint32_t active_ns_count = 0; 2234 size_t i; 2235 uint32_t nsid; 2236 struct spdk_nvme_ns *ns, *tmp_ns; 2237 int rc; 2238 2239 /* First, remove namespaces that no longer exist */ 2240 RB_FOREACH_SAFE(ns, nvme_ns_tree, &ctrlr->ns, tmp_ns) { 2241 nsid = new_ns_list[0]; 2242 active_ns_count = 0; 2243 while (nsid != 0) { 2244 if (nsid == ns->id) { 2245 break; 2246 } 2247 2248 nsid = new_ns_list[active_ns_count++]; 2249 } 2250 2251 if (nsid != ns->id) { 2252 /* Did not find this namespace id in the new list. */ 2253 NVME_CTRLR_DEBUGLOG(ctrlr, "Namespace %u was removed\n", ns->id); 2254 nvme_ctrlr_destruct_namespace(ctrlr, ns->id); 2255 } 2256 } 2257 2258 /* Next, add new namespaces */ 2259 active_ns_count = 0; 2260 for (i = 0; i < max_entries; i++) { 2261 nsid = new_ns_list[active_ns_count]; 2262 2263 if (nsid == 0) { 2264 break; 2265 } 2266 2267 /* If the namespace already exists, this will not construct it a second time. */ 2268 rc = nvme_ctrlr_construct_namespace(ctrlr, nsid); 2269 if (rc != 0) { 2270 /* We can't easily handle a failure here. But just move on. */ 2271 assert(false); 2272 NVME_CTRLR_DEBUGLOG(ctrlr, "Failed to allocate a namespace object.\n"); 2273 continue; 2274 } 2275 2276 active_ns_count++; 2277 } 2278 2279 ctrlr->active_ns_count = active_ns_count; 2280 } 2281 2282 static void 2283 nvme_ctrlr_identify_active_ns_async_done(void *arg, const struct spdk_nvme_cpl *cpl) 2284 { 2285 struct nvme_active_ns_ctx *ctx = arg; 2286 uint32_t *new_ns_list = NULL; 2287 2288 if (spdk_nvme_cpl_is_error(cpl)) { 2289 ctx->state = NVME_ACTIVE_NS_STATE_ERROR; 2290 goto out; 2291 } 2292 2293 ctx->next_nsid = ctx->new_ns_list[1024 * ctx->page_count - 1]; 2294 if (ctx->next_nsid == 0) { 2295 ctx->state = NVME_ACTIVE_NS_STATE_DONE; 2296 goto out; 2297 } 2298 2299 ctx->page_count++; 2300 new_ns_list = spdk_realloc(ctx->new_ns_list, 2301 ctx->page_count * sizeof(struct spdk_nvme_ns_list), 2302 ctx->ctrlr->page_size); 2303 if (!new_ns_list) { 2304 SPDK_ERRLOG("Failed to reallocate active_ns_list!\n"); 2305 ctx->state = NVME_ACTIVE_NS_STATE_ERROR; 2306 goto out; 2307 } 2308 2309 ctx->new_ns_list = new_ns_list; 2310 nvme_ctrlr_identify_active_ns_async(ctx); 2311 return; 2312 2313 out: 2314 if (ctx->deleter) { 2315 ctx->deleter(ctx); 2316 } 2317 } 2318 2319 static void 2320 nvme_ctrlr_identify_active_ns_async(struct nvme_active_ns_ctx *ctx) 2321 { 2322 struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr; 2323 uint32_t i; 2324 int rc; 2325 2326 if (ctrlr->cdata.nn == 0) { 2327 ctx->state = NVME_ACTIVE_NS_STATE_DONE; 2328 goto out; 2329 } 2330 2331 assert(ctx->new_ns_list != NULL); 2332 2333 /* 2334 * If controller doesn't support active ns list CNS 0x02 dummy up 2335 * an active ns list, i.e. all namespaces report as active 2336 */ 2337 if (ctrlr->vs.raw < SPDK_NVME_VERSION(1, 1, 0) || ctrlr->quirks & NVME_QUIRK_IDENTIFY_CNS) { 2338 uint32_t *new_ns_list; 2339 2340 /* 2341 * Active NS list must always end with zero element. 2342 * So, we allocate for cdata.nn+1. 2343 */ 2344 ctx->page_count = spdk_divide_round_up(ctrlr->cdata.nn + 1, 2345 sizeof(struct spdk_nvme_ns_list) / sizeof(new_ns_list[0])); 2346 new_ns_list = spdk_realloc(ctx->new_ns_list, 2347 ctx->page_count * sizeof(struct spdk_nvme_ns_list), 2348 ctx->ctrlr->page_size); 2349 if (!new_ns_list) { 2350 SPDK_ERRLOG("Failed to reallocate active_ns_list!\n"); 2351 ctx->state = NVME_ACTIVE_NS_STATE_ERROR; 2352 goto out; 2353 } 2354 2355 ctx->new_ns_list = new_ns_list; 2356 ctx->new_ns_list[ctrlr->cdata.nn] = 0; 2357 for (i = 0; i < ctrlr->cdata.nn; i++) { 2358 ctx->new_ns_list[i] = i + 1; 2359 } 2360 2361 ctx->state = NVME_ACTIVE_NS_STATE_DONE; 2362 goto out; 2363 } 2364 2365 ctx->state = NVME_ACTIVE_NS_STATE_PROCESSING; 2366 rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_ACTIVE_NS_LIST, 0, ctx->next_nsid, 0, 2367 &ctx->new_ns_list[1024 * (ctx->page_count - 1)], sizeof(struct spdk_nvme_ns_list), 2368 nvme_ctrlr_identify_active_ns_async_done, ctx); 2369 if (rc != 0) { 2370 ctx->state = NVME_ACTIVE_NS_STATE_ERROR; 2371 goto out; 2372 } 2373 2374 return; 2375 2376 out: 2377 if (ctx->deleter) { 2378 ctx->deleter(ctx); 2379 } 2380 } 2381 2382 static void 2383 _nvme_active_ns_ctx_deleter(struct nvme_active_ns_ctx *ctx) 2384 { 2385 struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr; 2386 struct spdk_nvme_ns *ns; 2387 2388 if (ctx->state == NVME_ACTIVE_NS_STATE_ERROR) { 2389 nvme_active_ns_ctx_destroy(ctx); 2390 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2391 return; 2392 } 2393 2394 assert(ctx->state == NVME_ACTIVE_NS_STATE_DONE); 2395 2396 RB_FOREACH(ns, nvme_ns_tree, &ctrlr->ns) { 2397 nvme_ns_free_iocs_specific_data(ns); 2398 } 2399 2400 nvme_ctrlr_identify_active_ns_swap(ctrlr, ctx->new_ns_list, ctx->page_count * 1024); 2401 nvme_active_ns_ctx_destroy(ctx); 2402 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS, ctrlr->opts.admin_timeout_ms); 2403 } 2404 2405 static void 2406 _nvme_ctrlr_identify_active_ns(struct spdk_nvme_ctrlr *ctrlr) 2407 { 2408 struct nvme_active_ns_ctx *ctx; 2409 2410 ctx = nvme_active_ns_ctx_create(ctrlr, _nvme_active_ns_ctx_deleter); 2411 if (!ctx) { 2412 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2413 return; 2414 } 2415 2416 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ACTIVE_NS, 2417 ctrlr->opts.admin_timeout_ms); 2418 nvme_ctrlr_identify_active_ns_async(ctx); 2419 } 2420 2421 int 2422 nvme_ctrlr_identify_active_ns(struct spdk_nvme_ctrlr *ctrlr) 2423 { 2424 struct nvme_active_ns_ctx *ctx; 2425 int rc; 2426 2427 ctx = nvme_active_ns_ctx_create(ctrlr, NULL); 2428 if (!ctx) { 2429 return -ENOMEM; 2430 } 2431 2432 nvme_ctrlr_identify_active_ns_async(ctx); 2433 while (ctx->state == NVME_ACTIVE_NS_STATE_PROCESSING) { 2434 rc = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 2435 if (rc < 0) { 2436 ctx->state = NVME_ACTIVE_NS_STATE_ERROR; 2437 break; 2438 } 2439 } 2440 2441 if (ctx->state == NVME_ACTIVE_NS_STATE_ERROR) { 2442 nvme_active_ns_ctx_destroy(ctx); 2443 return -ENXIO; 2444 } 2445 2446 assert(ctx->state == NVME_ACTIVE_NS_STATE_DONE); 2447 nvme_ctrlr_identify_active_ns_swap(ctrlr, ctx->new_ns_list, ctx->page_count * 1024); 2448 nvme_active_ns_ctx_destroy(ctx); 2449 2450 return 0; 2451 } 2452 2453 static void 2454 nvme_ctrlr_identify_ns_async_done(void *arg, const struct spdk_nvme_cpl *cpl) 2455 { 2456 struct spdk_nvme_ns *ns = (struct spdk_nvme_ns *)arg; 2457 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2458 uint32_t nsid; 2459 int rc; 2460 2461 if (spdk_nvme_cpl_is_error(cpl)) { 2462 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2463 return; 2464 } 2465 2466 nvme_ns_set_identify_data(ns); 2467 2468 /* move on to the next active NS */ 2469 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, ns->id); 2470 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2471 if (ns == NULL) { 2472 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_ID_DESCS, 2473 ctrlr->opts.admin_timeout_ms); 2474 return; 2475 } 2476 ns->ctrlr = ctrlr; 2477 ns->id = nsid; 2478 2479 rc = nvme_ctrlr_identify_ns_async(ns); 2480 if (rc) { 2481 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2482 } 2483 } 2484 2485 static int 2486 nvme_ctrlr_identify_ns_async(struct spdk_nvme_ns *ns) 2487 { 2488 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2489 struct spdk_nvme_ns_data *nsdata; 2490 2491 nsdata = &ns->nsdata; 2492 2493 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS, 2494 ctrlr->opts.admin_timeout_ms); 2495 return nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS, 0, ns->id, 0, 2496 nsdata, sizeof(*nsdata), 2497 nvme_ctrlr_identify_ns_async_done, ns); 2498 } 2499 2500 static int 2501 nvme_ctrlr_identify_namespaces(struct spdk_nvme_ctrlr *ctrlr) 2502 { 2503 uint32_t nsid; 2504 struct spdk_nvme_ns *ns; 2505 int rc; 2506 2507 nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); 2508 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2509 if (ns == NULL) { 2510 /* No active NS, move on to the next state */ 2511 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_ID_DESCS, 2512 ctrlr->opts.admin_timeout_ms); 2513 return 0; 2514 } 2515 2516 ns->ctrlr = ctrlr; 2517 ns->id = nsid; 2518 2519 rc = nvme_ctrlr_identify_ns_async(ns); 2520 if (rc) { 2521 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2522 } 2523 2524 return rc; 2525 } 2526 2527 static int 2528 nvme_ctrlr_identify_namespaces_iocs_specific_next(struct spdk_nvme_ctrlr *ctrlr, uint32_t prev_nsid) 2529 { 2530 uint32_t nsid; 2531 struct spdk_nvme_ns *ns; 2532 int rc; 2533 2534 if (!prev_nsid) { 2535 nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); 2536 } else { 2537 /* move on to the next active NS */ 2538 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, prev_nsid); 2539 } 2540 2541 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2542 if (ns == NULL) { 2543 /* No first/next active NS, move on to the next state */ 2544 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES, 2545 ctrlr->opts.admin_timeout_ms); 2546 return 0; 2547 } 2548 2549 /* loop until we find a ns which has (supported) iocs specific data */ 2550 while (!nvme_ns_has_supported_iocs_specific_data(ns)) { 2551 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, ns->id); 2552 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2553 if (ns == NULL) { 2554 /* no namespace with (supported) iocs specific data found */ 2555 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES, 2556 ctrlr->opts.admin_timeout_ms); 2557 return 0; 2558 } 2559 } 2560 2561 rc = nvme_ctrlr_identify_ns_iocs_specific_async(ns); 2562 if (rc) { 2563 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2564 } 2565 2566 return rc; 2567 } 2568 2569 static void 2570 nvme_ctrlr_identify_ns_zns_specific_async_done(void *arg, const struct spdk_nvme_cpl *cpl) 2571 { 2572 struct spdk_nvme_ns *ns = (struct spdk_nvme_ns *)arg; 2573 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2574 2575 if (spdk_nvme_cpl_is_error(cpl)) { 2576 nvme_ns_free_zns_specific_data(ns); 2577 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2578 return; 2579 } 2580 2581 nvme_ctrlr_identify_namespaces_iocs_specific_next(ctrlr, ns->id); 2582 } 2583 2584 static int 2585 nvme_ctrlr_identify_ns_iocs_specific_async(struct spdk_nvme_ns *ns) 2586 { 2587 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2588 int rc; 2589 2590 switch (ns->csi) { 2591 case SPDK_NVME_CSI_ZNS: 2592 break; 2593 default: 2594 /* 2595 * This switch must handle all cases for which 2596 * nvme_ns_has_supported_iocs_specific_data() returns true, 2597 * other cases should never happen. 2598 */ 2599 assert(0); 2600 } 2601 2602 assert(!ns->nsdata_zns); 2603 ns->nsdata_zns = spdk_zmalloc(sizeof(*ns->nsdata_zns), 64, NULL, SPDK_ENV_SOCKET_ID_ANY, 2604 SPDK_MALLOC_SHARE); 2605 if (!ns->nsdata_zns) { 2606 return -ENOMEM; 2607 } 2608 2609 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS_IOCS_SPECIFIC, 2610 ctrlr->opts.admin_timeout_ms); 2611 rc = nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS_IOCS, 0, ns->id, ns->csi, 2612 ns->nsdata_zns, sizeof(*ns->nsdata_zns), 2613 nvme_ctrlr_identify_ns_zns_specific_async_done, ns); 2614 if (rc) { 2615 nvme_ns_free_zns_specific_data(ns); 2616 } 2617 2618 return rc; 2619 } 2620 2621 static int 2622 nvme_ctrlr_identify_namespaces_iocs_specific(struct spdk_nvme_ctrlr *ctrlr) 2623 { 2624 if (!nvme_ctrlr_multi_iocs_enabled(ctrlr)) { 2625 /* Multi IOCS not supported/enabled, move on to the next state */ 2626 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES, 2627 ctrlr->opts.admin_timeout_ms); 2628 return 0; 2629 } 2630 2631 return nvme_ctrlr_identify_namespaces_iocs_specific_next(ctrlr, 0); 2632 } 2633 2634 static void 2635 nvme_ctrlr_identify_id_desc_async_done(void *arg, const struct spdk_nvme_cpl *cpl) 2636 { 2637 struct spdk_nvme_ns *ns = (struct spdk_nvme_ns *)arg; 2638 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2639 uint32_t nsid; 2640 int rc; 2641 2642 if (spdk_nvme_cpl_is_error(cpl)) { 2643 /* 2644 * Many controllers claim to be compatible with NVMe 1.3, however, 2645 * they do not implement NS ID Desc List. Therefore, instead of setting 2646 * the state to NVME_CTRLR_STATE_ERROR, silently ignore the completion 2647 * error and move on to the next state. 2648 * 2649 * The proper way is to create a new quirk for controllers that violate 2650 * the NVMe 1.3 spec by not supporting NS ID Desc List. 2651 * (Re-using the NVME_QUIRK_IDENTIFY_CNS quirk is not possible, since 2652 * it is too generic and was added in order to handle controllers that 2653 * violate the NVMe 1.1 spec by not supporting ACTIVE LIST). 2654 */ 2655 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC, 2656 ctrlr->opts.admin_timeout_ms); 2657 return; 2658 } 2659 2660 nvme_ns_set_id_desc_list_data(ns); 2661 2662 /* move on to the next active NS */ 2663 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, ns->id); 2664 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2665 if (ns == NULL) { 2666 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC, 2667 ctrlr->opts.admin_timeout_ms); 2668 return; 2669 } 2670 2671 rc = nvme_ctrlr_identify_id_desc_async(ns); 2672 if (rc) { 2673 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2674 } 2675 } 2676 2677 static int 2678 nvme_ctrlr_identify_id_desc_async(struct spdk_nvme_ns *ns) 2679 { 2680 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2681 2682 memset(ns->id_desc_list, 0, sizeof(ns->id_desc_list)); 2683 2684 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS, 2685 ctrlr->opts.admin_timeout_ms); 2686 return nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS_ID_DESCRIPTOR_LIST, 2687 0, ns->id, 0, ns->id_desc_list, sizeof(ns->id_desc_list), 2688 nvme_ctrlr_identify_id_desc_async_done, ns); 2689 } 2690 2691 static int 2692 nvme_ctrlr_identify_id_desc_namespaces(struct spdk_nvme_ctrlr *ctrlr) 2693 { 2694 uint32_t nsid; 2695 struct spdk_nvme_ns *ns; 2696 int rc; 2697 2698 if ((ctrlr->vs.raw < SPDK_NVME_VERSION(1, 3, 0) && 2699 !(ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_IOCS)) || 2700 (ctrlr->quirks & NVME_QUIRK_IDENTIFY_CNS)) { 2701 NVME_CTRLR_DEBUGLOG(ctrlr, "Version < 1.3; not attempting to retrieve NS ID Descriptor List\n"); 2702 /* NS ID Desc List not supported, move on to the next state */ 2703 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC, 2704 ctrlr->opts.admin_timeout_ms); 2705 return 0; 2706 } 2707 2708 nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); 2709 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2710 if (ns == NULL) { 2711 /* No active NS, move on to the next state */ 2712 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC, 2713 ctrlr->opts.admin_timeout_ms); 2714 return 0; 2715 } 2716 2717 rc = nvme_ctrlr_identify_id_desc_async(ns); 2718 if (rc) { 2719 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2720 } 2721 2722 return rc; 2723 } 2724 2725 static void 2726 nvme_ctrlr_update_nvmf_ioccsz(struct spdk_nvme_ctrlr *ctrlr) 2727 { 2728 if (spdk_nvme_ctrlr_is_fabrics(ctrlr)) { 2729 if (ctrlr->cdata.nvmf_specific.ioccsz < 4) { 2730 NVME_CTRLR_ERRLOG(ctrlr, "Incorrect IOCCSZ %u, the minimum value should be 4\n", 2731 ctrlr->cdata.nvmf_specific.ioccsz); 2732 ctrlr->cdata.nvmf_specific.ioccsz = 4; 2733 assert(0); 2734 } 2735 ctrlr->ioccsz_bytes = ctrlr->cdata.nvmf_specific.ioccsz * 16 - sizeof(struct spdk_nvme_cmd); 2736 ctrlr->icdoff = ctrlr->cdata.nvmf_specific.icdoff; 2737 } 2738 } 2739 2740 static void 2741 nvme_ctrlr_set_num_queues_done(void *arg, const struct spdk_nvme_cpl *cpl) 2742 { 2743 uint32_t cq_allocated, sq_allocated, min_allocated, i; 2744 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 2745 2746 if (spdk_nvme_cpl_is_error(cpl)) { 2747 NVME_CTRLR_ERRLOG(ctrlr, "Set Features - Number of Queues failed!\n"); 2748 ctrlr->opts.num_io_queues = 0; 2749 } else { 2750 /* 2751 * Data in cdw0 is 0-based. 2752 * Lower 16-bits indicate number of submission queues allocated. 2753 * Upper 16-bits indicate number of completion queues allocated. 2754 */ 2755 sq_allocated = (cpl->cdw0 & 0xFFFF) + 1; 2756 cq_allocated = (cpl->cdw0 >> 16) + 1; 2757 2758 /* 2759 * For 1:1 queue mapping, set number of allocated queues to be minimum of 2760 * submission and completion queues. 2761 */ 2762 min_allocated = spdk_min(sq_allocated, cq_allocated); 2763 2764 /* Set number of queues to be minimum of requested and actually allocated. */ 2765 ctrlr->opts.num_io_queues = spdk_min(min_allocated, ctrlr->opts.num_io_queues); 2766 } 2767 2768 ctrlr->free_io_qids = spdk_bit_array_create(ctrlr->opts.num_io_queues + 1); 2769 if (ctrlr->free_io_qids == NULL) { 2770 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2771 return; 2772 } 2773 2774 /* Initialize list of free I/O queue IDs. QID 0 is the admin queue (implicitly allocated). */ 2775 for (i = 1; i <= ctrlr->opts.num_io_queues; i++) { 2776 spdk_nvme_ctrlr_free_qid(ctrlr, i); 2777 } 2778 2779 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS, 2780 ctrlr->opts.admin_timeout_ms); 2781 } 2782 2783 static int 2784 nvme_ctrlr_set_num_queues(struct spdk_nvme_ctrlr *ctrlr) 2785 { 2786 int rc; 2787 2788 if (ctrlr->opts.num_io_queues > SPDK_NVME_MAX_IO_QUEUES) { 2789 NVME_CTRLR_NOTICELOG(ctrlr, "Limiting requested num_io_queues %u to max %d\n", 2790 ctrlr->opts.num_io_queues, SPDK_NVME_MAX_IO_QUEUES); 2791 ctrlr->opts.num_io_queues = SPDK_NVME_MAX_IO_QUEUES; 2792 } else if (ctrlr->opts.num_io_queues < 1) { 2793 NVME_CTRLR_NOTICELOG(ctrlr, "Requested num_io_queues 0, increasing to 1\n"); 2794 ctrlr->opts.num_io_queues = 1; 2795 } 2796 2797 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES, 2798 ctrlr->opts.admin_timeout_ms); 2799 2800 rc = nvme_ctrlr_cmd_set_num_queues(ctrlr, ctrlr->opts.num_io_queues, 2801 nvme_ctrlr_set_num_queues_done, ctrlr); 2802 if (rc != 0) { 2803 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2804 return rc; 2805 } 2806 2807 return 0; 2808 } 2809 2810 static void 2811 nvme_ctrlr_set_keep_alive_timeout_done(void *arg, const struct spdk_nvme_cpl *cpl) 2812 { 2813 uint32_t keep_alive_interval_us; 2814 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 2815 2816 if (spdk_nvme_cpl_is_error(cpl)) { 2817 if ((cpl->status.sct == SPDK_NVME_SCT_GENERIC) && 2818 (cpl->status.sc == SPDK_NVME_SC_INVALID_FIELD)) { 2819 NVME_CTRLR_DEBUGLOG(ctrlr, "Keep alive timeout Get Feature is not supported\n"); 2820 } else { 2821 NVME_CTRLR_ERRLOG(ctrlr, "Keep alive timeout Get Feature failed: SC %x SCT %x\n", 2822 cpl->status.sc, cpl->status.sct); 2823 ctrlr->opts.keep_alive_timeout_ms = 0; 2824 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2825 return; 2826 } 2827 } else { 2828 if (ctrlr->opts.keep_alive_timeout_ms != cpl->cdw0) { 2829 NVME_CTRLR_DEBUGLOG(ctrlr, "Controller adjusted keep alive timeout to %u ms\n", 2830 cpl->cdw0); 2831 } 2832 2833 ctrlr->opts.keep_alive_timeout_ms = cpl->cdw0; 2834 } 2835 2836 if (ctrlr->opts.keep_alive_timeout_ms == 0) { 2837 ctrlr->keep_alive_interval_ticks = 0; 2838 } else { 2839 keep_alive_interval_us = ctrlr->opts.keep_alive_timeout_ms * 1000 / 2; 2840 2841 NVME_CTRLR_DEBUGLOG(ctrlr, "Sending keep alive every %u us\n", keep_alive_interval_us); 2842 2843 ctrlr->keep_alive_interval_ticks = (keep_alive_interval_us * spdk_get_ticks_hz()) / 2844 UINT64_C(1000000); 2845 2846 /* Schedule the first Keep Alive to be sent as soon as possible. */ 2847 ctrlr->next_keep_alive_tick = spdk_get_ticks(); 2848 } 2849 2850 if (spdk_nvme_ctrlr_is_discovery(ctrlr)) { 2851 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE); 2852 } else { 2853 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC, 2854 ctrlr->opts.admin_timeout_ms); 2855 } 2856 } 2857 2858 static int 2859 nvme_ctrlr_set_keep_alive_timeout(struct spdk_nvme_ctrlr *ctrlr) 2860 { 2861 int rc; 2862 2863 if (ctrlr->opts.keep_alive_timeout_ms == 0) { 2864 if (spdk_nvme_ctrlr_is_discovery(ctrlr)) { 2865 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE); 2866 } else { 2867 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC, 2868 ctrlr->opts.admin_timeout_ms); 2869 } 2870 return 0; 2871 } 2872 2873 /* Note: Discovery controller identify data does not populate KAS according to spec. */ 2874 if (!spdk_nvme_ctrlr_is_discovery(ctrlr) && ctrlr->cdata.kas == 0) { 2875 NVME_CTRLR_DEBUGLOG(ctrlr, "Controller KAS is 0 - not enabling Keep Alive\n"); 2876 ctrlr->opts.keep_alive_timeout_ms = 0; 2877 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC, 2878 ctrlr->opts.admin_timeout_ms); 2879 return 0; 2880 } 2881 2882 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT, 2883 ctrlr->opts.admin_timeout_ms); 2884 2885 /* Retrieve actual keep alive timeout, since the controller may have adjusted it. */ 2886 rc = spdk_nvme_ctrlr_cmd_get_feature(ctrlr, SPDK_NVME_FEAT_KEEP_ALIVE_TIMER, 0, NULL, 0, 2887 nvme_ctrlr_set_keep_alive_timeout_done, ctrlr); 2888 if (rc != 0) { 2889 NVME_CTRLR_ERRLOG(ctrlr, "Keep alive timeout Get Feature failed: %d\n", rc); 2890 ctrlr->opts.keep_alive_timeout_ms = 0; 2891 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2892 return rc; 2893 } 2894 2895 return 0; 2896 } 2897 2898 static void 2899 nvme_ctrlr_set_host_id_done(void *arg, const struct spdk_nvme_cpl *cpl) 2900 { 2901 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 2902 2903 if (spdk_nvme_cpl_is_error(cpl)) { 2904 /* 2905 * Treat Set Features - Host ID failure as non-fatal, since the Host ID feature 2906 * is optional. 2907 */ 2908 NVME_CTRLR_WARNLOG(ctrlr, "Set Features - Host ID failed: SC 0x%x SCT 0x%x\n", 2909 cpl->status.sc, cpl->status.sct); 2910 } else { 2911 NVME_CTRLR_DEBUGLOG(ctrlr, "Set Features - Host ID was successful\n"); 2912 } 2913 2914 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_TRANSPORT_READY, ctrlr->opts.admin_timeout_ms); 2915 } 2916 2917 static int 2918 nvme_ctrlr_set_host_id(struct spdk_nvme_ctrlr *ctrlr) 2919 { 2920 uint8_t *host_id; 2921 uint32_t host_id_size; 2922 int rc; 2923 2924 if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) { 2925 /* 2926 * NVMe-oF sends the host ID during Connect and doesn't allow 2927 * Set Features - Host Identifier after Connect, so we don't need to do anything here. 2928 */ 2929 NVME_CTRLR_DEBUGLOG(ctrlr, "NVMe-oF transport - not sending Set Features - Host ID\n"); 2930 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_TRANSPORT_READY, ctrlr->opts.admin_timeout_ms); 2931 return 0; 2932 } 2933 2934 if (ctrlr->cdata.ctratt.host_id_exhid_supported) { 2935 NVME_CTRLR_DEBUGLOG(ctrlr, "Using 128-bit extended host identifier\n"); 2936 host_id = ctrlr->opts.extended_host_id; 2937 host_id_size = sizeof(ctrlr->opts.extended_host_id); 2938 } else { 2939 NVME_CTRLR_DEBUGLOG(ctrlr, "Using 64-bit host identifier\n"); 2940 host_id = ctrlr->opts.host_id; 2941 host_id_size = sizeof(ctrlr->opts.host_id); 2942 } 2943 2944 /* If the user specified an all-zeroes host identifier, don't send the command. */ 2945 if (spdk_mem_all_zero(host_id, host_id_size)) { 2946 NVME_CTRLR_DEBUGLOG(ctrlr, "User did not specify host ID - not sending Set Features - Host ID\n"); 2947 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_TRANSPORT_READY, ctrlr->opts.admin_timeout_ms); 2948 return 0; 2949 } 2950 2951 SPDK_LOGDUMP(nvme, "host_id", host_id, host_id_size); 2952 2953 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_HOST_ID, 2954 ctrlr->opts.admin_timeout_ms); 2955 2956 rc = nvme_ctrlr_cmd_set_host_id(ctrlr, host_id, host_id_size, nvme_ctrlr_set_host_id_done, ctrlr); 2957 if (rc != 0) { 2958 NVME_CTRLR_ERRLOG(ctrlr, "Set Features - Host ID failed: %d\n", rc); 2959 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2960 return rc; 2961 } 2962 2963 return 0; 2964 } 2965 2966 void 2967 nvme_ctrlr_update_namespaces(struct spdk_nvme_ctrlr *ctrlr) 2968 { 2969 uint32_t nsid; 2970 struct spdk_nvme_ns *ns; 2971 2972 for (nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); 2973 nsid != 0; nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) { 2974 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2975 nvme_ns_construct(ns, nsid, ctrlr); 2976 } 2977 } 2978 2979 static int 2980 nvme_ctrlr_clear_changed_ns_log(struct spdk_nvme_ctrlr *ctrlr) 2981 { 2982 struct nvme_completion_poll_status *status; 2983 int rc = -ENOMEM; 2984 char *buffer = NULL; 2985 uint32_t nsid; 2986 size_t buf_size = (SPDK_NVME_MAX_CHANGED_NAMESPACES * sizeof(uint32_t)); 2987 2988 if (ctrlr->opts.disable_read_changed_ns_list_log_page) { 2989 return 0; 2990 } 2991 2992 buffer = spdk_dma_zmalloc(buf_size, 4096, NULL); 2993 if (!buffer) { 2994 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate buffer for getting " 2995 "changed ns log.\n"); 2996 return rc; 2997 } 2998 2999 status = calloc(1, sizeof(*status)); 3000 if (!status) { 3001 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 3002 goto free_buffer; 3003 } 3004 3005 rc = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, 3006 SPDK_NVME_LOG_CHANGED_NS_LIST, 3007 SPDK_NVME_GLOBAL_NS_TAG, 3008 buffer, buf_size, 0, 3009 nvme_completion_poll_cb, status); 3010 3011 if (rc) { 3012 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_cmd_get_log_page() failed: rc=%d\n", rc); 3013 free(status); 3014 goto free_buffer; 3015 } 3016 3017 rc = nvme_wait_for_completion_timeout(ctrlr->adminq, status, 3018 ctrlr->opts.admin_timeout_ms * 1000); 3019 if (!status->timed_out) { 3020 free(status); 3021 } 3022 3023 if (rc) { 3024 NVME_CTRLR_ERRLOG(ctrlr, "wait for spdk_nvme_ctrlr_cmd_get_log_page failed: rc=%d\n", rc); 3025 goto free_buffer; 3026 } 3027 3028 /* only check the case of overflow. */ 3029 nsid = from_le32(buffer); 3030 if (nsid == 0xffffffffu) { 3031 NVME_CTRLR_WARNLOG(ctrlr, "changed ns log overflowed.\n"); 3032 } 3033 3034 free_buffer: 3035 spdk_dma_free(buffer); 3036 return rc; 3037 } 3038 3039 void 3040 nvme_ctrlr_process_async_event(struct spdk_nvme_ctrlr *ctrlr, 3041 const struct spdk_nvme_cpl *cpl) 3042 { 3043 union spdk_nvme_async_event_completion event; 3044 struct spdk_nvme_ctrlr_process *active_proc; 3045 int rc; 3046 3047 event.raw = cpl->cdw0; 3048 3049 if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) && 3050 (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED)) { 3051 nvme_ctrlr_clear_changed_ns_log(ctrlr); 3052 3053 rc = nvme_ctrlr_identify_active_ns(ctrlr); 3054 if (rc) { 3055 return; 3056 } 3057 nvme_ctrlr_update_namespaces(ctrlr); 3058 nvme_io_msg_ctrlr_update(ctrlr); 3059 } 3060 3061 if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) && 3062 (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_ANA_CHANGE)) { 3063 if (!ctrlr->opts.disable_read_ana_log_page) { 3064 rc = nvme_ctrlr_update_ana_log_page(ctrlr); 3065 if (rc) { 3066 return; 3067 } 3068 nvme_ctrlr_parse_ana_log_page(ctrlr, nvme_ctrlr_update_ns_ana_states, 3069 ctrlr); 3070 } 3071 } 3072 3073 active_proc = nvme_ctrlr_get_current_process(ctrlr); 3074 if (active_proc && active_proc->aer_cb_fn) { 3075 active_proc->aer_cb_fn(active_proc->aer_cb_arg, cpl); 3076 } 3077 } 3078 3079 static void 3080 nvme_ctrlr_queue_async_event(struct spdk_nvme_ctrlr *ctrlr, 3081 const struct spdk_nvme_cpl *cpl) 3082 { 3083 struct spdk_nvme_ctrlr_aer_completion_list *nvme_event; 3084 struct spdk_nvme_ctrlr_process *proc; 3085 3086 /* Add async event to each process objects event list */ 3087 TAILQ_FOREACH(proc, &ctrlr->active_procs, tailq) { 3088 /* Must be shared memory so other processes can access */ 3089 nvme_event = spdk_zmalloc(sizeof(*nvme_event), 0, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE); 3090 if (!nvme_event) { 3091 NVME_CTRLR_ERRLOG(ctrlr, "Alloc nvme event failed, ignore the event\n"); 3092 return; 3093 } 3094 nvme_event->cpl = *cpl; 3095 3096 STAILQ_INSERT_TAIL(&proc->async_events, nvme_event, link); 3097 } 3098 } 3099 3100 void 3101 nvme_ctrlr_complete_queued_async_events(struct spdk_nvme_ctrlr *ctrlr) 3102 { 3103 struct spdk_nvme_ctrlr_aer_completion_list *nvme_event, *nvme_event_tmp; 3104 struct spdk_nvme_ctrlr_process *active_proc; 3105 3106 active_proc = nvme_ctrlr_get_current_process(ctrlr); 3107 3108 STAILQ_FOREACH_SAFE(nvme_event, &active_proc->async_events, link, nvme_event_tmp) { 3109 STAILQ_REMOVE(&active_proc->async_events, nvme_event, 3110 spdk_nvme_ctrlr_aer_completion_list, link); 3111 nvme_ctrlr_process_async_event(ctrlr, &nvme_event->cpl); 3112 spdk_free(nvme_event); 3113 3114 } 3115 } 3116 3117 static void 3118 nvme_ctrlr_async_event_cb(void *arg, const struct spdk_nvme_cpl *cpl) 3119 { 3120 struct nvme_async_event_request *aer = arg; 3121 struct spdk_nvme_ctrlr *ctrlr = aer->ctrlr; 3122 3123 if (cpl->status.sct == SPDK_NVME_SCT_GENERIC && 3124 cpl->status.sc == SPDK_NVME_SC_ABORTED_SQ_DELETION) { 3125 /* 3126 * This is simulated when controller is being shut down, to 3127 * effectively abort outstanding asynchronous event requests 3128 * and make sure all memory is freed. Do not repost the 3129 * request in this case. 3130 */ 3131 return; 3132 } 3133 3134 if (cpl->status.sct == SPDK_NVME_SCT_COMMAND_SPECIFIC && 3135 cpl->status.sc == SPDK_NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED) { 3136 /* 3137 * SPDK will only send as many AERs as the device says it supports, 3138 * so this status code indicates an out-of-spec device. Do not repost 3139 * the request in this case. 3140 */ 3141 NVME_CTRLR_ERRLOG(ctrlr, "Controller appears out-of-spec for asynchronous event request\n" 3142 "handling. Do not repost this AER.\n"); 3143 return; 3144 } 3145 3146 /* Add the events to the list */ 3147 nvme_ctrlr_queue_async_event(ctrlr, cpl); 3148 3149 /* If the ctrlr was removed or in the destruct state, we should not send aer again */ 3150 if (ctrlr->is_removed || ctrlr->is_destructed) { 3151 return; 3152 } 3153 3154 /* 3155 * Repost another asynchronous event request to replace the one 3156 * that just completed. 3157 */ 3158 if (nvme_ctrlr_construct_and_submit_aer(ctrlr, aer)) { 3159 /* 3160 * We can't do anything to recover from a failure here, 3161 * so just print a warning message and leave the AER unsubmitted. 3162 */ 3163 NVME_CTRLR_ERRLOG(ctrlr, "resubmitting AER failed!\n"); 3164 } 3165 } 3166 3167 static int 3168 nvme_ctrlr_construct_and_submit_aer(struct spdk_nvme_ctrlr *ctrlr, 3169 struct nvme_async_event_request *aer) 3170 { 3171 struct nvme_request *req; 3172 3173 aer->ctrlr = ctrlr; 3174 req = nvme_allocate_request_null(ctrlr->adminq, nvme_ctrlr_async_event_cb, aer); 3175 aer->req = req; 3176 if (req == NULL) { 3177 return -1; 3178 } 3179 3180 req->cmd.opc = SPDK_NVME_OPC_ASYNC_EVENT_REQUEST; 3181 return nvme_ctrlr_submit_admin_request(ctrlr, req); 3182 } 3183 3184 static void 3185 nvme_ctrlr_configure_aer_done(void *arg, const struct spdk_nvme_cpl *cpl) 3186 { 3187 struct nvme_async_event_request *aer; 3188 int rc; 3189 uint32_t i; 3190 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 3191 3192 if (spdk_nvme_cpl_is_error(cpl)) { 3193 NVME_CTRLR_NOTICELOG(ctrlr, "nvme_ctrlr_configure_aer failed!\n"); 3194 ctrlr->num_aers = 0; 3195 } else { 3196 /* aerl is a zero-based value, so we need to add 1 here. */ 3197 ctrlr->num_aers = spdk_min(NVME_MAX_ASYNC_EVENTS, (ctrlr->cdata.aerl + 1)); 3198 } 3199 3200 for (i = 0; i < ctrlr->num_aers; i++) { 3201 aer = &ctrlr->aer[i]; 3202 rc = nvme_ctrlr_construct_and_submit_aer(ctrlr, aer); 3203 if (rc) { 3204 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_construct_and_submit_aer failed!\n"); 3205 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3206 return; 3207 } 3208 } 3209 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT, ctrlr->opts.admin_timeout_ms); 3210 } 3211 3212 static int 3213 nvme_ctrlr_configure_aer(struct spdk_nvme_ctrlr *ctrlr) 3214 { 3215 union spdk_nvme_feat_async_event_configuration config; 3216 int rc; 3217 3218 config.raw = 0; 3219 3220 if (spdk_nvme_ctrlr_is_discovery(ctrlr)) { 3221 config.bits.discovery_log_change_notice = 1; 3222 } else { 3223 config.bits.crit_warn.bits.available_spare = 1; 3224 config.bits.crit_warn.bits.temperature = 1; 3225 config.bits.crit_warn.bits.device_reliability = 1; 3226 config.bits.crit_warn.bits.read_only = 1; 3227 config.bits.crit_warn.bits.volatile_memory_backup = 1; 3228 3229 if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 2, 0)) { 3230 if (ctrlr->cdata.oaes.ns_attribute_notices) { 3231 config.bits.ns_attr_notice = 1; 3232 } 3233 if (ctrlr->cdata.oaes.fw_activation_notices) { 3234 config.bits.fw_activation_notice = 1; 3235 } 3236 if (ctrlr->cdata.oaes.ana_change_notices) { 3237 config.bits.ana_change_notice = 1; 3238 } 3239 } 3240 if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 3, 0) && ctrlr->cdata.lpa.telemetry) { 3241 config.bits.telemetry_log_notice = 1; 3242 } 3243 } 3244 3245 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER, 3246 ctrlr->opts.admin_timeout_ms); 3247 3248 rc = nvme_ctrlr_cmd_set_async_event_config(ctrlr, config, 3249 nvme_ctrlr_configure_aer_done, 3250 ctrlr); 3251 if (rc != 0) { 3252 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3253 return rc; 3254 } 3255 3256 return 0; 3257 } 3258 3259 struct spdk_nvme_ctrlr_process * 3260 nvme_ctrlr_get_process(struct spdk_nvme_ctrlr *ctrlr, pid_t pid) 3261 { 3262 struct spdk_nvme_ctrlr_process *active_proc; 3263 3264 TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) { 3265 if (active_proc->pid == pid) { 3266 return active_proc; 3267 } 3268 } 3269 3270 return NULL; 3271 } 3272 3273 struct spdk_nvme_ctrlr_process * 3274 nvme_ctrlr_get_current_process(struct spdk_nvme_ctrlr *ctrlr) 3275 { 3276 return nvme_ctrlr_get_process(ctrlr, getpid()); 3277 } 3278 3279 /** 3280 * This function will be called when a process is using the controller. 3281 * 1. For the primary process, it is called when constructing the controller. 3282 * 2. For the secondary process, it is called at probing the controller. 3283 * Note: will check whether the process is already added for the same process. 3284 */ 3285 int 3286 nvme_ctrlr_add_process(struct spdk_nvme_ctrlr *ctrlr, void *devhandle) 3287 { 3288 struct spdk_nvme_ctrlr_process *ctrlr_proc; 3289 pid_t pid = getpid(); 3290 3291 /* Check whether the process is already added or not */ 3292 if (nvme_ctrlr_get_process(ctrlr, pid)) { 3293 return 0; 3294 } 3295 3296 /* Initialize the per process properties for this ctrlr */ 3297 ctrlr_proc = spdk_zmalloc(sizeof(struct spdk_nvme_ctrlr_process), 3298 64, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE); 3299 if (ctrlr_proc == NULL) { 3300 NVME_CTRLR_ERRLOG(ctrlr, "failed to allocate memory to track the process props\n"); 3301 3302 return -1; 3303 } 3304 3305 ctrlr_proc->is_primary = spdk_process_is_primary(); 3306 ctrlr_proc->pid = pid; 3307 STAILQ_INIT(&ctrlr_proc->active_reqs); 3308 ctrlr_proc->devhandle = devhandle; 3309 ctrlr_proc->ref = 0; 3310 TAILQ_INIT(&ctrlr_proc->allocated_io_qpairs); 3311 STAILQ_INIT(&ctrlr_proc->async_events); 3312 3313 TAILQ_INSERT_TAIL(&ctrlr->active_procs, ctrlr_proc, tailq); 3314 3315 return 0; 3316 } 3317 3318 /** 3319 * This function will be called when the process detaches the controller. 3320 * Note: the ctrlr_lock must be held when calling this function. 3321 */ 3322 static void 3323 nvme_ctrlr_remove_process(struct spdk_nvme_ctrlr *ctrlr, 3324 struct spdk_nvme_ctrlr_process *proc) 3325 { 3326 struct spdk_nvme_qpair *qpair, *tmp_qpair; 3327 3328 assert(STAILQ_EMPTY(&proc->active_reqs)); 3329 3330 TAILQ_FOREACH_SAFE(qpair, &proc->allocated_io_qpairs, per_process_tailq, tmp_qpair) { 3331 spdk_nvme_ctrlr_free_io_qpair(qpair); 3332 } 3333 3334 TAILQ_REMOVE(&ctrlr->active_procs, proc, tailq); 3335 3336 if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { 3337 spdk_pci_device_detach(proc->devhandle); 3338 } 3339 3340 spdk_free(proc); 3341 } 3342 3343 /** 3344 * This function will be called when the process exited unexpectedly 3345 * in order to free any incomplete nvme request, allocated IO qpairs 3346 * and allocated memory. 3347 * Note: the ctrlr_lock must be held when calling this function. 3348 */ 3349 static void 3350 nvme_ctrlr_cleanup_process(struct spdk_nvme_ctrlr_process *proc) 3351 { 3352 struct nvme_request *req, *tmp_req; 3353 struct spdk_nvme_qpair *qpair, *tmp_qpair; 3354 struct spdk_nvme_ctrlr_aer_completion_list *event; 3355 3356 STAILQ_FOREACH_SAFE(req, &proc->active_reqs, stailq, tmp_req) { 3357 STAILQ_REMOVE(&proc->active_reqs, req, nvme_request, stailq); 3358 3359 assert(req->pid == proc->pid); 3360 3361 nvme_free_request(req); 3362 } 3363 3364 /* Remove async event from each process objects event list */ 3365 while (!STAILQ_EMPTY(&proc->async_events)) { 3366 event = STAILQ_FIRST(&proc->async_events); 3367 STAILQ_REMOVE_HEAD(&proc->async_events, link); 3368 spdk_free(event); 3369 } 3370 3371 TAILQ_FOREACH_SAFE(qpair, &proc->allocated_io_qpairs, per_process_tailq, tmp_qpair) { 3372 TAILQ_REMOVE(&proc->allocated_io_qpairs, qpair, per_process_tailq); 3373 3374 /* 3375 * The process may have been killed while some qpairs were in their 3376 * completion context. Clear that flag here to allow these IO 3377 * qpairs to be deleted. 3378 */ 3379 qpair->in_completion_context = 0; 3380 3381 qpair->no_deletion_notification_needed = 1; 3382 3383 spdk_nvme_ctrlr_free_io_qpair(qpair); 3384 } 3385 3386 spdk_free(proc); 3387 } 3388 3389 /** 3390 * This function will be called when destructing the controller. 3391 * 1. There is no more admin request on this controller. 3392 * 2. Clean up any left resource allocation when its associated process is gone. 3393 */ 3394 void 3395 nvme_ctrlr_free_processes(struct spdk_nvme_ctrlr *ctrlr) 3396 { 3397 struct spdk_nvme_ctrlr_process *active_proc, *tmp; 3398 3399 /* Free all the processes' properties and make sure no pending admin IOs */ 3400 TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) { 3401 TAILQ_REMOVE(&ctrlr->active_procs, active_proc, tailq); 3402 3403 assert(STAILQ_EMPTY(&active_proc->active_reqs)); 3404 3405 spdk_free(active_proc); 3406 } 3407 } 3408 3409 /** 3410 * This function will be called when any other process attaches or 3411 * detaches the controller in order to cleanup those unexpectedly 3412 * terminated processes. 3413 * Note: the ctrlr_lock must be held when calling this function. 3414 */ 3415 static int 3416 nvme_ctrlr_remove_inactive_proc(struct spdk_nvme_ctrlr *ctrlr) 3417 { 3418 struct spdk_nvme_ctrlr_process *active_proc, *tmp; 3419 int active_proc_count = 0; 3420 3421 TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) { 3422 if ((kill(active_proc->pid, 0) == -1) && (errno == ESRCH)) { 3423 NVME_CTRLR_ERRLOG(ctrlr, "process %d terminated unexpected\n", active_proc->pid); 3424 3425 TAILQ_REMOVE(&ctrlr->active_procs, active_proc, tailq); 3426 3427 nvme_ctrlr_cleanup_process(active_proc); 3428 } else { 3429 active_proc_count++; 3430 } 3431 } 3432 3433 return active_proc_count; 3434 } 3435 3436 void 3437 nvme_ctrlr_proc_get_ref(struct spdk_nvme_ctrlr *ctrlr) 3438 { 3439 struct spdk_nvme_ctrlr_process *active_proc; 3440 3441 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 3442 3443 nvme_ctrlr_remove_inactive_proc(ctrlr); 3444 3445 active_proc = nvme_ctrlr_get_current_process(ctrlr); 3446 if (active_proc) { 3447 active_proc->ref++; 3448 } 3449 3450 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 3451 } 3452 3453 void 3454 nvme_ctrlr_proc_put_ref(struct spdk_nvme_ctrlr *ctrlr) 3455 { 3456 struct spdk_nvme_ctrlr_process *active_proc; 3457 int proc_count; 3458 3459 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 3460 3461 proc_count = nvme_ctrlr_remove_inactive_proc(ctrlr); 3462 3463 active_proc = nvme_ctrlr_get_current_process(ctrlr); 3464 if (active_proc) { 3465 active_proc->ref--; 3466 assert(active_proc->ref >= 0); 3467 3468 /* 3469 * The last active process will be removed at the end of 3470 * the destruction of the controller. 3471 */ 3472 if (active_proc->ref == 0 && proc_count != 1) { 3473 nvme_ctrlr_remove_process(ctrlr, active_proc); 3474 } 3475 } 3476 3477 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 3478 } 3479 3480 int 3481 nvme_ctrlr_get_ref_count(struct spdk_nvme_ctrlr *ctrlr) 3482 { 3483 struct spdk_nvme_ctrlr_process *active_proc; 3484 int ref = 0; 3485 3486 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 3487 3488 nvme_ctrlr_remove_inactive_proc(ctrlr); 3489 3490 TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) { 3491 ref += active_proc->ref; 3492 } 3493 3494 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 3495 3496 return ref; 3497 } 3498 3499 /** 3500 * Get the PCI device handle which is only visible to its associated process. 3501 */ 3502 struct spdk_pci_device * 3503 nvme_ctrlr_proc_get_devhandle(struct spdk_nvme_ctrlr *ctrlr) 3504 { 3505 struct spdk_nvme_ctrlr_process *active_proc; 3506 struct spdk_pci_device *devhandle = NULL; 3507 3508 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 3509 3510 active_proc = nvme_ctrlr_get_current_process(ctrlr); 3511 if (active_proc) { 3512 devhandle = active_proc->devhandle; 3513 } 3514 3515 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 3516 3517 return devhandle; 3518 } 3519 3520 static void 3521 nvme_ctrlr_process_init_vs_done(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3522 { 3523 struct spdk_nvme_ctrlr *ctrlr = ctx; 3524 3525 if (spdk_nvme_cpl_is_error(cpl)) { 3526 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the VS register\n"); 3527 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3528 return; 3529 } 3530 3531 assert(value <= UINT32_MAX); 3532 ctrlr->vs.raw = (uint32_t)value; 3533 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READ_CAP, NVME_TIMEOUT_INFINITE); 3534 } 3535 3536 static void 3537 nvme_ctrlr_process_init_cap_done(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3538 { 3539 struct spdk_nvme_ctrlr *ctrlr = ctx; 3540 3541 if (spdk_nvme_cpl_is_error(cpl)) { 3542 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CAP register\n"); 3543 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3544 return; 3545 } 3546 3547 ctrlr->cap.raw = value; 3548 nvme_ctrlr_init_cap(ctrlr); 3549 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CHECK_EN, NVME_TIMEOUT_INFINITE); 3550 } 3551 3552 static void 3553 nvme_ctrlr_process_init_check_en(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3554 { 3555 struct spdk_nvme_ctrlr *ctrlr = ctx; 3556 enum nvme_ctrlr_state state; 3557 3558 if (spdk_nvme_cpl_is_error(cpl)) { 3559 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CC register\n"); 3560 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3561 return; 3562 } 3563 3564 assert(value <= UINT32_MAX); 3565 ctrlr->process_init_cc.raw = (uint32_t)value; 3566 3567 if (ctrlr->process_init_cc.bits.en) { 3568 NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 1\n"); 3569 state = NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1; 3570 } else { 3571 state = NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0; 3572 } 3573 3574 nvme_ctrlr_set_state(ctrlr, state, nvme_ctrlr_get_ready_timeout(ctrlr)); 3575 } 3576 3577 static void 3578 nvme_ctrlr_process_init_set_en_0(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3579 { 3580 struct spdk_nvme_ctrlr *ctrlr = ctx; 3581 3582 if (spdk_nvme_cpl_is_error(cpl)) { 3583 NVME_CTRLR_ERRLOG(ctrlr, "Failed to write the CC register\n"); 3584 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3585 return; 3586 } 3587 3588 /* 3589 * Wait 2.5 seconds before accessing PCI registers. 3590 * Not using sleep() to avoid blocking other controller's initialization. 3591 */ 3592 if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) { 3593 NVME_CTRLR_DEBUGLOG(ctrlr, "Applying quirk: delay 2.5 seconds before reading registers\n"); 3594 ctrlr->sleep_timeout_tsc = spdk_get_ticks() + (2500 * spdk_get_ticks_hz() / 1000); 3595 } 3596 3597 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, 3598 nvme_ctrlr_get_ready_timeout(ctrlr)); 3599 } 3600 3601 static void 3602 nvme_ctrlr_process_init_set_en_0_read_cc(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3603 { 3604 struct spdk_nvme_ctrlr *ctrlr = ctx; 3605 union spdk_nvme_cc_register cc; 3606 int rc; 3607 3608 if (spdk_nvme_cpl_is_error(cpl)) { 3609 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CC register\n"); 3610 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3611 return; 3612 } 3613 3614 assert(value <= UINT32_MAX); 3615 cc.raw = (uint32_t)value; 3616 cc.bits.en = 0; 3617 ctrlr->process_init_cc.raw = cc.raw; 3618 3619 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_EN_0_WAIT_FOR_CC, 3620 nvme_ctrlr_get_ready_timeout(ctrlr)); 3621 3622 rc = nvme_ctrlr_set_cc_async(ctrlr, cc.raw, nvme_ctrlr_process_init_set_en_0, ctrlr); 3623 if (rc != 0) { 3624 NVME_CTRLR_ERRLOG(ctrlr, "set_cc() failed\n"); 3625 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3626 } 3627 } 3628 3629 static void 3630 nvme_ctrlr_process_init_wait_for_ready_1(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3631 { 3632 struct spdk_nvme_ctrlr *ctrlr = ctx; 3633 union spdk_nvme_csts_register csts; 3634 3635 if (spdk_nvme_cpl_is_error(cpl)) { 3636 /* While a device is resetting, it may be unable to service MMIO reads 3637 * temporarily. Allow for this case. 3638 */ 3639 if (!ctrlr->is_failed && ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE) { 3640 NVME_CTRLR_DEBUGLOG(ctrlr, "Failed to read the CSTS register\n"); 3641 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1, 3642 NVME_TIMEOUT_KEEP_EXISTING); 3643 } else { 3644 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CSTS register\n"); 3645 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3646 } 3647 3648 return; 3649 } 3650 3651 assert(value <= UINT32_MAX); 3652 csts.raw = (uint32_t)value; 3653 if (csts.bits.rdy == 1 || csts.bits.cfs == 1) { 3654 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_EN_0, 3655 nvme_ctrlr_get_ready_timeout(ctrlr)); 3656 } else { 3657 NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 1 && CSTS.RDY = 0 - waiting for reset to complete\n"); 3658 nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1, 3659 NVME_TIMEOUT_KEEP_EXISTING); 3660 } 3661 } 3662 3663 static void 3664 nvme_ctrlr_process_init_wait_for_ready_0(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3665 { 3666 struct spdk_nvme_ctrlr *ctrlr = ctx; 3667 union spdk_nvme_csts_register csts; 3668 3669 if (spdk_nvme_cpl_is_error(cpl)) { 3670 /* While a device is resetting, it may be unable to service MMIO reads 3671 * temporarily. Allow for this case. 3672 */ 3673 if (!ctrlr->is_failed && ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE) { 3674 NVME_CTRLR_DEBUGLOG(ctrlr, "Failed to read the CSTS register\n"); 3675 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, 3676 NVME_TIMEOUT_KEEP_EXISTING); 3677 } else { 3678 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CSTS register\n"); 3679 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3680 } 3681 3682 return; 3683 } 3684 3685 assert(value <= UINT32_MAX); 3686 csts.raw = (uint32_t)value; 3687 if (csts.bits.rdy == 0) { 3688 NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 0 && CSTS.RDY = 0\n"); 3689 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLED, 3690 nvme_ctrlr_get_ready_timeout(ctrlr)); 3691 } else { 3692 nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, 3693 NVME_TIMEOUT_KEEP_EXISTING); 3694 } 3695 } 3696 3697 static void 3698 nvme_ctrlr_process_init_enable_wait_for_ready_1(void *ctx, uint64_t value, 3699 const struct spdk_nvme_cpl *cpl) 3700 { 3701 struct spdk_nvme_ctrlr *ctrlr = ctx; 3702 union spdk_nvme_csts_register csts; 3703 3704 if (spdk_nvme_cpl_is_error(cpl)) { 3705 /* While a device is resetting, it may be unable to service MMIO reads 3706 * temporarily. Allow for this case. 3707 */ 3708 if (!ctrlr->is_failed && ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE) { 3709 NVME_CTRLR_DEBUGLOG(ctrlr, "Failed to read the CSTS register\n"); 3710 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, 3711 NVME_TIMEOUT_KEEP_EXISTING); 3712 } else { 3713 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CSTS register\n"); 3714 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3715 } 3716 3717 return; 3718 } 3719 3720 assert(value <= UINT32_MAX); 3721 csts.raw = value; 3722 if (csts.bits.rdy == 1) { 3723 NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 1 && CSTS.RDY = 1 - controller is ready\n"); 3724 /* 3725 * The controller has been enabled. 3726 * Perform the rest of initialization serially. 3727 */ 3728 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_RESET_ADMIN_QUEUE, 3729 ctrlr->opts.admin_timeout_ms); 3730 } else { 3731 nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, 3732 NVME_TIMEOUT_KEEP_EXISTING); 3733 } 3734 } 3735 3736 /** 3737 * This function will be called repeatedly during initialization until the controller is ready. 3738 */ 3739 int 3740 nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr) 3741 { 3742 uint32_t ready_timeout_in_ms; 3743 uint64_t ticks; 3744 int rc = 0; 3745 3746 ticks = spdk_get_ticks(); 3747 3748 /* 3749 * May need to avoid accessing any register on the target controller 3750 * for a while. Return early without touching the FSM. 3751 * Check sleep_timeout_tsc > 0 for unit test. 3752 */ 3753 if ((ctrlr->sleep_timeout_tsc > 0) && 3754 (ticks <= ctrlr->sleep_timeout_tsc)) { 3755 return 0; 3756 } 3757 ctrlr->sleep_timeout_tsc = 0; 3758 3759 ready_timeout_in_ms = nvme_ctrlr_get_ready_timeout(ctrlr); 3760 3761 /* 3762 * Check if the current initialization step is done or has timed out. 3763 */ 3764 switch (ctrlr->state) { 3765 case NVME_CTRLR_STATE_INIT_DELAY: 3766 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, ready_timeout_in_ms); 3767 if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_INIT) { 3768 /* 3769 * Controller may need some delay before it's enabled. 3770 * 3771 * This is a workaround for an issue where the PCIe-attached NVMe controller 3772 * is not ready after VFIO reset. We delay the initialization rather than the 3773 * enabling itself, because this is required only for the very first enabling 3774 * - directly after a VFIO reset. 3775 */ 3776 NVME_CTRLR_DEBUGLOG(ctrlr, "Adding 2 second delay before initializing the controller\n"); 3777 ctrlr->sleep_timeout_tsc = ticks + (2000 * spdk_get_ticks_hz() / 1000); 3778 } 3779 break; 3780 3781 case NVME_CTRLR_STATE_CONNECT_ADMINQ: /* synonymous with NVME_CTRLR_STATE_INIT */ 3782 rc = nvme_transport_ctrlr_connect_qpair(ctrlr, ctrlr->adminq); 3783 if (rc == 0) { 3784 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_CONNECT_ADMINQ, 3785 NVME_TIMEOUT_INFINITE); 3786 } else { 3787 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3788 } 3789 break; 3790 3791 case NVME_CTRLR_STATE_WAIT_FOR_CONNECT_ADMINQ: 3792 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 3793 3794 switch (nvme_qpair_get_state(ctrlr->adminq)) { 3795 case NVME_QPAIR_CONNECTING: 3796 break; 3797 case NVME_QPAIR_CONNECTED: 3798 nvme_qpair_set_state(ctrlr->adminq, NVME_QPAIR_ENABLED); 3799 /* Fall through */ 3800 case NVME_QPAIR_ENABLED: 3801 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READ_VS, 3802 NVME_TIMEOUT_INFINITE); 3803 /* Abort any queued requests that were sent while the adminq was connecting 3804 * to avoid stalling the init process during a reset, as requests don't get 3805 * resubmitted while the controller is resetting and subsequent commands 3806 * would get queued too. 3807 */ 3808 nvme_qpair_abort_queued_reqs(ctrlr->adminq, 0); 3809 break; 3810 case NVME_QPAIR_DISCONNECTING: 3811 assert(ctrlr->adminq->async == true); 3812 break; 3813 case NVME_QPAIR_DISCONNECTED: 3814 /* fallthrough */ 3815 default: 3816 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3817 break; 3818 } 3819 3820 break; 3821 3822 case NVME_CTRLR_STATE_READ_VS: 3823 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READ_VS_WAIT_FOR_VS, NVME_TIMEOUT_INFINITE); 3824 rc = nvme_ctrlr_get_vs_async(ctrlr, nvme_ctrlr_process_init_vs_done, ctrlr); 3825 break; 3826 3827 case NVME_CTRLR_STATE_READ_CAP: 3828 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READ_CAP_WAIT_FOR_CAP, NVME_TIMEOUT_INFINITE); 3829 rc = nvme_ctrlr_get_cap_async(ctrlr, nvme_ctrlr_process_init_cap_done, ctrlr); 3830 break; 3831 3832 case NVME_CTRLR_STATE_CHECK_EN: 3833 /* Begin the hardware initialization by making sure the controller is disabled. */ 3834 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CHECK_EN_WAIT_FOR_CC, ready_timeout_in_ms); 3835 rc = nvme_ctrlr_get_cc_async(ctrlr, nvme_ctrlr_process_init_check_en, ctrlr); 3836 break; 3837 3838 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1: 3839 /* 3840 * Controller is currently enabled. We need to disable it to cause a reset. 3841 * 3842 * If CC.EN = 1 && CSTS.RDY = 0, the controller is in the process of becoming ready. 3843 * Wait for the ready bit to be 1 before disabling the controller. 3844 */ 3845 nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS, 3846 NVME_TIMEOUT_KEEP_EXISTING); 3847 rc = nvme_ctrlr_get_csts_async(ctrlr, nvme_ctrlr_process_init_wait_for_ready_1, ctrlr); 3848 break; 3849 3850 case NVME_CTRLR_STATE_SET_EN_0: 3851 NVME_CTRLR_DEBUGLOG(ctrlr, "Setting CC.EN = 0\n"); 3852 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_EN_0_WAIT_FOR_CC, ready_timeout_in_ms); 3853 rc = nvme_ctrlr_get_cc_async(ctrlr, nvme_ctrlr_process_init_set_en_0_read_cc, ctrlr); 3854 break; 3855 3856 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0: 3857 nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0_WAIT_FOR_CSTS, 3858 NVME_TIMEOUT_KEEP_EXISTING); 3859 rc = nvme_ctrlr_get_csts_async(ctrlr, nvme_ctrlr_process_init_wait_for_ready_0, ctrlr); 3860 break; 3861 3862 case NVME_CTRLR_STATE_DISABLED: 3863 if (ctrlr->is_disconnecting) { 3864 NVME_CTRLR_DEBUGLOG(ctrlr, "Ctrlr was disabled.\n"); 3865 } else { 3866 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE, ready_timeout_in_ms); 3867 3868 /* 3869 * Delay 100us before setting CC.EN = 1. Some NVMe SSDs miss CC.EN getting 3870 * set to 1 if it is too soon after CSTS.RDY is reported as 0. 3871 */ 3872 spdk_delay_us(100); 3873 } 3874 break; 3875 3876 case NVME_CTRLR_STATE_ENABLE: 3877 NVME_CTRLR_DEBUGLOG(ctrlr, "Setting CC.EN = 1\n"); 3878 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_CC, ready_timeout_in_ms); 3879 rc = nvme_ctrlr_enable(ctrlr); 3880 if (rc) { 3881 NVME_CTRLR_ERRLOG(ctrlr, "Ctrlr enable failed with error: %d", rc); 3882 } 3883 return rc; 3884 3885 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1: 3886 nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS, 3887 NVME_TIMEOUT_KEEP_EXISTING); 3888 rc = nvme_ctrlr_get_csts_async(ctrlr, nvme_ctrlr_process_init_enable_wait_for_ready_1, 3889 ctrlr); 3890 break; 3891 3892 case NVME_CTRLR_STATE_RESET_ADMIN_QUEUE: 3893 nvme_transport_qpair_reset(ctrlr->adminq); 3894 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY, NVME_TIMEOUT_INFINITE); 3895 break; 3896 3897 case NVME_CTRLR_STATE_IDENTIFY: 3898 rc = nvme_ctrlr_identify(ctrlr); 3899 break; 3900 3901 case NVME_CTRLR_STATE_CONFIGURE_AER: 3902 rc = nvme_ctrlr_configure_aer(ctrlr); 3903 break; 3904 3905 case NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT: 3906 rc = nvme_ctrlr_set_keep_alive_timeout(ctrlr); 3907 break; 3908 3909 case NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC: 3910 rc = nvme_ctrlr_identify_iocs_specific(ctrlr); 3911 break; 3912 3913 case NVME_CTRLR_STATE_GET_ZNS_CMD_EFFECTS_LOG: 3914 rc = nvme_ctrlr_get_zns_cmd_and_effects_log(ctrlr); 3915 break; 3916 3917 case NVME_CTRLR_STATE_SET_NUM_QUEUES: 3918 nvme_ctrlr_update_nvmf_ioccsz(ctrlr); 3919 rc = nvme_ctrlr_set_num_queues(ctrlr); 3920 break; 3921 3922 case NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS: 3923 _nvme_ctrlr_identify_active_ns(ctrlr); 3924 break; 3925 3926 case NVME_CTRLR_STATE_IDENTIFY_NS: 3927 rc = nvme_ctrlr_identify_namespaces(ctrlr); 3928 break; 3929 3930 case NVME_CTRLR_STATE_IDENTIFY_ID_DESCS: 3931 rc = nvme_ctrlr_identify_id_desc_namespaces(ctrlr); 3932 break; 3933 3934 case NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC: 3935 rc = nvme_ctrlr_identify_namespaces_iocs_specific(ctrlr); 3936 break; 3937 3938 case NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES: 3939 rc = nvme_ctrlr_set_supported_log_pages(ctrlr); 3940 break; 3941 3942 case NVME_CTRLR_STATE_SET_SUPPORTED_INTEL_LOG_PAGES: 3943 rc = nvme_ctrlr_set_intel_support_log_pages(ctrlr); 3944 break; 3945 3946 case NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES: 3947 nvme_ctrlr_set_supported_features(ctrlr); 3948 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_DB_BUF_CFG, 3949 ctrlr->opts.admin_timeout_ms); 3950 break; 3951 3952 case NVME_CTRLR_STATE_SET_DB_BUF_CFG: 3953 rc = nvme_ctrlr_set_doorbell_buffer_config(ctrlr); 3954 break; 3955 3956 case NVME_CTRLR_STATE_SET_HOST_ID: 3957 rc = nvme_ctrlr_set_host_id(ctrlr); 3958 break; 3959 3960 case NVME_CTRLR_STATE_TRANSPORT_READY: 3961 rc = nvme_transport_ctrlr_ready(ctrlr); 3962 if (rc) { 3963 NVME_CTRLR_ERRLOG(ctrlr, "Transport controller ready step failed: rc %d\n", rc); 3964 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3965 } else { 3966 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE); 3967 } 3968 break; 3969 3970 case NVME_CTRLR_STATE_READY: 3971 NVME_CTRLR_DEBUGLOG(ctrlr, "Ctrlr already in ready state\n"); 3972 return 0; 3973 3974 case NVME_CTRLR_STATE_ERROR: 3975 NVME_CTRLR_ERRLOG(ctrlr, "Ctrlr is in error state\n"); 3976 return -1; 3977 3978 case NVME_CTRLR_STATE_READ_VS_WAIT_FOR_VS: 3979 case NVME_CTRLR_STATE_READ_CAP_WAIT_FOR_CAP: 3980 case NVME_CTRLR_STATE_CHECK_EN_WAIT_FOR_CC: 3981 case NVME_CTRLR_STATE_SET_EN_0_WAIT_FOR_CC: 3982 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS: 3983 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0_WAIT_FOR_CSTS: 3984 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_CC: 3985 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS: 3986 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY: 3987 case NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER: 3988 case NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT: 3989 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_IOCS_SPECIFIC: 3990 case NVME_CTRLR_STATE_WAIT_FOR_GET_ZNS_CMD_EFFECTS_LOG: 3991 case NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES: 3992 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ACTIVE_NS: 3993 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS: 3994 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS: 3995 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS_IOCS_SPECIFIC: 3996 case NVME_CTRLR_STATE_WAIT_FOR_SUPPORTED_INTEL_LOG_PAGES: 3997 case NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG: 3998 case NVME_CTRLR_STATE_WAIT_FOR_HOST_ID: 3999 /* 4000 * nvme_ctrlr_process_init() may be called from the completion context 4001 * for the admin qpair. Avoid recursive calls for this case. 4002 */ 4003 if (!ctrlr->adminq->in_completion_context) { 4004 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 4005 } 4006 break; 4007 4008 default: 4009 assert(0); 4010 return -1; 4011 } 4012 4013 if (rc) { 4014 NVME_CTRLR_ERRLOG(ctrlr, "Ctrlr operation failed with error: %d, ctrlr state: %d (%s)\n", 4015 rc, ctrlr->state, nvme_ctrlr_state_string(ctrlr->state)); 4016 } 4017 4018 /* Note: we use the ticks captured when we entered this function. 4019 * This covers environments where the SPDK process gets swapped out after 4020 * we tried to advance the state but before we check the timeout here. 4021 * It is not normal for this to happen, but harmless to handle it in this 4022 * way. 4023 */ 4024 if (ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE && 4025 ticks > ctrlr->state_timeout_tsc) { 4026 NVME_CTRLR_ERRLOG(ctrlr, "Initialization timed out in state %d (%s)\n", 4027 ctrlr->state, nvme_ctrlr_state_string(ctrlr->state)); 4028 return -1; 4029 } 4030 4031 return rc; 4032 } 4033 4034 int 4035 nvme_robust_mutex_init_recursive_shared(pthread_mutex_t *mtx) 4036 { 4037 pthread_mutexattr_t attr; 4038 int rc = 0; 4039 4040 if (pthread_mutexattr_init(&attr)) { 4041 return -1; 4042 } 4043 if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE) || 4044 #ifndef __FreeBSD__ 4045 pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST) || 4046 pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED) || 4047 #endif 4048 pthread_mutex_init(mtx, &attr)) { 4049 rc = -1; 4050 } 4051 pthread_mutexattr_destroy(&attr); 4052 return rc; 4053 } 4054 4055 int 4056 nvme_ctrlr_construct(struct spdk_nvme_ctrlr *ctrlr) 4057 { 4058 int rc; 4059 4060 if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { 4061 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT_DELAY, NVME_TIMEOUT_INFINITE); 4062 } else { 4063 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE); 4064 } 4065 4066 if (ctrlr->opts.admin_queue_size > SPDK_NVME_ADMIN_QUEUE_MAX_ENTRIES) { 4067 NVME_CTRLR_ERRLOG(ctrlr, "admin_queue_size %u exceeds max defined by NVMe spec, use max value\n", 4068 ctrlr->opts.admin_queue_size); 4069 ctrlr->opts.admin_queue_size = SPDK_NVME_ADMIN_QUEUE_MAX_ENTRIES; 4070 } 4071 4072 if (ctrlr->quirks & NVME_QUIRK_MINIMUM_ADMIN_QUEUE_SIZE && 4073 (ctrlr->opts.admin_queue_size % SPDK_NVME_ADMIN_QUEUE_QUIRK_ENTRIES_MULTIPLE) != 0) { 4074 NVME_CTRLR_ERRLOG(ctrlr, 4075 "admin_queue_size %u is invalid for this NVMe device, adjust to next multiple\n", 4076 ctrlr->opts.admin_queue_size); 4077 ctrlr->opts.admin_queue_size = SPDK_ALIGN_CEIL(ctrlr->opts.admin_queue_size, 4078 SPDK_NVME_ADMIN_QUEUE_QUIRK_ENTRIES_MULTIPLE); 4079 } 4080 4081 if (ctrlr->opts.admin_queue_size < SPDK_NVME_ADMIN_QUEUE_MIN_ENTRIES) { 4082 NVME_CTRLR_ERRLOG(ctrlr, 4083 "admin_queue_size %u is less than minimum defined by NVMe spec, use min value\n", 4084 ctrlr->opts.admin_queue_size); 4085 ctrlr->opts.admin_queue_size = SPDK_NVME_ADMIN_QUEUE_MIN_ENTRIES; 4086 } 4087 4088 ctrlr->flags = 0; 4089 ctrlr->free_io_qids = NULL; 4090 ctrlr->is_resetting = false; 4091 ctrlr->is_failed = false; 4092 ctrlr->is_destructed = false; 4093 4094 TAILQ_INIT(&ctrlr->active_io_qpairs); 4095 STAILQ_INIT(&ctrlr->queued_aborts); 4096 ctrlr->outstanding_aborts = 0; 4097 4098 ctrlr->ana_log_page = NULL; 4099 ctrlr->ana_log_page_size = 0; 4100 4101 rc = nvme_robust_mutex_init_recursive_shared(&ctrlr->ctrlr_lock); 4102 if (rc != 0) { 4103 return rc; 4104 } 4105 4106 TAILQ_INIT(&ctrlr->active_procs); 4107 STAILQ_INIT(&ctrlr->register_operations); 4108 4109 RB_INIT(&ctrlr->ns); 4110 4111 return rc; 4112 } 4113 4114 static void 4115 nvme_ctrlr_init_cap(struct spdk_nvme_ctrlr *ctrlr) 4116 { 4117 if (ctrlr->cap.bits.ams & SPDK_NVME_CAP_AMS_WRR) { 4118 ctrlr->flags |= SPDK_NVME_CTRLR_WRR_SUPPORTED; 4119 } 4120 4121 ctrlr->min_page_size = 1u << (12 + ctrlr->cap.bits.mpsmin); 4122 4123 /* For now, always select page_size == min_page_size. */ 4124 ctrlr->page_size = ctrlr->min_page_size; 4125 4126 ctrlr->opts.io_queue_size = spdk_max(ctrlr->opts.io_queue_size, SPDK_NVME_IO_QUEUE_MIN_ENTRIES); 4127 ctrlr->opts.io_queue_size = spdk_min(ctrlr->opts.io_queue_size, MAX_IO_QUEUE_ENTRIES); 4128 if (ctrlr->quirks & NVME_QUIRK_MINIMUM_IO_QUEUE_SIZE && 4129 ctrlr->opts.io_queue_size == DEFAULT_IO_QUEUE_SIZE) { 4130 /* If the user specifically set an IO queue size different than the 4131 * default, use that value. Otherwise overwrite with the quirked value. 4132 * This allows this quirk to be overridden when necessary. 4133 * However, cap.mqes still needs to be respected. 4134 */ 4135 ctrlr->opts.io_queue_size = DEFAULT_IO_QUEUE_SIZE_FOR_QUIRK; 4136 } 4137 ctrlr->opts.io_queue_size = spdk_min(ctrlr->opts.io_queue_size, ctrlr->cap.bits.mqes + 1u); 4138 4139 ctrlr->opts.io_queue_requests = spdk_max(ctrlr->opts.io_queue_requests, ctrlr->opts.io_queue_size); 4140 } 4141 4142 void 4143 nvme_ctrlr_destruct_finish(struct spdk_nvme_ctrlr *ctrlr) 4144 { 4145 pthread_mutex_destroy(&ctrlr->ctrlr_lock); 4146 } 4147 4148 void 4149 nvme_ctrlr_destruct_async(struct spdk_nvme_ctrlr *ctrlr, 4150 struct nvme_ctrlr_detach_ctx *ctx) 4151 { 4152 struct spdk_nvme_qpair *qpair, *tmp; 4153 4154 NVME_CTRLR_DEBUGLOG(ctrlr, "Prepare to destruct SSD\n"); 4155 4156 ctrlr->prepare_for_reset = false; 4157 ctrlr->is_destructed = true; 4158 4159 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 4160 4161 nvme_ctrlr_abort_queued_aborts(ctrlr); 4162 nvme_transport_admin_qpair_abort_aers(ctrlr->adminq); 4163 4164 TAILQ_FOREACH_SAFE(qpair, &ctrlr->active_io_qpairs, tailq, tmp) { 4165 spdk_nvme_ctrlr_free_io_qpair(qpair); 4166 } 4167 4168 nvme_ctrlr_free_doorbell_buffer(ctrlr); 4169 nvme_ctrlr_free_iocs_specific_data(ctrlr); 4170 4171 nvme_ctrlr_shutdown_async(ctrlr, ctx); 4172 } 4173 4174 int 4175 nvme_ctrlr_destruct_poll_async(struct spdk_nvme_ctrlr *ctrlr, 4176 struct nvme_ctrlr_detach_ctx *ctx) 4177 { 4178 struct spdk_nvme_ns *ns, *tmp_ns; 4179 int rc = 0; 4180 4181 if (!ctx->shutdown_complete) { 4182 rc = nvme_ctrlr_shutdown_poll_async(ctrlr, ctx); 4183 if (rc == -EAGAIN) { 4184 return -EAGAIN; 4185 } 4186 /* Destruct ctrlr forcefully for any other error. */ 4187 } 4188 4189 if (ctx->cb_fn) { 4190 ctx->cb_fn(ctrlr); 4191 } 4192 4193 nvme_transport_ctrlr_disconnect_qpair(ctrlr, ctrlr->adminq); 4194 4195 RB_FOREACH_SAFE(ns, nvme_ns_tree, &ctrlr->ns, tmp_ns) { 4196 nvme_ctrlr_destruct_namespace(ctrlr, ns->id); 4197 RB_REMOVE(nvme_ns_tree, &ctrlr->ns, ns); 4198 spdk_free(ns); 4199 } 4200 4201 ctrlr->active_ns_count = 0; 4202 4203 spdk_bit_array_free(&ctrlr->free_io_qids); 4204 4205 free(ctrlr->ana_log_page); 4206 free(ctrlr->copied_ana_desc); 4207 ctrlr->ana_log_page = NULL; 4208 ctrlr->copied_ana_desc = NULL; 4209 ctrlr->ana_log_page_size = 0; 4210 4211 nvme_transport_ctrlr_destruct(ctrlr); 4212 4213 return rc; 4214 } 4215 4216 void 4217 nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) 4218 { 4219 struct nvme_ctrlr_detach_ctx ctx = { .ctrlr = ctrlr }; 4220 int rc; 4221 4222 nvme_ctrlr_destruct_async(ctrlr, &ctx); 4223 4224 while (1) { 4225 rc = nvme_ctrlr_destruct_poll_async(ctrlr, &ctx); 4226 if (rc != -EAGAIN) { 4227 break; 4228 } 4229 nvme_delay(1000); 4230 } 4231 } 4232 4233 int 4234 nvme_ctrlr_submit_admin_request(struct spdk_nvme_ctrlr *ctrlr, 4235 struct nvme_request *req) 4236 { 4237 return nvme_qpair_submit_request(ctrlr->adminq, req); 4238 } 4239 4240 static void 4241 nvme_keep_alive_completion(void *cb_ctx, const struct spdk_nvme_cpl *cpl) 4242 { 4243 /* Do nothing */ 4244 } 4245 4246 /* 4247 * Check if we need to send a Keep Alive command. 4248 * Caller must hold ctrlr->ctrlr_lock. 4249 */ 4250 static int 4251 nvme_ctrlr_keep_alive(struct spdk_nvme_ctrlr *ctrlr) 4252 { 4253 uint64_t now; 4254 struct nvme_request *req; 4255 struct spdk_nvme_cmd *cmd; 4256 int rc = 0; 4257 4258 now = spdk_get_ticks(); 4259 if (now < ctrlr->next_keep_alive_tick) { 4260 return rc; 4261 } 4262 4263 req = nvme_allocate_request_null(ctrlr->adminq, nvme_keep_alive_completion, NULL); 4264 if (req == NULL) { 4265 return rc; 4266 } 4267 4268 cmd = &req->cmd; 4269 cmd->opc = SPDK_NVME_OPC_KEEP_ALIVE; 4270 4271 rc = nvme_ctrlr_submit_admin_request(ctrlr, req); 4272 if (rc != 0) { 4273 NVME_CTRLR_ERRLOG(ctrlr, "Submitting Keep Alive failed\n"); 4274 rc = -ENXIO; 4275 } 4276 4277 ctrlr->next_keep_alive_tick = now + ctrlr->keep_alive_interval_ticks; 4278 return rc; 4279 } 4280 4281 int32_t 4282 spdk_nvme_ctrlr_process_admin_completions(struct spdk_nvme_ctrlr *ctrlr) 4283 { 4284 int32_t num_completions; 4285 int32_t rc; 4286 struct spdk_nvme_ctrlr_process *active_proc; 4287 4288 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4289 4290 if (ctrlr->keep_alive_interval_ticks) { 4291 rc = nvme_ctrlr_keep_alive(ctrlr); 4292 if (rc) { 4293 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4294 return rc; 4295 } 4296 } 4297 4298 rc = nvme_io_msg_process(ctrlr); 4299 if (rc < 0) { 4300 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4301 return rc; 4302 } 4303 num_completions = rc; 4304 4305 rc = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 4306 4307 /* Each process has an async list, complete the ones for this process object */ 4308 active_proc = nvme_ctrlr_get_current_process(ctrlr); 4309 if (active_proc) { 4310 nvme_ctrlr_complete_queued_async_events(ctrlr); 4311 } 4312 4313 if (rc == -ENXIO && ctrlr->is_disconnecting) { 4314 nvme_ctrlr_disconnect_done(ctrlr); 4315 } 4316 4317 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4318 4319 if (rc < 0) { 4320 num_completions = rc; 4321 } else { 4322 num_completions += rc; 4323 } 4324 4325 return num_completions; 4326 } 4327 4328 const struct spdk_nvme_ctrlr_data * 4329 spdk_nvme_ctrlr_get_data(struct spdk_nvme_ctrlr *ctrlr) 4330 { 4331 return &ctrlr->cdata; 4332 } 4333 4334 union spdk_nvme_csts_register spdk_nvme_ctrlr_get_regs_csts(struct spdk_nvme_ctrlr *ctrlr) 4335 { 4336 union spdk_nvme_csts_register csts; 4337 4338 if (nvme_ctrlr_get_csts(ctrlr, &csts)) { 4339 csts.raw = SPDK_NVME_INVALID_REGISTER_VALUE; 4340 } 4341 return csts; 4342 } 4343 4344 union spdk_nvme_cc_register spdk_nvme_ctrlr_get_regs_cc(struct spdk_nvme_ctrlr *ctrlr) 4345 { 4346 union spdk_nvme_cc_register cc; 4347 4348 if (nvme_ctrlr_get_cc(ctrlr, &cc)) { 4349 cc.raw = SPDK_NVME_INVALID_REGISTER_VALUE; 4350 } 4351 return cc; 4352 } 4353 4354 union spdk_nvme_cap_register spdk_nvme_ctrlr_get_regs_cap(struct spdk_nvme_ctrlr *ctrlr) 4355 { 4356 return ctrlr->cap; 4357 } 4358 4359 union spdk_nvme_vs_register spdk_nvme_ctrlr_get_regs_vs(struct spdk_nvme_ctrlr *ctrlr) 4360 { 4361 return ctrlr->vs; 4362 } 4363 4364 union spdk_nvme_cmbsz_register spdk_nvme_ctrlr_get_regs_cmbsz(struct spdk_nvme_ctrlr *ctrlr) 4365 { 4366 union spdk_nvme_cmbsz_register cmbsz; 4367 4368 if (nvme_ctrlr_get_cmbsz(ctrlr, &cmbsz)) { 4369 cmbsz.raw = 0; 4370 } 4371 4372 return cmbsz; 4373 } 4374 4375 union spdk_nvme_pmrcap_register spdk_nvme_ctrlr_get_regs_pmrcap(struct spdk_nvme_ctrlr *ctrlr) 4376 { 4377 union spdk_nvme_pmrcap_register pmrcap; 4378 4379 if (nvme_ctrlr_get_pmrcap(ctrlr, &pmrcap)) { 4380 pmrcap.raw = 0; 4381 } 4382 4383 return pmrcap; 4384 } 4385 4386 union spdk_nvme_bpinfo_register spdk_nvme_ctrlr_get_regs_bpinfo(struct spdk_nvme_ctrlr *ctrlr) 4387 { 4388 union spdk_nvme_bpinfo_register bpinfo; 4389 4390 if (nvme_ctrlr_get_bpinfo(ctrlr, &bpinfo)) { 4391 bpinfo.raw = 0; 4392 } 4393 4394 return bpinfo; 4395 } 4396 4397 uint64_t 4398 spdk_nvme_ctrlr_get_pmrsz(struct spdk_nvme_ctrlr *ctrlr) 4399 { 4400 return ctrlr->pmr_size; 4401 } 4402 4403 uint32_t 4404 spdk_nvme_ctrlr_get_num_ns(struct spdk_nvme_ctrlr *ctrlr) 4405 { 4406 return ctrlr->cdata.nn; 4407 } 4408 4409 bool 4410 spdk_nvme_ctrlr_is_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 4411 { 4412 struct spdk_nvme_ns tmp, *ns; 4413 4414 tmp.id = nsid; 4415 ns = RB_FIND(nvme_ns_tree, &ctrlr->ns, &tmp); 4416 4417 if (ns != NULL) { 4418 return ns->active; 4419 } 4420 4421 return false; 4422 } 4423 4424 uint32_t 4425 spdk_nvme_ctrlr_get_first_active_ns(struct spdk_nvme_ctrlr *ctrlr) 4426 { 4427 struct spdk_nvme_ns *ns; 4428 4429 ns = RB_MIN(nvme_ns_tree, &ctrlr->ns); 4430 if (ns == NULL) { 4431 return 0; 4432 } 4433 4434 while (ns != NULL) { 4435 if (ns->active) { 4436 return ns->id; 4437 } 4438 4439 ns = RB_NEXT(nvme_ns_tree, &ctrlr->ns, ns); 4440 } 4441 4442 return 0; 4443 } 4444 4445 uint32_t 4446 spdk_nvme_ctrlr_get_next_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t prev_nsid) 4447 { 4448 struct spdk_nvme_ns tmp, *ns; 4449 4450 tmp.id = prev_nsid; 4451 ns = RB_FIND(nvme_ns_tree, &ctrlr->ns, &tmp); 4452 if (ns == NULL) { 4453 return 0; 4454 } 4455 4456 ns = RB_NEXT(nvme_ns_tree, &ctrlr->ns, ns); 4457 while (ns != NULL) { 4458 if (ns->active) { 4459 return ns->id; 4460 } 4461 4462 ns = RB_NEXT(nvme_ns_tree, &ctrlr->ns, ns); 4463 } 4464 4465 return 0; 4466 } 4467 4468 struct spdk_nvme_ns * 4469 spdk_nvme_ctrlr_get_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 4470 { 4471 struct spdk_nvme_ns tmp; 4472 struct spdk_nvme_ns *ns; 4473 4474 if (nsid < 1 || nsid > ctrlr->cdata.nn) { 4475 return NULL; 4476 } 4477 4478 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4479 4480 tmp.id = nsid; 4481 ns = RB_FIND(nvme_ns_tree, &ctrlr->ns, &tmp); 4482 4483 if (ns == NULL) { 4484 ns = spdk_zmalloc(sizeof(struct spdk_nvme_ns), 64, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE); 4485 if (ns == NULL) { 4486 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4487 return NULL; 4488 } 4489 4490 NVME_CTRLR_DEBUGLOG(ctrlr, "Namespace %u was added\n", nsid); 4491 ns->id = nsid; 4492 RB_INSERT(nvme_ns_tree, &ctrlr->ns, ns); 4493 } 4494 4495 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4496 4497 return ns; 4498 } 4499 4500 struct spdk_pci_device * 4501 spdk_nvme_ctrlr_get_pci_device(struct spdk_nvme_ctrlr *ctrlr) 4502 { 4503 if (ctrlr == NULL) { 4504 return NULL; 4505 } 4506 4507 if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) { 4508 return NULL; 4509 } 4510 4511 return nvme_ctrlr_proc_get_devhandle(ctrlr); 4512 } 4513 4514 uint32_t 4515 spdk_nvme_ctrlr_get_max_xfer_size(const struct spdk_nvme_ctrlr *ctrlr) 4516 { 4517 return ctrlr->max_xfer_size; 4518 } 4519 4520 void 4521 spdk_nvme_ctrlr_register_aer_callback(struct spdk_nvme_ctrlr *ctrlr, 4522 spdk_nvme_aer_cb aer_cb_fn, 4523 void *aer_cb_arg) 4524 { 4525 struct spdk_nvme_ctrlr_process *active_proc; 4526 4527 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4528 4529 active_proc = nvme_ctrlr_get_current_process(ctrlr); 4530 if (active_proc) { 4531 active_proc->aer_cb_fn = aer_cb_fn; 4532 active_proc->aer_cb_arg = aer_cb_arg; 4533 } 4534 4535 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4536 } 4537 4538 void 4539 spdk_nvme_ctrlr_disable_read_changed_ns_list_log_page(struct spdk_nvme_ctrlr *ctrlr) 4540 { 4541 ctrlr->opts.disable_read_changed_ns_list_log_page = true; 4542 } 4543 4544 void 4545 spdk_nvme_ctrlr_register_timeout_callback(struct spdk_nvme_ctrlr *ctrlr, 4546 uint64_t timeout_io_us, uint64_t timeout_admin_us, 4547 spdk_nvme_timeout_cb cb_fn, void *cb_arg) 4548 { 4549 struct spdk_nvme_ctrlr_process *active_proc; 4550 4551 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4552 4553 active_proc = nvme_ctrlr_get_current_process(ctrlr); 4554 if (active_proc) { 4555 active_proc->timeout_io_ticks = timeout_io_us * spdk_get_ticks_hz() / 1000000ULL; 4556 active_proc->timeout_admin_ticks = timeout_admin_us * spdk_get_ticks_hz() / 1000000ULL; 4557 active_proc->timeout_cb_fn = cb_fn; 4558 active_proc->timeout_cb_arg = cb_arg; 4559 } 4560 4561 ctrlr->timeout_enabled = true; 4562 4563 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4564 } 4565 4566 bool 4567 spdk_nvme_ctrlr_is_log_page_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t log_page) 4568 { 4569 /* No bounds check necessary, since log_page is uint8_t and log_page_supported has 256 entries */ 4570 SPDK_STATIC_ASSERT(sizeof(ctrlr->log_page_supported) == 256, "log_page_supported size mismatch"); 4571 return ctrlr->log_page_supported[log_page]; 4572 } 4573 4574 bool 4575 spdk_nvme_ctrlr_is_feature_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t feature_code) 4576 { 4577 /* No bounds check necessary, since feature_code is uint8_t and feature_supported has 256 entries */ 4578 SPDK_STATIC_ASSERT(sizeof(ctrlr->feature_supported) == 256, "feature_supported size mismatch"); 4579 return ctrlr->feature_supported[feature_code]; 4580 } 4581 4582 int 4583 spdk_nvme_ctrlr_attach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, 4584 struct spdk_nvme_ctrlr_list *payload) 4585 { 4586 struct nvme_completion_poll_status *status; 4587 struct spdk_nvme_ns *ns; 4588 int res; 4589 4590 if (nsid == 0) { 4591 return -EINVAL; 4592 } 4593 4594 status = calloc(1, sizeof(*status)); 4595 if (!status) { 4596 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4597 return -ENOMEM; 4598 } 4599 4600 res = nvme_ctrlr_cmd_attach_ns(ctrlr, nsid, payload, 4601 nvme_completion_poll_cb, status); 4602 if (res) { 4603 free(status); 4604 return res; 4605 } 4606 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4607 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_attach_ns failed!\n"); 4608 if (!status->timed_out) { 4609 free(status); 4610 } 4611 return -ENXIO; 4612 } 4613 free(status); 4614 4615 res = nvme_ctrlr_identify_active_ns(ctrlr); 4616 if (res) { 4617 return res; 4618 } 4619 4620 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 4621 return nvme_ns_construct(ns, nsid, ctrlr); 4622 } 4623 4624 int 4625 spdk_nvme_ctrlr_detach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, 4626 struct spdk_nvme_ctrlr_list *payload) 4627 { 4628 struct nvme_completion_poll_status *status; 4629 int res; 4630 4631 if (nsid == 0) { 4632 return -EINVAL; 4633 } 4634 4635 status = calloc(1, sizeof(*status)); 4636 if (!status) { 4637 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4638 return -ENOMEM; 4639 } 4640 4641 res = nvme_ctrlr_cmd_detach_ns(ctrlr, nsid, payload, 4642 nvme_completion_poll_cb, status); 4643 if (res) { 4644 free(status); 4645 return res; 4646 } 4647 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4648 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_detach_ns failed!\n"); 4649 if (!status->timed_out) { 4650 free(status); 4651 } 4652 return -ENXIO; 4653 } 4654 free(status); 4655 4656 return nvme_ctrlr_identify_active_ns(ctrlr); 4657 } 4658 4659 uint32_t 4660 spdk_nvme_ctrlr_create_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns_data *payload) 4661 { 4662 struct nvme_completion_poll_status *status; 4663 int res; 4664 uint32_t nsid; 4665 4666 status = calloc(1, sizeof(*status)); 4667 if (!status) { 4668 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4669 return 0; 4670 } 4671 4672 res = nvme_ctrlr_cmd_create_ns(ctrlr, payload, nvme_completion_poll_cb, status); 4673 if (res) { 4674 free(status); 4675 return 0; 4676 } 4677 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4678 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_create_ns failed!\n"); 4679 if (!status->timed_out) { 4680 free(status); 4681 } 4682 return 0; 4683 } 4684 4685 nsid = status->cpl.cdw0; 4686 free(status); 4687 4688 assert(nsid > 0); 4689 4690 /* Return the namespace ID that was created */ 4691 return nsid; 4692 } 4693 4694 int 4695 spdk_nvme_ctrlr_delete_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 4696 { 4697 struct nvme_completion_poll_status *status; 4698 int res; 4699 4700 if (nsid == 0) { 4701 return -EINVAL; 4702 } 4703 4704 status = calloc(1, sizeof(*status)); 4705 if (!status) { 4706 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4707 return -ENOMEM; 4708 } 4709 4710 res = nvme_ctrlr_cmd_delete_ns(ctrlr, nsid, nvme_completion_poll_cb, status); 4711 if (res) { 4712 free(status); 4713 return res; 4714 } 4715 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4716 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_delete_ns failed!\n"); 4717 if (!status->timed_out) { 4718 free(status); 4719 } 4720 return -ENXIO; 4721 } 4722 free(status); 4723 4724 return nvme_ctrlr_identify_active_ns(ctrlr); 4725 } 4726 4727 int 4728 spdk_nvme_ctrlr_format(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, 4729 struct spdk_nvme_format *format) 4730 { 4731 struct nvme_completion_poll_status *status; 4732 int res; 4733 4734 status = calloc(1, sizeof(*status)); 4735 if (!status) { 4736 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4737 return -ENOMEM; 4738 } 4739 4740 res = nvme_ctrlr_cmd_format(ctrlr, nsid, format, nvme_completion_poll_cb, 4741 status); 4742 if (res) { 4743 free(status); 4744 return res; 4745 } 4746 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4747 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_format failed!\n"); 4748 if (!status->timed_out) { 4749 free(status); 4750 } 4751 return -ENXIO; 4752 } 4753 free(status); 4754 4755 return spdk_nvme_ctrlr_reset(ctrlr); 4756 } 4757 4758 int 4759 spdk_nvme_ctrlr_update_firmware(struct spdk_nvme_ctrlr *ctrlr, void *payload, uint32_t size, 4760 int slot, enum spdk_nvme_fw_commit_action commit_action, struct spdk_nvme_status *completion_status) 4761 { 4762 struct spdk_nvme_fw_commit fw_commit; 4763 struct nvme_completion_poll_status *status; 4764 int res; 4765 unsigned int size_remaining; 4766 unsigned int offset; 4767 unsigned int transfer; 4768 void *p; 4769 4770 if (!completion_status) { 4771 return -EINVAL; 4772 } 4773 memset(completion_status, 0, sizeof(struct spdk_nvme_status)); 4774 if (size % 4) { 4775 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_update_firmware invalid size!\n"); 4776 return -1; 4777 } 4778 4779 /* Current support only for SPDK_NVME_FW_COMMIT_REPLACE_IMG 4780 * and SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG 4781 */ 4782 if ((commit_action != SPDK_NVME_FW_COMMIT_REPLACE_IMG) && 4783 (commit_action != SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG)) { 4784 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_update_firmware invalid command!\n"); 4785 return -1; 4786 } 4787 4788 status = calloc(1, sizeof(*status)); 4789 if (!status) { 4790 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4791 return -ENOMEM; 4792 } 4793 4794 /* Firmware download */ 4795 size_remaining = size; 4796 offset = 0; 4797 p = payload; 4798 4799 while (size_remaining > 0) { 4800 transfer = spdk_min(size_remaining, ctrlr->min_page_size); 4801 4802 memset(status, 0, sizeof(*status)); 4803 res = nvme_ctrlr_cmd_fw_image_download(ctrlr, transfer, offset, p, 4804 nvme_completion_poll_cb, 4805 status); 4806 if (res) { 4807 free(status); 4808 return res; 4809 } 4810 4811 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4812 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_fw_image_download failed!\n"); 4813 if (!status->timed_out) { 4814 free(status); 4815 } 4816 return -ENXIO; 4817 } 4818 p += transfer; 4819 offset += transfer; 4820 size_remaining -= transfer; 4821 } 4822 4823 /* Firmware commit */ 4824 memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit)); 4825 fw_commit.fs = slot; 4826 fw_commit.ca = commit_action; 4827 4828 memset(status, 0, sizeof(*status)); 4829 res = nvme_ctrlr_cmd_fw_commit(ctrlr, &fw_commit, nvme_completion_poll_cb, 4830 status); 4831 if (res) { 4832 free(status); 4833 return res; 4834 } 4835 4836 res = nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock); 4837 4838 memcpy(completion_status, &status->cpl.status, sizeof(struct spdk_nvme_status)); 4839 4840 if (!status->timed_out) { 4841 free(status); 4842 } 4843 4844 if (res) { 4845 if (completion_status->sct != SPDK_NVME_SCT_COMMAND_SPECIFIC || 4846 completion_status->sc != SPDK_NVME_SC_FIRMWARE_REQ_NVM_RESET) { 4847 if (completion_status->sct == SPDK_NVME_SCT_COMMAND_SPECIFIC && 4848 completion_status->sc == SPDK_NVME_SC_FIRMWARE_REQ_CONVENTIONAL_RESET) { 4849 NVME_CTRLR_NOTICELOG(ctrlr, 4850 "firmware activation requires conventional reset to be performed. !\n"); 4851 } else { 4852 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_cmd_fw_commit failed!\n"); 4853 } 4854 return -ENXIO; 4855 } 4856 } 4857 4858 return spdk_nvme_ctrlr_reset(ctrlr); 4859 } 4860 4861 int 4862 spdk_nvme_ctrlr_reserve_cmb(struct spdk_nvme_ctrlr *ctrlr) 4863 { 4864 int rc, size; 4865 union spdk_nvme_cmbsz_register cmbsz; 4866 4867 cmbsz = spdk_nvme_ctrlr_get_regs_cmbsz(ctrlr); 4868 4869 if (cmbsz.bits.rds == 0 || cmbsz.bits.wds == 0) { 4870 return -ENOTSUP; 4871 } 4872 4873 size = cmbsz.bits.sz * (0x1000 << (cmbsz.bits.szu * 4)); 4874 4875 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4876 rc = nvme_transport_ctrlr_reserve_cmb(ctrlr); 4877 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4878 4879 if (rc < 0) { 4880 return rc; 4881 } 4882 4883 return size; 4884 } 4885 4886 void * 4887 spdk_nvme_ctrlr_map_cmb(struct spdk_nvme_ctrlr *ctrlr, size_t *size) 4888 { 4889 void *buf; 4890 4891 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4892 buf = nvme_transport_ctrlr_map_cmb(ctrlr, size); 4893 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4894 4895 return buf; 4896 } 4897 4898 void 4899 spdk_nvme_ctrlr_unmap_cmb(struct spdk_nvme_ctrlr *ctrlr) 4900 { 4901 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4902 nvme_transport_ctrlr_unmap_cmb(ctrlr); 4903 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4904 } 4905 4906 int 4907 spdk_nvme_ctrlr_enable_pmr(struct spdk_nvme_ctrlr *ctrlr) 4908 { 4909 int rc; 4910 4911 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4912 rc = nvme_transport_ctrlr_enable_pmr(ctrlr); 4913 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4914 4915 return rc; 4916 } 4917 4918 int 4919 spdk_nvme_ctrlr_disable_pmr(struct spdk_nvme_ctrlr *ctrlr) 4920 { 4921 int rc; 4922 4923 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4924 rc = nvme_transport_ctrlr_disable_pmr(ctrlr); 4925 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4926 4927 return rc; 4928 } 4929 4930 void * 4931 spdk_nvme_ctrlr_map_pmr(struct spdk_nvme_ctrlr *ctrlr, size_t *size) 4932 { 4933 void *buf; 4934 4935 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4936 buf = nvme_transport_ctrlr_map_pmr(ctrlr, size); 4937 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4938 4939 return buf; 4940 } 4941 4942 int 4943 spdk_nvme_ctrlr_unmap_pmr(struct spdk_nvme_ctrlr *ctrlr) 4944 { 4945 int rc; 4946 4947 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4948 rc = nvme_transport_ctrlr_unmap_pmr(ctrlr); 4949 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4950 4951 return rc; 4952 } 4953 4954 int 4955 spdk_nvme_ctrlr_read_boot_partition_start(struct spdk_nvme_ctrlr *ctrlr, void *payload, 4956 uint32_t bprsz, uint32_t bprof, uint32_t bpid) 4957 { 4958 union spdk_nvme_bprsel_register bprsel; 4959 union spdk_nvme_bpinfo_register bpinfo; 4960 uint64_t bpmbl, bpmb_size; 4961 4962 if (ctrlr->cap.bits.bps == 0) { 4963 return -ENOTSUP; 4964 } 4965 4966 if (nvme_ctrlr_get_bpinfo(ctrlr, &bpinfo)) { 4967 NVME_CTRLR_ERRLOG(ctrlr, "get bpinfo failed\n"); 4968 return -EIO; 4969 } 4970 4971 if (bpinfo.bits.brs == SPDK_NVME_BRS_READ_IN_PROGRESS) { 4972 NVME_CTRLR_ERRLOG(ctrlr, "Boot Partition read already initiated\n"); 4973 return -EALREADY; 4974 } 4975 4976 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4977 4978 bpmb_size = bprsz * 4096; 4979 bpmbl = spdk_vtophys(payload, &bpmb_size); 4980 if (bpmbl == SPDK_VTOPHYS_ERROR) { 4981 NVME_CTRLR_ERRLOG(ctrlr, "spdk_vtophys of bpmbl failed\n"); 4982 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4983 return -EFAULT; 4984 } 4985 4986 if (bpmb_size != bprsz * 4096) { 4987 NVME_CTRLR_ERRLOG(ctrlr, "Boot Partition buffer is not physically contiguous\n"); 4988 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4989 return -EFAULT; 4990 } 4991 4992 if (nvme_ctrlr_set_bpmbl(ctrlr, bpmbl)) { 4993 NVME_CTRLR_ERRLOG(ctrlr, "set_bpmbl() failed\n"); 4994 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4995 return -EIO; 4996 } 4997 4998 bprsel.bits.bpid = bpid; 4999 bprsel.bits.bprof = bprof; 5000 bprsel.bits.bprsz = bprsz; 5001 5002 if (nvme_ctrlr_set_bprsel(ctrlr, &bprsel)) { 5003 NVME_CTRLR_ERRLOG(ctrlr, "set_bprsel() failed\n"); 5004 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 5005 return -EIO; 5006 } 5007 5008 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 5009 return 0; 5010 } 5011 5012 int 5013 spdk_nvme_ctrlr_read_boot_partition_poll(struct spdk_nvme_ctrlr *ctrlr) 5014 { 5015 int rc = 0; 5016 union spdk_nvme_bpinfo_register bpinfo; 5017 5018 if (nvme_ctrlr_get_bpinfo(ctrlr, &bpinfo)) { 5019 NVME_CTRLR_ERRLOG(ctrlr, "get bpinfo failed\n"); 5020 return -EIO; 5021 } 5022 5023 switch (bpinfo.bits.brs) { 5024 case SPDK_NVME_BRS_NO_READ: 5025 NVME_CTRLR_ERRLOG(ctrlr, "Boot Partition read not initiated\n"); 5026 rc = -EINVAL; 5027 break; 5028 case SPDK_NVME_BRS_READ_IN_PROGRESS: 5029 NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition read in progress\n"); 5030 rc = -EAGAIN; 5031 break; 5032 case SPDK_NVME_BRS_READ_ERROR: 5033 NVME_CTRLR_ERRLOG(ctrlr, "Error completing Boot Partition read\n"); 5034 rc = -EIO; 5035 break; 5036 case SPDK_NVME_BRS_READ_SUCCESS: 5037 NVME_CTRLR_INFOLOG(ctrlr, "Boot Partition read completed successfully\n"); 5038 break; 5039 default: 5040 NVME_CTRLR_ERRLOG(ctrlr, "Invalid Boot Partition read status\n"); 5041 rc = -EINVAL; 5042 } 5043 5044 return rc; 5045 } 5046 5047 static void 5048 nvme_write_boot_partition_cb(void *arg, const struct spdk_nvme_cpl *cpl) 5049 { 5050 int res; 5051 struct spdk_nvme_ctrlr *ctrlr = arg; 5052 struct spdk_nvme_fw_commit fw_commit; 5053 struct spdk_nvme_cpl err_cpl = 5054 {.status = {.sct = SPDK_NVME_SCT_GENERIC, .sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR }}; 5055 5056 if (spdk_nvme_cpl_is_error(cpl)) { 5057 NVME_CTRLR_ERRLOG(ctrlr, "Write Boot Partition failed\n"); 5058 ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, cpl); 5059 return; 5060 } 5061 5062 if (ctrlr->bp_ws == SPDK_NVME_BP_WS_DOWNLOADING) { 5063 NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition Downloading at Offset %d Success\n", ctrlr->fw_offset); 5064 ctrlr->fw_payload += ctrlr->fw_transfer_size; 5065 ctrlr->fw_offset += ctrlr->fw_transfer_size; 5066 ctrlr->fw_size_remaining -= ctrlr->fw_transfer_size; 5067 ctrlr->fw_transfer_size = spdk_min(ctrlr->fw_size_remaining, ctrlr->min_page_size); 5068 res = nvme_ctrlr_cmd_fw_image_download(ctrlr, ctrlr->fw_transfer_size, ctrlr->fw_offset, 5069 ctrlr->fw_payload, nvme_write_boot_partition_cb, ctrlr); 5070 if (res) { 5071 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_cmd_fw_image_download failed!\n"); 5072 ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, &err_cpl); 5073 return; 5074 } 5075 5076 if (ctrlr->fw_transfer_size < ctrlr->min_page_size) { 5077 ctrlr->bp_ws = SPDK_NVME_BP_WS_DOWNLOADED; 5078 } 5079 } else if (ctrlr->bp_ws == SPDK_NVME_BP_WS_DOWNLOADED) { 5080 NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition Download Success\n"); 5081 memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit)); 5082 fw_commit.bpid = ctrlr->bpid; 5083 fw_commit.ca = SPDK_NVME_FW_COMMIT_REPLACE_BOOT_PARTITION; 5084 res = nvme_ctrlr_cmd_fw_commit(ctrlr, &fw_commit, 5085 nvme_write_boot_partition_cb, ctrlr); 5086 if (res) { 5087 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_cmd_fw_commit failed!\n"); 5088 NVME_CTRLR_ERRLOG(ctrlr, "commit action: %d\n", fw_commit.ca); 5089 ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, &err_cpl); 5090 return; 5091 } 5092 5093 ctrlr->bp_ws = SPDK_NVME_BP_WS_REPLACE; 5094 } else if (ctrlr->bp_ws == SPDK_NVME_BP_WS_REPLACE) { 5095 NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition Replacement Success\n"); 5096 memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit)); 5097 fw_commit.bpid = ctrlr->bpid; 5098 fw_commit.ca = SPDK_NVME_FW_COMMIT_ACTIVATE_BOOT_PARTITION; 5099 res = nvme_ctrlr_cmd_fw_commit(ctrlr, &fw_commit, 5100 nvme_write_boot_partition_cb, ctrlr); 5101 if (res) { 5102 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_cmd_fw_commit failed!\n"); 5103 NVME_CTRLR_ERRLOG(ctrlr, "commit action: %d\n", fw_commit.ca); 5104 ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, &err_cpl); 5105 return; 5106 } 5107 5108 ctrlr->bp_ws = SPDK_NVME_BP_WS_ACTIVATE; 5109 } else if (ctrlr->bp_ws == SPDK_NVME_BP_WS_ACTIVATE) { 5110 NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition Activation Success\n"); 5111 ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, cpl); 5112 } else { 5113 NVME_CTRLR_ERRLOG(ctrlr, "Invalid Boot Partition write state\n"); 5114 ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, &err_cpl); 5115 return; 5116 } 5117 } 5118 5119 int 5120 spdk_nvme_ctrlr_write_boot_partition(struct spdk_nvme_ctrlr *ctrlr, 5121 void *payload, uint32_t size, uint32_t bpid, 5122 spdk_nvme_cmd_cb cb_fn, void *cb_arg) 5123 { 5124 int res; 5125 5126 if (ctrlr->cap.bits.bps == 0) { 5127 return -ENOTSUP; 5128 } 5129 5130 ctrlr->bp_ws = SPDK_NVME_BP_WS_DOWNLOADING; 5131 ctrlr->bpid = bpid; 5132 ctrlr->bp_write_cb_fn = cb_fn; 5133 ctrlr->bp_write_cb_arg = cb_arg; 5134 ctrlr->fw_offset = 0; 5135 ctrlr->fw_size_remaining = size; 5136 ctrlr->fw_payload = payload; 5137 ctrlr->fw_transfer_size = spdk_min(ctrlr->fw_size_remaining, ctrlr->min_page_size); 5138 5139 res = nvme_ctrlr_cmd_fw_image_download(ctrlr, ctrlr->fw_transfer_size, ctrlr->fw_offset, 5140 ctrlr->fw_payload, nvme_write_boot_partition_cb, ctrlr); 5141 5142 return res; 5143 } 5144 5145 bool 5146 spdk_nvme_ctrlr_is_discovery(struct spdk_nvme_ctrlr *ctrlr) 5147 { 5148 assert(ctrlr); 5149 5150 return !strncmp(ctrlr->trid.subnqn, SPDK_NVMF_DISCOVERY_NQN, 5151 strlen(SPDK_NVMF_DISCOVERY_NQN)); 5152 } 5153 5154 bool 5155 spdk_nvme_ctrlr_is_fabrics(struct spdk_nvme_ctrlr *ctrlr) 5156 { 5157 assert(ctrlr); 5158 5159 return spdk_nvme_trtype_is_fabrics(ctrlr->trid.trtype); 5160 } 5161 5162 int 5163 spdk_nvme_ctrlr_security_receive(struct spdk_nvme_ctrlr *ctrlr, uint8_t secp, 5164 uint16_t spsp, uint8_t nssf, void *payload, size_t size) 5165 { 5166 struct nvme_completion_poll_status *status; 5167 int res; 5168 5169 status = calloc(1, sizeof(*status)); 5170 if (!status) { 5171 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 5172 return -ENOMEM; 5173 } 5174 5175 res = spdk_nvme_ctrlr_cmd_security_receive(ctrlr, secp, spsp, nssf, payload, size, 5176 nvme_completion_poll_cb, status); 5177 if (res) { 5178 free(status); 5179 return res; 5180 } 5181 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 5182 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_cmd_security_receive failed!\n"); 5183 if (!status->timed_out) { 5184 free(status); 5185 } 5186 return -ENXIO; 5187 } 5188 free(status); 5189 5190 return 0; 5191 } 5192 5193 int 5194 spdk_nvme_ctrlr_security_send(struct spdk_nvme_ctrlr *ctrlr, uint8_t secp, 5195 uint16_t spsp, uint8_t nssf, void *payload, size_t size) 5196 { 5197 struct nvme_completion_poll_status *status; 5198 int res; 5199 5200 status = calloc(1, sizeof(*status)); 5201 if (!status) { 5202 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 5203 return -ENOMEM; 5204 } 5205 5206 res = spdk_nvme_ctrlr_cmd_security_send(ctrlr, secp, spsp, nssf, payload, size, 5207 nvme_completion_poll_cb, 5208 status); 5209 if (res) { 5210 free(status); 5211 return res; 5212 } 5213 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 5214 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_cmd_security_send failed!\n"); 5215 if (!status->timed_out) { 5216 free(status); 5217 } 5218 return -ENXIO; 5219 } 5220 5221 free(status); 5222 5223 return 0; 5224 } 5225 5226 uint64_t 5227 spdk_nvme_ctrlr_get_flags(struct spdk_nvme_ctrlr *ctrlr) 5228 { 5229 return ctrlr->flags; 5230 } 5231 5232 const struct spdk_nvme_transport_id * 5233 spdk_nvme_ctrlr_get_transport_id(struct spdk_nvme_ctrlr *ctrlr) 5234 { 5235 return &ctrlr->trid; 5236 } 5237 5238 int32_t 5239 spdk_nvme_ctrlr_alloc_qid(struct spdk_nvme_ctrlr *ctrlr) 5240 { 5241 uint32_t qid; 5242 5243 assert(ctrlr->free_io_qids); 5244 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 5245 qid = spdk_bit_array_find_first_set(ctrlr->free_io_qids, 1); 5246 if (qid > ctrlr->opts.num_io_queues) { 5247 NVME_CTRLR_ERRLOG(ctrlr, "No free I/O queue IDs\n"); 5248 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 5249 return -1; 5250 } 5251 5252 spdk_bit_array_clear(ctrlr->free_io_qids, qid); 5253 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 5254 return qid; 5255 } 5256 5257 void 5258 spdk_nvme_ctrlr_free_qid(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid) 5259 { 5260 assert(qid <= ctrlr->opts.num_io_queues); 5261 5262 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 5263 5264 if (spdk_likely(ctrlr->free_io_qids)) { 5265 spdk_bit_array_set(ctrlr->free_io_qids, qid); 5266 } 5267 5268 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 5269 } 5270 5271 int 5272 spdk_nvme_ctrlr_get_memory_domains(const struct spdk_nvme_ctrlr *ctrlr, 5273 struct spdk_memory_domain **domains, int array_size) 5274 { 5275 return nvme_transport_ctrlr_get_memory_domains(ctrlr, domains, array_size); 5276 } 5277