1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) Intel Corporation. All rights reserved. 3 * Copyright (c) 2019-2021 Mellanox Technologies LTD. All rights reserved. 4 * Copyright (c) 2021, 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 #include "spdk/stdinc.h" 8 9 #include "nvme_internal.h" 10 #include "nvme_io_msg.h" 11 12 #include "spdk/env.h" 13 #include "spdk/string.h" 14 #include "spdk/endian.h" 15 16 struct nvme_active_ns_ctx; 17 18 static int nvme_ctrlr_construct_and_submit_aer(struct spdk_nvme_ctrlr *ctrlr, 19 struct nvme_async_event_request *aer); 20 static void nvme_ctrlr_identify_active_ns_async(struct nvme_active_ns_ctx *ctx); 21 static int nvme_ctrlr_identify_ns_async(struct spdk_nvme_ns *ns); 22 static int nvme_ctrlr_identify_ns_iocs_specific_async(struct spdk_nvme_ns *ns); 23 static int nvme_ctrlr_identify_id_desc_async(struct spdk_nvme_ns *ns); 24 static void nvme_ctrlr_init_cap(struct spdk_nvme_ctrlr *ctrlr); 25 static void nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state, 26 uint64_t timeout_in_ms); 27 28 static int 29 nvme_ns_cmp(struct spdk_nvme_ns *ns1, struct spdk_nvme_ns *ns2) 30 { 31 if (ns1->id < ns2->id) { 32 return -1; 33 } else if (ns1->id > ns2->id) { 34 return 1; 35 } else { 36 return 0; 37 } 38 } 39 40 RB_GENERATE_STATIC(nvme_ns_tree, spdk_nvme_ns, node, nvme_ns_cmp); 41 42 #define CTRLR_STRING(ctrlr) \ 43 ((ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_TCP || ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_RDMA) ? \ 44 ctrlr->trid.subnqn : ctrlr->trid.traddr) 45 46 #define NVME_CTRLR_ERRLOG(ctrlr, format, ...) \ 47 SPDK_ERRLOG("[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__); 48 49 #define NVME_CTRLR_WARNLOG(ctrlr, format, ...) \ 50 SPDK_WARNLOG("[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__); 51 52 #define NVME_CTRLR_NOTICELOG(ctrlr, format, ...) \ 53 SPDK_NOTICELOG("[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__); 54 55 #define NVME_CTRLR_INFOLOG(ctrlr, format, ...) \ 56 SPDK_INFOLOG(nvme, "[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__); 57 58 #ifdef DEBUG 59 #define NVME_CTRLR_DEBUGLOG(ctrlr, format, ...) \ 60 SPDK_DEBUGLOG(nvme, "[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__); 61 #else 62 #define NVME_CTRLR_DEBUGLOG(ctrlr, ...) do { } while (0) 63 #endif 64 65 #define nvme_ctrlr_get_reg_async(ctrlr, reg, sz, cb_fn, cb_arg) \ 66 nvme_transport_ctrlr_get_reg_ ## sz ## _async(ctrlr, \ 67 offsetof(struct spdk_nvme_registers, reg), cb_fn, cb_arg) 68 69 #define nvme_ctrlr_set_reg_async(ctrlr, reg, sz, val, cb_fn, cb_arg) \ 70 nvme_transport_ctrlr_set_reg_ ## sz ## _async(ctrlr, \ 71 offsetof(struct spdk_nvme_registers, reg), val, cb_fn, cb_arg) 72 73 #define nvme_ctrlr_get_cc_async(ctrlr, cb_fn, cb_arg) \ 74 nvme_ctrlr_get_reg_async(ctrlr, cc, 4, cb_fn, cb_arg) 75 76 #define nvme_ctrlr_get_csts_async(ctrlr, cb_fn, cb_arg) \ 77 nvme_ctrlr_get_reg_async(ctrlr, csts, 4, cb_fn, cb_arg) 78 79 #define nvme_ctrlr_get_cap_async(ctrlr, cb_fn, cb_arg) \ 80 nvme_ctrlr_get_reg_async(ctrlr, cap, 8, cb_fn, cb_arg) 81 82 #define nvme_ctrlr_get_vs_async(ctrlr, cb_fn, cb_arg) \ 83 nvme_ctrlr_get_reg_async(ctrlr, vs, 4, cb_fn, cb_arg) 84 85 #define nvme_ctrlr_set_cc_async(ctrlr, value, cb_fn, cb_arg) \ 86 nvme_ctrlr_set_reg_async(ctrlr, cc, 4, value, cb_fn, cb_arg) 87 88 static int 89 nvme_ctrlr_get_cc(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cc_register *cc) 90 { 91 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cc.raw), 92 &cc->raw); 93 } 94 95 static int 96 nvme_ctrlr_get_csts(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_csts_register *csts) 97 { 98 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, csts.raw), 99 &csts->raw); 100 } 101 102 int 103 nvme_ctrlr_get_cap(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cap_register *cap) 104 { 105 return nvme_transport_ctrlr_get_reg_8(ctrlr, offsetof(struct spdk_nvme_registers, cap.raw), 106 &cap->raw); 107 } 108 109 int 110 nvme_ctrlr_get_vs(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_vs_register *vs) 111 { 112 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, vs.raw), 113 &vs->raw); 114 } 115 116 int 117 nvme_ctrlr_get_cmbsz(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cmbsz_register *cmbsz) 118 { 119 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cmbsz.raw), 120 &cmbsz->raw); 121 } 122 123 int 124 nvme_ctrlr_get_pmrcap(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_pmrcap_register *pmrcap) 125 { 126 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, pmrcap.raw), 127 &pmrcap->raw); 128 } 129 130 int 131 nvme_ctrlr_get_bpinfo(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_bpinfo_register *bpinfo) 132 { 133 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, bpinfo.raw), 134 &bpinfo->raw); 135 } 136 137 int 138 nvme_ctrlr_set_bprsel(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_bprsel_register *bprsel) 139 { 140 return nvme_transport_ctrlr_set_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, bprsel.raw), 141 bprsel->raw); 142 } 143 144 int 145 nvme_ctrlr_set_bpmbl(struct spdk_nvme_ctrlr *ctrlr, uint64_t bpmbl_value) 146 { 147 return nvme_transport_ctrlr_set_reg_8(ctrlr, offsetof(struct spdk_nvme_registers, bpmbl), 148 bpmbl_value); 149 } 150 151 static int 152 nvme_ctrlr_set_nssr(struct spdk_nvme_ctrlr *ctrlr, uint32_t nssr_value) 153 { 154 return nvme_transport_ctrlr_set_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, nssr), 155 nssr_value); 156 } 157 158 bool 159 nvme_ctrlr_multi_iocs_enabled(struct spdk_nvme_ctrlr *ctrlr) 160 { 161 return ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_IOCS && 162 ctrlr->opts.command_set == SPDK_NVME_CC_CSS_IOCS; 163 } 164 165 /* When the field in spdk_nvme_ctrlr_opts are changed and you change this function, please 166 * also update the nvme_ctrl_opts_init function in nvme_ctrlr.c 167 */ 168 void 169 spdk_nvme_ctrlr_get_default_ctrlr_opts(struct spdk_nvme_ctrlr_opts *opts, size_t opts_size) 170 { 171 char host_id_str[SPDK_UUID_STRING_LEN]; 172 173 assert(opts); 174 175 opts->opts_size = opts_size; 176 177 #define FIELD_OK(field) \ 178 offsetof(struct spdk_nvme_ctrlr_opts, field) + sizeof(opts->field) <= opts_size 179 180 #define SET_FIELD(field, value) \ 181 if (offsetof(struct spdk_nvme_ctrlr_opts, field) + sizeof(opts->field) <= opts_size) { \ 182 opts->field = value; \ 183 } \ 184 185 SET_FIELD(num_io_queues, DEFAULT_MAX_IO_QUEUES); 186 SET_FIELD(use_cmb_sqs, false); 187 SET_FIELD(no_shn_notification, false); 188 SET_FIELD(arb_mechanism, SPDK_NVME_CC_AMS_RR); 189 SET_FIELD(arbitration_burst, 0); 190 SET_FIELD(low_priority_weight, 0); 191 SET_FIELD(medium_priority_weight, 0); 192 SET_FIELD(high_priority_weight, 0); 193 SET_FIELD(keep_alive_timeout_ms, MIN_KEEP_ALIVE_TIMEOUT_IN_MS); 194 SET_FIELD(transport_retry_count, SPDK_NVME_DEFAULT_RETRY_COUNT); 195 SET_FIELD(io_queue_size, DEFAULT_IO_QUEUE_SIZE); 196 197 if (nvme_driver_init() == 0) { 198 if (FIELD_OK(hostnqn)) { 199 spdk_uuid_fmt_lower(host_id_str, sizeof(host_id_str), 200 &g_spdk_nvme_driver->default_extended_host_id); 201 snprintf(opts->hostnqn, sizeof(opts->hostnqn), 202 "nqn.2014-08.org.nvmexpress:uuid:%s", host_id_str); 203 } 204 205 if (FIELD_OK(extended_host_id)) { 206 memcpy(opts->extended_host_id, &g_spdk_nvme_driver->default_extended_host_id, 207 sizeof(opts->extended_host_id)); 208 } 209 210 } 211 212 SET_FIELD(io_queue_requests, DEFAULT_IO_QUEUE_REQUESTS); 213 214 if (FIELD_OK(src_addr)) { 215 memset(opts->src_addr, 0, sizeof(opts->src_addr)); 216 } 217 218 if (FIELD_OK(src_svcid)) { 219 memset(opts->src_svcid, 0, sizeof(opts->src_svcid)); 220 } 221 222 if (FIELD_OK(host_id)) { 223 memset(opts->host_id, 0, sizeof(opts->host_id)); 224 } 225 226 SET_FIELD(command_set, CHAR_BIT); 227 SET_FIELD(admin_timeout_ms, NVME_MAX_ADMIN_TIMEOUT_IN_SECS * 1000); 228 SET_FIELD(header_digest, false); 229 SET_FIELD(data_digest, false); 230 SET_FIELD(disable_error_logging, false); 231 SET_FIELD(transport_ack_timeout, SPDK_NVME_DEFAULT_TRANSPORT_ACK_TIMEOUT); 232 SET_FIELD(admin_queue_size, DEFAULT_ADMIN_QUEUE_SIZE); 233 SET_FIELD(fabrics_connect_timeout_us, NVME_FABRIC_CONNECT_COMMAND_TIMEOUT); 234 SET_FIELD(disable_read_ana_log_page, false); 235 236 #undef FIELD_OK 237 #undef SET_FIELD 238 } 239 240 const struct spdk_nvme_ctrlr_opts * 241 spdk_nvme_ctrlr_get_opts(struct spdk_nvme_ctrlr *ctrlr) 242 { 243 return &ctrlr->opts; 244 } 245 246 /** 247 * This function will be called when the process allocates the IO qpair. 248 * Note: the ctrlr_lock must be held when calling this function. 249 */ 250 static void 251 nvme_ctrlr_proc_add_io_qpair(struct spdk_nvme_qpair *qpair) 252 { 253 struct spdk_nvme_ctrlr_process *active_proc; 254 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 255 256 active_proc = nvme_ctrlr_get_current_process(ctrlr); 257 if (active_proc) { 258 TAILQ_INSERT_TAIL(&active_proc->allocated_io_qpairs, qpair, per_process_tailq); 259 qpair->active_proc = active_proc; 260 } 261 } 262 263 /** 264 * This function will be called when the process frees the IO qpair. 265 * Note: the ctrlr_lock must be held when calling this function. 266 */ 267 static void 268 nvme_ctrlr_proc_remove_io_qpair(struct spdk_nvme_qpair *qpair) 269 { 270 struct spdk_nvme_ctrlr_process *active_proc; 271 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 272 struct spdk_nvme_qpair *active_qpair, *tmp_qpair; 273 274 active_proc = nvme_ctrlr_get_current_process(ctrlr); 275 if (!active_proc) { 276 return; 277 } 278 279 TAILQ_FOREACH_SAFE(active_qpair, &active_proc->allocated_io_qpairs, 280 per_process_tailq, tmp_qpair) { 281 if (active_qpair == qpair) { 282 TAILQ_REMOVE(&active_proc->allocated_io_qpairs, 283 active_qpair, per_process_tailq); 284 285 break; 286 } 287 } 288 } 289 290 void 291 spdk_nvme_ctrlr_get_default_io_qpair_opts(struct spdk_nvme_ctrlr *ctrlr, 292 struct spdk_nvme_io_qpair_opts *opts, 293 size_t opts_size) 294 { 295 assert(ctrlr); 296 297 assert(opts); 298 299 memset(opts, 0, opts_size); 300 301 #define FIELD_OK(field) \ 302 offsetof(struct spdk_nvme_io_qpair_opts, field) + sizeof(opts->field) <= opts_size 303 304 if (FIELD_OK(qprio)) { 305 opts->qprio = SPDK_NVME_QPRIO_URGENT; 306 } 307 308 if (FIELD_OK(io_queue_size)) { 309 opts->io_queue_size = ctrlr->opts.io_queue_size; 310 } 311 312 if (FIELD_OK(io_queue_requests)) { 313 opts->io_queue_requests = ctrlr->opts.io_queue_requests; 314 } 315 316 if (FIELD_OK(delay_cmd_submit)) { 317 opts->delay_cmd_submit = false; 318 } 319 320 if (FIELD_OK(sq.vaddr)) { 321 opts->sq.vaddr = NULL; 322 } 323 324 if (FIELD_OK(sq.paddr)) { 325 opts->sq.paddr = 0; 326 } 327 328 if (FIELD_OK(sq.buffer_size)) { 329 opts->sq.buffer_size = 0; 330 } 331 332 if (FIELD_OK(cq.vaddr)) { 333 opts->cq.vaddr = NULL; 334 } 335 336 if (FIELD_OK(cq.paddr)) { 337 opts->cq.paddr = 0; 338 } 339 340 if (FIELD_OK(cq.buffer_size)) { 341 opts->cq.buffer_size = 0; 342 } 343 344 if (FIELD_OK(create_only)) { 345 opts->create_only = false; 346 } 347 348 if (FIELD_OK(async_mode)) { 349 opts->async_mode = false; 350 } 351 352 #undef FIELD_OK 353 } 354 355 static struct spdk_nvme_qpair * 356 nvme_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, 357 const struct spdk_nvme_io_qpair_opts *opts) 358 { 359 int32_t qid; 360 struct spdk_nvme_qpair *qpair; 361 union spdk_nvme_cc_register cc; 362 363 if (!ctrlr) { 364 return NULL; 365 } 366 367 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 368 cc.raw = ctrlr->process_init_cc.raw; 369 370 if (opts->qprio & ~SPDK_NVME_CREATE_IO_SQ_QPRIO_MASK) { 371 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 372 return NULL; 373 } 374 375 /* 376 * Only value SPDK_NVME_QPRIO_URGENT(0) is valid for the 377 * default round robin arbitration method. 378 */ 379 if ((cc.bits.ams == SPDK_NVME_CC_AMS_RR) && (opts->qprio != SPDK_NVME_QPRIO_URGENT)) { 380 NVME_CTRLR_ERRLOG(ctrlr, "invalid queue priority for default round robin arbitration method\n"); 381 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 382 return NULL; 383 } 384 385 qid = spdk_nvme_ctrlr_alloc_qid(ctrlr); 386 if (qid < 0) { 387 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 388 return NULL; 389 } 390 391 qpair = nvme_transport_ctrlr_create_io_qpair(ctrlr, qid, opts); 392 if (qpair == NULL) { 393 NVME_CTRLR_ERRLOG(ctrlr, "nvme_transport_ctrlr_create_io_qpair() failed\n"); 394 spdk_nvme_ctrlr_free_qid(ctrlr, qid); 395 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 396 return NULL; 397 } 398 399 TAILQ_INSERT_TAIL(&ctrlr->active_io_qpairs, qpair, tailq); 400 401 nvme_ctrlr_proc_add_io_qpair(qpair); 402 403 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 404 405 return qpair; 406 } 407 408 int 409 spdk_nvme_ctrlr_connect_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 410 { 411 int rc; 412 413 if (nvme_qpair_get_state(qpair) != NVME_QPAIR_DISCONNECTED) { 414 return -EISCONN; 415 } 416 417 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 418 rc = nvme_transport_ctrlr_connect_qpair(ctrlr, qpair); 419 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 420 421 if (ctrlr->quirks & NVME_QUIRK_DELAY_AFTER_QUEUE_ALLOC) { 422 spdk_delay_us(100); 423 } 424 425 return rc; 426 } 427 428 void 429 spdk_nvme_ctrlr_disconnect_io_qpair(struct spdk_nvme_qpair *qpair) 430 { 431 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 432 433 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 434 nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair); 435 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 436 } 437 438 struct spdk_nvme_qpair * 439 spdk_nvme_ctrlr_alloc_io_qpair(struct spdk_nvme_ctrlr *ctrlr, 440 const struct spdk_nvme_io_qpair_opts *user_opts, 441 size_t opts_size) 442 { 443 444 struct spdk_nvme_qpair *qpair; 445 struct spdk_nvme_io_qpair_opts opts; 446 int rc; 447 448 if (spdk_unlikely(ctrlr->state != NVME_CTRLR_STATE_READY)) { 449 /* When controller is resetting or initializing, free_io_qids is deleted or not created yet. 450 * We can't create IO qpair in that case */ 451 return NULL; 452 } 453 454 /* 455 * Get the default options, then overwrite them with the user-provided options 456 * up to opts_size. 457 * 458 * This allows for extensions of the opts structure without breaking 459 * ABI compatibility. 460 */ 461 spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &opts, sizeof(opts)); 462 if (user_opts) { 463 memcpy(&opts, user_opts, spdk_min(sizeof(opts), opts_size)); 464 465 /* If user passes buffers, make sure they're big enough for the requested queue size */ 466 if (opts.sq.vaddr) { 467 if (opts.sq.buffer_size < (opts.io_queue_size * sizeof(struct spdk_nvme_cmd))) { 468 NVME_CTRLR_ERRLOG(ctrlr, "sq buffer size %" PRIx64 " is too small for sq size %zx\n", 469 opts.sq.buffer_size, (opts.io_queue_size * sizeof(struct spdk_nvme_cmd))); 470 return NULL; 471 } 472 } 473 if (opts.cq.vaddr) { 474 if (opts.cq.buffer_size < (opts.io_queue_size * sizeof(struct spdk_nvme_cpl))) { 475 NVME_CTRLR_ERRLOG(ctrlr, "cq buffer size %" PRIx64 " is too small for cq size %zx\n", 476 opts.cq.buffer_size, (opts.io_queue_size * sizeof(struct spdk_nvme_cpl))); 477 return NULL; 478 } 479 } 480 } 481 482 qpair = nvme_ctrlr_create_io_qpair(ctrlr, &opts); 483 484 if (qpair == NULL || opts.create_only == true) { 485 return qpair; 486 } 487 488 rc = spdk_nvme_ctrlr_connect_io_qpair(ctrlr, qpair); 489 if (rc != 0) { 490 NVME_CTRLR_ERRLOG(ctrlr, "nvme_transport_ctrlr_connect_io_qpair() failed\n"); 491 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 492 nvme_ctrlr_proc_remove_io_qpair(qpair); 493 TAILQ_REMOVE(&ctrlr->active_io_qpairs, qpair, tailq); 494 spdk_bit_array_set(ctrlr->free_io_qids, qpair->id); 495 nvme_transport_ctrlr_delete_io_qpair(ctrlr, qpair); 496 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 497 return NULL; 498 } 499 500 return qpair; 501 } 502 503 int 504 spdk_nvme_ctrlr_reconnect_io_qpair(struct spdk_nvme_qpair *qpair) 505 { 506 struct spdk_nvme_ctrlr *ctrlr; 507 enum nvme_qpair_state qpair_state; 508 int rc; 509 510 assert(qpair != NULL); 511 assert(nvme_qpair_is_admin_queue(qpair) == false); 512 assert(qpair->ctrlr != NULL); 513 514 ctrlr = qpair->ctrlr; 515 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 516 qpair_state = nvme_qpair_get_state(qpair); 517 518 if (ctrlr->is_removed) { 519 rc = -ENODEV; 520 goto out; 521 } 522 523 if (ctrlr->is_resetting || qpair_state == NVME_QPAIR_DISCONNECTING) { 524 rc = -EAGAIN; 525 goto out; 526 } 527 528 if (ctrlr->is_failed || qpair_state == NVME_QPAIR_DESTROYING) { 529 rc = -ENXIO; 530 goto out; 531 } 532 533 if (qpair_state != NVME_QPAIR_DISCONNECTED) { 534 rc = 0; 535 goto out; 536 } 537 538 rc = nvme_transport_ctrlr_connect_qpair(ctrlr, qpair); 539 if (rc) { 540 rc = -EAGAIN; 541 goto out; 542 } 543 544 out: 545 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 546 return rc; 547 } 548 549 spdk_nvme_qp_failure_reason 550 spdk_nvme_ctrlr_get_admin_qp_failure_reason(struct spdk_nvme_ctrlr *ctrlr) 551 { 552 return ctrlr->adminq->transport_failure_reason; 553 } 554 555 /* 556 * This internal function will attempt to take the controller 557 * lock before calling disconnect on a controller qpair. 558 * Functions already holding the controller lock should 559 * call nvme_transport_ctrlr_disconnect_qpair directly. 560 */ 561 void 562 nvme_ctrlr_disconnect_qpair(struct spdk_nvme_qpair *qpair) 563 { 564 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 565 566 assert(ctrlr != NULL); 567 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 568 nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair); 569 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 570 } 571 572 int 573 spdk_nvme_ctrlr_free_io_qpair(struct spdk_nvme_qpair *qpair) 574 { 575 struct spdk_nvme_ctrlr *ctrlr; 576 577 if (qpair == NULL) { 578 return 0; 579 } 580 581 ctrlr = qpair->ctrlr; 582 583 if (qpair->in_completion_context) { 584 /* 585 * There are many cases where it is convenient to delete an io qpair in the context 586 * of that qpair's completion routine. To handle this properly, set a flag here 587 * so that the completion routine will perform an actual delete after the context 588 * unwinds. 589 */ 590 qpair->delete_after_completion_context = 1; 591 return 0; 592 } 593 594 nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair); 595 596 if (qpair->poll_group && (qpair->active_proc == nvme_ctrlr_get_current_process(ctrlr))) { 597 spdk_nvme_poll_group_remove(qpair->poll_group->group, qpair); 598 } 599 600 /* Do not retry. */ 601 nvme_qpair_set_state(qpair, NVME_QPAIR_DESTROYING); 602 603 /* In the multi-process case, a process may call this function on a foreign 604 * I/O qpair (i.e. one that this process did not create) when that qpairs process 605 * exits unexpectedly. In that case, we must not try to abort any reqs associated 606 * with that qpair, since the callbacks will also be foreign to this process. 607 */ 608 if (qpair->active_proc == nvme_ctrlr_get_current_process(ctrlr)) { 609 nvme_qpair_abort_all_queued_reqs(qpair, 0); 610 } 611 612 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 613 614 nvme_ctrlr_proc_remove_io_qpair(qpair); 615 616 TAILQ_REMOVE(&ctrlr->active_io_qpairs, qpair, tailq); 617 spdk_nvme_ctrlr_free_qid(ctrlr, qpair->id); 618 619 nvme_transport_ctrlr_delete_io_qpair(ctrlr, qpair); 620 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 621 return 0; 622 } 623 624 static void 625 nvme_ctrlr_construct_intel_support_log_page_list(struct spdk_nvme_ctrlr *ctrlr, 626 struct spdk_nvme_intel_log_page_directory *log_page_directory) 627 { 628 if (log_page_directory == NULL) { 629 return; 630 } 631 632 assert(ctrlr->cdata.vid == SPDK_PCI_VID_INTEL); 633 634 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY] = true; 635 636 if (log_page_directory->read_latency_log_len || 637 (ctrlr->quirks & NVME_INTEL_QUIRK_READ_LATENCY)) { 638 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_READ_CMD_LATENCY] = true; 639 } 640 if (log_page_directory->write_latency_log_len || 641 (ctrlr->quirks & NVME_INTEL_QUIRK_WRITE_LATENCY)) { 642 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_WRITE_CMD_LATENCY] = true; 643 } 644 if (log_page_directory->temperature_statistics_log_len) { 645 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_TEMPERATURE] = true; 646 } 647 if (log_page_directory->smart_log_len) { 648 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_SMART] = true; 649 } 650 if (log_page_directory->marketing_description_log_len) { 651 ctrlr->log_page_supported[SPDK_NVME_INTEL_MARKETING_DESCRIPTION] = true; 652 } 653 } 654 655 struct intel_log_pages_ctx { 656 struct spdk_nvme_intel_log_page_directory log_page_directory; 657 struct spdk_nvme_ctrlr *ctrlr; 658 }; 659 660 static void 661 nvme_ctrlr_set_intel_support_log_pages_done(void *arg, const struct spdk_nvme_cpl *cpl) 662 { 663 struct intel_log_pages_ctx *ctx = arg; 664 struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr; 665 666 if (!spdk_nvme_cpl_is_error(cpl)) { 667 nvme_ctrlr_construct_intel_support_log_page_list(ctrlr, &ctx->log_page_directory); 668 } 669 670 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES, 671 ctrlr->opts.admin_timeout_ms); 672 free(ctx); 673 } 674 675 static int 676 nvme_ctrlr_set_intel_support_log_pages(struct spdk_nvme_ctrlr *ctrlr) 677 { 678 int rc = 0; 679 struct intel_log_pages_ctx *ctx; 680 681 ctx = calloc(1, sizeof(*ctx)); 682 if (!ctx) { 683 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES, 684 ctrlr->opts.admin_timeout_ms); 685 return 0; 686 } 687 688 ctx->ctrlr = ctrlr; 689 690 rc = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY, 691 SPDK_NVME_GLOBAL_NS_TAG, &ctx->log_page_directory, 692 sizeof(struct spdk_nvme_intel_log_page_directory), 693 0, nvme_ctrlr_set_intel_support_log_pages_done, ctx); 694 if (rc != 0) { 695 free(ctx); 696 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES, 697 ctrlr->opts.admin_timeout_ms); 698 return 0; 699 } 700 701 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_SUPPORTED_INTEL_LOG_PAGES, 702 ctrlr->opts.admin_timeout_ms); 703 704 return 0; 705 } 706 707 static int 708 nvme_ctrlr_alloc_ana_log_page(struct spdk_nvme_ctrlr *ctrlr) 709 { 710 uint32_t ana_log_page_size; 711 712 ana_log_page_size = sizeof(struct spdk_nvme_ana_page) + ctrlr->cdata.nanagrpid * 713 sizeof(struct spdk_nvme_ana_group_descriptor) + ctrlr->active_ns_count * 714 sizeof(uint32_t); 715 716 /* Number of active namespaces may have changed. 717 * Check if ANA log page fits into existing buffer. 718 */ 719 if (ana_log_page_size > ctrlr->ana_log_page_size) { 720 void *new_buffer; 721 722 if (ctrlr->ana_log_page) { 723 new_buffer = realloc(ctrlr->ana_log_page, ana_log_page_size); 724 } else { 725 new_buffer = calloc(1, ana_log_page_size); 726 } 727 728 if (!new_buffer) { 729 NVME_CTRLR_ERRLOG(ctrlr, "could not allocate ANA log page buffer, size %u\n", 730 ana_log_page_size); 731 return -ENXIO; 732 } 733 734 ctrlr->ana_log_page = new_buffer; 735 if (ctrlr->copied_ana_desc) { 736 new_buffer = realloc(ctrlr->copied_ana_desc, ana_log_page_size); 737 } else { 738 new_buffer = calloc(1, ana_log_page_size); 739 } 740 741 if (!new_buffer) { 742 NVME_CTRLR_ERRLOG(ctrlr, "could not allocate a buffer to parse ANA descriptor, size %u\n", 743 ana_log_page_size); 744 return -ENOMEM; 745 } 746 747 ctrlr->copied_ana_desc = new_buffer; 748 ctrlr->ana_log_page_size = ana_log_page_size; 749 } 750 751 return 0; 752 } 753 754 static int 755 nvme_ctrlr_update_ana_log_page(struct spdk_nvme_ctrlr *ctrlr) 756 { 757 struct nvme_completion_poll_status *status; 758 int rc; 759 760 rc = nvme_ctrlr_alloc_ana_log_page(ctrlr); 761 if (rc != 0) { 762 return rc; 763 } 764 765 status = calloc(1, sizeof(*status)); 766 if (status == NULL) { 767 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 768 return -ENOMEM; 769 } 770 771 rc = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS, 772 SPDK_NVME_GLOBAL_NS_TAG, ctrlr->ana_log_page, 773 ctrlr->ana_log_page_size, 0, 774 nvme_completion_poll_cb, status); 775 if (rc != 0) { 776 free(status); 777 return rc; 778 } 779 780 if (nvme_wait_for_completion_robust_lock_timeout(ctrlr->adminq, status, &ctrlr->ctrlr_lock, 781 ctrlr->opts.admin_timeout_ms * 1000)) { 782 if (!status->timed_out) { 783 free(status); 784 } 785 return -EIO; 786 } 787 788 free(status); 789 return 0; 790 } 791 792 static int 793 nvme_ctrlr_init_ana_log_page(struct spdk_nvme_ctrlr *ctrlr) 794 { 795 int rc; 796 797 rc = nvme_ctrlr_alloc_ana_log_page(ctrlr); 798 if (rc) { 799 return rc; 800 } 801 802 return nvme_ctrlr_update_ana_log_page(ctrlr); 803 } 804 805 static int 806 nvme_ctrlr_update_ns_ana_states(const struct spdk_nvme_ana_group_descriptor *desc, 807 void *cb_arg) 808 { 809 struct spdk_nvme_ctrlr *ctrlr = cb_arg; 810 struct spdk_nvme_ns *ns; 811 uint32_t i, nsid; 812 813 for (i = 0; i < desc->num_of_nsid; i++) { 814 nsid = desc->nsid[i]; 815 if (nsid == 0 || nsid > ctrlr->cdata.nn) { 816 continue; 817 } 818 819 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 820 assert(ns != NULL); 821 822 ns->ana_group_id = desc->ana_group_id; 823 ns->ana_state = desc->ana_state; 824 } 825 826 return 0; 827 } 828 829 int 830 nvme_ctrlr_parse_ana_log_page(struct spdk_nvme_ctrlr *ctrlr, 831 spdk_nvme_parse_ana_log_page_cb cb_fn, void *cb_arg) 832 { 833 struct spdk_nvme_ana_group_descriptor *copied_desc; 834 uint8_t *orig_desc; 835 uint32_t i, desc_size, copy_len; 836 int rc = 0; 837 838 if (ctrlr->ana_log_page == NULL) { 839 return -EINVAL; 840 } 841 842 copied_desc = ctrlr->copied_ana_desc; 843 844 orig_desc = (uint8_t *)ctrlr->ana_log_page + sizeof(struct spdk_nvme_ana_page); 845 copy_len = ctrlr->ana_log_page_size - sizeof(struct spdk_nvme_ana_page); 846 847 for (i = 0; i < ctrlr->ana_log_page->num_ana_group_desc; i++) { 848 memcpy(copied_desc, orig_desc, copy_len); 849 850 rc = cb_fn(copied_desc, cb_arg); 851 if (rc != 0) { 852 break; 853 } 854 855 desc_size = sizeof(struct spdk_nvme_ana_group_descriptor) + 856 copied_desc->num_of_nsid * sizeof(uint32_t); 857 orig_desc += desc_size; 858 copy_len -= desc_size; 859 } 860 861 return rc; 862 } 863 864 static int 865 nvme_ctrlr_set_supported_log_pages(struct spdk_nvme_ctrlr *ctrlr) 866 { 867 int rc = 0; 868 869 memset(ctrlr->log_page_supported, 0, sizeof(ctrlr->log_page_supported)); 870 /* Mandatory pages */ 871 ctrlr->log_page_supported[SPDK_NVME_LOG_ERROR] = true; 872 ctrlr->log_page_supported[SPDK_NVME_LOG_HEALTH_INFORMATION] = true; 873 ctrlr->log_page_supported[SPDK_NVME_LOG_FIRMWARE_SLOT] = true; 874 if (ctrlr->cdata.lpa.celp) { 875 ctrlr->log_page_supported[SPDK_NVME_LOG_COMMAND_EFFECTS_LOG] = true; 876 } 877 878 if (ctrlr->cdata.cmic.ana_reporting) { 879 ctrlr->log_page_supported[SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS] = true; 880 if (!ctrlr->opts.disable_read_ana_log_page) { 881 rc = nvme_ctrlr_init_ana_log_page(ctrlr); 882 if (rc == 0) { 883 nvme_ctrlr_parse_ana_log_page(ctrlr, nvme_ctrlr_update_ns_ana_states, 884 ctrlr); 885 } 886 } 887 } 888 889 if (ctrlr->cdata.vid == SPDK_PCI_VID_INTEL && 890 ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE && 891 !(ctrlr->quirks & NVME_INTEL_QUIRK_NO_LOG_PAGES)) { 892 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_INTEL_LOG_PAGES, 893 ctrlr->opts.admin_timeout_ms); 894 895 } else { 896 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES, 897 ctrlr->opts.admin_timeout_ms); 898 899 } 900 901 return rc; 902 } 903 904 static void 905 nvme_ctrlr_set_intel_supported_features(struct spdk_nvme_ctrlr *ctrlr) 906 { 907 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_MAX_LBA] = true; 908 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_NATIVE_MAX_LBA] = true; 909 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_POWER_GOVERNOR_SETTING] = true; 910 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_SMBUS_ADDRESS] = true; 911 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_LED_PATTERN] = true; 912 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_RESET_TIMED_WORKLOAD_COUNTERS] = true; 913 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING] = true; 914 } 915 916 static void 917 nvme_ctrlr_set_arbitration_feature(struct spdk_nvme_ctrlr *ctrlr) 918 { 919 uint32_t cdw11; 920 struct nvme_completion_poll_status *status; 921 922 if (ctrlr->opts.arbitration_burst == 0) { 923 return; 924 } 925 926 if (ctrlr->opts.arbitration_burst > 7) { 927 NVME_CTRLR_WARNLOG(ctrlr, "Valid arbitration burst values is from 0-7\n"); 928 return; 929 } 930 931 status = calloc(1, sizeof(*status)); 932 if (!status) { 933 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 934 return; 935 } 936 937 cdw11 = ctrlr->opts.arbitration_burst; 938 939 if (spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_WRR_SUPPORTED) { 940 cdw11 |= (uint32_t)ctrlr->opts.low_priority_weight << 8; 941 cdw11 |= (uint32_t)ctrlr->opts.medium_priority_weight << 16; 942 cdw11 |= (uint32_t)ctrlr->opts.high_priority_weight << 24; 943 } 944 945 if (spdk_nvme_ctrlr_cmd_set_feature(ctrlr, SPDK_NVME_FEAT_ARBITRATION, 946 cdw11, 0, NULL, 0, 947 nvme_completion_poll_cb, status) < 0) { 948 NVME_CTRLR_ERRLOG(ctrlr, "Set arbitration feature failed\n"); 949 free(status); 950 return; 951 } 952 953 if (nvme_wait_for_completion_timeout(ctrlr->adminq, status, 954 ctrlr->opts.admin_timeout_ms * 1000)) { 955 NVME_CTRLR_ERRLOG(ctrlr, "Timeout to set arbitration feature\n"); 956 } 957 958 if (!status->timed_out) { 959 free(status); 960 } 961 } 962 963 static void 964 nvme_ctrlr_set_supported_features(struct spdk_nvme_ctrlr *ctrlr) 965 { 966 memset(ctrlr->feature_supported, 0, sizeof(ctrlr->feature_supported)); 967 /* Mandatory features */ 968 ctrlr->feature_supported[SPDK_NVME_FEAT_ARBITRATION] = true; 969 ctrlr->feature_supported[SPDK_NVME_FEAT_POWER_MANAGEMENT] = true; 970 ctrlr->feature_supported[SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD] = true; 971 ctrlr->feature_supported[SPDK_NVME_FEAT_ERROR_RECOVERY] = true; 972 ctrlr->feature_supported[SPDK_NVME_FEAT_NUMBER_OF_QUEUES] = true; 973 ctrlr->feature_supported[SPDK_NVME_FEAT_INTERRUPT_COALESCING] = true; 974 ctrlr->feature_supported[SPDK_NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION] = true; 975 ctrlr->feature_supported[SPDK_NVME_FEAT_WRITE_ATOMICITY] = true; 976 ctrlr->feature_supported[SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION] = true; 977 /* Optional features */ 978 if (ctrlr->cdata.vwc.present) { 979 ctrlr->feature_supported[SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE] = true; 980 } 981 if (ctrlr->cdata.apsta.supported) { 982 ctrlr->feature_supported[SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION] = true; 983 } 984 if (ctrlr->cdata.hmpre) { 985 ctrlr->feature_supported[SPDK_NVME_FEAT_HOST_MEM_BUFFER] = true; 986 } 987 if (ctrlr->cdata.vid == SPDK_PCI_VID_INTEL) { 988 nvme_ctrlr_set_intel_supported_features(ctrlr); 989 } 990 991 nvme_ctrlr_set_arbitration_feature(ctrlr); 992 } 993 994 bool 995 spdk_nvme_ctrlr_is_failed(struct spdk_nvme_ctrlr *ctrlr) 996 { 997 return ctrlr->is_failed; 998 } 999 1000 void 1001 nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr, bool hot_remove) 1002 { 1003 /* 1004 * Set the flag here and leave the work failure of qpairs to 1005 * spdk_nvme_qpair_process_completions(). 1006 */ 1007 if (hot_remove) { 1008 ctrlr->is_removed = true; 1009 } 1010 1011 if (ctrlr->is_failed) { 1012 NVME_CTRLR_NOTICELOG(ctrlr, "already in failed state\n"); 1013 return; 1014 } 1015 1016 if (ctrlr->is_disconnecting) { 1017 NVME_CTRLR_DEBUGLOG(ctrlr, "already disconnecting\n"); 1018 return; 1019 } 1020 1021 ctrlr->is_failed = true; 1022 nvme_transport_ctrlr_disconnect_qpair(ctrlr, ctrlr->adminq); 1023 NVME_CTRLR_ERRLOG(ctrlr, "in failed state.\n"); 1024 } 1025 1026 /** 1027 * This public API function will try to take the controller lock. 1028 * Any private functions being called from a thread already holding 1029 * the ctrlr lock should call nvme_ctrlr_fail directly. 1030 */ 1031 void 1032 spdk_nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr) 1033 { 1034 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1035 nvme_ctrlr_fail(ctrlr, false); 1036 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1037 } 1038 1039 static void 1040 nvme_ctrlr_shutdown_set_cc_done(void *_ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 1041 { 1042 struct nvme_ctrlr_detach_ctx *ctx = _ctx; 1043 struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr; 1044 1045 if (spdk_nvme_cpl_is_error(cpl)) { 1046 NVME_CTRLR_ERRLOG(ctrlr, "Failed to write CC.SHN\n"); 1047 ctx->shutdown_complete = true; 1048 return; 1049 } 1050 1051 if (ctrlr->opts.no_shn_notification) { 1052 ctx->shutdown_complete = true; 1053 return; 1054 } 1055 1056 /* 1057 * The NVMe specification defines RTD3E to be the time between 1058 * setting SHN = 1 until the controller will set SHST = 10b. 1059 * If the device doesn't report RTD3 entry latency, or if it 1060 * reports RTD3 entry latency less than 10 seconds, pick 1061 * 10 seconds as a reasonable amount of time to 1062 * wait before proceeding. 1063 */ 1064 NVME_CTRLR_DEBUGLOG(ctrlr, "RTD3E = %" PRIu32 " us\n", ctrlr->cdata.rtd3e); 1065 ctx->shutdown_timeout_ms = SPDK_CEIL_DIV(ctrlr->cdata.rtd3e, 1000); 1066 ctx->shutdown_timeout_ms = spdk_max(ctx->shutdown_timeout_ms, 10000); 1067 NVME_CTRLR_DEBUGLOG(ctrlr, "shutdown timeout = %" PRIu32 " ms\n", ctx->shutdown_timeout_ms); 1068 1069 ctx->shutdown_start_tsc = spdk_get_ticks(); 1070 ctx->state = NVME_CTRLR_DETACH_CHECK_CSTS; 1071 } 1072 1073 static void 1074 nvme_ctrlr_shutdown_get_cc_done(void *_ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 1075 { 1076 struct nvme_ctrlr_detach_ctx *ctx = _ctx; 1077 struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr; 1078 union spdk_nvme_cc_register cc; 1079 int rc; 1080 1081 if (spdk_nvme_cpl_is_error(cpl)) { 1082 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CC register\n"); 1083 ctx->shutdown_complete = true; 1084 return; 1085 } 1086 1087 assert(value <= UINT32_MAX); 1088 cc.raw = (uint32_t)value; 1089 1090 if (ctrlr->opts.no_shn_notification) { 1091 NVME_CTRLR_INFOLOG(ctrlr, "Disable SSD without shutdown notification\n"); 1092 if (cc.bits.en == 0) { 1093 ctx->shutdown_complete = true; 1094 return; 1095 } 1096 1097 cc.bits.en = 0; 1098 } else { 1099 cc.bits.shn = SPDK_NVME_SHN_NORMAL; 1100 } 1101 1102 rc = nvme_ctrlr_set_cc_async(ctrlr, cc.raw, nvme_ctrlr_shutdown_set_cc_done, ctx); 1103 if (rc != 0) { 1104 NVME_CTRLR_ERRLOG(ctrlr, "Failed to write CC.SHN\n"); 1105 ctx->shutdown_complete = true; 1106 } 1107 } 1108 1109 static void 1110 nvme_ctrlr_shutdown_async(struct spdk_nvme_ctrlr *ctrlr, 1111 struct nvme_ctrlr_detach_ctx *ctx) 1112 { 1113 int rc; 1114 1115 if (ctrlr->is_removed) { 1116 ctx->shutdown_complete = true; 1117 return; 1118 } 1119 1120 ctx->state = NVME_CTRLR_DETACH_SET_CC; 1121 rc = nvme_ctrlr_get_cc_async(ctrlr, nvme_ctrlr_shutdown_get_cc_done, ctx); 1122 if (rc != 0) { 1123 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CC register\n"); 1124 ctx->shutdown_complete = true; 1125 } 1126 } 1127 1128 static void 1129 nvme_ctrlr_shutdown_get_csts_done(void *_ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 1130 { 1131 struct nvme_ctrlr_detach_ctx *ctx = _ctx; 1132 1133 if (spdk_nvme_cpl_is_error(cpl)) { 1134 NVME_CTRLR_ERRLOG(ctx->ctrlr, "Failed to read the CSTS register\n"); 1135 ctx->shutdown_complete = true; 1136 return; 1137 } 1138 1139 assert(value <= UINT32_MAX); 1140 ctx->csts.raw = (uint32_t)value; 1141 ctx->state = NVME_CTRLR_DETACH_GET_CSTS_DONE; 1142 } 1143 1144 static int 1145 nvme_ctrlr_shutdown_poll_async(struct spdk_nvme_ctrlr *ctrlr, 1146 struct nvme_ctrlr_detach_ctx *ctx) 1147 { 1148 union spdk_nvme_csts_register csts; 1149 uint32_t ms_waited; 1150 1151 switch (ctx->state) { 1152 case NVME_CTRLR_DETACH_SET_CC: 1153 case NVME_CTRLR_DETACH_GET_CSTS: 1154 /* We're still waiting for the register operation to complete */ 1155 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 1156 return -EAGAIN; 1157 1158 case NVME_CTRLR_DETACH_CHECK_CSTS: 1159 ctx->state = NVME_CTRLR_DETACH_GET_CSTS; 1160 if (nvme_ctrlr_get_csts_async(ctrlr, nvme_ctrlr_shutdown_get_csts_done, ctx)) { 1161 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CSTS register\n"); 1162 return -EIO; 1163 } 1164 return -EAGAIN; 1165 1166 case NVME_CTRLR_DETACH_GET_CSTS_DONE: 1167 ctx->state = NVME_CTRLR_DETACH_CHECK_CSTS; 1168 break; 1169 1170 default: 1171 assert(0 && "Should never happen"); 1172 return -EINVAL; 1173 } 1174 1175 ms_waited = (spdk_get_ticks() - ctx->shutdown_start_tsc) * 1000 / spdk_get_ticks_hz(); 1176 csts.raw = ctx->csts.raw; 1177 1178 if (csts.bits.shst == SPDK_NVME_SHST_COMPLETE) { 1179 NVME_CTRLR_DEBUGLOG(ctrlr, "shutdown complete in %u milliseconds\n", ms_waited); 1180 return 0; 1181 } 1182 1183 if (ms_waited < ctx->shutdown_timeout_ms) { 1184 return -EAGAIN; 1185 } 1186 1187 NVME_CTRLR_ERRLOG(ctrlr, "did not shutdown within %u milliseconds\n", 1188 ctx->shutdown_timeout_ms); 1189 if (ctrlr->quirks & NVME_QUIRK_SHST_COMPLETE) { 1190 NVME_CTRLR_ERRLOG(ctrlr, "likely due to shutdown handling in the VMWare emulated NVMe SSD\n"); 1191 } 1192 1193 return 0; 1194 } 1195 1196 static inline uint64_t 1197 nvme_ctrlr_get_ready_timeout(struct spdk_nvme_ctrlr *ctrlr) 1198 { 1199 return ctrlr->cap.bits.to * 500; 1200 } 1201 1202 static void 1203 nvme_ctrlr_set_cc_en_done(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 1204 { 1205 struct spdk_nvme_ctrlr *ctrlr = ctx; 1206 1207 if (spdk_nvme_cpl_is_error(cpl)) { 1208 NVME_CTRLR_ERRLOG(ctrlr, "Failed to set the CC register\n"); 1209 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 1210 return; 1211 } 1212 1213 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, 1214 nvme_ctrlr_get_ready_timeout(ctrlr)); 1215 } 1216 1217 static int 1218 nvme_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr) 1219 { 1220 union spdk_nvme_cc_register cc; 1221 int rc; 1222 1223 rc = nvme_transport_ctrlr_enable(ctrlr); 1224 if (rc != 0) { 1225 NVME_CTRLR_ERRLOG(ctrlr, "transport ctrlr_enable failed\n"); 1226 return rc; 1227 } 1228 1229 cc.raw = ctrlr->process_init_cc.raw; 1230 if (cc.bits.en != 0) { 1231 NVME_CTRLR_ERRLOG(ctrlr, "called with CC.EN = 1\n"); 1232 return -EINVAL; 1233 } 1234 1235 cc.bits.en = 1; 1236 cc.bits.css = 0; 1237 cc.bits.shn = 0; 1238 cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */ 1239 cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */ 1240 1241 /* Page size is 2 ^ (12 + mps). */ 1242 cc.bits.mps = spdk_u32log2(ctrlr->page_size) - 12; 1243 1244 /* 1245 * Since NVMe 1.0, a controller should have at least one bit set in CAP.CSS. 1246 * A controller that does not have any bit set in CAP.CSS is not spec compliant. 1247 * Try to support such a controller regardless. 1248 */ 1249 if (ctrlr->cap.bits.css == 0) { 1250 NVME_CTRLR_INFOLOG(ctrlr, "Drive reports no command sets supported. Assuming NVM is supported.\n"); 1251 ctrlr->cap.bits.css = SPDK_NVME_CAP_CSS_NVM; 1252 } 1253 1254 /* 1255 * If the user did not explicitly request a command set, or supplied a value larger than 1256 * what can be saved in CC.CSS, use the most reasonable default. 1257 */ 1258 if (ctrlr->opts.command_set >= CHAR_BIT) { 1259 if (ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_IOCS) { 1260 ctrlr->opts.command_set = SPDK_NVME_CC_CSS_IOCS; 1261 } else if (ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_NVM) { 1262 ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NVM; 1263 } else if (ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_NOIO) { 1264 ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NOIO; 1265 } else { 1266 /* Invalid supported bits detected, falling back to NVM. */ 1267 ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NVM; 1268 } 1269 } 1270 1271 /* Verify that the selected command set is supported by the controller. */ 1272 if (!(ctrlr->cap.bits.css & (1u << ctrlr->opts.command_set))) { 1273 NVME_CTRLR_DEBUGLOG(ctrlr, "Requested I/O command set %u but supported mask is 0x%x\n", 1274 ctrlr->opts.command_set, ctrlr->cap.bits.css); 1275 NVME_CTRLR_DEBUGLOG(ctrlr, "Falling back to NVM. Assuming NVM is supported.\n"); 1276 ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NVM; 1277 } 1278 1279 cc.bits.css = ctrlr->opts.command_set; 1280 1281 switch (ctrlr->opts.arb_mechanism) { 1282 case SPDK_NVME_CC_AMS_RR: 1283 break; 1284 case SPDK_NVME_CC_AMS_WRR: 1285 if (SPDK_NVME_CAP_AMS_WRR & ctrlr->cap.bits.ams) { 1286 break; 1287 } 1288 return -EINVAL; 1289 case SPDK_NVME_CC_AMS_VS: 1290 if (SPDK_NVME_CAP_AMS_VS & ctrlr->cap.bits.ams) { 1291 break; 1292 } 1293 return -EINVAL; 1294 default: 1295 return -EINVAL; 1296 } 1297 1298 cc.bits.ams = ctrlr->opts.arb_mechanism; 1299 ctrlr->process_init_cc.raw = cc.raw; 1300 1301 if (nvme_ctrlr_set_cc_async(ctrlr, cc.raw, nvme_ctrlr_set_cc_en_done, ctrlr)) { 1302 NVME_CTRLR_ERRLOG(ctrlr, "set_cc() failed\n"); 1303 return -EIO; 1304 } 1305 1306 return 0; 1307 } 1308 1309 static const char * 1310 nvme_ctrlr_state_string(enum nvme_ctrlr_state state) 1311 { 1312 switch (state) { 1313 case NVME_CTRLR_STATE_INIT_DELAY: 1314 return "delay init"; 1315 case NVME_CTRLR_STATE_CONNECT_ADMINQ: 1316 return "connect adminq"; 1317 case NVME_CTRLR_STATE_WAIT_FOR_CONNECT_ADMINQ: 1318 return "wait for connect adminq"; 1319 case NVME_CTRLR_STATE_READ_VS: 1320 return "read vs"; 1321 case NVME_CTRLR_STATE_READ_VS_WAIT_FOR_VS: 1322 return "read vs wait for vs"; 1323 case NVME_CTRLR_STATE_READ_CAP: 1324 return "read cap"; 1325 case NVME_CTRLR_STATE_READ_CAP_WAIT_FOR_CAP: 1326 return "read cap wait for cap"; 1327 case NVME_CTRLR_STATE_CHECK_EN: 1328 return "check en"; 1329 case NVME_CTRLR_STATE_CHECK_EN_WAIT_FOR_CC: 1330 return "check en wait for cc"; 1331 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1: 1332 return "disable and wait for CSTS.RDY = 1"; 1333 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS: 1334 return "disable and wait for CSTS.RDY = 1 reg"; 1335 case NVME_CTRLR_STATE_SET_EN_0: 1336 return "set CC.EN = 0"; 1337 case NVME_CTRLR_STATE_SET_EN_0_WAIT_FOR_CC: 1338 return "set CC.EN = 0 wait for cc"; 1339 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0: 1340 return "disable and wait for CSTS.RDY = 0"; 1341 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0_WAIT_FOR_CSTS: 1342 return "disable and wait for CSTS.RDY = 0 reg"; 1343 case NVME_CTRLR_STATE_DISABLED: 1344 return "controller is disabled"; 1345 case NVME_CTRLR_STATE_ENABLE: 1346 return "enable controller by writing CC.EN = 1"; 1347 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_CC: 1348 return "enable controller by writing CC.EN = 1 reg"; 1349 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1: 1350 return "wait for CSTS.RDY = 1"; 1351 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS: 1352 return "wait for CSTS.RDY = 1 reg"; 1353 case NVME_CTRLR_STATE_RESET_ADMIN_QUEUE: 1354 return "reset admin queue"; 1355 case NVME_CTRLR_STATE_IDENTIFY: 1356 return "identify controller"; 1357 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY: 1358 return "wait for identify controller"; 1359 case NVME_CTRLR_STATE_CONFIGURE_AER: 1360 return "configure AER"; 1361 case NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER: 1362 return "wait for configure aer"; 1363 case NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT: 1364 return "set keep alive timeout"; 1365 case NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT: 1366 return "wait for set keep alive timeout"; 1367 case NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC: 1368 return "identify controller iocs specific"; 1369 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_IOCS_SPECIFIC: 1370 return "wait for identify controller iocs specific"; 1371 case NVME_CTRLR_STATE_GET_ZNS_CMD_EFFECTS_LOG: 1372 return "get zns cmd and effects log page"; 1373 case NVME_CTRLR_STATE_WAIT_FOR_GET_ZNS_CMD_EFFECTS_LOG: 1374 return "wait for get zns cmd and effects log page"; 1375 case NVME_CTRLR_STATE_SET_NUM_QUEUES: 1376 return "set number of queues"; 1377 case NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES: 1378 return "wait for set number of queues"; 1379 case NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS: 1380 return "identify active ns"; 1381 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ACTIVE_NS: 1382 return "wait for identify active ns"; 1383 case NVME_CTRLR_STATE_IDENTIFY_NS: 1384 return "identify ns"; 1385 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS: 1386 return "wait for identify ns"; 1387 case NVME_CTRLR_STATE_IDENTIFY_ID_DESCS: 1388 return "identify namespace id descriptors"; 1389 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS: 1390 return "wait for identify namespace id descriptors"; 1391 case NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC: 1392 return "identify ns iocs specific"; 1393 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS_IOCS_SPECIFIC: 1394 return "wait for identify ns iocs specific"; 1395 case NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES: 1396 return "set supported log pages"; 1397 case NVME_CTRLR_STATE_SET_SUPPORTED_INTEL_LOG_PAGES: 1398 return "set supported INTEL log pages"; 1399 case NVME_CTRLR_STATE_WAIT_FOR_SUPPORTED_INTEL_LOG_PAGES: 1400 return "wait for supported INTEL log pages"; 1401 case NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES: 1402 return "set supported features"; 1403 case NVME_CTRLR_STATE_SET_DB_BUF_CFG: 1404 return "set doorbell buffer config"; 1405 case NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG: 1406 return "wait for doorbell buffer config"; 1407 case NVME_CTRLR_STATE_SET_HOST_ID: 1408 return "set host ID"; 1409 case NVME_CTRLR_STATE_WAIT_FOR_HOST_ID: 1410 return "wait for set host ID"; 1411 case NVME_CTRLR_STATE_READY: 1412 return "ready"; 1413 case NVME_CTRLR_STATE_ERROR: 1414 return "error"; 1415 } 1416 return "unknown"; 1417 }; 1418 1419 static void 1420 _nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state, 1421 uint64_t timeout_in_ms, bool quiet) 1422 { 1423 uint64_t ticks_per_ms, timeout_in_ticks, now_ticks; 1424 1425 ctrlr->state = state; 1426 if (timeout_in_ms == NVME_TIMEOUT_KEEP_EXISTING) { 1427 if (!quiet) { 1428 NVME_CTRLR_DEBUGLOG(ctrlr, "setting state to %s (keeping existing timeout)\n", 1429 nvme_ctrlr_state_string(ctrlr->state)); 1430 } 1431 return; 1432 } 1433 1434 if (timeout_in_ms == NVME_TIMEOUT_INFINITE) { 1435 goto inf; 1436 } 1437 1438 ticks_per_ms = spdk_get_ticks_hz() / 1000; 1439 if (timeout_in_ms > UINT64_MAX / ticks_per_ms) { 1440 NVME_CTRLR_ERRLOG(ctrlr, 1441 "Specified timeout would cause integer overflow. Defaulting to no timeout.\n"); 1442 goto inf; 1443 } 1444 1445 now_ticks = spdk_get_ticks(); 1446 timeout_in_ticks = timeout_in_ms * ticks_per_ms; 1447 if (timeout_in_ticks > UINT64_MAX - now_ticks) { 1448 NVME_CTRLR_ERRLOG(ctrlr, 1449 "Specified timeout would cause integer overflow. Defaulting to no timeout.\n"); 1450 goto inf; 1451 } 1452 1453 ctrlr->state_timeout_tsc = timeout_in_ticks + now_ticks; 1454 if (!quiet) { 1455 NVME_CTRLR_DEBUGLOG(ctrlr, "setting state to %s (timeout %" PRIu64 " ms)\n", 1456 nvme_ctrlr_state_string(ctrlr->state), timeout_in_ms); 1457 } 1458 return; 1459 inf: 1460 if (!quiet) { 1461 NVME_CTRLR_DEBUGLOG(ctrlr, "setting state to %s (no timeout)\n", 1462 nvme_ctrlr_state_string(ctrlr->state)); 1463 } 1464 ctrlr->state_timeout_tsc = NVME_TIMEOUT_INFINITE; 1465 } 1466 1467 static void 1468 nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state, 1469 uint64_t timeout_in_ms) 1470 { 1471 _nvme_ctrlr_set_state(ctrlr, state, timeout_in_ms, false); 1472 } 1473 1474 static void 1475 nvme_ctrlr_set_state_quiet(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state, 1476 uint64_t timeout_in_ms) 1477 { 1478 _nvme_ctrlr_set_state(ctrlr, state, timeout_in_ms, true); 1479 } 1480 1481 static void 1482 nvme_ctrlr_free_zns_specific_data(struct spdk_nvme_ctrlr *ctrlr) 1483 { 1484 spdk_free(ctrlr->cdata_zns); 1485 ctrlr->cdata_zns = NULL; 1486 } 1487 1488 static void 1489 nvme_ctrlr_free_iocs_specific_data(struct spdk_nvme_ctrlr *ctrlr) 1490 { 1491 nvme_ctrlr_free_zns_specific_data(ctrlr); 1492 } 1493 1494 static void 1495 nvme_ctrlr_free_doorbell_buffer(struct spdk_nvme_ctrlr *ctrlr) 1496 { 1497 if (ctrlr->shadow_doorbell) { 1498 spdk_free(ctrlr->shadow_doorbell); 1499 ctrlr->shadow_doorbell = NULL; 1500 } 1501 1502 if (ctrlr->eventidx) { 1503 spdk_free(ctrlr->eventidx); 1504 ctrlr->eventidx = NULL; 1505 } 1506 } 1507 1508 static void 1509 nvme_ctrlr_set_doorbell_buffer_config_done(void *arg, const struct spdk_nvme_cpl *cpl) 1510 { 1511 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 1512 1513 if (spdk_nvme_cpl_is_error(cpl)) { 1514 NVME_CTRLR_WARNLOG(ctrlr, "Doorbell buffer config failed\n"); 1515 } else { 1516 NVME_CTRLR_INFOLOG(ctrlr, "Doorbell buffer config enabled\n"); 1517 } 1518 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID, 1519 ctrlr->opts.admin_timeout_ms); 1520 } 1521 1522 static int 1523 nvme_ctrlr_set_doorbell_buffer_config(struct spdk_nvme_ctrlr *ctrlr) 1524 { 1525 int rc = 0; 1526 uint64_t prp1, prp2, len; 1527 1528 if (!ctrlr->cdata.oacs.doorbell_buffer_config) { 1529 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID, 1530 ctrlr->opts.admin_timeout_ms); 1531 return 0; 1532 } 1533 1534 if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) { 1535 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID, 1536 ctrlr->opts.admin_timeout_ms); 1537 return 0; 1538 } 1539 1540 /* only 1 page size for doorbell buffer */ 1541 ctrlr->shadow_doorbell = spdk_zmalloc(ctrlr->page_size, ctrlr->page_size, 1542 NULL, SPDK_ENV_LCORE_ID_ANY, 1543 SPDK_MALLOC_DMA | SPDK_MALLOC_SHARE); 1544 if (ctrlr->shadow_doorbell == NULL) { 1545 rc = -ENOMEM; 1546 goto error; 1547 } 1548 1549 len = ctrlr->page_size; 1550 prp1 = spdk_vtophys(ctrlr->shadow_doorbell, &len); 1551 if (prp1 == SPDK_VTOPHYS_ERROR || len != ctrlr->page_size) { 1552 rc = -EFAULT; 1553 goto error; 1554 } 1555 1556 ctrlr->eventidx = spdk_zmalloc(ctrlr->page_size, ctrlr->page_size, 1557 NULL, SPDK_ENV_LCORE_ID_ANY, 1558 SPDK_MALLOC_DMA | SPDK_MALLOC_SHARE); 1559 if (ctrlr->eventidx == NULL) { 1560 rc = -ENOMEM; 1561 goto error; 1562 } 1563 1564 len = ctrlr->page_size; 1565 prp2 = spdk_vtophys(ctrlr->eventidx, &len); 1566 if (prp2 == SPDK_VTOPHYS_ERROR || len != ctrlr->page_size) { 1567 rc = -EFAULT; 1568 goto error; 1569 } 1570 1571 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG, 1572 ctrlr->opts.admin_timeout_ms); 1573 1574 rc = nvme_ctrlr_cmd_doorbell_buffer_config(ctrlr, prp1, prp2, 1575 nvme_ctrlr_set_doorbell_buffer_config_done, ctrlr); 1576 if (rc != 0) { 1577 goto error; 1578 } 1579 1580 return 0; 1581 1582 error: 1583 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 1584 nvme_ctrlr_free_doorbell_buffer(ctrlr); 1585 return rc; 1586 } 1587 1588 void 1589 nvme_ctrlr_abort_queued_aborts(struct spdk_nvme_ctrlr *ctrlr) 1590 { 1591 struct nvme_request *req, *tmp; 1592 struct spdk_nvme_cpl cpl = {}; 1593 1594 cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION; 1595 cpl.status.sct = SPDK_NVME_SCT_GENERIC; 1596 1597 STAILQ_FOREACH_SAFE(req, &ctrlr->queued_aborts, stailq, tmp) { 1598 STAILQ_REMOVE_HEAD(&ctrlr->queued_aborts, stailq); 1599 ctrlr->outstanding_aborts++; 1600 1601 nvme_complete_request(req->cb_fn, req->cb_arg, req->qpair, req, &cpl); 1602 nvme_free_request(req); 1603 } 1604 } 1605 1606 static int 1607 nvme_ctrlr_disconnect(struct spdk_nvme_ctrlr *ctrlr) 1608 { 1609 if (ctrlr->is_resetting || ctrlr->is_removed) { 1610 /* 1611 * Controller is already resetting or has been removed. Return 1612 * immediately since there is no need to kick off another 1613 * reset in these cases. 1614 */ 1615 return ctrlr->is_resetting ? -EBUSY : -ENXIO; 1616 } 1617 1618 ctrlr->is_resetting = true; 1619 ctrlr->is_failed = false; 1620 ctrlr->is_disconnecting = true; 1621 ctrlr->prepare_for_reset = true; 1622 1623 NVME_CTRLR_NOTICELOG(ctrlr, "resetting controller\n"); 1624 1625 /* Disable keep-alive, it'll be re-enabled as part of the init process */ 1626 ctrlr->keep_alive_interval_ticks = 0; 1627 1628 /* Abort all of the queued abort requests */ 1629 nvme_ctrlr_abort_queued_aborts(ctrlr); 1630 1631 nvme_transport_admin_qpair_abort_aers(ctrlr->adminq); 1632 1633 ctrlr->adminq->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL; 1634 nvme_transport_ctrlr_disconnect_qpair(ctrlr, ctrlr->adminq); 1635 1636 return 0; 1637 } 1638 1639 static void 1640 nvme_ctrlr_disconnect_done(struct spdk_nvme_ctrlr *ctrlr) 1641 { 1642 assert(ctrlr->is_failed == false); 1643 ctrlr->is_disconnecting = false; 1644 1645 /* Doorbell buffer config is invalid during reset */ 1646 nvme_ctrlr_free_doorbell_buffer(ctrlr); 1647 1648 /* I/O Command Set Specific Identify Controller data is invalidated during reset */ 1649 nvme_ctrlr_free_iocs_specific_data(ctrlr); 1650 1651 spdk_bit_array_free(&ctrlr->free_io_qids); 1652 } 1653 1654 int 1655 spdk_nvme_ctrlr_disconnect(struct spdk_nvme_ctrlr *ctrlr) 1656 { 1657 int rc; 1658 1659 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1660 rc = nvme_ctrlr_disconnect(ctrlr); 1661 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1662 1663 return rc; 1664 } 1665 1666 void 1667 spdk_nvme_ctrlr_reconnect_async(struct spdk_nvme_ctrlr *ctrlr) 1668 { 1669 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1670 1671 ctrlr->prepare_for_reset = false; 1672 1673 /* Set the state back to INIT to cause a full hardware reset. */ 1674 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE); 1675 1676 /* Return without releasing ctrlr_lock. ctrlr_lock will be released when 1677 * spdk_nvme_ctrlr_reset_poll_async() returns 0. 1678 */ 1679 } 1680 1681 /** 1682 * This function will be called when the controller is being reinitialized. 1683 * Note: the ctrlr_lock must be held when calling this function. 1684 */ 1685 int 1686 spdk_nvme_ctrlr_reconnect_poll_async(struct spdk_nvme_ctrlr *ctrlr) 1687 { 1688 struct spdk_nvme_ns *ns, *tmp_ns; 1689 struct spdk_nvme_qpair *qpair; 1690 int rc = 0, rc_tmp = 0; 1691 bool async; 1692 1693 if (nvme_ctrlr_process_init(ctrlr) != 0) { 1694 NVME_CTRLR_ERRLOG(ctrlr, "controller reinitialization failed\n"); 1695 rc = -1; 1696 } 1697 if (ctrlr->state != NVME_CTRLR_STATE_READY && rc != -1) { 1698 return -EAGAIN; 1699 } 1700 1701 /* 1702 * For non-fabrics controllers, the memory locations of the transport qpair 1703 * don't change when the controller is reset. They simply need to be 1704 * re-enabled with admin commands to the controller. For fabric 1705 * controllers we need to disconnect and reconnect the qpair on its 1706 * own thread outside of the context of the reset. 1707 */ 1708 if (rc == 0 && !spdk_nvme_ctrlr_is_fabrics(ctrlr)) { 1709 /* Reinitialize qpairs */ 1710 TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) { 1711 assert(spdk_bit_array_get(ctrlr->free_io_qids, qpair->id)); 1712 spdk_bit_array_clear(ctrlr->free_io_qids, qpair->id); 1713 1714 /* Force a synchronous connect. We can't currently handle an asynchronous 1715 * operation here. */ 1716 async = qpair->async; 1717 qpair->async = false; 1718 rc_tmp = nvme_transport_ctrlr_connect_qpair(ctrlr, qpair); 1719 qpair->async = async; 1720 1721 if (rc_tmp != 0) { 1722 rc = rc_tmp; 1723 qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL; 1724 continue; 1725 } 1726 } 1727 } 1728 1729 /* 1730 * Take this opportunity to remove inactive namespaces. During a reset namespace 1731 * handles can be invalidated. 1732 */ 1733 RB_FOREACH_SAFE(ns, nvme_ns_tree, &ctrlr->ns, tmp_ns) { 1734 if (!ns->active) { 1735 RB_REMOVE(nvme_ns_tree, &ctrlr->ns, ns); 1736 spdk_free(ns); 1737 } 1738 } 1739 1740 if (rc) { 1741 nvme_ctrlr_fail(ctrlr, false); 1742 } 1743 ctrlr->is_resetting = false; 1744 1745 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1746 1747 if (!ctrlr->cdata.oaes.ns_attribute_notices) { 1748 /* 1749 * If controller doesn't support ns_attribute_notices and 1750 * namespace attributes change (e.g. number of namespaces) 1751 * we need to update system handling device reset. 1752 */ 1753 nvme_io_msg_ctrlr_update(ctrlr); 1754 } 1755 1756 return rc; 1757 } 1758 1759 /* 1760 * For PCIe transport, spdk_nvme_ctrlr_disconnect() will do a Controller Level Reset 1761 * (Change CC.EN from 1 to 0) as a operation to disconnect the admin qpair. 1762 * The following two functions are added to do a Controller Level Reset. They have 1763 * to be called under the nvme controller's lock. 1764 */ 1765 void 1766 nvme_ctrlr_disable(struct spdk_nvme_ctrlr *ctrlr) 1767 { 1768 assert(ctrlr->is_disconnecting == true); 1769 1770 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CHECK_EN, NVME_TIMEOUT_INFINITE); 1771 } 1772 1773 int 1774 nvme_ctrlr_disable_poll(struct spdk_nvme_ctrlr *ctrlr) 1775 { 1776 int rc = 0; 1777 1778 if (nvme_ctrlr_process_init(ctrlr) != 0) { 1779 NVME_CTRLR_ERRLOG(ctrlr, "failed to disable controller\n"); 1780 rc = -1; 1781 } 1782 1783 if (ctrlr->state != NVME_CTRLR_STATE_DISABLED && rc != -1) { 1784 return -EAGAIN; 1785 } 1786 1787 return rc; 1788 } 1789 1790 static void 1791 nvme_ctrlr_fail_io_qpairs(struct spdk_nvme_ctrlr *ctrlr) 1792 { 1793 struct spdk_nvme_qpair *qpair; 1794 1795 TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) { 1796 qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL; 1797 } 1798 } 1799 1800 int 1801 spdk_nvme_ctrlr_reset(struct spdk_nvme_ctrlr *ctrlr) 1802 { 1803 int rc; 1804 1805 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1806 1807 rc = nvme_ctrlr_disconnect(ctrlr); 1808 if (rc == 0) { 1809 nvme_ctrlr_fail_io_qpairs(ctrlr); 1810 } 1811 1812 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1813 1814 if (rc != 0) { 1815 if (rc == -EBUSY) { 1816 rc = 0; 1817 } 1818 return rc; 1819 } 1820 1821 while (1) { 1822 rc = spdk_nvme_ctrlr_process_admin_completions(ctrlr); 1823 if (rc == -ENXIO) { 1824 break; 1825 } 1826 } 1827 1828 spdk_nvme_ctrlr_reconnect_async(ctrlr); 1829 1830 while (true) { 1831 rc = spdk_nvme_ctrlr_reconnect_poll_async(ctrlr); 1832 if (rc != -EAGAIN) { 1833 break; 1834 } 1835 } 1836 1837 return rc; 1838 } 1839 1840 void 1841 spdk_nvme_ctrlr_prepare_for_reset(struct spdk_nvme_ctrlr *ctrlr) 1842 { 1843 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1844 ctrlr->prepare_for_reset = true; 1845 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1846 } 1847 1848 int 1849 spdk_nvme_ctrlr_reset_subsystem(struct spdk_nvme_ctrlr *ctrlr) 1850 { 1851 union spdk_nvme_cap_register cap; 1852 int rc = 0; 1853 1854 cap = spdk_nvme_ctrlr_get_regs_cap(ctrlr); 1855 if (cap.bits.nssrs == 0) { 1856 NVME_CTRLR_WARNLOG(ctrlr, "subsystem reset is not supported\n"); 1857 return -ENOTSUP; 1858 } 1859 1860 NVME_CTRLR_NOTICELOG(ctrlr, "resetting subsystem\n"); 1861 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1862 ctrlr->is_resetting = true; 1863 rc = nvme_ctrlr_set_nssr(ctrlr, SPDK_NVME_NSSR_VALUE); 1864 ctrlr->is_resetting = false; 1865 1866 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1867 /* 1868 * No more cleanup at this point like in the ctrlr reset. A subsystem reset will cause 1869 * a hot remove for PCIe transport. The hot remove handling does all the necessary ctrlr cleanup. 1870 */ 1871 return rc; 1872 } 1873 1874 int 1875 spdk_nvme_ctrlr_set_trid(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_transport_id *trid) 1876 { 1877 int rc = 0; 1878 1879 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1880 1881 if (ctrlr->is_failed == false) { 1882 rc = -EPERM; 1883 goto out; 1884 } 1885 1886 if (trid->trtype != ctrlr->trid.trtype) { 1887 rc = -EINVAL; 1888 goto out; 1889 } 1890 1891 if (strncmp(trid->subnqn, ctrlr->trid.subnqn, SPDK_NVMF_NQN_MAX_LEN)) { 1892 rc = -EINVAL; 1893 goto out; 1894 } 1895 1896 ctrlr->trid = *trid; 1897 1898 out: 1899 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1900 return rc; 1901 } 1902 1903 void 1904 spdk_nvme_ctrlr_set_remove_cb(struct spdk_nvme_ctrlr *ctrlr, 1905 spdk_nvme_remove_cb remove_cb, void *remove_ctx) 1906 { 1907 if (!spdk_process_is_primary()) { 1908 return; 1909 } 1910 1911 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1912 ctrlr->remove_cb = remove_cb; 1913 ctrlr->cb_ctx = remove_ctx; 1914 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1915 } 1916 1917 static void 1918 nvme_ctrlr_identify_done(void *arg, const struct spdk_nvme_cpl *cpl) 1919 { 1920 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 1921 1922 if (spdk_nvme_cpl_is_error(cpl)) { 1923 NVME_CTRLR_ERRLOG(ctrlr, "nvme_identify_controller failed!\n"); 1924 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 1925 return; 1926 } 1927 1928 /* 1929 * Use MDTS to ensure our default max_xfer_size doesn't exceed what the 1930 * controller supports. 1931 */ 1932 ctrlr->max_xfer_size = nvme_transport_ctrlr_get_max_xfer_size(ctrlr); 1933 NVME_CTRLR_DEBUGLOG(ctrlr, "transport max_xfer_size %u\n", ctrlr->max_xfer_size); 1934 if (ctrlr->cdata.mdts > 0) { 1935 ctrlr->max_xfer_size = spdk_min(ctrlr->max_xfer_size, 1936 ctrlr->min_page_size * (1 << ctrlr->cdata.mdts)); 1937 NVME_CTRLR_DEBUGLOG(ctrlr, "MDTS max_xfer_size %u\n", ctrlr->max_xfer_size); 1938 } 1939 1940 NVME_CTRLR_DEBUGLOG(ctrlr, "CNTLID 0x%04" PRIx16 "\n", ctrlr->cdata.cntlid); 1941 if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { 1942 ctrlr->cntlid = ctrlr->cdata.cntlid; 1943 } else { 1944 /* 1945 * Fabrics controllers should already have CNTLID from the Connect command. 1946 * 1947 * If CNTLID from Connect doesn't match CNTLID in the Identify Controller data, 1948 * trust the one from Connect. 1949 */ 1950 if (ctrlr->cntlid != ctrlr->cdata.cntlid) { 1951 NVME_CTRLR_DEBUGLOG(ctrlr, "Identify CNTLID 0x%04" PRIx16 " != Connect CNTLID 0x%04" PRIx16 "\n", 1952 ctrlr->cdata.cntlid, ctrlr->cntlid); 1953 } 1954 } 1955 1956 if (ctrlr->cdata.sgls.supported && !(ctrlr->quirks & NVME_QUIRK_NOT_USE_SGL)) { 1957 assert(ctrlr->cdata.sgls.supported != 0x3); 1958 ctrlr->flags |= SPDK_NVME_CTRLR_SGL_SUPPORTED; 1959 if (ctrlr->cdata.sgls.supported == 0x2) { 1960 ctrlr->flags |= SPDK_NVME_CTRLR_SGL_REQUIRES_DWORD_ALIGNMENT; 1961 } 1962 1963 ctrlr->max_sges = nvme_transport_ctrlr_get_max_sges(ctrlr); 1964 NVME_CTRLR_DEBUGLOG(ctrlr, "transport max_sges %u\n", ctrlr->max_sges); 1965 } 1966 1967 if (ctrlr->cdata.oacs.security && !(ctrlr->quirks & NVME_QUIRK_OACS_SECURITY)) { 1968 ctrlr->flags |= SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED; 1969 } 1970 1971 if (ctrlr->cdata.oacs.directives) { 1972 ctrlr->flags |= SPDK_NVME_CTRLR_DIRECTIVES_SUPPORTED; 1973 } 1974 1975 NVME_CTRLR_DEBUGLOG(ctrlr, "fuses compare and write: %d\n", 1976 ctrlr->cdata.fuses.compare_and_write); 1977 if (ctrlr->cdata.fuses.compare_and_write) { 1978 ctrlr->flags |= SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED; 1979 } 1980 1981 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONFIGURE_AER, 1982 ctrlr->opts.admin_timeout_ms); 1983 } 1984 1985 static int 1986 nvme_ctrlr_identify(struct spdk_nvme_ctrlr *ctrlr) 1987 { 1988 int rc; 1989 1990 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY, 1991 ctrlr->opts.admin_timeout_ms); 1992 1993 rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_CTRLR, 0, 0, 0, 1994 &ctrlr->cdata, sizeof(ctrlr->cdata), 1995 nvme_ctrlr_identify_done, ctrlr); 1996 if (rc != 0) { 1997 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 1998 return rc; 1999 } 2000 2001 return 0; 2002 } 2003 2004 static void 2005 nvme_ctrlr_get_zns_cmd_and_effects_log_done(void *arg, const struct spdk_nvme_cpl *cpl) 2006 { 2007 struct spdk_nvme_cmds_and_effect_log_page *log_page; 2008 struct spdk_nvme_ctrlr *ctrlr = arg; 2009 2010 if (spdk_nvme_cpl_is_error(cpl)) { 2011 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_get_zns_cmd_and_effects_log failed!\n"); 2012 spdk_free(ctrlr->tmp_ptr); 2013 ctrlr->tmp_ptr = NULL; 2014 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2015 return; 2016 } 2017 2018 log_page = ctrlr->tmp_ptr; 2019 2020 if (log_page->io_cmds_supported[SPDK_NVME_OPC_ZONE_APPEND].csupp) { 2021 ctrlr->flags |= SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED; 2022 } 2023 spdk_free(ctrlr->tmp_ptr); 2024 ctrlr->tmp_ptr = NULL; 2025 2026 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_NUM_QUEUES, ctrlr->opts.admin_timeout_ms); 2027 } 2028 2029 static int 2030 nvme_ctrlr_get_zns_cmd_and_effects_log(struct spdk_nvme_ctrlr *ctrlr) 2031 { 2032 int rc; 2033 2034 assert(!ctrlr->tmp_ptr); 2035 ctrlr->tmp_ptr = spdk_zmalloc(sizeof(struct spdk_nvme_cmds_and_effect_log_page), 64, NULL, 2036 SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE | SPDK_MALLOC_DMA); 2037 if (!ctrlr->tmp_ptr) { 2038 rc = -ENOMEM; 2039 goto error; 2040 } 2041 2042 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_GET_ZNS_CMD_EFFECTS_LOG, 2043 ctrlr->opts.admin_timeout_ms); 2044 2045 rc = spdk_nvme_ctrlr_cmd_get_log_page_ext(ctrlr, SPDK_NVME_LOG_COMMAND_EFFECTS_LOG, 2046 0, ctrlr->tmp_ptr, sizeof(struct spdk_nvme_cmds_and_effect_log_page), 2047 0, 0, 0, SPDK_NVME_CSI_ZNS << 24, 2048 nvme_ctrlr_get_zns_cmd_and_effects_log_done, ctrlr); 2049 if (rc != 0) { 2050 goto error; 2051 } 2052 2053 return 0; 2054 2055 error: 2056 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2057 spdk_free(ctrlr->tmp_ptr); 2058 ctrlr->tmp_ptr = NULL; 2059 return rc; 2060 } 2061 2062 static void 2063 nvme_ctrlr_identify_zns_specific_done(void *arg, const struct spdk_nvme_cpl *cpl) 2064 { 2065 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 2066 2067 if (spdk_nvme_cpl_is_error(cpl)) { 2068 /* no need to print an error, the controller simply does not support ZNS */ 2069 nvme_ctrlr_free_zns_specific_data(ctrlr); 2070 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_NUM_QUEUES, 2071 ctrlr->opts.admin_timeout_ms); 2072 return; 2073 } 2074 2075 /* A zero zasl value means use mdts */ 2076 if (ctrlr->cdata_zns->zasl) { 2077 uint32_t max_append = ctrlr->min_page_size * (1 << ctrlr->cdata_zns->zasl); 2078 ctrlr->max_zone_append_size = spdk_min(ctrlr->max_xfer_size, max_append); 2079 } else { 2080 ctrlr->max_zone_append_size = ctrlr->max_xfer_size; 2081 } 2082 2083 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_GET_ZNS_CMD_EFFECTS_LOG, 2084 ctrlr->opts.admin_timeout_ms); 2085 } 2086 2087 /** 2088 * This function will try to fetch the I/O Command Specific Controller data structure for 2089 * each I/O Command Set supported by SPDK. 2090 * 2091 * If an I/O Command Set is not supported by the controller, "Invalid Field in Command" 2092 * will be returned. Since we are fetching in a exploratively way, getting an error back 2093 * from the controller should not be treated as fatal. 2094 * 2095 * I/O Command Sets not supported by SPDK will be skipped (e.g. Key Value Command Set). 2096 * 2097 * I/O Command Sets without a IOCS specific data structure (i.e. a zero-filled IOCS specific 2098 * data structure) will be skipped (e.g. NVM Command Set, Key Value Command Set). 2099 */ 2100 static int 2101 nvme_ctrlr_identify_iocs_specific(struct spdk_nvme_ctrlr *ctrlr) 2102 { 2103 int rc; 2104 2105 if (!nvme_ctrlr_multi_iocs_enabled(ctrlr)) { 2106 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_NUM_QUEUES, 2107 ctrlr->opts.admin_timeout_ms); 2108 return 0; 2109 } 2110 2111 /* 2112 * Since SPDK currently only needs to fetch a single Command Set, keep the code here, 2113 * instead of creating multiple NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC substates, 2114 * which would require additional functions and complexity for no good reason. 2115 */ 2116 assert(!ctrlr->cdata_zns); 2117 ctrlr->cdata_zns = spdk_zmalloc(sizeof(*ctrlr->cdata_zns), 64, NULL, SPDK_ENV_SOCKET_ID_ANY, 2118 SPDK_MALLOC_SHARE | SPDK_MALLOC_DMA); 2119 if (!ctrlr->cdata_zns) { 2120 rc = -ENOMEM; 2121 goto error; 2122 } 2123 2124 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_IOCS_SPECIFIC, 2125 ctrlr->opts.admin_timeout_ms); 2126 2127 rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_CTRLR_IOCS, 0, 0, SPDK_NVME_CSI_ZNS, 2128 ctrlr->cdata_zns, sizeof(*ctrlr->cdata_zns), 2129 nvme_ctrlr_identify_zns_specific_done, ctrlr); 2130 if (rc != 0) { 2131 goto error; 2132 } 2133 2134 return 0; 2135 2136 error: 2137 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2138 nvme_ctrlr_free_zns_specific_data(ctrlr); 2139 return rc; 2140 } 2141 2142 enum nvme_active_ns_state { 2143 NVME_ACTIVE_NS_STATE_IDLE, 2144 NVME_ACTIVE_NS_STATE_PROCESSING, 2145 NVME_ACTIVE_NS_STATE_DONE, 2146 NVME_ACTIVE_NS_STATE_ERROR 2147 }; 2148 2149 typedef void (*nvme_active_ns_ctx_deleter)(struct nvme_active_ns_ctx *); 2150 2151 struct nvme_active_ns_ctx { 2152 struct spdk_nvme_ctrlr *ctrlr; 2153 uint32_t page_count; 2154 uint32_t next_nsid; 2155 uint32_t *new_ns_list; 2156 nvme_active_ns_ctx_deleter deleter; 2157 2158 enum nvme_active_ns_state state; 2159 }; 2160 2161 static struct nvme_active_ns_ctx * 2162 nvme_active_ns_ctx_create(struct spdk_nvme_ctrlr *ctrlr, nvme_active_ns_ctx_deleter deleter) 2163 { 2164 struct nvme_active_ns_ctx *ctx; 2165 uint32_t *new_ns_list = NULL; 2166 2167 ctx = calloc(1, sizeof(*ctx)); 2168 if (!ctx) { 2169 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate nvme_active_ns_ctx!\n"); 2170 return NULL; 2171 } 2172 2173 new_ns_list = spdk_zmalloc(sizeof(struct spdk_nvme_ns_list), ctrlr->page_size, 2174 NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_SHARE); 2175 if (!new_ns_list) { 2176 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate active_ns_list!\n"); 2177 free(ctx); 2178 return NULL; 2179 } 2180 2181 ctx->page_count = 1; 2182 ctx->new_ns_list = new_ns_list; 2183 ctx->ctrlr = ctrlr; 2184 ctx->deleter = deleter; 2185 2186 return ctx; 2187 } 2188 2189 static void 2190 nvme_active_ns_ctx_destroy(struct nvme_active_ns_ctx *ctx) 2191 { 2192 spdk_free(ctx->new_ns_list); 2193 free(ctx); 2194 } 2195 2196 static int 2197 nvme_ctrlr_destruct_namespace(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 2198 { 2199 struct spdk_nvme_ns tmp, *ns; 2200 2201 assert(ctrlr != NULL); 2202 2203 tmp.id = nsid; 2204 ns = RB_FIND(nvme_ns_tree, &ctrlr->ns, &tmp); 2205 if (ns == NULL) { 2206 return -EINVAL; 2207 } 2208 2209 nvme_ns_destruct(ns); 2210 ns->active = false; 2211 2212 return 0; 2213 } 2214 2215 static int 2216 nvme_ctrlr_construct_namespace(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 2217 { 2218 struct spdk_nvme_ns *ns; 2219 2220 if (nsid < 1 || nsid > ctrlr->cdata.nn) { 2221 return -EINVAL; 2222 } 2223 2224 /* Namespaces are constructed on demand, so simply request it. */ 2225 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2226 if (ns == NULL) { 2227 return -ENOMEM; 2228 } 2229 2230 ns->active = true; 2231 2232 return 0; 2233 } 2234 2235 static void 2236 nvme_ctrlr_identify_active_ns_swap(struct spdk_nvme_ctrlr *ctrlr, uint32_t *new_ns_list, 2237 size_t max_entries) 2238 { 2239 uint32_t active_ns_count = 0; 2240 size_t i; 2241 uint32_t nsid; 2242 struct spdk_nvme_ns *ns, *tmp_ns; 2243 int rc; 2244 2245 /* First, remove namespaces that no longer exist */ 2246 RB_FOREACH_SAFE(ns, nvme_ns_tree, &ctrlr->ns, tmp_ns) { 2247 nsid = new_ns_list[0]; 2248 active_ns_count = 0; 2249 while (nsid != 0) { 2250 if (nsid == ns->id) { 2251 break; 2252 } 2253 2254 nsid = new_ns_list[active_ns_count++]; 2255 } 2256 2257 if (nsid != ns->id) { 2258 /* Did not find this namespace id in the new list. */ 2259 NVME_CTRLR_DEBUGLOG(ctrlr, "Namespace %u was removed\n", ns->id); 2260 nvme_ctrlr_destruct_namespace(ctrlr, ns->id); 2261 } 2262 } 2263 2264 /* Next, add new namespaces */ 2265 active_ns_count = 0; 2266 for (i = 0; i < max_entries; i++) { 2267 nsid = new_ns_list[active_ns_count]; 2268 2269 if (nsid == 0) { 2270 break; 2271 } 2272 2273 /* If the namespace already exists, this will not construct it a second time. */ 2274 rc = nvme_ctrlr_construct_namespace(ctrlr, nsid); 2275 if (rc != 0) { 2276 /* We can't easily handle a failure here. But just move on. */ 2277 assert(false); 2278 NVME_CTRLR_DEBUGLOG(ctrlr, "Failed to allocate a namespace object.\n"); 2279 continue; 2280 } 2281 2282 active_ns_count++; 2283 } 2284 2285 ctrlr->active_ns_count = active_ns_count; 2286 } 2287 2288 static void 2289 nvme_ctrlr_identify_active_ns_async_done(void *arg, const struct spdk_nvme_cpl *cpl) 2290 { 2291 struct nvme_active_ns_ctx *ctx = arg; 2292 uint32_t *new_ns_list = NULL; 2293 2294 if (spdk_nvme_cpl_is_error(cpl)) { 2295 ctx->state = NVME_ACTIVE_NS_STATE_ERROR; 2296 goto out; 2297 } 2298 2299 ctx->next_nsid = ctx->new_ns_list[1024 * ctx->page_count - 1]; 2300 if (ctx->next_nsid == 0) { 2301 ctx->state = NVME_ACTIVE_NS_STATE_DONE; 2302 goto out; 2303 } 2304 2305 ctx->page_count++; 2306 new_ns_list = spdk_realloc(ctx->new_ns_list, 2307 ctx->page_count * sizeof(struct spdk_nvme_ns_list), 2308 ctx->ctrlr->page_size); 2309 if (!new_ns_list) { 2310 SPDK_ERRLOG("Failed to reallocate active_ns_list!\n"); 2311 ctx->state = NVME_ACTIVE_NS_STATE_ERROR; 2312 goto out; 2313 } 2314 2315 ctx->new_ns_list = new_ns_list; 2316 nvme_ctrlr_identify_active_ns_async(ctx); 2317 return; 2318 2319 out: 2320 if (ctx->deleter) { 2321 ctx->deleter(ctx); 2322 } 2323 } 2324 2325 static void 2326 nvme_ctrlr_identify_active_ns_async(struct nvme_active_ns_ctx *ctx) 2327 { 2328 struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr; 2329 uint32_t i; 2330 int rc; 2331 2332 if (ctrlr->cdata.nn == 0) { 2333 ctx->state = NVME_ACTIVE_NS_STATE_DONE; 2334 goto out; 2335 } 2336 2337 assert(ctx->new_ns_list != NULL); 2338 2339 /* 2340 * If controller doesn't support active ns list CNS 0x02 dummy up 2341 * an active ns list, i.e. all namespaces report as active 2342 */ 2343 if (ctrlr->vs.raw < SPDK_NVME_VERSION(1, 1, 0) || ctrlr->quirks & NVME_QUIRK_IDENTIFY_CNS) { 2344 uint32_t *new_ns_list; 2345 2346 /* 2347 * Active NS list must always end with zero element. 2348 * So, we allocate for cdata.nn+1. 2349 */ 2350 ctx->page_count = spdk_divide_round_up(ctrlr->cdata.nn + 1, 2351 sizeof(struct spdk_nvme_ns_list) / sizeof(new_ns_list[0])); 2352 new_ns_list = spdk_realloc(ctx->new_ns_list, 2353 ctx->page_count * sizeof(struct spdk_nvme_ns_list), 2354 ctx->ctrlr->page_size); 2355 if (!new_ns_list) { 2356 SPDK_ERRLOG("Failed to reallocate active_ns_list!\n"); 2357 ctx->state = NVME_ACTIVE_NS_STATE_ERROR; 2358 goto out; 2359 } 2360 2361 ctx->new_ns_list = new_ns_list; 2362 ctx->new_ns_list[ctrlr->cdata.nn] = 0; 2363 for (i = 0; i < ctrlr->cdata.nn; i++) { 2364 ctx->new_ns_list[i] = i + 1; 2365 } 2366 2367 ctx->state = NVME_ACTIVE_NS_STATE_DONE; 2368 goto out; 2369 } 2370 2371 ctx->state = NVME_ACTIVE_NS_STATE_PROCESSING; 2372 rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_ACTIVE_NS_LIST, 0, ctx->next_nsid, 0, 2373 &ctx->new_ns_list[1024 * (ctx->page_count - 1)], sizeof(struct spdk_nvme_ns_list), 2374 nvme_ctrlr_identify_active_ns_async_done, ctx); 2375 if (rc != 0) { 2376 ctx->state = NVME_ACTIVE_NS_STATE_ERROR; 2377 goto out; 2378 } 2379 2380 return; 2381 2382 out: 2383 if (ctx->deleter) { 2384 ctx->deleter(ctx); 2385 } 2386 } 2387 2388 static void 2389 _nvme_active_ns_ctx_deleter(struct nvme_active_ns_ctx *ctx) 2390 { 2391 struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr; 2392 struct spdk_nvme_ns *ns; 2393 2394 if (ctx->state == NVME_ACTIVE_NS_STATE_ERROR) { 2395 nvme_active_ns_ctx_destroy(ctx); 2396 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2397 return; 2398 } 2399 2400 assert(ctx->state == NVME_ACTIVE_NS_STATE_DONE); 2401 2402 RB_FOREACH(ns, nvme_ns_tree, &ctrlr->ns) { 2403 nvme_ns_free_iocs_specific_data(ns); 2404 } 2405 2406 nvme_ctrlr_identify_active_ns_swap(ctrlr, ctx->new_ns_list, ctx->page_count * 1024); 2407 nvme_active_ns_ctx_destroy(ctx); 2408 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS, ctrlr->opts.admin_timeout_ms); 2409 } 2410 2411 static void 2412 _nvme_ctrlr_identify_active_ns(struct spdk_nvme_ctrlr *ctrlr) 2413 { 2414 struct nvme_active_ns_ctx *ctx; 2415 2416 ctx = nvme_active_ns_ctx_create(ctrlr, _nvme_active_ns_ctx_deleter); 2417 if (!ctx) { 2418 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2419 return; 2420 } 2421 2422 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ACTIVE_NS, 2423 ctrlr->opts.admin_timeout_ms); 2424 nvme_ctrlr_identify_active_ns_async(ctx); 2425 } 2426 2427 int 2428 nvme_ctrlr_identify_active_ns(struct spdk_nvme_ctrlr *ctrlr) 2429 { 2430 struct nvme_active_ns_ctx *ctx; 2431 int rc; 2432 2433 ctx = nvme_active_ns_ctx_create(ctrlr, NULL); 2434 if (!ctx) { 2435 return -ENOMEM; 2436 } 2437 2438 nvme_ctrlr_identify_active_ns_async(ctx); 2439 while (ctx->state == NVME_ACTIVE_NS_STATE_PROCESSING) { 2440 rc = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 2441 if (rc < 0) { 2442 ctx->state = NVME_ACTIVE_NS_STATE_ERROR; 2443 break; 2444 } 2445 } 2446 2447 if (ctx->state == NVME_ACTIVE_NS_STATE_ERROR) { 2448 nvme_active_ns_ctx_destroy(ctx); 2449 return -ENXIO; 2450 } 2451 2452 assert(ctx->state == NVME_ACTIVE_NS_STATE_DONE); 2453 nvme_ctrlr_identify_active_ns_swap(ctrlr, ctx->new_ns_list, ctx->page_count * 1024); 2454 nvme_active_ns_ctx_destroy(ctx); 2455 2456 return 0; 2457 } 2458 2459 static void 2460 nvme_ctrlr_identify_ns_async_done(void *arg, const struct spdk_nvme_cpl *cpl) 2461 { 2462 struct spdk_nvme_ns *ns = (struct spdk_nvme_ns *)arg; 2463 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2464 uint32_t nsid; 2465 int rc; 2466 2467 if (spdk_nvme_cpl_is_error(cpl)) { 2468 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2469 return; 2470 } 2471 2472 nvme_ns_set_identify_data(ns); 2473 2474 /* move on to the next active NS */ 2475 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, ns->id); 2476 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2477 if (ns == NULL) { 2478 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_ID_DESCS, 2479 ctrlr->opts.admin_timeout_ms); 2480 return; 2481 } 2482 ns->ctrlr = ctrlr; 2483 ns->id = nsid; 2484 2485 rc = nvme_ctrlr_identify_ns_async(ns); 2486 if (rc) { 2487 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2488 } 2489 } 2490 2491 static int 2492 nvme_ctrlr_identify_ns_async(struct spdk_nvme_ns *ns) 2493 { 2494 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2495 struct spdk_nvme_ns_data *nsdata; 2496 2497 nsdata = &ns->nsdata; 2498 2499 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS, 2500 ctrlr->opts.admin_timeout_ms); 2501 return nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS, 0, ns->id, 0, 2502 nsdata, sizeof(*nsdata), 2503 nvme_ctrlr_identify_ns_async_done, ns); 2504 } 2505 2506 static int 2507 nvme_ctrlr_identify_namespaces(struct spdk_nvme_ctrlr *ctrlr) 2508 { 2509 uint32_t nsid; 2510 struct spdk_nvme_ns *ns; 2511 int rc; 2512 2513 nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); 2514 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2515 if (ns == NULL) { 2516 /* No active NS, move on to the next state */ 2517 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_ID_DESCS, 2518 ctrlr->opts.admin_timeout_ms); 2519 return 0; 2520 } 2521 2522 ns->ctrlr = ctrlr; 2523 ns->id = nsid; 2524 2525 rc = nvme_ctrlr_identify_ns_async(ns); 2526 if (rc) { 2527 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2528 } 2529 2530 return rc; 2531 } 2532 2533 static int 2534 nvme_ctrlr_identify_namespaces_iocs_specific_next(struct spdk_nvme_ctrlr *ctrlr, uint32_t prev_nsid) 2535 { 2536 uint32_t nsid; 2537 struct spdk_nvme_ns *ns; 2538 int rc; 2539 2540 if (!prev_nsid) { 2541 nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); 2542 } else { 2543 /* move on to the next active NS */ 2544 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, prev_nsid); 2545 } 2546 2547 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2548 if (ns == NULL) { 2549 /* No first/next active NS, move on to the next state */ 2550 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES, 2551 ctrlr->opts.admin_timeout_ms); 2552 return 0; 2553 } 2554 2555 /* loop until we find a ns which has (supported) iocs specific data */ 2556 while (!nvme_ns_has_supported_iocs_specific_data(ns)) { 2557 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, ns->id); 2558 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2559 if (ns == NULL) { 2560 /* no namespace with (supported) iocs specific data found */ 2561 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES, 2562 ctrlr->opts.admin_timeout_ms); 2563 return 0; 2564 } 2565 } 2566 2567 rc = nvme_ctrlr_identify_ns_iocs_specific_async(ns); 2568 if (rc) { 2569 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2570 } 2571 2572 return rc; 2573 } 2574 2575 static void 2576 nvme_ctrlr_identify_ns_zns_specific_async_done(void *arg, const struct spdk_nvme_cpl *cpl) 2577 { 2578 struct spdk_nvme_ns *ns = (struct spdk_nvme_ns *)arg; 2579 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2580 2581 if (spdk_nvme_cpl_is_error(cpl)) { 2582 nvme_ns_free_zns_specific_data(ns); 2583 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2584 return; 2585 } 2586 2587 nvme_ctrlr_identify_namespaces_iocs_specific_next(ctrlr, ns->id); 2588 } 2589 2590 static int 2591 nvme_ctrlr_identify_ns_iocs_specific_async(struct spdk_nvme_ns *ns) 2592 { 2593 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2594 int rc; 2595 2596 switch (ns->csi) { 2597 case SPDK_NVME_CSI_ZNS: 2598 break; 2599 default: 2600 /* 2601 * This switch must handle all cases for which 2602 * nvme_ns_has_supported_iocs_specific_data() returns true, 2603 * other cases should never happen. 2604 */ 2605 assert(0); 2606 } 2607 2608 assert(!ns->nsdata_zns); 2609 ns->nsdata_zns = spdk_zmalloc(sizeof(*ns->nsdata_zns), 64, NULL, SPDK_ENV_SOCKET_ID_ANY, 2610 SPDK_MALLOC_SHARE); 2611 if (!ns->nsdata_zns) { 2612 return -ENOMEM; 2613 } 2614 2615 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS_IOCS_SPECIFIC, 2616 ctrlr->opts.admin_timeout_ms); 2617 rc = nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS_IOCS, 0, ns->id, ns->csi, 2618 ns->nsdata_zns, sizeof(*ns->nsdata_zns), 2619 nvme_ctrlr_identify_ns_zns_specific_async_done, ns); 2620 if (rc) { 2621 nvme_ns_free_zns_specific_data(ns); 2622 } 2623 2624 return rc; 2625 } 2626 2627 static int 2628 nvme_ctrlr_identify_namespaces_iocs_specific(struct spdk_nvme_ctrlr *ctrlr) 2629 { 2630 if (!nvme_ctrlr_multi_iocs_enabled(ctrlr)) { 2631 /* Multi IOCS not supported/enabled, move on to the next state */ 2632 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES, 2633 ctrlr->opts.admin_timeout_ms); 2634 return 0; 2635 } 2636 2637 return nvme_ctrlr_identify_namespaces_iocs_specific_next(ctrlr, 0); 2638 } 2639 2640 static void 2641 nvme_ctrlr_identify_id_desc_async_done(void *arg, const struct spdk_nvme_cpl *cpl) 2642 { 2643 struct spdk_nvme_ns *ns = (struct spdk_nvme_ns *)arg; 2644 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2645 uint32_t nsid; 2646 int rc; 2647 2648 if (spdk_nvme_cpl_is_error(cpl)) { 2649 /* 2650 * Many controllers claim to be compatible with NVMe 1.3, however, 2651 * they do not implement NS ID Desc List. Therefore, instead of setting 2652 * the state to NVME_CTRLR_STATE_ERROR, silently ignore the completion 2653 * error and move on to the next state. 2654 * 2655 * The proper way is to create a new quirk for controllers that violate 2656 * the NVMe 1.3 spec by not supporting NS ID Desc List. 2657 * (Re-using the NVME_QUIRK_IDENTIFY_CNS quirk is not possible, since 2658 * it is too generic and was added in order to handle controllers that 2659 * violate the NVMe 1.1 spec by not supporting ACTIVE LIST). 2660 */ 2661 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC, 2662 ctrlr->opts.admin_timeout_ms); 2663 return; 2664 } 2665 2666 nvme_ns_set_id_desc_list_data(ns); 2667 2668 /* move on to the next active NS */ 2669 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, ns->id); 2670 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2671 if (ns == NULL) { 2672 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC, 2673 ctrlr->opts.admin_timeout_ms); 2674 return; 2675 } 2676 2677 rc = nvme_ctrlr_identify_id_desc_async(ns); 2678 if (rc) { 2679 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2680 } 2681 } 2682 2683 static int 2684 nvme_ctrlr_identify_id_desc_async(struct spdk_nvme_ns *ns) 2685 { 2686 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2687 2688 memset(ns->id_desc_list, 0, sizeof(ns->id_desc_list)); 2689 2690 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS, 2691 ctrlr->opts.admin_timeout_ms); 2692 return nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS_ID_DESCRIPTOR_LIST, 2693 0, ns->id, 0, ns->id_desc_list, sizeof(ns->id_desc_list), 2694 nvme_ctrlr_identify_id_desc_async_done, ns); 2695 } 2696 2697 static int 2698 nvme_ctrlr_identify_id_desc_namespaces(struct spdk_nvme_ctrlr *ctrlr) 2699 { 2700 uint32_t nsid; 2701 struct spdk_nvme_ns *ns; 2702 int rc; 2703 2704 if ((ctrlr->vs.raw < SPDK_NVME_VERSION(1, 3, 0) && 2705 !(ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_IOCS)) || 2706 (ctrlr->quirks & NVME_QUIRK_IDENTIFY_CNS)) { 2707 NVME_CTRLR_DEBUGLOG(ctrlr, "Version < 1.3; not attempting to retrieve NS ID Descriptor List\n"); 2708 /* NS ID Desc List not supported, move on to the next state */ 2709 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC, 2710 ctrlr->opts.admin_timeout_ms); 2711 return 0; 2712 } 2713 2714 nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); 2715 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2716 if (ns == NULL) { 2717 /* No active NS, move on to the next state */ 2718 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC, 2719 ctrlr->opts.admin_timeout_ms); 2720 return 0; 2721 } 2722 2723 rc = nvme_ctrlr_identify_id_desc_async(ns); 2724 if (rc) { 2725 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2726 } 2727 2728 return rc; 2729 } 2730 2731 static void 2732 nvme_ctrlr_update_nvmf_ioccsz(struct spdk_nvme_ctrlr *ctrlr) 2733 { 2734 if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_RDMA || 2735 ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_TCP || 2736 ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_FC) { 2737 if (ctrlr->cdata.nvmf_specific.ioccsz < 4) { 2738 NVME_CTRLR_ERRLOG(ctrlr, "Incorrect IOCCSZ %u, the minimum value should be 4\n", 2739 ctrlr->cdata.nvmf_specific.ioccsz); 2740 ctrlr->cdata.nvmf_specific.ioccsz = 4; 2741 assert(0); 2742 } 2743 ctrlr->ioccsz_bytes = ctrlr->cdata.nvmf_specific.ioccsz * 16 - sizeof(struct spdk_nvme_cmd); 2744 ctrlr->icdoff = ctrlr->cdata.nvmf_specific.icdoff; 2745 } 2746 } 2747 2748 static void 2749 nvme_ctrlr_set_num_queues_done(void *arg, const struct spdk_nvme_cpl *cpl) 2750 { 2751 uint32_t cq_allocated, sq_allocated, min_allocated, i; 2752 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 2753 2754 if (spdk_nvme_cpl_is_error(cpl)) { 2755 NVME_CTRLR_ERRLOG(ctrlr, "Set Features - Number of Queues failed!\n"); 2756 ctrlr->opts.num_io_queues = 0; 2757 } else { 2758 /* 2759 * Data in cdw0 is 0-based. 2760 * Lower 16-bits indicate number of submission queues allocated. 2761 * Upper 16-bits indicate number of completion queues allocated. 2762 */ 2763 sq_allocated = (cpl->cdw0 & 0xFFFF) + 1; 2764 cq_allocated = (cpl->cdw0 >> 16) + 1; 2765 2766 /* 2767 * For 1:1 queue mapping, set number of allocated queues to be minimum of 2768 * submission and completion queues. 2769 */ 2770 min_allocated = spdk_min(sq_allocated, cq_allocated); 2771 2772 /* Set number of queues to be minimum of requested and actually allocated. */ 2773 ctrlr->opts.num_io_queues = spdk_min(min_allocated, ctrlr->opts.num_io_queues); 2774 } 2775 2776 ctrlr->free_io_qids = spdk_bit_array_create(ctrlr->opts.num_io_queues + 1); 2777 if (ctrlr->free_io_qids == NULL) { 2778 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2779 return; 2780 } 2781 2782 /* Initialize list of free I/O queue IDs. QID 0 is the admin queue (implicitly allocated). */ 2783 for (i = 1; i <= ctrlr->opts.num_io_queues; i++) { 2784 spdk_nvme_ctrlr_free_qid(ctrlr, i); 2785 } 2786 2787 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS, 2788 ctrlr->opts.admin_timeout_ms); 2789 } 2790 2791 static int 2792 nvme_ctrlr_set_num_queues(struct spdk_nvme_ctrlr *ctrlr) 2793 { 2794 int rc; 2795 2796 if (ctrlr->opts.num_io_queues > SPDK_NVME_MAX_IO_QUEUES) { 2797 NVME_CTRLR_NOTICELOG(ctrlr, "Limiting requested num_io_queues %u to max %d\n", 2798 ctrlr->opts.num_io_queues, SPDK_NVME_MAX_IO_QUEUES); 2799 ctrlr->opts.num_io_queues = SPDK_NVME_MAX_IO_QUEUES; 2800 } else if (ctrlr->opts.num_io_queues < 1) { 2801 NVME_CTRLR_NOTICELOG(ctrlr, "Requested num_io_queues 0, increasing to 1\n"); 2802 ctrlr->opts.num_io_queues = 1; 2803 } 2804 2805 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES, 2806 ctrlr->opts.admin_timeout_ms); 2807 2808 rc = nvme_ctrlr_cmd_set_num_queues(ctrlr, ctrlr->opts.num_io_queues, 2809 nvme_ctrlr_set_num_queues_done, ctrlr); 2810 if (rc != 0) { 2811 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2812 return rc; 2813 } 2814 2815 return 0; 2816 } 2817 2818 static void 2819 nvme_ctrlr_set_keep_alive_timeout_done(void *arg, const struct spdk_nvme_cpl *cpl) 2820 { 2821 uint32_t keep_alive_interval_us; 2822 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 2823 2824 if (spdk_nvme_cpl_is_error(cpl)) { 2825 if ((cpl->status.sct == SPDK_NVME_SCT_GENERIC) && 2826 (cpl->status.sc == SPDK_NVME_SC_INVALID_FIELD)) { 2827 NVME_CTRLR_DEBUGLOG(ctrlr, "Keep alive timeout Get Feature is not supported\n"); 2828 } else { 2829 NVME_CTRLR_ERRLOG(ctrlr, "Keep alive timeout Get Feature failed: SC %x SCT %x\n", 2830 cpl->status.sc, cpl->status.sct); 2831 ctrlr->opts.keep_alive_timeout_ms = 0; 2832 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2833 return; 2834 } 2835 } else { 2836 if (ctrlr->opts.keep_alive_timeout_ms != cpl->cdw0) { 2837 NVME_CTRLR_DEBUGLOG(ctrlr, "Controller adjusted keep alive timeout to %u ms\n", 2838 cpl->cdw0); 2839 } 2840 2841 ctrlr->opts.keep_alive_timeout_ms = cpl->cdw0; 2842 } 2843 2844 if (ctrlr->opts.keep_alive_timeout_ms == 0) { 2845 ctrlr->keep_alive_interval_ticks = 0; 2846 } else { 2847 keep_alive_interval_us = ctrlr->opts.keep_alive_timeout_ms * 1000 / 2; 2848 2849 NVME_CTRLR_DEBUGLOG(ctrlr, "Sending keep alive every %u us\n", keep_alive_interval_us); 2850 2851 ctrlr->keep_alive_interval_ticks = (keep_alive_interval_us * spdk_get_ticks_hz()) / 2852 UINT64_C(1000000); 2853 2854 /* Schedule the first Keep Alive to be sent as soon as possible. */ 2855 ctrlr->next_keep_alive_tick = spdk_get_ticks(); 2856 } 2857 2858 if (spdk_nvme_ctrlr_is_discovery(ctrlr)) { 2859 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE); 2860 } else { 2861 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC, 2862 ctrlr->opts.admin_timeout_ms); 2863 } 2864 } 2865 2866 static int 2867 nvme_ctrlr_set_keep_alive_timeout(struct spdk_nvme_ctrlr *ctrlr) 2868 { 2869 int rc; 2870 2871 if (ctrlr->opts.keep_alive_timeout_ms == 0) { 2872 if (spdk_nvme_ctrlr_is_discovery(ctrlr)) { 2873 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE); 2874 } else { 2875 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC, 2876 ctrlr->opts.admin_timeout_ms); 2877 } 2878 return 0; 2879 } 2880 2881 /* Note: Discovery controller identify data does not populate KAS according to spec. */ 2882 if (!spdk_nvme_ctrlr_is_discovery(ctrlr) && ctrlr->cdata.kas == 0) { 2883 NVME_CTRLR_DEBUGLOG(ctrlr, "Controller KAS is 0 - not enabling Keep Alive\n"); 2884 ctrlr->opts.keep_alive_timeout_ms = 0; 2885 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC, 2886 ctrlr->opts.admin_timeout_ms); 2887 return 0; 2888 } 2889 2890 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT, 2891 ctrlr->opts.admin_timeout_ms); 2892 2893 /* Retrieve actual keep alive timeout, since the controller may have adjusted it. */ 2894 rc = spdk_nvme_ctrlr_cmd_get_feature(ctrlr, SPDK_NVME_FEAT_KEEP_ALIVE_TIMER, 0, NULL, 0, 2895 nvme_ctrlr_set_keep_alive_timeout_done, ctrlr); 2896 if (rc != 0) { 2897 NVME_CTRLR_ERRLOG(ctrlr, "Keep alive timeout Get Feature failed: %d\n", rc); 2898 ctrlr->opts.keep_alive_timeout_ms = 0; 2899 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2900 return rc; 2901 } 2902 2903 return 0; 2904 } 2905 2906 static void 2907 nvme_ctrlr_set_host_id_done(void *arg, const struct spdk_nvme_cpl *cpl) 2908 { 2909 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 2910 2911 if (spdk_nvme_cpl_is_error(cpl)) { 2912 /* 2913 * Treat Set Features - Host ID failure as non-fatal, since the Host ID feature 2914 * is optional. 2915 */ 2916 NVME_CTRLR_WARNLOG(ctrlr, "Set Features - Host ID failed: SC 0x%x SCT 0x%x\n", 2917 cpl->status.sc, cpl->status.sct); 2918 } else { 2919 NVME_CTRLR_DEBUGLOG(ctrlr, "Set Features - Host ID was successful\n"); 2920 } 2921 2922 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE); 2923 } 2924 2925 static int 2926 nvme_ctrlr_set_host_id(struct spdk_nvme_ctrlr *ctrlr) 2927 { 2928 uint8_t *host_id; 2929 uint32_t host_id_size; 2930 int rc; 2931 2932 if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) { 2933 /* 2934 * NVMe-oF sends the host ID during Connect and doesn't allow 2935 * Set Features - Host Identifier after Connect, so we don't need to do anything here. 2936 */ 2937 NVME_CTRLR_DEBUGLOG(ctrlr, "NVMe-oF transport - not sending Set Features - Host ID\n"); 2938 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE); 2939 return 0; 2940 } 2941 2942 if (ctrlr->cdata.ctratt.host_id_exhid_supported) { 2943 NVME_CTRLR_DEBUGLOG(ctrlr, "Using 128-bit extended host identifier\n"); 2944 host_id = ctrlr->opts.extended_host_id; 2945 host_id_size = sizeof(ctrlr->opts.extended_host_id); 2946 } else { 2947 NVME_CTRLR_DEBUGLOG(ctrlr, "Using 64-bit host identifier\n"); 2948 host_id = ctrlr->opts.host_id; 2949 host_id_size = sizeof(ctrlr->opts.host_id); 2950 } 2951 2952 /* If the user specified an all-zeroes host identifier, don't send the command. */ 2953 if (spdk_mem_all_zero(host_id, host_id_size)) { 2954 NVME_CTRLR_DEBUGLOG(ctrlr, "User did not specify host ID - not sending Set Features - Host ID\n"); 2955 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE); 2956 return 0; 2957 } 2958 2959 SPDK_LOGDUMP(nvme, "host_id", host_id, host_id_size); 2960 2961 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_HOST_ID, 2962 ctrlr->opts.admin_timeout_ms); 2963 2964 rc = nvme_ctrlr_cmd_set_host_id(ctrlr, host_id, host_id_size, nvme_ctrlr_set_host_id_done, ctrlr); 2965 if (rc != 0) { 2966 NVME_CTRLR_ERRLOG(ctrlr, "Set Features - Host ID failed: %d\n", rc); 2967 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2968 return rc; 2969 } 2970 2971 return 0; 2972 } 2973 2974 void 2975 nvme_ctrlr_update_namespaces(struct spdk_nvme_ctrlr *ctrlr) 2976 { 2977 uint32_t nsid; 2978 struct spdk_nvme_ns *ns; 2979 2980 for (nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); 2981 nsid != 0; nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) { 2982 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2983 nvme_ns_construct(ns, nsid, ctrlr); 2984 } 2985 } 2986 2987 static int 2988 nvme_ctrlr_clear_changed_ns_log(struct spdk_nvme_ctrlr *ctrlr) 2989 { 2990 struct nvme_completion_poll_status *status; 2991 int rc = -ENOMEM; 2992 char *buffer = NULL; 2993 uint32_t nsid; 2994 size_t buf_size = (SPDK_NVME_MAX_CHANGED_NAMESPACES * sizeof(uint32_t)); 2995 2996 buffer = spdk_dma_zmalloc(buf_size, 4096, NULL); 2997 if (!buffer) { 2998 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate buffer for getting " 2999 "changed ns log.\n"); 3000 return rc; 3001 } 3002 3003 status = calloc(1, sizeof(*status)); 3004 if (!status) { 3005 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 3006 goto free_buffer; 3007 } 3008 3009 rc = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, 3010 SPDK_NVME_LOG_CHANGED_NS_LIST, 3011 SPDK_NVME_GLOBAL_NS_TAG, 3012 buffer, buf_size, 0, 3013 nvme_completion_poll_cb, status); 3014 3015 if (rc) { 3016 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_cmd_get_log_page() failed: rc=%d\n", rc); 3017 free(status); 3018 goto free_buffer; 3019 } 3020 3021 rc = nvme_wait_for_completion_timeout(ctrlr->adminq, status, 3022 ctrlr->opts.admin_timeout_ms * 1000); 3023 if (!status->timed_out) { 3024 free(status); 3025 } 3026 3027 if (rc) { 3028 NVME_CTRLR_ERRLOG(ctrlr, "wait for spdk_nvme_ctrlr_cmd_get_log_page failed: rc=%d\n", rc); 3029 goto free_buffer; 3030 } 3031 3032 /* only check the case of overflow. */ 3033 nsid = from_le32(buffer); 3034 if (nsid == 0xffffffffu) { 3035 NVME_CTRLR_WARNLOG(ctrlr, "changed ns log overflowed.\n"); 3036 } 3037 3038 free_buffer: 3039 spdk_dma_free(buffer); 3040 return rc; 3041 } 3042 3043 void 3044 nvme_ctrlr_process_async_event(struct spdk_nvme_ctrlr *ctrlr, 3045 const struct spdk_nvme_cpl *cpl) 3046 { 3047 union spdk_nvme_async_event_completion event; 3048 struct spdk_nvme_ctrlr_process *active_proc; 3049 int rc; 3050 3051 event.raw = cpl->cdw0; 3052 3053 if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) && 3054 (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED)) { 3055 nvme_ctrlr_clear_changed_ns_log(ctrlr); 3056 3057 rc = nvme_ctrlr_identify_active_ns(ctrlr); 3058 if (rc) { 3059 return; 3060 } 3061 nvme_ctrlr_update_namespaces(ctrlr); 3062 nvme_io_msg_ctrlr_update(ctrlr); 3063 } 3064 3065 if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) && 3066 (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_ANA_CHANGE)) { 3067 if (!ctrlr->opts.disable_read_ana_log_page) { 3068 rc = nvme_ctrlr_update_ana_log_page(ctrlr); 3069 if (rc) { 3070 return; 3071 } 3072 nvme_ctrlr_parse_ana_log_page(ctrlr, nvme_ctrlr_update_ns_ana_states, 3073 ctrlr); 3074 } 3075 } 3076 3077 active_proc = nvme_ctrlr_get_current_process(ctrlr); 3078 if (active_proc && active_proc->aer_cb_fn) { 3079 active_proc->aer_cb_fn(active_proc->aer_cb_arg, cpl); 3080 } 3081 } 3082 3083 static void 3084 nvme_ctrlr_queue_async_event(struct spdk_nvme_ctrlr *ctrlr, 3085 const struct spdk_nvme_cpl *cpl) 3086 { 3087 struct spdk_nvme_ctrlr_aer_completion_list *nvme_event; 3088 struct spdk_nvme_ctrlr_process *proc; 3089 3090 /* Add async event to each process objects event list */ 3091 TAILQ_FOREACH(proc, &ctrlr->active_procs, tailq) { 3092 /* Must be shared memory so other processes can access */ 3093 nvme_event = spdk_zmalloc(sizeof(*nvme_event), 0, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE); 3094 if (!nvme_event) { 3095 NVME_CTRLR_ERRLOG(ctrlr, "Alloc nvme event failed, ignore the event\n"); 3096 return; 3097 } 3098 nvme_event->cpl = *cpl; 3099 3100 STAILQ_INSERT_TAIL(&proc->async_events, nvme_event, link); 3101 } 3102 } 3103 3104 void 3105 nvme_ctrlr_complete_queued_async_events(struct spdk_nvme_ctrlr *ctrlr) 3106 { 3107 struct spdk_nvme_ctrlr_aer_completion_list *nvme_event, *nvme_event_tmp; 3108 struct spdk_nvme_ctrlr_process *active_proc; 3109 3110 active_proc = nvme_ctrlr_get_current_process(ctrlr); 3111 3112 STAILQ_FOREACH_SAFE(nvme_event, &active_proc->async_events, link, nvme_event_tmp) { 3113 STAILQ_REMOVE(&active_proc->async_events, nvme_event, 3114 spdk_nvme_ctrlr_aer_completion_list, link); 3115 nvme_ctrlr_process_async_event(ctrlr, &nvme_event->cpl); 3116 spdk_free(nvme_event); 3117 3118 } 3119 } 3120 3121 static void 3122 nvme_ctrlr_async_event_cb(void *arg, const struct spdk_nvme_cpl *cpl) 3123 { 3124 struct nvme_async_event_request *aer = arg; 3125 struct spdk_nvme_ctrlr *ctrlr = aer->ctrlr; 3126 3127 if (cpl->status.sct == SPDK_NVME_SCT_GENERIC && 3128 cpl->status.sc == SPDK_NVME_SC_ABORTED_SQ_DELETION) { 3129 /* 3130 * This is simulated when controller is being shut down, to 3131 * effectively abort outstanding asynchronous event requests 3132 * and make sure all memory is freed. Do not repost the 3133 * request in this case. 3134 */ 3135 return; 3136 } 3137 3138 if (cpl->status.sct == SPDK_NVME_SCT_COMMAND_SPECIFIC && 3139 cpl->status.sc == SPDK_NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED) { 3140 /* 3141 * SPDK will only send as many AERs as the device says it supports, 3142 * so this status code indicates an out-of-spec device. Do not repost 3143 * the request in this case. 3144 */ 3145 NVME_CTRLR_ERRLOG(ctrlr, "Controller appears out-of-spec for asynchronous event request\n" 3146 "handling. Do not repost this AER.\n"); 3147 return; 3148 } 3149 3150 /* Add the events to the list */ 3151 nvme_ctrlr_queue_async_event(ctrlr, cpl); 3152 3153 /* If the ctrlr was removed or in the destruct state, we should not send aer again */ 3154 if (ctrlr->is_removed || ctrlr->is_destructed) { 3155 return; 3156 } 3157 3158 /* 3159 * Repost another asynchronous event request to replace the one 3160 * that just completed. 3161 */ 3162 if (nvme_ctrlr_construct_and_submit_aer(ctrlr, aer)) { 3163 /* 3164 * We can't do anything to recover from a failure here, 3165 * so just print a warning message and leave the AER unsubmitted. 3166 */ 3167 NVME_CTRLR_ERRLOG(ctrlr, "resubmitting AER failed!\n"); 3168 } 3169 } 3170 3171 static int 3172 nvme_ctrlr_construct_and_submit_aer(struct spdk_nvme_ctrlr *ctrlr, 3173 struct nvme_async_event_request *aer) 3174 { 3175 struct nvme_request *req; 3176 3177 aer->ctrlr = ctrlr; 3178 req = nvme_allocate_request_null(ctrlr->adminq, nvme_ctrlr_async_event_cb, aer); 3179 aer->req = req; 3180 if (req == NULL) { 3181 return -1; 3182 } 3183 3184 req->cmd.opc = SPDK_NVME_OPC_ASYNC_EVENT_REQUEST; 3185 return nvme_ctrlr_submit_admin_request(ctrlr, req); 3186 } 3187 3188 static void 3189 nvme_ctrlr_configure_aer_done(void *arg, const struct spdk_nvme_cpl *cpl) 3190 { 3191 struct nvme_async_event_request *aer; 3192 int rc; 3193 uint32_t i; 3194 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 3195 3196 if (spdk_nvme_cpl_is_error(cpl)) { 3197 NVME_CTRLR_NOTICELOG(ctrlr, "nvme_ctrlr_configure_aer failed!\n"); 3198 ctrlr->num_aers = 0; 3199 } else { 3200 /* aerl is a zero-based value, so we need to add 1 here. */ 3201 ctrlr->num_aers = spdk_min(NVME_MAX_ASYNC_EVENTS, (ctrlr->cdata.aerl + 1)); 3202 } 3203 3204 for (i = 0; i < ctrlr->num_aers; i++) { 3205 aer = &ctrlr->aer[i]; 3206 rc = nvme_ctrlr_construct_and_submit_aer(ctrlr, aer); 3207 if (rc) { 3208 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_construct_and_submit_aer failed!\n"); 3209 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3210 return; 3211 } 3212 } 3213 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT, ctrlr->opts.admin_timeout_ms); 3214 } 3215 3216 static int 3217 nvme_ctrlr_configure_aer(struct spdk_nvme_ctrlr *ctrlr) 3218 { 3219 union spdk_nvme_feat_async_event_configuration config; 3220 int rc; 3221 3222 config.raw = 0; 3223 3224 if (spdk_nvme_ctrlr_is_discovery(ctrlr)) { 3225 config.bits.discovery_log_change_notice = 1; 3226 } else { 3227 config.bits.crit_warn.bits.available_spare = 1; 3228 config.bits.crit_warn.bits.temperature = 1; 3229 config.bits.crit_warn.bits.device_reliability = 1; 3230 config.bits.crit_warn.bits.read_only = 1; 3231 config.bits.crit_warn.bits.volatile_memory_backup = 1; 3232 3233 if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 2, 0)) { 3234 if (ctrlr->cdata.oaes.ns_attribute_notices) { 3235 config.bits.ns_attr_notice = 1; 3236 } 3237 if (ctrlr->cdata.oaes.fw_activation_notices) { 3238 config.bits.fw_activation_notice = 1; 3239 } 3240 if (ctrlr->cdata.oaes.ana_change_notices) { 3241 config.bits.ana_change_notice = 1; 3242 } 3243 } 3244 if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 3, 0) && ctrlr->cdata.lpa.telemetry) { 3245 config.bits.telemetry_log_notice = 1; 3246 } 3247 } 3248 3249 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER, 3250 ctrlr->opts.admin_timeout_ms); 3251 3252 rc = nvme_ctrlr_cmd_set_async_event_config(ctrlr, config, 3253 nvme_ctrlr_configure_aer_done, 3254 ctrlr); 3255 if (rc != 0) { 3256 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3257 return rc; 3258 } 3259 3260 return 0; 3261 } 3262 3263 struct spdk_nvme_ctrlr_process * 3264 nvme_ctrlr_get_process(struct spdk_nvme_ctrlr *ctrlr, pid_t pid) 3265 { 3266 struct spdk_nvme_ctrlr_process *active_proc; 3267 3268 TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) { 3269 if (active_proc->pid == pid) { 3270 return active_proc; 3271 } 3272 } 3273 3274 return NULL; 3275 } 3276 3277 struct spdk_nvme_ctrlr_process * 3278 nvme_ctrlr_get_current_process(struct spdk_nvme_ctrlr *ctrlr) 3279 { 3280 return nvme_ctrlr_get_process(ctrlr, getpid()); 3281 } 3282 3283 /** 3284 * This function will be called when a process is using the controller. 3285 * 1. For the primary process, it is called when constructing the controller. 3286 * 2. For the secondary process, it is called at probing the controller. 3287 * Note: will check whether the process is already added for the same process. 3288 */ 3289 int 3290 nvme_ctrlr_add_process(struct spdk_nvme_ctrlr *ctrlr, void *devhandle) 3291 { 3292 struct spdk_nvme_ctrlr_process *ctrlr_proc; 3293 pid_t pid = getpid(); 3294 3295 /* Check whether the process is already added or not */ 3296 if (nvme_ctrlr_get_process(ctrlr, pid)) { 3297 return 0; 3298 } 3299 3300 /* Initialize the per process properties for this ctrlr */ 3301 ctrlr_proc = spdk_zmalloc(sizeof(struct spdk_nvme_ctrlr_process), 3302 64, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE); 3303 if (ctrlr_proc == NULL) { 3304 NVME_CTRLR_ERRLOG(ctrlr, "failed to allocate memory to track the process props\n"); 3305 3306 return -1; 3307 } 3308 3309 ctrlr_proc->is_primary = spdk_process_is_primary(); 3310 ctrlr_proc->pid = pid; 3311 STAILQ_INIT(&ctrlr_proc->active_reqs); 3312 ctrlr_proc->devhandle = devhandle; 3313 ctrlr_proc->ref = 0; 3314 TAILQ_INIT(&ctrlr_proc->allocated_io_qpairs); 3315 STAILQ_INIT(&ctrlr_proc->async_events); 3316 3317 TAILQ_INSERT_TAIL(&ctrlr->active_procs, ctrlr_proc, tailq); 3318 3319 return 0; 3320 } 3321 3322 /** 3323 * This function will be called when the process detaches the controller. 3324 * Note: the ctrlr_lock must be held when calling this function. 3325 */ 3326 static void 3327 nvme_ctrlr_remove_process(struct spdk_nvme_ctrlr *ctrlr, 3328 struct spdk_nvme_ctrlr_process *proc) 3329 { 3330 struct spdk_nvme_qpair *qpair, *tmp_qpair; 3331 3332 assert(STAILQ_EMPTY(&proc->active_reqs)); 3333 3334 TAILQ_FOREACH_SAFE(qpair, &proc->allocated_io_qpairs, per_process_tailq, tmp_qpair) { 3335 spdk_nvme_ctrlr_free_io_qpair(qpair); 3336 } 3337 3338 TAILQ_REMOVE(&ctrlr->active_procs, proc, tailq); 3339 3340 if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { 3341 spdk_pci_device_detach(proc->devhandle); 3342 } 3343 3344 spdk_free(proc); 3345 } 3346 3347 /** 3348 * This function will be called when the process exited unexpectedly 3349 * in order to free any incomplete nvme request, allocated IO qpairs 3350 * and allocated memory. 3351 * Note: the ctrlr_lock must be held when calling this function. 3352 */ 3353 static void 3354 nvme_ctrlr_cleanup_process(struct spdk_nvme_ctrlr_process *proc) 3355 { 3356 struct nvme_request *req, *tmp_req; 3357 struct spdk_nvme_qpair *qpair, *tmp_qpair; 3358 struct spdk_nvme_ctrlr_aer_completion_list *event; 3359 3360 STAILQ_FOREACH_SAFE(req, &proc->active_reqs, stailq, tmp_req) { 3361 STAILQ_REMOVE(&proc->active_reqs, req, nvme_request, stailq); 3362 3363 assert(req->pid == proc->pid); 3364 3365 nvme_free_request(req); 3366 } 3367 3368 /* Remove async event from each process objects event list */ 3369 while (!STAILQ_EMPTY(&proc->async_events)) { 3370 event = STAILQ_FIRST(&proc->async_events); 3371 STAILQ_REMOVE_HEAD(&proc->async_events, link); 3372 spdk_free(event); 3373 } 3374 3375 TAILQ_FOREACH_SAFE(qpair, &proc->allocated_io_qpairs, per_process_tailq, tmp_qpair) { 3376 TAILQ_REMOVE(&proc->allocated_io_qpairs, qpair, per_process_tailq); 3377 3378 /* 3379 * The process may have been killed while some qpairs were in their 3380 * completion context. Clear that flag here to allow these IO 3381 * qpairs to be deleted. 3382 */ 3383 qpair->in_completion_context = 0; 3384 3385 qpair->no_deletion_notification_needed = 1; 3386 3387 spdk_nvme_ctrlr_free_io_qpair(qpair); 3388 } 3389 3390 spdk_free(proc); 3391 } 3392 3393 /** 3394 * This function will be called when destructing the controller. 3395 * 1. There is no more admin request on this controller. 3396 * 2. Clean up any left resource allocation when its associated process is gone. 3397 */ 3398 void 3399 nvme_ctrlr_free_processes(struct spdk_nvme_ctrlr *ctrlr) 3400 { 3401 struct spdk_nvme_ctrlr_process *active_proc, *tmp; 3402 3403 /* Free all the processes' properties and make sure no pending admin IOs */ 3404 TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) { 3405 TAILQ_REMOVE(&ctrlr->active_procs, active_proc, tailq); 3406 3407 assert(STAILQ_EMPTY(&active_proc->active_reqs)); 3408 3409 spdk_free(active_proc); 3410 } 3411 } 3412 3413 /** 3414 * This function will be called when any other process attaches or 3415 * detaches the controller in order to cleanup those unexpectedly 3416 * terminated processes. 3417 * Note: the ctrlr_lock must be held when calling this function. 3418 */ 3419 static int 3420 nvme_ctrlr_remove_inactive_proc(struct spdk_nvme_ctrlr *ctrlr) 3421 { 3422 struct spdk_nvme_ctrlr_process *active_proc, *tmp; 3423 int active_proc_count = 0; 3424 3425 TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) { 3426 if ((kill(active_proc->pid, 0) == -1) && (errno == ESRCH)) { 3427 NVME_CTRLR_ERRLOG(ctrlr, "process %d terminated unexpected\n", active_proc->pid); 3428 3429 TAILQ_REMOVE(&ctrlr->active_procs, active_proc, tailq); 3430 3431 nvme_ctrlr_cleanup_process(active_proc); 3432 } else { 3433 active_proc_count++; 3434 } 3435 } 3436 3437 return active_proc_count; 3438 } 3439 3440 void 3441 nvme_ctrlr_proc_get_ref(struct spdk_nvme_ctrlr *ctrlr) 3442 { 3443 struct spdk_nvme_ctrlr_process *active_proc; 3444 3445 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 3446 3447 nvme_ctrlr_remove_inactive_proc(ctrlr); 3448 3449 active_proc = nvme_ctrlr_get_current_process(ctrlr); 3450 if (active_proc) { 3451 active_proc->ref++; 3452 } 3453 3454 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 3455 } 3456 3457 void 3458 nvme_ctrlr_proc_put_ref(struct spdk_nvme_ctrlr *ctrlr) 3459 { 3460 struct spdk_nvme_ctrlr_process *active_proc; 3461 int proc_count; 3462 3463 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 3464 3465 proc_count = nvme_ctrlr_remove_inactive_proc(ctrlr); 3466 3467 active_proc = nvme_ctrlr_get_current_process(ctrlr); 3468 if (active_proc) { 3469 active_proc->ref--; 3470 assert(active_proc->ref >= 0); 3471 3472 /* 3473 * The last active process will be removed at the end of 3474 * the destruction of the controller. 3475 */ 3476 if (active_proc->ref == 0 && proc_count != 1) { 3477 nvme_ctrlr_remove_process(ctrlr, active_proc); 3478 } 3479 } 3480 3481 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 3482 } 3483 3484 int 3485 nvme_ctrlr_get_ref_count(struct spdk_nvme_ctrlr *ctrlr) 3486 { 3487 struct spdk_nvme_ctrlr_process *active_proc; 3488 int ref = 0; 3489 3490 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 3491 3492 nvme_ctrlr_remove_inactive_proc(ctrlr); 3493 3494 TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) { 3495 ref += active_proc->ref; 3496 } 3497 3498 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 3499 3500 return ref; 3501 } 3502 3503 /** 3504 * Get the PCI device handle which is only visible to its associated process. 3505 */ 3506 struct spdk_pci_device * 3507 nvme_ctrlr_proc_get_devhandle(struct spdk_nvme_ctrlr *ctrlr) 3508 { 3509 struct spdk_nvme_ctrlr_process *active_proc; 3510 struct spdk_pci_device *devhandle = NULL; 3511 3512 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 3513 3514 active_proc = nvme_ctrlr_get_current_process(ctrlr); 3515 if (active_proc) { 3516 devhandle = active_proc->devhandle; 3517 } 3518 3519 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 3520 3521 return devhandle; 3522 } 3523 3524 static void 3525 nvme_ctrlr_process_init_vs_done(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3526 { 3527 struct spdk_nvme_ctrlr *ctrlr = ctx; 3528 3529 if (spdk_nvme_cpl_is_error(cpl)) { 3530 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the VS register\n"); 3531 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3532 return; 3533 } 3534 3535 assert(value <= UINT32_MAX); 3536 ctrlr->vs.raw = (uint32_t)value; 3537 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READ_CAP, NVME_TIMEOUT_INFINITE); 3538 } 3539 3540 static void 3541 nvme_ctrlr_process_init_cap_done(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3542 { 3543 struct spdk_nvme_ctrlr *ctrlr = ctx; 3544 3545 if (spdk_nvme_cpl_is_error(cpl)) { 3546 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CAP register\n"); 3547 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3548 return; 3549 } 3550 3551 ctrlr->cap.raw = value; 3552 nvme_ctrlr_init_cap(ctrlr); 3553 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CHECK_EN, NVME_TIMEOUT_INFINITE); 3554 } 3555 3556 static void 3557 nvme_ctrlr_process_init_check_en(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3558 { 3559 struct spdk_nvme_ctrlr *ctrlr = ctx; 3560 enum nvme_ctrlr_state state; 3561 3562 if (spdk_nvme_cpl_is_error(cpl)) { 3563 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CC register\n"); 3564 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3565 return; 3566 } 3567 3568 assert(value <= UINT32_MAX); 3569 ctrlr->process_init_cc.raw = (uint32_t)value; 3570 3571 if (ctrlr->process_init_cc.bits.en) { 3572 NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 1\n"); 3573 state = NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1; 3574 } else { 3575 state = NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0; 3576 } 3577 3578 nvme_ctrlr_set_state(ctrlr, state, nvme_ctrlr_get_ready_timeout(ctrlr)); 3579 } 3580 3581 static void 3582 nvme_ctrlr_process_init_set_en_0(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3583 { 3584 struct spdk_nvme_ctrlr *ctrlr = ctx; 3585 3586 if (spdk_nvme_cpl_is_error(cpl)) { 3587 NVME_CTRLR_ERRLOG(ctrlr, "Failed to write the CC register\n"); 3588 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3589 return; 3590 } 3591 3592 /* 3593 * Wait 2.5 seconds before accessing PCI registers. 3594 * Not using sleep() to avoid blocking other controller's initialization. 3595 */ 3596 if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) { 3597 NVME_CTRLR_DEBUGLOG(ctrlr, "Applying quirk: delay 2.5 seconds before reading registers\n"); 3598 ctrlr->sleep_timeout_tsc = spdk_get_ticks() + (2500 * spdk_get_ticks_hz() / 1000); 3599 } 3600 3601 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, 3602 nvme_ctrlr_get_ready_timeout(ctrlr)); 3603 } 3604 3605 static void 3606 nvme_ctrlr_process_init_set_en_0_read_cc(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3607 { 3608 struct spdk_nvme_ctrlr *ctrlr = ctx; 3609 union spdk_nvme_cc_register cc; 3610 int rc; 3611 3612 if (spdk_nvme_cpl_is_error(cpl)) { 3613 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CC register\n"); 3614 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3615 return; 3616 } 3617 3618 assert(value <= UINT32_MAX); 3619 cc.raw = (uint32_t)value; 3620 cc.bits.en = 0; 3621 ctrlr->process_init_cc.raw = cc.raw; 3622 3623 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_EN_0_WAIT_FOR_CC, 3624 nvme_ctrlr_get_ready_timeout(ctrlr)); 3625 3626 rc = nvme_ctrlr_set_cc_async(ctrlr, cc.raw, nvme_ctrlr_process_init_set_en_0, ctrlr); 3627 if (rc != 0) { 3628 NVME_CTRLR_ERRLOG(ctrlr, "set_cc() failed\n"); 3629 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3630 } 3631 } 3632 3633 static void 3634 nvme_ctrlr_process_init_wait_for_ready_1(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3635 { 3636 struct spdk_nvme_ctrlr *ctrlr = ctx; 3637 union spdk_nvme_csts_register csts; 3638 3639 if (spdk_nvme_cpl_is_error(cpl)) { 3640 /* While a device is resetting, it may be unable to service MMIO reads 3641 * temporarily. Allow for this case. 3642 */ 3643 if (!ctrlr->is_failed && ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE) { 3644 NVME_CTRLR_DEBUGLOG(ctrlr, "Failed to read the CSTS register\n"); 3645 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1, 3646 NVME_TIMEOUT_KEEP_EXISTING); 3647 } else { 3648 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CSTS register\n"); 3649 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3650 } 3651 3652 return; 3653 } 3654 3655 assert(value <= UINT32_MAX); 3656 csts.raw = (uint32_t)value; 3657 if (csts.bits.rdy == 1) { 3658 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_EN_0, 3659 nvme_ctrlr_get_ready_timeout(ctrlr)); 3660 } else { 3661 NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 1 && CSTS.RDY = 0 - waiting for reset to complete\n"); 3662 nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1, 3663 NVME_TIMEOUT_KEEP_EXISTING); 3664 } 3665 } 3666 3667 static void 3668 nvme_ctrlr_process_init_wait_for_ready_0(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3669 { 3670 struct spdk_nvme_ctrlr *ctrlr = ctx; 3671 union spdk_nvme_csts_register csts; 3672 3673 if (spdk_nvme_cpl_is_error(cpl)) { 3674 /* While a device is resetting, it may be unable to service MMIO reads 3675 * temporarily. Allow for this case. 3676 */ 3677 if (!ctrlr->is_failed && ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE) { 3678 NVME_CTRLR_DEBUGLOG(ctrlr, "Failed to read the CSTS register\n"); 3679 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, 3680 NVME_TIMEOUT_KEEP_EXISTING); 3681 } else { 3682 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CSTS register\n"); 3683 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3684 } 3685 3686 return; 3687 } 3688 3689 assert(value <= UINT32_MAX); 3690 csts.raw = (uint32_t)value; 3691 if (csts.bits.rdy == 0) { 3692 NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 0 && CSTS.RDY = 0\n"); 3693 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLED, 3694 nvme_ctrlr_get_ready_timeout(ctrlr)); 3695 } else { 3696 nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, 3697 NVME_TIMEOUT_KEEP_EXISTING); 3698 } 3699 } 3700 3701 static void 3702 nvme_ctrlr_process_init_enable_wait_for_ready_1(void *ctx, uint64_t value, 3703 const struct spdk_nvme_cpl *cpl) 3704 { 3705 struct spdk_nvme_ctrlr *ctrlr = ctx; 3706 union spdk_nvme_csts_register csts; 3707 3708 if (spdk_nvme_cpl_is_error(cpl)) { 3709 /* While a device is resetting, it may be unable to service MMIO reads 3710 * temporarily. Allow for this case. 3711 */ 3712 if (!ctrlr->is_failed && ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE) { 3713 NVME_CTRLR_DEBUGLOG(ctrlr, "Failed to read the CSTS register\n"); 3714 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, 3715 NVME_TIMEOUT_KEEP_EXISTING); 3716 } else { 3717 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CSTS register\n"); 3718 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3719 } 3720 3721 return; 3722 } 3723 3724 assert(value <= UINT32_MAX); 3725 csts.raw = value; 3726 if (csts.bits.rdy == 1) { 3727 NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 1 && CSTS.RDY = 1 - controller is ready\n"); 3728 /* 3729 * The controller has been enabled. 3730 * Perform the rest of initialization serially. 3731 */ 3732 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_RESET_ADMIN_QUEUE, 3733 ctrlr->opts.admin_timeout_ms); 3734 } else { 3735 nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, 3736 NVME_TIMEOUT_KEEP_EXISTING); 3737 } 3738 } 3739 3740 /** 3741 * This function will be called repeatedly during initialization until the controller is ready. 3742 */ 3743 int 3744 nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr) 3745 { 3746 uint32_t ready_timeout_in_ms; 3747 uint64_t ticks; 3748 int rc = 0; 3749 3750 ticks = spdk_get_ticks(); 3751 3752 /* 3753 * May need to avoid accessing any register on the target controller 3754 * for a while. Return early without touching the FSM. 3755 * Check sleep_timeout_tsc > 0 for unit test. 3756 */ 3757 if ((ctrlr->sleep_timeout_tsc > 0) && 3758 (ticks <= ctrlr->sleep_timeout_tsc)) { 3759 return 0; 3760 } 3761 ctrlr->sleep_timeout_tsc = 0; 3762 3763 ready_timeout_in_ms = nvme_ctrlr_get_ready_timeout(ctrlr); 3764 3765 /* 3766 * Check if the current initialization step is done or has timed out. 3767 */ 3768 switch (ctrlr->state) { 3769 case NVME_CTRLR_STATE_INIT_DELAY: 3770 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, ready_timeout_in_ms); 3771 if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_INIT) { 3772 /* 3773 * Controller may need some delay before it's enabled. 3774 * 3775 * This is a workaround for an issue where the PCIe-attached NVMe controller 3776 * is not ready after VFIO reset. We delay the initialization rather than the 3777 * enabling itself, because this is required only for the very first enabling 3778 * - directly after a VFIO reset. 3779 */ 3780 NVME_CTRLR_DEBUGLOG(ctrlr, "Adding 2 second delay before initializing the controller\n"); 3781 ctrlr->sleep_timeout_tsc = ticks + (2000 * spdk_get_ticks_hz() / 1000); 3782 } 3783 break; 3784 3785 case NVME_CTRLR_STATE_CONNECT_ADMINQ: /* synonymous with NVME_CTRLR_STATE_INIT */ 3786 rc = nvme_transport_ctrlr_connect_qpair(ctrlr, ctrlr->adminq); 3787 if (rc == 0) { 3788 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_CONNECT_ADMINQ, 3789 NVME_TIMEOUT_INFINITE); 3790 } else { 3791 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3792 } 3793 break; 3794 3795 case NVME_CTRLR_STATE_WAIT_FOR_CONNECT_ADMINQ: 3796 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 3797 3798 switch (nvme_qpair_get_state(ctrlr->adminq)) { 3799 case NVME_QPAIR_CONNECTING: 3800 break; 3801 case NVME_QPAIR_CONNECTED: 3802 nvme_qpair_set_state(ctrlr->adminq, NVME_QPAIR_ENABLED); 3803 /* Fall through */ 3804 case NVME_QPAIR_ENABLED: 3805 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READ_VS, 3806 NVME_TIMEOUT_INFINITE); 3807 /* Abort any queued requests that were sent while the adminq was connecting 3808 * to avoid stalling the init process during a reset, as requests don't get 3809 * resubmitted while the controller is resetting and subsequent commands 3810 * would get queued too. 3811 */ 3812 nvme_qpair_abort_queued_reqs(ctrlr->adminq, 0); 3813 break; 3814 case NVME_QPAIR_DISCONNECTING: 3815 assert(ctrlr->adminq->async == true); 3816 break; 3817 case NVME_QPAIR_DISCONNECTED: 3818 /* fallthrough */ 3819 default: 3820 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3821 break; 3822 } 3823 3824 break; 3825 3826 case NVME_CTRLR_STATE_READ_VS: 3827 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READ_VS_WAIT_FOR_VS, NVME_TIMEOUT_INFINITE); 3828 rc = nvme_ctrlr_get_vs_async(ctrlr, nvme_ctrlr_process_init_vs_done, ctrlr); 3829 break; 3830 3831 case NVME_CTRLR_STATE_READ_CAP: 3832 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READ_CAP_WAIT_FOR_CAP, NVME_TIMEOUT_INFINITE); 3833 rc = nvme_ctrlr_get_cap_async(ctrlr, nvme_ctrlr_process_init_cap_done, ctrlr); 3834 break; 3835 3836 case NVME_CTRLR_STATE_CHECK_EN: 3837 /* Begin the hardware initialization by making sure the controller is disabled. */ 3838 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CHECK_EN_WAIT_FOR_CC, ready_timeout_in_ms); 3839 rc = nvme_ctrlr_get_cc_async(ctrlr, nvme_ctrlr_process_init_check_en, ctrlr); 3840 break; 3841 3842 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1: 3843 /* 3844 * Controller is currently enabled. We need to disable it to cause a reset. 3845 * 3846 * If CC.EN = 1 && CSTS.RDY = 0, the controller is in the process of becoming ready. 3847 * Wait for the ready bit to be 1 before disabling the controller. 3848 */ 3849 nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS, 3850 NVME_TIMEOUT_KEEP_EXISTING); 3851 rc = nvme_ctrlr_get_csts_async(ctrlr, nvme_ctrlr_process_init_wait_for_ready_1, ctrlr); 3852 break; 3853 3854 case NVME_CTRLR_STATE_SET_EN_0: 3855 NVME_CTRLR_DEBUGLOG(ctrlr, "Setting CC.EN = 0\n"); 3856 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_EN_0_WAIT_FOR_CC, ready_timeout_in_ms); 3857 rc = nvme_ctrlr_get_cc_async(ctrlr, nvme_ctrlr_process_init_set_en_0_read_cc, ctrlr); 3858 break; 3859 3860 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0: 3861 nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0_WAIT_FOR_CSTS, 3862 NVME_TIMEOUT_KEEP_EXISTING); 3863 rc = nvme_ctrlr_get_csts_async(ctrlr, nvme_ctrlr_process_init_wait_for_ready_0, ctrlr); 3864 break; 3865 3866 case NVME_CTRLR_STATE_DISABLED: 3867 if (ctrlr->is_disconnecting) { 3868 NVME_CTRLR_DEBUGLOG(ctrlr, "Ctrlr was disabled.\n"); 3869 } else { 3870 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE, ready_timeout_in_ms); 3871 3872 /* 3873 * Delay 100us before setting CC.EN = 1. Some NVMe SSDs miss CC.EN getting 3874 * set to 1 if it is too soon after CSTS.RDY is reported as 0. 3875 */ 3876 spdk_delay_us(100); 3877 } 3878 break; 3879 3880 case NVME_CTRLR_STATE_ENABLE: 3881 NVME_CTRLR_DEBUGLOG(ctrlr, "Setting CC.EN = 1\n"); 3882 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_CC, ready_timeout_in_ms); 3883 rc = nvme_ctrlr_enable(ctrlr); 3884 if (rc) { 3885 NVME_CTRLR_ERRLOG(ctrlr, "Ctrlr enable failed with error: %d", rc); 3886 } 3887 return rc; 3888 3889 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1: 3890 nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS, 3891 NVME_TIMEOUT_KEEP_EXISTING); 3892 rc = nvme_ctrlr_get_csts_async(ctrlr, nvme_ctrlr_process_init_enable_wait_for_ready_1, 3893 ctrlr); 3894 break; 3895 3896 case NVME_CTRLR_STATE_RESET_ADMIN_QUEUE: 3897 nvme_transport_qpair_reset(ctrlr->adminq); 3898 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY, NVME_TIMEOUT_INFINITE); 3899 break; 3900 3901 case NVME_CTRLR_STATE_IDENTIFY: 3902 rc = nvme_ctrlr_identify(ctrlr); 3903 break; 3904 3905 case NVME_CTRLR_STATE_CONFIGURE_AER: 3906 rc = nvme_ctrlr_configure_aer(ctrlr); 3907 break; 3908 3909 case NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT: 3910 rc = nvme_ctrlr_set_keep_alive_timeout(ctrlr); 3911 break; 3912 3913 case NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC: 3914 rc = nvme_ctrlr_identify_iocs_specific(ctrlr); 3915 break; 3916 3917 case NVME_CTRLR_STATE_GET_ZNS_CMD_EFFECTS_LOG: 3918 rc = nvme_ctrlr_get_zns_cmd_and_effects_log(ctrlr); 3919 break; 3920 3921 case NVME_CTRLR_STATE_SET_NUM_QUEUES: 3922 nvme_ctrlr_update_nvmf_ioccsz(ctrlr); 3923 rc = nvme_ctrlr_set_num_queues(ctrlr); 3924 break; 3925 3926 case NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS: 3927 _nvme_ctrlr_identify_active_ns(ctrlr); 3928 break; 3929 3930 case NVME_CTRLR_STATE_IDENTIFY_NS: 3931 rc = nvme_ctrlr_identify_namespaces(ctrlr); 3932 break; 3933 3934 case NVME_CTRLR_STATE_IDENTIFY_ID_DESCS: 3935 rc = nvme_ctrlr_identify_id_desc_namespaces(ctrlr); 3936 break; 3937 3938 case NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC: 3939 rc = nvme_ctrlr_identify_namespaces_iocs_specific(ctrlr); 3940 break; 3941 3942 case NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES: 3943 rc = nvme_ctrlr_set_supported_log_pages(ctrlr); 3944 break; 3945 3946 case NVME_CTRLR_STATE_SET_SUPPORTED_INTEL_LOG_PAGES: 3947 rc = nvme_ctrlr_set_intel_support_log_pages(ctrlr); 3948 break; 3949 3950 case NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES: 3951 nvme_ctrlr_set_supported_features(ctrlr); 3952 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_DB_BUF_CFG, 3953 ctrlr->opts.admin_timeout_ms); 3954 break; 3955 3956 case NVME_CTRLR_STATE_SET_DB_BUF_CFG: 3957 rc = nvme_ctrlr_set_doorbell_buffer_config(ctrlr); 3958 break; 3959 3960 case NVME_CTRLR_STATE_SET_HOST_ID: 3961 rc = nvme_ctrlr_set_host_id(ctrlr); 3962 break; 3963 3964 case NVME_CTRLR_STATE_READY: 3965 NVME_CTRLR_DEBUGLOG(ctrlr, "Ctrlr already in ready state\n"); 3966 return 0; 3967 3968 case NVME_CTRLR_STATE_ERROR: 3969 NVME_CTRLR_ERRLOG(ctrlr, "Ctrlr is in error state\n"); 3970 return -1; 3971 3972 case NVME_CTRLR_STATE_READ_VS_WAIT_FOR_VS: 3973 case NVME_CTRLR_STATE_READ_CAP_WAIT_FOR_CAP: 3974 case NVME_CTRLR_STATE_CHECK_EN_WAIT_FOR_CC: 3975 case NVME_CTRLR_STATE_SET_EN_0_WAIT_FOR_CC: 3976 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS: 3977 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0_WAIT_FOR_CSTS: 3978 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_CC: 3979 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS: 3980 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY: 3981 case NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER: 3982 case NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT: 3983 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_IOCS_SPECIFIC: 3984 case NVME_CTRLR_STATE_WAIT_FOR_GET_ZNS_CMD_EFFECTS_LOG: 3985 case NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES: 3986 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ACTIVE_NS: 3987 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS: 3988 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS: 3989 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS_IOCS_SPECIFIC: 3990 case NVME_CTRLR_STATE_WAIT_FOR_SUPPORTED_INTEL_LOG_PAGES: 3991 case NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG: 3992 case NVME_CTRLR_STATE_WAIT_FOR_HOST_ID: 3993 /* 3994 * nvme_ctrlr_process_init() may be called from the completion context 3995 * for the admin qpair. Avoid recursive calls for this case. 3996 */ 3997 if (!ctrlr->adminq->in_completion_context) { 3998 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 3999 } 4000 break; 4001 4002 default: 4003 assert(0); 4004 return -1; 4005 } 4006 4007 if (rc) { 4008 NVME_CTRLR_ERRLOG(ctrlr, "Ctrlr operation failed with error: %d, ctrlr state: %d", 4009 rc, ctrlr->state); 4010 } 4011 4012 /* Note: we use the ticks captured when we entered this function. 4013 * This covers environments where the SPDK process gets swapped out after 4014 * we tried to advance the state but before we check the timeout here. 4015 * It is not normal for this to happen, but harmless to handle it in this 4016 * way. 4017 */ 4018 if (ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE && 4019 ticks > ctrlr->state_timeout_tsc) { 4020 NVME_CTRLR_ERRLOG(ctrlr, "Initialization timed out in state %d (%s)\n", 4021 ctrlr->state, nvme_ctrlr_state_string(ctrlr->state)); 4022 return -1; 4023 } 4024 4025 return rc; 4026 } 4027 4028 int 4029 nvme_robust_mutex_init_recursive_shared(pthread_mutex_t *mtx) 4030 { 4031 pthread_mutexattr_t attr; 4032 int rc = 0; 4033 4034 if (pthread_mutexattr_init(&attr)) { 4035 return -1; 4036 } 4037 if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE) || 4038 #ifndef __FreeBSD__ 4039 pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST) || 4040 pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED) || 4041 #endif 4042 pthread_mutex_init(mtx, &attr)) { 4043 rc = -1; 4044 } 4045 pthread_mutexattr_destroy(&attr); 4046 return rc; 4047 } 4048 4049 int 4050 nvme_ctrlr_construct(struct spdk_nvme_ctrlr *ctrlr) 4051 { 4052 int rc; 4053 4054 if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { 4055 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT_DELAY, NVME_TIMEOUT_INFINITE); 4056 } else { 4057 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE); 4058 } 4059 4060 if (ctrlr->opts.admin_queue_size > SPDK_NVME_ADMIN_QUEUE_MAX_ENTRIES) { 4061 NVME_CTRLR_ERRLOG(ctrlr, "admin_queue_size %u exceeds max defined by NVMe spec, use max value\n", 4062 ctrlr->opts.admin_queue_size); 4063 ctrlr->opts.admin_queue_size = SPDK_NVME_ADMIN_QUEUE_MAX_ENTRIES; 4064 } 4065 4066 if (ctrlr->opts.admin_queue_size < SPDK_NVME_ADMIN_QUEUE_MIN_ENTRIES) { 4067 NVME_CTRLR_ERRLOG(ctrlr, 4068 "admin_queue_size %u is less than minimum defined by NVMe spec, use min value\n", 4069 ctrlr->opts.admin_queue_size); 4070 ctrlr->opts.admin_queue_size = SPDK_NVME_ADMIN_QUEUE_MIN_ENTRIES; 4071 } 4072 4073 ctrlr->flags = 0; 4074 ctrlr->free_io_qids = NULL; 4075 ctrlr->is_resetting = false; 4076 ctrlr->is_failed = false; 4077 ctrlr->is_destructed = false; 4078 4079 TAILQ_INIT(&ctrlr->active_io_qpairs); 4080 STAILQ_INIT(&ctrlr->queued_aborts); 4081 ctrlr->outstanding_aborts = 0; 4082 4083 ctrlr->ana_log_page = NULL; 4084 ctrlr->ana_log_page_size = 0; 4085 4086 rc = nvme_robust_mutex_init_recursive_shared(&ctrlr->ctrlr_lock); 4087 if (rc != 0) { 4088 return rc; 4089 } 4090 4091 TAILQ_INIT(&ctrlr->active_procs); 4092 STAILQ_INIT(&ctrlr->register_operations); 4093 4094 RB_INIT(&ctrlr->ns); 4095 4096 return rc; 4097 } 4098 4099 static void 4100 nvme_ctrlr_init_cap(struct spdk_nvme_ctrlr *ctrlr) 4101 { 4102 if (ctrlr->cap.bits.ams & SPDK_NVME_CAP_AMS_WRR) { 4103 ctrlr->flags |= SPDK_NVME_CTRLR_WRR_SUPPORTED; 4104 } 4105 4106 ctrlr->min_page_size = 1u << (12 + ctrlr->cap.bits.mpsmin); 4107 4108 /* For now, always select page_size == min_page_size. */ 4109 ctrlr->page_size = ctrlr->min_page_size; 4110 4111 ctrlr->opts.io_queue_size = spdk_max(ctrlr->opts.io_queue_size, SPDK_NVME_IO_QUEUE_MIN_ENTRIES); 4112 ctrlr->opts.io_queue_size = spdk_min(ctrlr->opts.io_queue_size, MAX_IO_QUEUE_ENTRIES); 4113 if (ctrlr->quirks & NVME_QUIRK_MINIMUM_IO_QUEUE_SIZE && 4114 ctrlr->opts.io_queue_size == DEFAULT_IO_QUEUE_SIZE) { 4115 /* If the user specifically set an IO queue size different than the 4116 * default, use that value. Otherwise overwrite with the quirked value. 4117 * This allows this quirk to be overridden when necessary. 4118 * However, cap.mqes still needs to be respected. 4119 */ 4120 ctrlr->opts.io_queue_size = DEFAULT_IO_QUEUE_SIZE_FOR_QUIRK; 4121 } 4122 ctrlr->opts.io_queue_size = spdk_min(ctrlr->opts.io_queue_size, ctrlr->cap.bits.mqes + 1u); 4123 4124 ctrlr->opts.io_queue_requests = spdk_max(ctrlr->opts.io_queue_requests, ctrlr->opts.io_queue_size); 4125 } 4126 4127 void 4128 nvme_ctrlr_destruct_finish(struct spdk_nvme_ctrlr *ctrlr) 4129 { 4130 pthread_mutex_destroy(&ctrlr->ctrlr_lock); 4131 } 4132 4133 void 4134 nvme_ctrlr_destruct_async(struct spdk_nvme_ctrlr *ctrlr, 4135 struct nvme_ctrlr_detach_ctx *ctx) 4136 { 4137 struct spdk_nvme_qpair *qpair, *tmp; 4138 4139 NVME_CTRLR_DEBUGLOG(ctrlr, "Prepare to destruct SSD\n"); 4140 4141 ctrlr->prepare_for_reset = false; 4142 ctrlr->is_destructed = true; 4143 4144 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 4145 4146 nvme_ctrlr_abort_queued_aborts(ctrlr); 4147 nvme_transport_admin_qpair_abort_aers(ctrlr->adminq); 4148 4149 TAILQ_FOREACH_SAFE(qpair, &ctrlr->active_io_qpairs, tailq, tmp) { 4150 spdk_nvme_ctrlr_free_io_qpair(qpair); 4151 } 4152 4153 nvme_ctrlr_free_doorbell_buffer(ctrlr); 4154 nvme_ctrlr_free_iocs_specific_data(ctrlr); 4155 4156 nvme_ctrlr_shutdown_async(ctrlr, ctx); 4157 } 4158 4159 int 4160 nvme_ctrlr_destruct_poll_async(struct spdk_nvme_ctrlr *ctrlr, 4161 struct nvme_ctrlr_detach_ctx *ctx) 4162 { 4163 struct spdk_nvme_ns *ns, *tmp_ns; 4164 int rc = 0; 4165 4166 if (!ctx->shutdown_complete) { 4167 rc = nvme_ctrlr_shutdown_poll_async(ctrlr, ctx); 4168 if (rc == -EAGAIN) { 4169 return -EAGAIN; 4170 } 4171 /* Destruct ctrlr forcefully for any other error. */ 4172 } 4173 4174 if (ctx->cb_fn) { 4175 ctx->cb_fn(ctrlr); 4176 } 4177 4178 nvme_transport_ctrlr_disconnect_qpair(ctrlr, ctrlr->adminq); 4179 4180 RB_FOREACH_SAFE(ns, nvme_ns_tree, &ctrlr->ns, tmp_ns) { 4181 nvme_ctrlr_destruct_namespace(ctrlr, ns->id); 4182 RB_REMOVE(nvme_ns_tree, &ctrlr->ns, ns); 4183 spdk_free(ns); 4184 } 4185 4186 ctrlr->active_ns_count = 0; 4187 4188 spdk_bit_array_free(&ctrlr->free_io_qids); 4189 4190 free(ctrlr->ana_log_page); 4191 free(ctrlr->copied_ana_desc); 4192 ctrlr->ana_log_page = NULL; 4193 ctrlr->copied_ana_desc = NULL; 4194 ctrlr->ana_log_page_size = 0; 4195 4196 nvme_transport_ctrlr_destruct(ctrlr); 4197 4198 return rc; 4199 } 4200 4201 void 4202 nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) 4203 { 4204 struct nvme_ctrlr_detach_ctx ctx = { .ctrlr = ctrlr }; 4205 int rc; 4206 4207 nvme_ctrlr_destruct_async(ctrlr, &ctx); 4208 4209 while (1) { 4210 rc = nvme_ctrlr_destruct_poll_async(ctrlr, &ctx); 4211 if (rc != -EAGAIN) { 4212 break; 4213 } 4214 nvme_delay(1000); 4215 } 4216 } 4217 4218 int 4219 nvme_ctrlr_submit_admin_request(struct spdk_nvme_ctrlr *ctrlr, 4220 struct nvme_request *req) 4221 { 4222 return nvme_qpair_submit_request(ctrlr->adminq, req); 4223 } 4224 4225 static void 4226 nvme_keep_alive_completion(void *cb_ctx, const struct spdk_nvme_cpl *cpl) 4227 { 4228 /* Do nothing */ 4229 } 4230 4231 /* 4232 * Check if we need to send a Keep Alive command. 4233 * Caller must hold ctrlr->ctrlr_lock. 4234 */ 4235 static int 4236 nvme_ctrlr_keep_alive(struct spdk_nvme_ctrlr *ctrlr) 4237 { 4238 uint64_t now; 4239 struct nvme_request *req; 4240 struct spdk_nvme_cmd *cmd; 4241 int rc = 0; 4242 4243 now = spdk_get_ticks(); 4244 if (now < ctrlr->next_keep_alive_tick) { 4245 return rc; 4246 } 4247 4248 req = nvme_allocate_request_null(ctrlr->adminq, nvme_keep_alive_completion, NULL); 4249 if (req == NULL) { 4250 return rc; 4251 } 4252 4253 cmd = &req->cmd; 4254 cmd->opc = SPDK_NVME_OPC_KEEP_ALIVE; 4255 4256 rc = nvme_ctrlr_submit_admin_request(ctrlr, req); 4257 if (rc != 0) { 4258 NVME_CTRLR_ERRLOG(ctrlr, "Submitting Keep Alive failed\n"); 4259 rc = -ENXIO; 4260 } 4261 4262 ctrlr->next_keep_alive_tick = now + ctrlr->keep_alive_interval_ticks; 4263 return rc; 4264 } 4265 4266 int32_t 4267 spdk_nvme_ctrlr_process_admin_completions(struct spdk_nvme_ctrlr *ctrlr) 4268 { 4269 int32_t num_completions; 4270 int32_t rc; 4271 struct spdk_nvme_ctrlr_process *active_proc; 4272 4273 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4274 4275 if (ctrlr->keep_alive_interval_ticks) { 4276 rc = nvme_ctrlr_keep_alive(ctrlr); 4277 if (rc) { 4278 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4279 return rc; 4280 } 4281 } 4282 4283 rc = nvme_io_msg_process(ctrlr); 4284 if (rc < 0) { 4285 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4286 return rc; 4287 } 4288 num_completions = rc; 4289 4290 rc = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 4291 4292 /* Each process has an async list, complete the ones for this process object */ 4293 active_proc = nvme_ctrlr_get_current_process(ctrlr); 4294 if (active_proc) { 4295 nvme_ctrlr_complete_queued_async_events(ctrlr); 4296 } 4297 4298 if (rc == -ENXIO && ctrlr->is_disconnecting) { 4299 nvme_ctrlr_disconnect_done(ctrlr); 4300 } 4301 4302 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4303 4304 if (rc < 0) { 4305 num_completions = rc; 4306 } else { 4307 num_completions += rc; 4308 } 4309 4310 return num_completions; 4311 } 4312 4313 const struct spdk_nvme_ctrlr_data * 4314 spdk_nvme_ctrlr_get_data(struct spdk_nvme_ctrlr *ctrlr) 4315 { 4316 return &ctrlr->cdata; 4317 } 4318 4319 union spdk_nvme_csts_register spdk_nvme_ctrlr_get_regs_csts(struct spdk_nvme_ctrlr *ctrlr) 4320 { 4321 union spdk_nvme_csts_register csts; 4322 4323 if (nvme_ctrlr_get_csts(ctrlr, &csts)) { 4324 csts.raw = SPDK_NVME_INVALID_REGISTER_VALUE; 4325 } 4326 return csts; 4327 } 4328 4329 union spdk_nvme_cc_register spdk_nvme_ctrlr_get_regs_cc(struct spdk_nvme_ctrlr *ctrlr) 4330 { 4331 union spdk_nvme_cc_register cc; 4332 4333 if (nvme_ctrlr_get_cc(ctrlr, &cc)) { 4334 cc.raw = SPDK_NVME_INVALID_REGISTER_VALUE; 4335 } 4336 return cc; 4337 } 4338 4339 union spdk_nvme_cap_register spdk_nvme_ctrlr_get_regs_cap(struct spdk_nvme_ctrlr *ctrlr) 4340 { 4341 return ctrlr->cap; 4342 } 4343 4344 union spdk_nvme_vs_register spdk_nvme_ctrlr_get_regs_vs(struct spdk_nvme_ctrlr *ctrlr) 4345 { 4346 return ctrlr->vs; 4347 } 4348 4349 union spdk_nvme_cmbsz_register spdk_nvme_ctrlr_get_regs_cmbsz(struct spdk_nvme_ctrlr *ctrlr) 4350 { 4351 union spdk_nvme_cmbsz_register cmbsz; 4352 4353 if (nvme_ctrlr_get_cmbsz(ctrlr, &cmbsz)) { 4354 cmbsz.raw = 0; 4355 } 4356 4357 return cmbsz; 4358 } 4359 4360 union spdk_nvme_pmrcap_register spdk_nvme_ctrlr_get_regs_pmrcap(struct spdk_nvme_ctrlr *ctrlr) 4361 { 4362 union spdk_nvme_pmrcap_register pmrcap; 4363 4364 if (nvme_ctrlr_get_pmrcap(ctrlr, &pmrcap)) { 4365 pmrcap.raw = 0; 4366 } 4367 4368 return pmrcap; 4369 } 4370 4371 union spdk_nvme_bpinfo_register spdk_nvme_ctrlr_get_regs_bpinfo(struct spdk_nvme_ctrlr *ctrlr) 4372 { 4373 union spdk_nvme_bpinfo_register bpinfo; 4374 4375 if (nvme_ctrlr_get_bpinfo(ctrlr, &bpinfo)) { 4376 bpinfo.raw = 0; 4377 } 4378 4379 return bpinfo; 4380 } 4381 4382 uint64_t 4383 spdk_nvme_ctrlr_get_pmrsz(struct spdk_nvme_ctrlr *ctrlr) 4384 { 4385 return ctrlr->pmr_size; 4386 } 4387 4388 uint32_t 4389 spdk_nvme_ctrlr_get_num_ns(struct spdk_nvme_ctrlr *ctrlr) 4390 { 4391 return ctrlr->cdata.nn; 4392 } 4393 4394 bool 4395 spdk_nvme_ctrlr_is_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 4396 { 4397 struct spdk_nvme_ns tmp, *ns; 4398 4399 tmp.id = nsid; 4400 ns = RB_FIND(nvme_ns_tree, &ctrlr->ns, &tmp); 4401 4402 if (ns != NULL) { 4403 return ns->active; 4404 } 4405 4406 return false; 4407 } 4408 4409 uint32_t 4410 spdk_nvme_ctrlr_get_first_active_ns(struct spdk_nvme_ctrlr *ctrlr) 4411 { 4412 struct spdk_nvme_ns *ns; 4413 4414 ns = RB_MIN(nvme_ns_tree, &ctrlr->ns); 4415 if (ns == NULL) { 4416 return 0; 4417 } 4418 4419 while (ns != NULL) { 4420 if (ns->active) { 4421 return ns->id; 4422 } 4423 4424 ns = RB_NEXT(nvme_ns_tree, &ctrlr->ns, ns); 4425 } 4426 4427 return 0; 4428 } 4429 4430 uint32_t 4431 spdk_nvme_ctrlr_get_next_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t prev_nsid) 4432 { 4433 struct spdk_nvme_ns tmp, *ns; 4434 4435 tmp.id = prev_nsid; 4436 ns = RB_FIND(nvme_ns_tree, &ctrlr->ns, &tmp); 4437 if (ns == NULL) { 4438 return 0; 4439 } 4440 4441 ns = RB_NEXT(nvme_ns_tree, &ctrlr->ns, ns); 4442 while (ns != NULL) { 4443 if (ns->active) { 4444 return ns->id; 4445 } 4446 4447 ns = RB_NEXT(nvme_ns_tree, &ctrlr->ns, ns); 4448 } 4449 4450 return 0; 4451 } 4452 4453 struct spdk_nvme_ns * 4454 spdk_nvme_ctrlr_get_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 4455 { 4456 struct spdk_nvme_ns tmp; 4457 struct spdk_nvme_ns *ns; 4458 4459 if (nsid < 1 || nsid > ctrlr->cdata.nn) { 4460 return NULL; 4461 } 4462 4463 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4464 4465 tmp.id = nsid; 4466 ns = RB_FIND(nvme_ns_tree, &ctrlr->ns, &tmp); 4467 4468 if (ns == NULL) { 4469 ns = spdk_zmalloc(sizeof(struct spdk_nvme_ns), 64, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE); 4470 if (ns == NULL) { 4471 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4472 return NULL; 4473 } 4474 4475 NVME_CTRLR_DEBUGLOG(ctrlr, "Namespace %u was added\n", nsid); 4476 ns->id = nsid; 4477 RB_INSERT(nvme_ns_tree, &ctrlr->ns, ns); 4478 } 4479 4480 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4481 4482 return ns; 4483 } 4484 4485 struct spdk_pci_device * 4486 spdk_nvme_ctrlr_get_pci_device(struct spdk_nvme_ctrlr *ctrlr) 4487 { 4488 if (ctrlr == NULL) { 4489 return NULL; 4490 } 4491 4492 if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) { 4493 return NULL; 4494 } 4495 4496 return nvme_ctrlr_proc_get_devhandle(ctrlr); 4497 } 4498 4499 uint32_t 4500 spdk_nvme_ctrlr_get_max_xfer_size(const struct spdk_nvme_ctrlr *ctrlr) 4501 { 4502 return ctrlr->max_xfer_size; 4503 } 4504 4505 void 4506 spdk_nvme_ctrlr_register_aer_callback(struct spdk_nvme_ctrlr *ctrlr, 4507 spdk_nvme_aer_cb aer_cb_fn, 4508 void *aer_cb_arg) 4509 { 4510 struct spdk_nvme_ctrlr_process *active_proc; 4511 4512 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4513 4514 active_proc = nvme_ctrlr_get_current_process(ctrlr); 4515 if (active_proc) { 4516 active_proc->aer_cb_fn = aer_cb_fn; 4517 active_proc->aer_cb_arg = aer_cb_arg; 4518 } 4519 4520 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4521 } 4522 4523 void 4524 spdk_nvme_ctrlr_register_timeout_callback(struct spdk_nvme_ctrlr *ctrlr, 4525 uint64_t timeout_io_us, uint64_t timeout_admin_us, 4526 spdk_nvme_timeout_cb cb_fn, void *cb_arg) 4527 { 4528 struct spdk_nvme_ctrlr_process *active_proc; 4529 4530 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4531 4532 active_proc = nvme_ctrlr_get_current_process(ctrlr); 4533 if (active_proc) { 4534 active_proc->timeout_io_ticks = timeout_io_us * spdk_get_ticks_hz() / 1000000ULL; 4535 active_proc->timeout_admin_ticks = timeout_admin_us * spdk_get_ticks_hz() / 1000000ULL; 4536 active_proc->timeout_cb_fn = cb_fn; 4537 active_proc->timeout_cb_arg = cb_arg; 4538 } 4539 4540 ctrlr->timeout_enabled = true; 4541 4542 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4543 } 4544 4545 bool 4546 spdk_nvme_ctrlr_is_log_page_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t log_page) 4547 { 4548 /* No bounds check necessary, since log_page is uint8_t and log_page_supported has 256 entries */ 4549 SPDK_STATIC_ASSERT(sizeof(ctrlr->log_page_supported) == 256, "log_page_supported size mismatch"); 4550 return ctrlr->log_page_supported[log_page]; 4551 } 4552 4553 bool 4554 spdk_nvme_ctrlr_is_feature_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t feature_code) 4555 { 4556 /* No bounds check necessary, since feature_code is uint8_t and feature_supported has 256 entries */ 4557 SPDK_STATIC_ASSERT(sizeof(ctrlr->feature_supported) == 256, "feature_supported size mismatch"); 4558 return ctrlr->feature_supported[feature_code]; 4559 } 4560 4561 int 4562 spdk_nvme_ctrlr_attach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, 4563 struct spdk_nvme_ctrlr_list *payload) 4564 { 4565 struct nvme_completion_poll_status *status; 4566 struct spdk_nvme_ns *ns; 4567 int res; 4568 4569 if (nsid == 0) { 4570 return -EINVAL; 4571 } 4572 4573 status = calloc(1, sizeof(*status)); 4574 if (!status) { 4575 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4576 return -ENOMEM; 4577 } 4578 4579 res = nvme_ctrlr_cmd_attach_ns(ctrlr, nsid, payload, 4580 nvme_completion_poll_cb, status); 4581 if (res) { 4582 free(status); 4583 return res; 4584 } 4585 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4586 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_attach_ns failed!\n"); 4587 if (!status->timed_out) { 4588 free(status); 4589 } 4590 return -ENXIO; 4591 } 4592 free(status); 4593 4594 res = nvme_ctrlr_identify_active_ns(ctrlr); 4595 if (res) { 4596 return res; 4597 } 4598 4599 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 4600 return nvme_ns_construct(ns, nsid, ctrlr); 4601 } 4602 4603 int 4604 spdk_nvme_ctrlr_detach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, 4605 struct spdk_nvme_ctrlr_list *payload) 4606 { 4607 struct nvme_completion_poll_status *status; 4608 int res; 4609 4610 if (nsid == 0) { 4611 return -EINVAL; 4612 } 4613 4614 status = calloc(1, sizeof(*status)); 4615 if (!status) { 4616 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4617 return -ENOMEM; 4618 } 4619 4620 res = nvme_ctrlr_cmd_detach_ns(ctrlr, nsid, payload, 4621 nvme_completion_poll_cb, status); 4622 if (res) { 4623 free(status); 4624 return res; 4625 } 4626 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4627 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_detach_ns failed!\n"); 4628 if (!status->timed_out) { 4629 free(status); 4630 } 4631 return -ENXIO; 4632 } 4633 free(status); 4634 4635 return nvme_ctrlr_identify_active_ns(ctrlr); 4636 } 4637 4638 uint32_t 4639 spdk_nvme_ctrlr_create_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns_data *payload) 4640 { 4641 struct nvme_completion_poll_status *status; 4642 int res; 4643 uint32_t nsid; 4644 4645 status = calloc(1, sizeof(*status)); 4646 if (!status) { 4647 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4648 return 0; 4649 } 4650 4651 res = nvme_ctrlr_cmd_create_ns(ctrlr, payload, nvme_completion_poll_cb, status); 4652 if (res) { 4653 free(status); 4654 return 0; 4655 } 4656 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4657 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_create_ns failed!\n"); 4658 if (!status->timed_out) { 4659 free(status); 4660 } 4661 return 0; 4662 } 4663 4664 nsid = status->cpl.cdw0; 4665 free(status); 4666 4667 assert(nsid > 0); 4668 4669 /* Return the namespace ID that was created */ 4670 return nsid; 4671 } 4672 4673 int 4674 spdk_nvme_ctrlr_delete_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 4675 { 4676 struct nvme_completion_poll_status *status; 4677 int res; 4678 4679 if (nsid == 0) { 4680 return -EINVAL; 4681 } 4682 4683 status = calloc(1, sizeof(*status)); 4684 if (!status) { 4685 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4686 return -ENOMEM; 4687 } 4688 4689 res = nvme_ctrlr_cmd_delete_ns(ctrlr, nsid, nvme_completion_poll_cb, status); 4690 if (res) { 4691 free(status); 4692 return res; 4693 } 4694 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4695 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_delete_ns failed!\n"); 4696 if (!status->timed_out) { 4697 free(status); 4698 } 4699 return -ENXIO; 4700 } 4701 free(status); 4702 4703 return nvme_ctrlr_identify_active_ns(ctrlr); 4704 } 4705 4706 int 4707 spdk_nvme_ctrlr_format(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, 4708 struct spdk_nvme_format *format) 4709 { 4710 struct nvme_completion_poll_status *status; 4711 int res; 4712 4713 status = calloc(1, sizeof(*status)); 4714 if (!status) { 4715 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4716 return -ENOMEM; 4717 } 4718 4719 res = nvme_ctrlr_cmd_format(ctrlr, nsid, format, nvme_completion_poll_cb, 4720 status); 4721 if (res) { 4722 free(status); 4723 return res; 4724 } 4725 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4726 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_format failed!\n"); 4727 if (!status->timed_out) { 4728 free(status); 4729 } 4730 return -ENXIO; 4731 } 4732 free(status); 4733 4734 return spdk_nvme_ctrlr_reset(ctrlr); 4735 } 4736 4737 int 4738 spdk_nvme_ctrlr_update_firmware(struct spdk_nvme_ctrlr *ctrlr, void *payload, uint32_t size, 4739 int slot, enum spdk_nvme_fw_commit_action commit_action, struct spdk_nvme_status *completion_status) 4740 { 4741 struct spdk_nvme_fw_commit fw_commit; 4742 struct nvme_completion_poll_status *status; 4743 int res; 4744 unsigned int size_remaining; 4745 unsigned int offset; 4746 unsigned int transfer; 4747 void *p; 4748 4749 if (!completion_status) { 4750 return -EINVAL; 4751 } 4752 memset(completion_status, 0, sizeof(struct spdk_nvme_status)); 4753 if (size % 4) { 4754 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_update_firmware invalid size!\n"); 4755 return -1; 4756 } 4757 4758 /* Current support only for SPDK_NVME_FW_COMMIT_REPLACE_IMG 4759 * and SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG 4760 */ 4761 if ((commit_action != SPDK_NVME_FW_COMMIT_REPLACE_IMG) && 4762 (commit_action != SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG)) { 4763 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_update_firmware invalid command!\n"); 4764 return -1; 4765 } 4766 4767 status = calloc(1, sizeof(*status)); 4768 if (!status) { 4769 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4770 return -ENOMEM; 4771 } 4772 4773 /* Firmware download */ 4774 size_remaining = size; 4775 offset = 0; 4776 p = payload; 4777 4778 while (size_remaining > 0) { 4779 transfer = spdk_min(size_remaining, ctrlr->min_page_size); 4780 4781 memset(status, 0, sizeof(*status)); 4782 res = nvme_ctrlr_cmd_fw_image_download(ctrlr, transfer, offset, p, 4783 nvme_completion_poll_cb, 4784 status); 4785 if (res) { 4786 free(status); 4787 return res; 4788 } 4789 4790 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4791 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_fw_image_download failed!\n"); 4792 if (!status->timed_out) { 4793 free(status); 4794 } 4795 return -ENXIO; 4796 } 4797 p += transfer; 4798 offset += transfer; 4799 size_remaining -= transfer; 4800 } 4801 4802 /* Firmware commit */ 4803 memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit)); 4804 fw_commit.fs = slot; 4805 fw_commit.ca = commit_action; 4806 4807 memset(status, 0, sizeof(*status)); 4808 res = nvme_ctrlr_cmd_fw_commit(ctrlr, &fw_commit, nvme_completion_poll_cb, 4809 status); 4810 if (res) { 4811 free(status); 4812 return res; 4813 } 4814 4815 res = nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock); 4816 4817 memcpy(completion_status, &status->cpl.status, sizeof(struct spdk_nvme_status)); 4818 4819 if (!status->timed_out) { 4820 free(status); 4821 } 4822 4823 if (res) { 4824 if (completion_status->sct != SPDK_NVME_SCT_COMMAND_SPECIFIC || 4825 completion_status->sc != SPDK_NVME_SC_FIRMWARE_REQ_NVM_RESET) { 4826 if (completion_status->sct == SPDK_NVME_SCT_COMMAND_SPECIFIC && 4827 completion_status->sc == SPDK_NVME_SC_FIRMWARE_REQ_CONVENTIONAL_RESET) { 4828 NVME_CTRLR_NOTICELOG(ctrlr, 4829 "firmware activation requires conventional reset to be performed. !\n"); 4830 } else { 4831 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_cmd_fw_commit failed!\n"); 4832 } 4833 return -ENXIO; 4834 } 4835 } 4836 4837 return spdk_nvme_ctrlr_reset(ctrlr); 4838 } 4839 4840 int 4841 spdk_nvme_ctrlr_reserve_cmb(struct spdk_nvme_ctrlr *ctrlr) 4842 { 4843 int rc, size; 4844 union spdk_nvme_cmbsz_register cmbsz; 4845 4846 cmbsz = spdk_nvme_ctrlr_get_regs_cmbsz(ctrlr); 4847 4848 if (cmbsz.bits.rds == 0 || cmbsz.bits.wds == 0) { 4849 return -ENOTSUP; 4850 } 4851 4852 size = cmbsz.bits.sz * (0x1000 << (cmbsz.bits.szu * 4)); 4853 4854 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4855 rc = nvme_transport_ctrlr_reserve_cmb(ctrlr); 4856 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4857 4858 if (rc < 0) { 4859 return rc; 4860 } 4861 4862 return size; 4863 } 4864 4865 void * 4866 spdk_nvme_ctrlr_map_cmb(struct spdk_nvme_ctrlr *ctrlr, size_t *size) 4867 { 4868 void *buf; 4869 4870 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4871 buf = nvme_transport_ctrlr_map_cmb(ctrlr, size); 4872 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4873 4874 return buf; 4875 } 4876 4877 void 4878 spdk_nvme_ctrlr_unmap_cmb(struct spdk_nvme_ctrlr *ctrlr) 4879 { 4880 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4881 nvme_transport_ctrlr_unmap_cmb(ctrlr); 4882 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4883 } 4884 4885 int 4886 spdk_nvme_ctrlr_enable_pmr(struct spdk_nvme_ctrlr *ctrlr) 4887 { 4888 int rc; 4889 4890 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4891 rc = nvme_transport_ctrlr_enable_pmr(ctrlr); 4892 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4893 4894 return rc; 4895 } 4896 4897 int 4898 spdk_nvme_ctrlr_disable_pmr(struct spdk_nvme_ctrlr *ctrlr) 4899 { 4900 int rc; 4901 4902 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4903 rc = nvme_transport_ctrlr_disable_pmr(ctrlr); 4904 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4905 4906 return rc; 4907 } 4908 4909 void * 4910 spdk_nvme_ctrlr_map_pmr(struct spdk_nvme_ctrlr *ctrlr, size_t *size) 4911 { 4912 void *buf; 4913 4914 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4915 buf = nvme_transport_ctrlr_map_pmr(ctrlr, size); 4916 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4917 4918 return buf; 4919 } 4920 4921 int 4922 spdk_nvme_ctrlr_unmap_pmr(struct spdk_nvme_ctrlr *ctrlr) 4923 { 4924 int rc; 4925 4926 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4927 rc = nvme_transport_ctrlr_unmap_pmr(ctrlr); 4928 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4929 4930 return rc; 4931 } 4932 4933 int 4934 spdk_nvme_ctrlr_read_boot_partition_start(struct spdk_nvme_ctrlr *ctrlr, void *payload, 4935 uint32_t bprsz, uint32_t bprof, uint32_t bpid) 4936 { 4937 union spdk_nvme_bprsel_register bprsel; 4938 union spdk_nvme_bpinfo_register bpinfo; 4939 uint64_t bpmbl, bpmb_size; 4940 4941 if (ctrlr->cap.bits.bps == 0) { 4942 return -ENOTSUP; 4943 } 4944 4945 if (nvme_ctrlr_get_bpinfo(ctrlr, &bpinfo)) { 4946 NVME_CTRLR_ERRLOG(ctrlr, "get bpinfo failed\n"); 4947 return -EIO; 4948 } 4949 4950 if (bpinfo.bits.brs == SPDK_NVME_BRS_READ_IN_PROGRESS) { 4951 NVME_CTRLR_ERRLOG(ctrlr, "Boot Partition read already initiated\n"); 4952 return -EALREADY; 4953 } 4954 4955 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4956 4957 bpmb_size = bprsz * 4096; 4958 bpmbl = spdk_vtophys(payload, &bpmb_size); 4959 if (bpmbl == SPDK_VTOPHYS_ERROR) { 4960 NVME_CTRLR_ERRLOG(ctrlr, "spdk_vtophys of bpmbl failed\n"); 4961 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4962 return -EFAULT; 4963 } 4964 4965 if (bpmb_size != bprsz * 4096) { 4966 NVME_CTRLR_ERRLOG(ctrlr, "Boot Partition buffer is not physically contiguous\n"); 4967 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4968 return -EFAULT; 4969 } 4970 4971 if (nvme_ctrlr_set_bpmbl(ctrlr, bpmbl)) { 4972 NVME_CTRLR_ERRLOG(ctrlr, "set_bpmbl() failed\n"); 4973 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4974 return -EIO; 4975 } 4976 4977 bprsel.bits.bpid = bpid; 4978 bprsel.bits.bprof = bprof; 4979 bprsel.bits.bprsz = bprsz; 4980 4981 if (nvme_ctrlr_set_bprsel(ctrlr, &bprsel)) { 4982 NVME_CTRLR_ERRLOG(ctrlr, "set_bprsel() failed\n"); 4983 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4984 return -EIO; 4985 } 4986 4987 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4988 return 0; 4989 } 4990 4991 int 4992 spdk_nvme_ctrlr_read_boot_partition_poll(struct spdk_nvme_ctrlr *ctrlr) 4993 { 4994 int rc = 0; 4995 union spdk_nvme_bpinfo_register bpinfo; 4996 4997 if (nvme_ctrlr_get_bpinfo(ctrlr, &bpinfo)) { 4998 NVME_CTRLR_ERRLOG(ctrlr, "get bpinfo failed\n"); 4999 return -EIO; 5000 } 5001 5002 switch (bpinfo.bits.brs) { 5003 case SPDK_NVME_BRS_NO_READ: 5004 NVME_CTRLR_ERRLOG(ctrlr, "Boot Partition read not initiated\n"); 5005 rc = -EINVAL; 5006 break; 5007 case SPDK_NVME_BRS_READ_IN_PROGRESS: 5008 NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition read in progress\n"); 5009 rc = -EAGAIN; 5010 break; 5011 case SPDK_NVME_BRS_READ_ERROR: 5012 NVME_CTRLR_ERRLOG(ctrlr, "Error completing Boot Partition read\n"); 5013 rc = -EIO; 5014 break; 5015 case SPDK_NVME_BRS_READ_SUCCESS: 5016 NVME_CTRLR_INFOLOG(ctrlr, "Boot Partition read completed successfully\n"); 5017 break; 5018 default: 5019 NVME_CTRLR_ERRLOG(ctrlr, "Invalid Boot Partition read status\n"); 5020 rc = -EINVAL; 5021 } 5022 5023 return rc; 5024 } 5025 5026 static void 5027 nvme_write_boot_partition_cb(void *arg, const struct spdk_nvme_cpl *cpl) 5028 { 5029 int res; 5030 struct spdk_nvme_ctrlr *ctrlr = arg; 5031 struct spdk_nvme_fw_commit fw_commit; 5032 struct spdk_nvme_cpl err_cpl = 5033 {.status = {.sct = SPDK_NVME_SCT_GENERIC, .sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR }}; 5034 5035 if (spdk_nvme_cpl_is_error(cpl)) { 5036 NVME_CTRLR_ERRLOG(ctrlr, "Write Boot Partition failed\n"); 5037 ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, cpl); 5038 return; 5039 } 5040 5041 if (ctrlr->bp_ws == SPDK_NVME_BP_WS_DOWNLOADING) { 5042 NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition Downloading at Offset %d Success\n", ctrlr->fw_offset); 5043 ctrlr->fw_payload += ctrlr->fw_transfer_size; 5044 ctrlr->fw_offset += ctrlr->fw_transfer_size; 5045 ctrlr->fw_size_remaining -= ctrlr->fw_transfer_size; 5046 ctrlr->fw_transfer_size = spdk_min(ctrlr->fw_size_remaining, ctrlr->min_page_size); 5047 res = nvme_ctrlr_cmd_fw_image_download(ctrlr, ctrlr->fw_transfer_size, ctrlr->fw_offset, 5048 ctrlr->fw_payload, nvme_write_boot_partition_cb, ctrlr); 5049 if (res) { 5050 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_cmd_fw_image_download failed!\n"); 5051 ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, &err_cpl); 5052 return; 5053 } 5054 5055 if (ctrlr->fw_transfer_size < ctrlr->min_page_size) { 5056 ctrlr->bp_ws = SPDK_NVME_BP_WS_DOWNLOADED; 5057 } 5058 } else if (ctrlr->bp_ws == SPDK_NVME_BP_WS_DOWNLOADED) { 5059 NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition Download Success\n"); 5060 memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit)); 5061 fw_commit.bpid = ctrlr->bpid; 5062 fw_commit.ca = SPDK_NVME_FW_COMMIT_REPLACE_BOOT_PARTITION; 5063 res = nvme_ctrlr_cmd_fw_commit(ctrlr, &fw_commit, 5064 nvme_write_boot_partition_cb, ctrlr); 5065 if (res) { 5066 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_cmd_fw_commit failed!\n"); 5067 NVME_CTRLR_ERRLOG(ctrlr, "commit action: %d\n", fw_commit.ca); 5068 ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, &err_cpl); 5069 return; 5070 } 5071 5072 ctrlr->bp_ws = SPDK_NVME_BP_WS_REPLACE; 5073 } else if (ctrlr->bp_ws == SPDK_NVME_BP_WS_REPLACE) { 5074 NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition Replacement Success\n"); 5075 memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit)); 5076 fw_commit.bpid = ctrlr->bpid; 5077 fw_commit.ca = SPDK_NVME_FW_COMMIT_ACTIVATE_BOOT_PARTITION; 5078 res = nvme_ctrlr_cmd_fw_commit(ctrlr, &fw_commit, 5079 nvme_write_boot_partition_cb, ctrlr); 5080 if (res) { 5081 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_cmd_fw_commit failed!\n"); 5082 NVME_CTRLR_ERRLOG(ctrlr, "commit action: %d\n", fw_commit.ca); 5083 ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, &err_cpl); 5084 return; 5085 } 5086 5087 ctrlr->bp_ws = SPDK_NVME_BP_WS_ACTIVATE; 5088 } else if (ctrlr->bp_ws == SPDK_NVME_BP_WS_ACTIVATE) { 5089 NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition Activation Success\n"); 5090 ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, cpl); 5091 } else { 5092 NVME_CTRLR_ERRLOG(ctrlr, "Invalid Boot Partition write state\n"); 5093 ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, &err_cpl); 5094 return; 5095 } 5096 } 5097 5098 int 5099 spdk_nvme_ctrlr_write_boot_partition(struct spdk_nvme_ctrlr *ctrlr, 5100 void *payload, uint32_t size, uint32_t bpid, 5101 spdk_nvme_cmd_cb cb_fn, void *cb_arg) 5102 { 5103 int res; 5104 5105 if (ctrlr->cap.bits.bps == 0) { 5106 return -ENOTSUP; 5107 } 5108 5109 ctrlr->bp_ws = SPDK_NVME_BP_WS_DOWNLOADING; 5110 ctrlr->bpid = bpid; 5111 ctrlr->bp_write_cb_fn = cb_fn; 5112 ctrlr->bp_write_cb_arg = cb_arg; 5113 ctrlr->fw_offset = 0; 5114 ctrlr->fw_size_remaining = size; 5115 ctrlr->fw_payload = payload; 5116 ctrlr->fw_transfer_size = spdk_min(ctrlr->fw_size_remaining, ctrlr->min_page_size); 5117 5118 res = nvme_ctrlr_cmd_fw_image_download(ctrlr, ctrlr->fw_transfer_size, ctrlr->fw_offset, 5119 ctrlr->fw_payload, nvme_write_boot_partition_cb, ctrlr); 5120 5121 return res; 5122 } 5123 5124 bool 5125 spdk_nvme_ctrlr_is_discovery(struct spdk_nvme_ctrlr *ctrlr) 5126 { 5127 assert(ctrlr); 5128 5129 return !strncmp(ctrlr->trid.subnqn, SPDK_NVMF_DISCOVERY_NQN, 5130 strlen(SPDK_NVMF_DISCOVERY_NQN)); 5131 } 5132 5133 bool 5134 spdk_nvme_ctrlr_is_fabrics(struct spdk_nvme_ctrlr *ctrlr) 5135 { 5136 assert(ctrlr); 5137 5138 return spdk_nvme_trtype_is_fabrics(ctrlr->trid.trtype); 5139 } 5140 5141 int 5142 spdk_nvme_ctrlr_security_receive(struct spdk_nvme_ctrlr *ctrlr, uint8_t secp, 5143 uint16_t spsp, uint8_t nssf, void *payload, size_t size) 5144 { 5145 struct nvme_completion_poll_status *status; 5146 int res; 5147 5148 status = calloc(1, sizeof(*status)); 5149 if (!status) { 5150 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 5151 return -ENOMEM; 5152 } 5153 5154 res = spdk_nvme_ctrlr_cmd_security_receive(ctrlr, secp, spsp, nssf, payload, size, 5155 nvme_completion_poll_cb, status); 5156 if (res) { 5157 free(status); 5158 return res; 5159 } 5160 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 5161 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_cmd_security_receive failed!\n"); 5162 if (!status->timed_out) { 5163 free(status); 5164 } 5165 return -ENXIO; 5166 } 5167 free(status); 5168 5169 return 0; 5170 } 5171 5172 int 5173 spdk_nvme_ctrlr_security_send(struct spdk_nvme_ctrlr *ctrlr, uint8_t secp, 5174 uint16_t spsp, uint8_t nssf, void *payload, size_t size) 5175 { 5176 struct nvme_completion_poll_status *status; 5177 int res; 5178 5179 status = calloc(1, sizeof(*status)); 5180 if (!status) { 5181 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 5182 return -ENOMEM; 5183 } 5184 5185 res = spdk_nvme_ctrlr_cmd_security_send(ctrlr, secp, spsp, nssf, payload, size, 5186 nvme_completion_poll_cb, 5187 status); 5188 if (res) { 5189 free(status); 5190 return res; 5191 } 5192 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 5193 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_cmd_security_send failed!\n"); 5194 if (!status->timed_out) { 5195 free(status); 5196 } 5197 return -ENXIO; 5198 } 5199 5200 free(status); 5201 5202 return 0; 5203 } 5204 5205 uint64_t 5206 spdk_nvme_ctrlr_get_flags(struct spdk_nvme_ctrlr *ctrlr) 5207 { 5208 return ctrlr->flags; 5209 } 5210 5211 const struct spdk_nvme_transport_id * 5212 spdk_nvme_ctrlr_get_transport_id(struct spdk_nvme_ctrlr *ctrlr) 5213 { 5214 return &ctrlr->trid; 5215 } 5216 5217 int32_t 5218 spdk_nvme_ctrlr_alloc_qid(struct spdk_nvme_ctrlr *ctrlr) 5219 { 5220 uint32_t qid; 5221 5222 assert(ctrlr->free_io_qids); 5223 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 5224 qid = spdk_bit_array_find_first_set(ctrlr->free_io_qids, 1); 5225 if (qid > ctrlr->opts.num_io_queues) { 5226 NVME_CTRLR_ERRLOG(ctrlr, "No free I/O queue IDs\n"); 5227 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 5228 return -1; 5229 } 5230 5231 spdk_bit_array_clear(ctrlr->free_io_qids, qid); 5232 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 5233 return qid; 5234 } 5235 5236 void 5237 spdk_nvme_ctrlr_free_qid(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid) 5238 { 5239 assert(qid <= ctrlr->opts.num_io_queues); 5240 5241 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 5242 5243 if (spdk_likely(ctrlr->free_io_qids)) { 5244 spdk_bit_array_set(ctrlr->free_io_qids, qid); 5245 } 5246 5247 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 5248 } 5249 5250 int 5251 spdk_nvme_ctrlr_get_memory_domains(const struct spdk_nvme_ctrlr *ctrlr, 5252 struct spdk_memory_domain **domains, int array_size) 5253 { 5254 return nvme_transport_ctrlr_get_memory_domains(ctrlr, domains, array_size); 5255 } 5256