1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. All rights reserved. 5 * Copyright (c) 2019-2021 Mellanox Technologies LTD. All rights reserved. 6 * Copyright (c) 2021, 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * * Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * * Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * * Neither the name of Intel Corporation nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 #include "spdk/stdinc.h" 36 37 #include "nvme_internal.h" 38 #include "nvme_io_msg.h" 39 40 #include "spdk/env.h" 41 #include "spdk/string.h" 42 #include "spdk/endian.h" 43 44 struct nvme_active_ns_ctx; 45 46 static int nvme_ctrlr_construct_and_submit_aer(struct spdk_nvme_ctrlr *ctrlr, 47 struct nvme_async_event_request *aer); 48 static void nvme_ctrlr_identify_active_ns_async(struct nvme_active_ns_ctx *ctx); 49 static int nvme_ctrlr_identify_ns_async(struct spdk_nvme_ns *ns); 50 static int nvme_ctrlr_identify_ns_iocs_specific_async(struct spdk_nvme_ns *ns); 51 static int nvme_ctrlr_identify_id_desc_async(struct spdk_nvme_ns *ns); 52 static void nvme_ctrlr_init_cap(struct spdk_nvme_ctrlr *ctrlr); 53 static void nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state, 54 uint64_t timeout_in_ms); 55 56 static int 57 nvme_ns_cmp(struct spdk_nvme_ns *ns1, struct spdk_nvme_ns *ns2) 58 { 59 if (ns1->id < ns2->id) { 60 return -1; 61 } else if (ns1->id > ns2->id) { 62 return 1; 63 } else { 64 return 0; 65 } 66 } 67 68 RB_GENERATE_STATIC(nvme_ns_tree, spdk_nvme_ns, node, nvme_ns_cmp); 69 70 #define CTRLR_STRING(ctrlr) \ 71 ((ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_TCP || ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_RDMA) ? \ 72 ctrlr->trid.subnqn : ctrlr->trid.traddr) 73 74 #define NVME_CTRLR_ERRLOG(ctrlr, format, ...) \ 75 SPDK_ERRLOG("[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__); 76 77 #define NVME_CTRLR_WARNLOG(ctrlr, format, ...) \ 78 SPDK_WARNLOG("[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__); 79 80 #define NVME_CTRLR_NOTICELOG(ctrlr, format, ...) \ 81 SPDK_NOTICELOG("[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__); 82 83 #define NVME_CTRLR_INFOLOG(ctrlr, format, ...) \ 84 SPDK_INFOLOG(nvme, "[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__); 85 86 #ifdef DEBUG 87 #define NVME_CTRLR_DEBUGLOG(ctrlr, format, ...) \ 88 SPDK_DEBUGLOG(nvme, "[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__); 89 #else 90 #define NVME_CTRLR_DEBUGLOG(ctrlr, ...) do { } while (0) 91 #endif 92 93 #define nvme_ctrlr_get_reg_async(ctrlr, reg, sz, cb_fn, cb_arg) \ 94 nvme_transport_ctrlr_get_reg_ ## sz ## _async(ctrlr, \ 95 offsetof(struct spdk_nvme_registers, reg), cb_fn, cb_arg) 96 97 #define nvme_ctrlr_set_reg_async(ctrlr, reg, sz, val, cb_fn, cb_arg) \ 98 nvme_transport_ctrlr_set_reg_ ## sz ## _async(ctrlr, \ 99 offsetof(struct spdk_nvme_registers, reg), val, cb_fn, cb_arg) 100 101 #define nvme_ctrlr_get_cc_async(ctrlr, cb_fn, cb_arg) \ 102 nvme_ctrlr_get_reg_async(ctrlr, cc, 4, cb_fn, cb_arg) 103 104 #define nvme_ctrlr_get_csts_async(ctrlr, cb_fn, cb_arg) \ 105 nvme_ctrlr_get_reg_async(ctrlr, csts, 4, cb_fn, cb_arg) 106 107 #define nvme_ctrlr_get_cap_async(ctrlr, cb_fn, cb_arg) \ 108 nvme_ctrlr_get_reg_async(ctrlr, cap, 8, cb_fn, cb_arg) 109 110 #define nvme_ctrlr_get_vs_async(ctrlr, cb_fn, cb_arg) \ 111 nvme_ctrlr_get_reg_async(ctrlr, vs, 4, cb_fn, cb_arg) 112 113 #define nvme_ctrlr_set_cc_async(ctrlr, value, cb_fn, cb_arg) \ 114 nvme_ctrlr_set_reg_async(ctrlr, cc, 4, value, cb_fn, cb_arg) 115 116 static int 117 nvme_ctrlr_get_cc(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cc_register *cc) 118 { 119 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cc.raw), 120 &cc->raw); 121 } 122 123 static int 124 nvme_ctrlr_get_csts(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_csts_register *csts) 125 { 126 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, csts.raw), 127 &csts->raw); 128 } 129 130 int 131 nvme_ctrlr_get_cap(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cap_register *cap) 132 { 133 return nvme_transport_ctrlr_get_reg_8(ctrlr, offsetof(struct spdk_nvme_registers, cap.raw), 134 &cap->raw); 135 } 136 137 int 138 nvme_ctrlr_get_vs(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_vs_register *vs) 139 { 140 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, vs.raw), 141 &vs->raw); 142 } 143 144 int 145 nvme_ctrlr_get_cmbsz(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cmbsz_register *cmbsz) 146 { 147 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cmbsz.raw), 148 &cmbsz->raw); 149 } 150 151 int 152 nvme_ctrlr_get_pmrcap(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_pmrcap_register *pmrcap) 153 { 154 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, pmrcap.raw), 155 &pmrcap->raw); 156 } 157 158 int 159 nvme_ctrlr_get_bpinfo(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_bpinfo_register *bpinfo) 160 { 161 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, bpinfo.raw), 162 &bpinfo->raw); 163 } 164 165 int 166 nvme_ctrlr_set_bprsel(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_bprsel_register *bprsel) 167 { 168 return nvme_transport_ctrlr_set_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, bprsel.raw), 169 bprsel->raw); 170 } 171 172 int 173 nvme_ctrlr_set_bpmbl(struct spdk_nvme_ctrlr *ctrlr, uint64_t bpmbl_value) 174 { 175 return nvme_transport_ctrlr_set_reg_8(ctrlr, offsetof(struct spdk_nvme_registers, bpmbl), 176 bpmbl_value); 177 } 178 179 static int 180 nvme_ctrlr_set_nssr(struct spdk_nvme_ctrlr *ctrlr, uint32_t nssr_value) 181 { 182 return nvme_transport_ctrlr_set_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, nssr), 183 nssr_value); 184 } 185 186 bool 187 nvme_ctrlr_multi_iocs_enabled(struct spdk_nvme_ctrlr *ctrlr) 188 { 189 return ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_IOCS && 190 ctrlr->opts.command_set == SPDK_NVME_CC_CSS_IOCS; 191 } 192 193 /* When the field in spdk_nvme_ctrlr_opts are changed and you change this function, please 194 * also update the nvme_ctrl_opts_init function in nvme_ctrlr.c 195 */ 196 void 197 spdk_nvme_ctrlr_get_default_ctrlr_opts(struct spdk_nvme_ctrlr_opts *opts, size_t opts_size) 198 { 199 char host_id_str[SPDK_UUID_STRING_LEN]; 200 201 assert(opts); 202 203 opts->opts_size = opts_size; 204 205 #define FIELD_OK(field) \ 206 offsetof(struct spdk_nvme_ctrlr_opts, field) + sizeof(opts->field) <= opts_size 207 208 #define SET_FIELD(field, value) \ 209 if (offsetof(struct spdk_nvme_ctrlr_opts, field) + sizeof(opts->field) <= opts_size) { \ 210 opts->field = value; \ 211 } \ 212 213 SET_FIELD(num_io_queues, DEFAULT_MAX_IO_QUEUES); 214 SET_FIELD(use_cmb_sqs, false); 215 SET_FIELD(no_shn_notification, false); 216 SET_FIELD(arb_mechanism, SPDK_NVME_CC_AMS_RR); 217 SET_FIELD(arbitration_burst, 0); 218 SET_FIELD(low_priority_weight, 0); 219 SET_FIELD(medium_priority_weight, 0); 220 SET_FIELD(high_priority_weight, 0); 221 SET_FIELD(keep_alive_timeout_ms, MIN_KEEP_ALIVE_TIMEOUT_IN_MS); 222 SET_FIELD(transport_retry_count, SPDK_NVME_DEFAULT_RETRY_COUNT); 223 SET_FIELD(io_queue_size, DEFAULT_IO_QUEUE_SIZE); 224 225 if (nvme_driver_init() == 0) { 226 if (FIELD_OK(hostnqn)) { 227 spdk_uuid_fmt_lower(host_id_str, sizeof(host_id_str), 228 &g_spdk_nvme_driver->default_extended_host_id); 229 snprintf(opts->hostnqn, sizeof(opts->hostnqn), 230 "nqn.2014-08.org.nvmexpress:uuid:%s", host_id_str); 231 } 232 233 if (FIELD_OK(extended_host_id)) { 234 memcpy(opts->extended_host_id, &g_spdk_nvme_driver->default_extended_host_id, 235 sizeof(opts->extended_host_id)); 236 } 237 238 } 239 240 SET_FIELD(io_queue_requests, DEFAULT_IO_QUEUE_REQUESTS); 241 242 if (FIELD_OK(src_addr)) { 243 memset(opts->src_addr, 0, sizeof(opts->src_addr)); 244 } 245 246 if (FIELD_OK(src_svcid)) { 247 memset(opts->src_svcid, 0, sizeof(opts->src_svcid)); 248 } 249 250 if (FIELD_OK(host_id)) { 251 memset(opts->host_id, 0, sizeof(opts->host_id)); 252 } 253 254 SET_FIELD(command_set, CHAR_BIT); 255 SET_FIELD(admin_timeout_ms, NVME_MAX_ADMIN_TIMEOUT_IN_SECS * 1000); 256 SET_FIELD(header_digest, false); 257 SET_FIELD(data_digest, false); 258 SET_FIELD(disable_error_logging, false); 259 SET_FIELD(transport_ack_timeout, SPDK_NVME_DEFAULT_TRANSPORT_ACK_TIMEOUT); 260 SET_FIELD(admin_queue_size, DEFAULT_ADMIN_QUEUE_SIZE); 261 SET_FIELD(fabrics_connect_timeout_us, NVME_FABRIC_CONNECT_COMMAND_TIMEOUT); 262 SET_FIELD(disable_read_ana_log_page, false); 263 264 #undef FIELD_OK 265 #undef SET_FIELD 266 } 267 268 const struct spdk_nvme_ctrlr_opts * 269 spdk_nvme_ctrlr_get_opts(struct spdk_nvme_ctrlr *ctrlr) 270 { 271 return &ctrlr->opts; 272 } 273 274 /** 275 * This function will be called when the process allocates the IO qpair. 276 * Note: the ctrlr_lock must be held when calling this function. 277 */ 278 static void 279 nvme_ctrlr_proc_add_io_qpair(struct spdk_nvme_qpair *qpair) 280 { 281 struct spdk_nvme_ctrlr_process *active_proc; 282 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 283 284 active_proc = nvme_ctrlr_get_current_process(ctrlr); 285 if (active_proc) { 286 TAILQ_INSERT_TAIL(&active_proc->allocated_io_qpairs, qpair, per_process_tailq); 287 qpair->active_proc = active_proc; 288 } 289 } 290 291 /** 292 * This function will be called when the process frees the IO qpair. 293 * Note: the ctrlr_lock must be held when calling this function. 294 */ 295 static void 296 nvme_ctrlr_proc_remove_io_qpair(struct spdk_nvme_qpair *qpair) 297 { 298 struct spdk_nvme_ctrlr_process *active_proc; 299 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 300 struct spdk_nvme_qpair *active_qpair, *tmp_qpair; 301 302 active_proc = nvme_ctrlr_get_current_process(ctrlr); 303 if (!active_proc) { 304 return; 305 } 306 307 TAILQ_FOREACH_SAFE(active_qpair, &active_proc->allocated_io_qpairs, 308 per_process_tailq, tmp_qpair) { 309 if (active_qpair == qpair) { 310 TAILQ_REMOVE(&active_proc->allocated_io_qpairs, 311 active_qpair, per_process_tailq); 312 313 break; 314 } 315 } 316 } 317 318 void 319 spdk_nvme_ctrlr_get_default_io_qpair_opts(struct spdk_nvme_ctrlr *ctrlr, 320 struct spdk_nvme_io_qpair_opts *opts, 321 size_t opts_size) 322 { 323 assert(ctrlr); 324 325 assert(opts); 326 327 memset(opts, 0, opts_size); 328 329 #define FIELD_OK(field) \ 330 offsetof(struct spdk_nvme_io_qpair_opts, field) + sizeof(opts->field) <= opts_size 331 332 if (FIELD_OK(qprio)) { 333 opts->qprio = SPDK_NVME_QPRIO_URGENT; 334 } 335 336 if (FIELD_OK(io_queue_size)) { 337 opts->io_queue_size = ctrlr->opts.io_queue_size; 338 } 339 340 if (FIELD_OK(io_queue_requests)) { 341 opts->io_queue_requests = ctrlr->opts.io_queue_requests; 342 } 343 344 if (FIELD_OK(delay_cmd_submit)) { 345 opts->delay_cmd_submit = false; 346 } 347 348 if (FIELD_OK(sq.vaddr)) { 349 opts->sq.vaddr = NULL; 350 } 351 352 if (FIELD_OK(sq.paddr)) { 353 opts->sq.paddr = 0; 354 } 355 356 if (FIELD_OK(sq.buffer_size)) { 357 opts->sq.buffer_size = 0; 358 } 359 360 if (FIELD_OK(cq.vaddr)) { 361 opts->cq.vaddr = NULL; 362 } 363 364 if (FIELD_OK(cq.paddr)) { 365 opts->cq.paddr = 0; 366 } 367 368 if (FIELD_OK(cq.buffer_size)) { 369 opts->cq.buffer_size = 0; 370 } 371 372 if (FIELD_OK(create_only)) { 373 opts->create_only = false; 374 } 375 376 if (FIELD_OK(async_mode)) { 377 opts->async_mode = false; 378 } 379 380 #undef FIELD_OK 381 } 382 383 static struct spdk_nvme_qpair * 384 nvme_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, 385 const struct spdk_nvme_io_qpair_opts *opts) 386 { 387 int32_t qid; 388 struct spdk_nvme_qpair *qpair; 389 union spdk_nvme_cc_register cc; 390 391 if (!ctrlr) { 392 return NULL; 393 } 394 395 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 396 cc.raw = ctrlr->process_init_cc.raw; 397 398 if (opts->qprio & ~SPDK_NVME_CREATE_IO_SQ_QPRIO_MASK) { 399 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 400 return NULL; 401 } 402 403 /* 404 * Only value SPDK_NVME_QPRIO_URGENT(0) is valid for the 405 * default round robin arbitration method. 406 */ 407 if ((cc.bits.ams == SPDK_NVME_CC_AMS_RR) && (opts->qprio != SPDK_NVME_QPRIO_URGENT)) { 408 NVME_CTRLR_ERRLOG(ctrlr, "invalid queue priority for default round robin arbitration method\n"); 409 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 410 return NULL; 411 } 412 413 qid = spdk_nvme_ctrlr_alloc_qid(ctrlr); 414 if (qid < 0) { 415 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 416 return NULL; 417 } 418 419 qpair = nvme_transport_ctrlr_create_io_qpair(ctrlr, qid, opts); 420 if (qpair == NULL) { 421 NVME_CTRLR_ERRLOG(ctrlr, "nvme_transport_ctrlr_create_io_qpair() failed\n"); 422 spdk_nvme_ctrlr_free_qid(ctrlr, qid); 423 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 424 return NULL; 425 } 426 427 TAILQ_INSERT_TAIL(&ctrlr->active_io_qpairs, qpair, tailq); 428 429 nvme_ctrlr_proc_add_io_qpair(qpair); 430 431 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 432 433 return qpair; 434 } 435 436 int 437 spdk_nvme_ctrlr_connect_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 438 { 439 int rc; 440 441 if (nvme_qpair_get_state(qpair) != NVME_QPAIR_DISCONNECTED) { 442 return -EISCONN; 443 } 444 445 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 446 rc = nvme_transport_ctrlr_connect_qpair(ctrlr, qpair); 447 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 448 449 if (ctrlr->quirks & NVME_QUIRK_DELAY_AFTER_QUEUE_ALLOC) { 450 spdk_delay_us(100); 451 } 452 453 return rc; 454 } 455 456 void 457 spdk_nvme_ctrlr_disconnect_io_qpair(struct spdk_nvme_qpair *qpair) 458 { 459 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 460 461 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 462 nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair); 463 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 464 } 465 466 struct spdk_nvme_qpair * 467 spdk_nvme_ctrlr_alloc_io_qpair(struct spdk_nvme_ctrlr *ctrlr, 468 const struct spdk_nvme_io_qpair_opts *user_opts, 469 size_t opts_size) 470 { 471 472 struct spdk_nvme_qpair *qpair; 473 struct spdk_nvme_io_qpair_opts opts; 474 int rc; 475 476 if (spdk_unlikely(ctrlr->state != NVME_CTRLR_STATE_READY)) { 477 /* When controller is resetting or initializing, free_io_qids is deleted or not created yet. 478 * We can't create IO qpair in that case */ 479 return NULL; 480 } 481 482 /* 483 * Get the default options, then overwrite them with the user-provided options 484 * up to opts_size. 485 * 486 * This allows for extensions of the opts structure without breaking 487 * ABI compatibility. 488 */ 489 spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &opts, sizeof(opts)); 490 if (user_opts) { 491 memcpy(&opts, user_opts, spdk_min(sizeof(opts), opts_size)); 492 493 /* If user passes buffers, make sure they're big enough for the requested queue size */ 494 if (opts.sq.vaddr) { 495 if (opts.sq.buffer_size < (opts.io_queue_size * sizeof(struct spdk_nvme_cmd))) { 496 NVME_CTRLR_ERRLOG(ctrlr, "sq buffer size %" PRIx64 " is too small for sq size %zx\n", 497 opts.sq.buffer_size, (opts.io_queue_size * sizeof(struct spdk_nvme_cmd))); 498 return NULL; 499 } 500 } 501 if (opts.cq.vaddr) { 502 if (opts.cq.buffer_size < (opts.io_queue_size * sizeof(struct spdk_nvme_cpl))) { 503 NVME_CTRLR_ERRLOG(ctrlr, "cq buffer size %" PRIx64 " is too small for cq size %zx\n", 504 opts.cq.buffer_size, (opts.io_queue_size * sizeof(struct spdk_nvme_cpl))); 505 return NULL; 506 } 507 } 508 } 509 510 qpair = nvme_ctrlr_create_io_qpair(ctrlr, &opts); 511 512 if (qpair == NULL || opts.create_only == true) { 513 return qpair; 514 } 515 516 rc = spdk_nvme_ctrlr_connect_io_qpair(ctrlr, qpair); 517 if (rc != 0) { 518 NVME_CTRLR_ERRLOG(ctrlr, "nvme_transport_ctrlr_connect_io_qpair() failed\n"); 519 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 520 nvme_ctrlr_proc_remove_io_qpair(qpair); 521 TAILQ_REMOVE(&ctrlr->active_io_qpairs, qpair, tailq); 522 spdk_bit_array_set(ctrlr->free_io_qids, qpair->id); 523 nvme_transport_ctrlr_delete_io_qpair(ctrlr, qpair); 524 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 525 return NULL; 526 } 527 528 return qpair; 529 } 530 531 int 532 spdk_nvme_ctrlr_reconnect_io_qpair(struct spdk_nvme_qpair *qpair) 533 { 534 struct spdk_nvme_ctrlr *ctrlr; 535 enum nvme_qpair_state qpair_state; 536 int rc; 537 538 assert(qpair != NULL); 539 assert(nvme_qpair_is_admin_queue(qpair) == false); 540 assert(qpair->ctrlr != NULL); 541 542 ctrlr = qpair->ctrlr; 543 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 544 qpair_state = nvme_qpair_get_state(qpair); 545 546 if (ctrlr->is_removed) { 547 rc = -ENODEV; 548 goto out; 549 } 550 551 if (ctrlr->is_resetting || qpair_state == NVME_QPAIR_DISCONNECTING) { 552 rc = -EAGAIN; 553 goto out; 554 } 555 556 if (ctrlr->is_failed || qpair_state == NVME_QPAIR_DESTROYING) { 557 rc = -ENXIO; 558 goto out; 559 } 560 561 if (qpair_state != NVME_QPAIR_DISCONNECTED) { 562 rc = 0; 563 goto out; 564 } 565 566 rc = nvme_transport_ctrlr_connect_qpair(ctrlr, qpair); 567 if (rc) { 568 rc = -EAGAIN; 569 goto out; 570 } 571 572 out: 573 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 574 return rc; 575 } 576 577 spdk_nvme_qp_failure_reason 578 spdk_nvme_ctrlr_get_admin_qp_failure_reason(struct spdk_nvme_ctrlr *ctrlr) 579 { 580 return ctrlr->adminq->transport_failure_reason; 581 } 582 583 /* 584 * This internal function will attempt to take the controller 585 * lock before calling disconnect on a controller qpair. 586 * Functions already holding the controller lock should 587 * call nvme_transport_ctrlr_disconnect_qpair directly. 588 */ 589 void 590 nvme_ctrlr_disconnect_qpair(struct spdk_nvme_qpair *qpair) 591 { 592 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 593 594 assert(ctrlr != NULL); 595 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 596 nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair); 597 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 598 } 599 600 int 601 spdk_nvme_ctrlr_free_io_qpair(struct spdk_nvme_qpair *qpair) 602 { 603 struct spdk_nvme_ctrlr *ctrlr; 604 605 if (qpair == NULL) { 606 return 0; 607 } 608 609 ctrlr = qpair->ctrlr; 610 611 if (qpair->in_completion_context) { 612 /* 613 * There are many cases where it is convenient to delete an io qpair in the context 614 * of that qpair's completion routine. To handle this properly, set a flag here 615 * so that the completion routine will perform an actual delete after the context 616 * unwinds. 617 */ 618 qpair->delete_after_completion_context = 1; 619 return 0; 620 } 621 622 if (qpair->poll_group && qpair->poll_group->in_completion_context) { 623 /* Same as above, but in a poll group. */ 624 qpair->poll_group->num_qpairs_to_delete++; 625 qpair->delete_after_completion_context = 1; 626 return 0; 627 } 628 629 nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair); 630 631 if (qpair->poll_group) { 632 spdk_nvme_poll_group_remove(qpair->poll_group->group, qpair); 633 } 634 635 /* Do not retry. */ 636 nvme_qpair_set_state(qpair, NVME_QPAIR_DESTROYING); 637 638 /* In the multi-process case, a process may call this function on a foreign 639 * I/O qpair (i.e. one that this process did not create) when that qpairs process 640 * exits unexpectedly. In that case, we must not try to abort any reqs associated 641 * with that qpair, since the callbacks will also be foreign to this process. 642 */ 643 if (qpair->active_proc == nvme_ctrlr_get_current_process(ctrlr)) { 644 nvme_qpair_abort_all_queued_reqs(qpair, 0); 645 } 646 647 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 648 649 nvme_ctrlr_proc_remove_io_qpair(qpair); 650 651 TAILQ_REMOVE(&ctrlr->active_io_qpairs, qpair, tailq); 652 spdk_nvme_ctrlr_free_qid(ctrlr, qpair->id); 653 654 nvme_transport_ctrlr_delete_io_qpair(ctrlr, qpair); 655 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 656 return 0; 657 } 658 659 static void 660 nvme_ctrlr_construct_intel_support_log_page_list(struct spdk_nvme_ctrlr *ctrlr, 661 struct spdk_nvme_intel_log_page_directory *log_page_directory) 662 { 663 if (log_page_directory == NULL) { 664 return; 665 } 666 667 if (ctrlr->cdata.vid != SPDK_PCI_VID_INTEL) { 668 return; 669 } 670 671 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY] = true; 672 673 if (log_page_directory->read_latency_log_len || 674 (ctrlr->quirks & NVME_INTEL_QUIRK_READ_LATENCY)) { 675 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_READ_CMD_LATENCY] = true; 676 } 677 if (log_page_directory->write_latency_log_len || 678 (ctrlr->quirks & NVME_INTEL_QUIRK_WRITE_LATENCY)) { 679 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_WRITE_CMD_LATENCY] = true; 680 } 681 if (log_page_directory->temperature_statistics_log_len) { 682 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_TEMPERATURE] = true; 683 } 684 if (log_page_directory->smart_log_len) { 685 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_SMART] = true; 686 } 687 if (log_page_directory->marketing_description_log_len) { 688 ctrlr->log_page_supported[SPDK_NVME_INTEL_MARKETING_DESCRIPTION] = true; 689 } 690 } 691 692 struct intel_log_pages_ctx { 693 struct spdk_nvme_intel_log_page_directory log_page_directory; 694 struct spdk_nvme_ctrlr *ctrlr; 695 }; 696 697 static void 698 nvme_ctrlr_set_intel_support_log_pages_done(void *arg, const struct spdk_nvme_cpl *cpl) 699 { 700 struct intel_log_pages_ctx *ctx = arg; 701 struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr; 702 703 if (!spdk_nvme_cpl_is_error(cpl)) { 704 nvme_ctrlr_construct_intel_support_log_page_list(ctrlr, &ctx->log_page_directory); 705 } 706 707 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES, 708 ctrlr->opts.admin_timeout_ms); 709 free(ctx); 710 } 711 712 static int nvme_ctrlr_set_intel_support_log_pages(struct spdk_nvme_ctrlr *ctrlr) 713 { 714 int rc = 0; 715 struct intel_log_pages_ctx *ctx; 716 717 ctx = calloc(1, sizeof(*ctx)); 718 if (!ctx) { 719 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES, 720 ctrlr->opts.admin_timeout_ms); 721 return 0; 722 } 723 724 ctx->ctrlr = ctrlr; 725 726 rc = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY, 727 SPDK_NVME_GLOBAL_NS_TAG, &ctx->log_page_directory, 728 sizeof(struct spdk_nvme_intel_log_page_directory), 729 0, nvme_ctrlr_set_intel_support_log_pages_done, ctx); 730 if (rc != 0) { 731 free(ctx); 732 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES, 733 ctrlr->opts.admin_timeout_ms); 734 return 0; 735 } 736 737 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_SUPPORTED_INTEL_LOG_PAGES, 738 ctrlr->opts.admin_timeout_ms); 739 740 return 0; 741 } 742 743 static int 744 nvme_ctrlr_alloc_ana_log_page(struct spdk_nvme_ctrlr *ctrlr) 745 { 746 uint32_t ana_log_page_size; 747 748 ana_log_page_size = sizeof(struct spdk_nvme_ana_page) + ctrlr->cdata.nanagrpid * 749 sizeof(struct spdk_nvme_ana_group_descriptor) + ctrlr->active_ns_count * 750 sizeof(uint32_t); 751 752 /* Number of active namespaces may have changed. 753 * Check if ANA log page fits into existing buffer. 754 */ 755 if (ana_log_page_size > ctrlr->ana_log_page_size) { 756 void *new_buffer; 757 758 if (ctrlr->ana_log_page) { 759 new_buffer = realloc(ctrlr->ana_log_page, ana_log_page_size); 760 } else { 761 new_buffer = calloc(1, ana_log_page_size); 762 } 763 764 if (!new_buffer) { 765 NVME_CTRLR_ERRLOG(ctrlr, "could not allocate ANA log page buffer, size %u\n", 766 ana_log_page_size); 767 return -ENXIO; 768 } 769 770 ctrlr->ana_log_page = new_buffer; 771 if (ctrlr->copied_ana_desc) { 772 new_buffer = realloc(ctrlr->copied_ana_desc, ana_log_page_size); 773 } else { 774 new_buffer = calloc(1, ana_log_page_size); 775 } 776 777 if (!new_buffer) { 778 NVME_CTRLR_ERRLOG(ctrlr, "could not allocate a buffer to parse ANA descriptor, size %u\n", 779 ana_log_page_size); 780 return -ENOMEM; 781 } 782 783 ctrlr->copied_ana_desc = new_buffer; 784 ctrlr->ana_log_page_size = ana_log_page_size; 785 } 786 787 return 0; 788 } 789 790 static int 791 nvme_ctrlr_update_ana_log_page(struct spdk_nvme_ctrlr *ctrlr) 792 { 793 struct nvme_completion_poll_status *status; 794 int rc; 795 796 rc = nvme_ctrlr_alloc_ana_log_page(ctrlr); 797 if (rc != 0) { 798 return rc; 799 } 800 801 status = calloc(1, sizeof(*status)); 802 if (status == NULL) { 803 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 804 return -ENOMEM; 805 } 806 807 rc = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS, 808 SPDK_NVME_GLOBAL_NS_TAG, ctrlr->ana_log_page, 809 ctrlr->ana_log_page_size, 0, 810 nvme_completion_poll_cb, status); 811 if (rc != 0) { 812 free(status); 813 return rc; 814 } 815 816 if (nvme_wait_for_completion_robust_lock_timeout(ctrlr->adminq, status, &ctrlr->ctrlr_lock, 817 ctrlr->opts.admin_timeout_ms * 1000)) { 818 if (!status->timed_out) { 819 free(status); 820 } 821 return -EIO; 822 } 823 824 free(status); 825 return 0; 826 } 827 828 static int 829 nvme_ctrlr_init_ana_log_page(struct spdk_nvme_ctrlr *ctrlr) 830 { 831 int rc; 832 833 rc = nvme_ctrlr_alloc_ana_log_page(ctrlr); 834 if (rc) { 835 return rc; 836 } 837 838 return nvme_ctrlr_update_ana_log_page(ctrlr); 839 } 840 841 static int 842 nvme_ctrlr_update_ns_ana_states(const struct spdk_nvme_ana_group_descriptor *desc, 843 void *cb_arg) 844 { 845 struct spdk_nvme_ctrlr *ctrlr = cb_arg; 846 struct spdk_nvme_ns *ns; 847 uint32_t i, nsid; 848 849 for (i = 0; i < desc->num_of_nsid; i++) { 850 nsid = desc->nsid[i]; 851 if (nsid == 0 || nsid > ctrlr->cdata.nn) { 852 continue; 853 } 854 855 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 856 assert(ns != NULL); 857 858 ns->ana_group_id = desc->ana_group_id; 859 ns->ana_state = desc->ana_state; 860 } 861 862 return 0; 863 } 864 865 int 866 nvme_ctrlr_parse_ana_log_page(struct spdk_nvme_ctrlr *ctrlr, 867 spdk_nvme_parse_ana_log_page_cb cb_fn, void *cb_arg) 868 { 869 struct spdk_nvme_ana_group_descriptor *copied_desc; 870 uint8_t *orig_desc; 871 uint32_t i, desc_size, copy_len; 872 int rc = 0; 873 874 if (ctrlr->ana_log_page == NULL) { 875 return -EINVAL; 876 } 877 878 copied_desc = ctrlr->copied_ana_desc; 879 880 orig_desc = (uint8_t *)ctrlr->ana_log_page + sizeof(struct spdk_nvme_ana_page); 881 copy_len = ctrlr->ana_log_page_size - sizeof(struct spdk_nvme_ana_page); 882 883 for (i = 0; i < ctrlr->ana_log_page->num_ana_group_desc; i++) { 884 memcpy(copied_desc, orig_desc, copy_len); 885 886 rc = cb_fn(copied_desc, cb_arg); 887 if (rc != 0) { 888 break; 889 } 890 891 desc_size = sizeof(struct spdk_nvme_ana_group_descriptor) + 892 copied_desc->num_of_nsid * sizeof(uint32_t); 893 orig_desc += desc_size; 894 copy_len -= desc_size; 895 } 896 897 return rc; 898 } 899 900 static int 901 nvme_ctrlr_set_supported_log_pages(struct spdk_nvme_ctrlr *ctrlr) 902 { 903 int rc = 0; 904 905 memset(ctrlr->log_page_supported, 0, sizeof(ctrlr->log_page_supported)); 906 /* Mandatory pages */ 907 ctrlr->log_page_supported[SPDK_NVME_LOG_ERROR] = true; 908 ctrlr->log_page_supported[SPDK_NVME_LOG_HEALTH_INFORMATION] = true; 909 ctrlr->log_page_supported[SPDK_NVME_LOG_FIRMWARE_SLOT] = true; 910 if (ctrlr->cdata.lpa.celp) { 911 ctrlr->log_page_supported[SPDK_NVME_LOG_COMMAND_EFFECTS_LOG] = true; 912 } 913 914 if (ctrlr->cdata.cmic.ana_reporting) { 915 ctrlr->log_page_supported[SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS] = true; 916 if (!ctrlr->opts.disable_read_ana_log_page) { 917 rc = nvme_ctrlr_init_ana_log_page(ctrlr); 918 if (rc == 0) { 919 nvme_ctrlr_parse_ana_log_page(ctrlr, nvme_ctrlr_update_ns_ana_states, 920 ctrlr); 921 } 922 } 923 } 924 925 if (ctrlr->cdata.vid == SPDK_PCI_VID_INTEL && !(ctrlr->quirks & NVME_INTEL_QUIRK_NO_LOG_PAGES)) { 926 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_INTEL_LOG_PAGES, 927 ctrlr->opts.admin_timeout_ms); 928 929 } else { 930 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES, 931 ctrlr->opts.admin_timeout_ms); 932 933 } 934 935 return rc; 936 } 937 938 static void 939 nvme_ctrlr_set_intel_supported_features(struct spdk_nvme_ctrlr *ctrlr) 940 { 941 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_MAX_LBA] = true; 942 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_NATIVE_MAX_LBA] = true; 943 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_POWER_GOVERNOR_SETTING] = true; 944 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_SMBUS_ADDRESS] = true; 945 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_LED_PATTERN] = true; 946 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_RESET_TIMED_WORKLOAD_COUNTERS] = true; 947 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING] = true; 948 } 949 950 static void 951 nvme_ctrlr_set_arbitration_feature(struct spdk_nvme_ctrlr *ctrlr) 952 { 953 uint32_t cdw11; 954 struct nvme_completion_poll_status *status; 955 956 if (ctrlr->opts.arbitration_burst == 0) { 957 return; 958 } 959 960 if (ctrlr->opts.arbitration_burst > 7) { 961 NVME_CTRLR_WARNLOG(ctrlr, "Valid arbitration burst values is from 0-7\n"); 962 return; 963 } 964 965 status = calloc(1, sizeof(*status)); 966 if (!status) { 967 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 968 return; 969 } 970 971 cdw11 = ctrlr->opts.arbitration_burst; 972 973 if (spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_WRR_SUPPORTED) { 974 cdw11 |= (uint32_t)ctrlr->opts.low_priority_weight << 8; 975 cdw11 |= (uint32_t)ctrlr->opts.medium_priority_weight << 16; 976 cdw11 |= (uint32_t)ctrlr->opts.high_priority_weight << 24; 977 } 978 979 if (spdk_nvme_ctrlr_cmd_set_feature(ctrlr, SPDK_NVME_FEAT_ARBITRATION, 980 cdw11, 0, NULL, 0, 981 nvme_completion_poll_cb, status) < 0) { 982 NVME_CTRLR_ERRLOG(ctrlr, "Set arbitration feature failed\n"); 983 free(status); 984 return; 985 } 986 987 if (nvme_wait_for_completion_timeout(ctrlr->adminq, status, 988 ctrlr->opts.admin_timeout_ms * 1000)) { 989 NVME_CTRLR_ERRLOG(ctrlr, "Timeout to set arbitration feature\n"); 990 } 991 992 if (!status->timed_out) { 993 free(status); 994 } 995 } 996 997 static void 998 nvme_ctrlr_set_supported_features(struct spdk_nvme_ctrlr *ctrlr) 999 { 1000 memset(ctrlr->feature_supported, 0, sizeof(ctrlr->feature_supported)); 1001 /* Mandatory features */ 1002 ctrlr->feature_supported[SPDK_NVME_FEAT_ARBITRATION] = true; 1003 ctrlr->feature_supported[SPDK_NVME_FEAT_POWER_MANAGEMENT] = true; 1004 ctrlr->feature_supported[SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD] = true; 1005 ctrlr->feature_supported[SPDK_NVME_FEAT_ERROR_RECOVERY] = true; 1006 ctrlr->feature_supported[SPDK_NVME_FEAT_NUMBER_OF_QUEUES] = true; 1007 ctrlr->feature_supported[SPDK_NVME_FEAT_INTERRUPT_COALESCING] = true; 1008 ctrlr->feature_supported[SPDK_NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION] = true; 1009 ctrlr->feature_supported[SPDK_NVME_FEAT_WRITE_ATOMICITY] = true; 1010 ctrlr->feature_supported[SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION] = true; 1011 /* Optional features */ 1012 if (ctrlr->cdata.vwc.present) { 1013 ctrlr->feature_supported[SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE] = true; 1014 } 1015 if (ctrlr->cdata.apsta.supported) { 1016 ctrlr->feature_supported[SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION] = true; 1017 } 1018 if (ctrlr->cdata.hmpre) { 1019 ctrlr->feature_supported[SPDK_NVME_FEAT_HOST_MEM_BUFFER] = true; 1020 } 1021 if (ctrlr->cdata.vid == SPDK_PCI_VID_INTEL) { 1022 nvme_ctrlr_set_intel_supported_features(ctrlr); 1023 } 1024 1025 nvme_ctrlr_set_arbitration_feature(ctrlr); 1026 } 1027 1028 bool 1029 spdk_nvme_ctrlr_is_failed(struct spdk_nvme_ctrlr *ctrlr) 1030 { 1031 return ctrlr->is_failed; 1032 } 1033 1034 void 1035 nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr, bool hot_remove) 1036 { 1037 /* 1038 * Set the flag here and leave the work failure of qpairs to 1039 * spdk_nvme_qpair_process_completions(). 1040 */ 1041 if (hot_remove) { 1042 ctrlr->is_removed = true; 1043 } 1044 1045 if (ctrlr->is_failed) { 1046 NVME_CTRLR_NOTICELOG(ctrlr, "already in failed state\n"); 1047 return; 1048 } 1049 1050 ctrlr->is_failed = true; 1051 nvme_transport_ctrlr_disconnect_qpair(ctrlr, ctrlr->adminq); 1052 NVME_CTRLR_ERRLOG(ctrlr, "in failed state.\n"); 1053 } 1054 1055 /** 1056 * This public API function will try to take the controller lock. 1057 * Any private functions being called from a thread already holding 1058 * the ctrlr lock should call nvme_ctrlr_fail directly. 1059 */ 1060 void 1061 spdk_nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr) 1062 { 1063 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1064 nvme_ctrlr_fail(ctrlr, false); 1065 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1066 } 1067 1068 static void 1069 nvme_ctrlr_shutdown_set_cc_done(void *_ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 1070 { 1071 struct nvme_ctrlr_detach_ctx *ctx = _ctx; 1072 struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr; 1073 1074 if (spdk_nvme_cpl_is_error(cpl)) { 1075 NVME_CTRLR_ERRLOG(ctrlr, "Failed to write CC.SHN\n"); 1076 ctx->shutdown_complete = true; 1077 return; 1078 } 1079 1080 if (ctrlr->opts.no_shn_notification) { 1081 ctx->shutdown_complete = true; 1082 return; 1083 } 1084 1085 /* 1086 * The NVMe specification defines RTD3E to be the time between 1087 * setting SHN = 1 until the controller will set SHST = 10b. 1088 * If the device doesn't report RTD3 entry latency, or if it 1089 * reports RTD3 entry latency less than 10 seconds, pick 1090 * 10 seconds as a reasonable amount of time to 1091 * wait before proceeding. 1092 */ 1093 NVME_CTRLR_DEBUGLOG(ctrlr, "RTD3E = %" PRIu32 " us\n", ctrlr->cdata.rtd3e); 1094 ctx->shutdown_timeout_ms = SPDK_CEIL_DIV(ctrlr->cdata.rtd3e, 1000); 1095 ctx->shutdown_timeout_ms = spdk_max(ctx->shutdown_timeout_ms, 10000); 1096 NVME_CTRLR_DEBUGLOG(ctrlr, "shutdown timeout = %" PRIu32 " ms\n", ctx->shutdown_timeout_ms); 1097 1098 ctx->shutdown_start_tsc = spdk_get_ticks(); 1099 ctx->state = NVME_CTRLR_DETACH_CHECK_CSTS; 1100 } 1101 1102 static void 1103 nvme_ctrlr_shutdown_get_cc_done(void *_ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 1104 { 1105 struct nvme_ctrlr_detach_ctx *ctx = _ctx; 1106 struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr; 1107 union spdk_nvme_cc_register cc; 1108 int rc; 1109 1110 if (spdk_nvme_cpl_is_error(cpl)) { 1111 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CC register\n"); 1112 ctx->shutdown_complete = true; 1113 return; 1114 } 1115 1116 assert(value <= UINT32_MAX); 1117 cc.raw = (uint32_t)value; 1118 1119 if (ctrlr->opts.no_shn_notification) { 1120 NVME_CTRLR_INFOLOG(ctrlr, "Disable SSD without shutdown notification\n"); 1121 if (cc.bits.en == 0) { 1122 ctx->shutdown_complete = true; 1123 return; 1124 } 1125 1126 cc.bits.en = 0; 1127 } else { 1128 cc.bits.shn = SPDK_NVME_SHN_NORMAL; 1129 } 1130 1131 rc = nvme_ctrlr_set_cc_async(ctrlr, cc.raw, nvme_ctrlr_shutdown_set_cc_done, ctx); 1132 if (rc != 0) { 1133 NVME_CTRLR_ERRLOG(ctrlr, "Failed to write CC.SHN\n"); 1134 ctx->shutdown_complete = true; 1135 } 1136 } 1137 1138 static void 1139 nvme_ctrlr_shutdown_async(struct spdk_nvme_ctrlr *ctrlr, 1140 struct nvme_ctrlr_detach_ctx *ctx) 1141 { 1142 int rc; 1143 1144 if (ctrlr->is_removed) { 1145 ctx->shutdown_complete = true; 1146 return; 1147 } 1148 1149 ctx->state = NVME_CTRLR_DETACH_SET_CC; 1150 rc = nvme_ctrlr_get_cc_async(ctrlr, nvme_ctrlr_shutdown_get_cc_done, ctx); 1151 if (rc != 0) { 1152 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CC register\n"); 1153 ctx->shutdown_complete = true; 1154 } 1155 } 1156 1157 static void 1158 nvme_ctrlr_shutdown_get_csts_done(void *_ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 1159 { 1160 struct nvme_ctrlr_detach_ctx *ctx = _ctx; 1161 1162 if (spdk_nvme_cpl_is_error(cpl)) { 1163 NVME_CTRLR_ERRLOG(ctx->ctrlr, "Failed to read the CSTS register\n"); 1164 ctx->shutdown_complete = true; 1165 return; 1166 } 1167 1168 assert(value <= UINT32_MAX); 1169 ctx->csts.raw = (uint32_t)value; 1170 ctx->state = NVME_CTRLR_DETACH_GET_CSTS_DONE; 1171 } 1172 1173 static int 1174 nvme_ctrlr_shutdown_poll_async(struct spdk_nvme_ctrlr *ctrlr, 1175 struct nvme_ctrlr_detach_ctx *ctx) 1176 { 1177 union spdk_nvme_csts_register csts; 1178 uint32_t ms_waited; 1179 1180 switch (ctx->state) { 1181 case NVME_CTRLR_DETACH_SET_CC: 1182 case NVME_CTRLR_DETACH_GET_CSTS: 1183 /* We're still waiting for the register operation to complete */ 1184 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 1185 return -EAGAIN; 1186 1187 case NVME_CTRLR_DETACH_CHECK_CSTS: 1188 ctx->state = NVME_CTRLR_DETACH_GET_CSTS; 1189 if (nvme_ctrlr_get_csts_async(ctrlr, nvme_ctrlr_shutdown_get_csts_done, ctx)) { 1190 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CSTS register\n"); 1191 return -EIO; 1192 } 1193 return -EAGAIN; 1194 1195 case NVME_CTRLR_DETACH_GET_CSTS_DONE: 1196 ctx->state = NVME_CTRLR_DETACH_CHECK_CSTS; 1197 break; 1198 1199 default: 1200 assert(0 && "Should never happen"); 1201 return -EINVAL; 1202 } 1203 1204 ms_waited = (spdk_get_ticks() - ctx->shutdown_start_tsc) * 1000 / spdk_get_ticks_hz(); 1205 csts.raw = ctx->csts.raw; 1206 1207 if (csts.bits.shst == SPDK_NVME_SHST_COMPLETE) { 1208 NVME_CTRLR_DEBUGLOG(ctrlr, "shutdown complete in %u milliseconds\n", ms_waited); 1209 return 0; 1210 } 1211 1212 if (ms_waited < ctx->shutdown_timeout_ms) { 1213 return -EAGAIN; 1214 } 1215 1216 NVME_CTRLR_ERRLOG(ctrlr, "did not shutdown within %u milliseconds\n", 1217 ctx->shutdown_timeout_ms); 1218 if (ctrlr->quirks & NVME_QUIRK_SHST_COMPLETE) { 1219 NVME_CTRLR_ERRLOG(ctrlr, "likely due to shutdown handling in the VMWare emulated NVMe SSD\n"); 1220 } 1221 1222 return 0; 1223 } 1224 1225 static inline uint64_t 1226 nvme_ctrlr_get_ready_timeout(struct spdk_nvme_ctrlr *ctrlr) 1227 { 1228 return ctrlr->cap.bits.to * 500; 1229 } 1230 1231 static void 1232 nvme_ctrlr_set_cc_en_done(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 1233 { 1234 struct spdk_nvme_ctrlr *ctrlr = ctx; 1235 1236 if (spdk_nvme_cpl_is_error(cpl)) { 1237 NVME_CTRLR_ERRLOG(ctrlr, "Failed to set the CC register\n"); 1238 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 1239 return; 1240 } 1241 1242 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, 1243 nvme_ctrlr_get_ready_timeout(ctrlr)); 1244 } 1245 1246 static int 1247 nvme_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr) 1248 { 1249 union spdk_nvme_cc_register cc; 1250 int rc; 1251 1252 rc = nvme_transport_ctrlr_enable(ctrlr); 1253 if (rc != 0) { 1254 NVME_CTRLR_ERRLOG(ctrlr, "transport ctrlr_enable failed\n"); 1255 return rc; 1256 } 1257 1258 cc.raw = ctrlr->process_init_cc.raw; 1259 if (cc.bits.en != 0) { 1260 NVME_CTRLR_ERRLOG(ctrlr, "called with CC.EN = 1\n"); 1261 return -EINVAL; 1262 } 1263 1264 cc.bits.en = 1; 1265 cc.bits.css = 0; 1266 cc.bits.shn = 0; 1267 cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */ 1268 cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */ 1269 1270 /* Page size is 2 ^ (12 + mps). */ 1271 cc.bits.mps = spdk_u32log2(ctrlr->page_size) - 12; 1272 1273 /* 1274 * Since NVMe 1.0, a controller should have at least one bit set in CAP.CSS. 1275 * A controller that does not have any bit set in CAP.CSS is not spec compliant. 1276 * Try to support such a controller regardless. 1277 */ 1278 if (ctrlr->cap.bits.css == 0) { 1279 NVME_CTRLR_INFOLOG(ctrlr, "Drive reports no command sets supported. Assuming NVM is supported.\n"); 1280 ctrlr->cap.bits.css = SPDK_NVME_CAP_CSS_NVM; 1281 } 1282 1283 /* 1284 * If the user did not explicitly request a command set, or supplied a value larger than 1285 * what can be saved in CC.CSS, use the most reasonable default. 1286 */ 1287 if (ctrlr->opts.command_set >= CHAR_BIT) { 1288 if (ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_IOCS) { 1289 ctrlr->opts.command_set = SPDK_NVME_CC_CSS_IOCS; 1290 } else if (ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_NVM) { 1291 ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NVM; 1292 } else if (ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_NOIO) { 1293 ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NOIO; 1294 } else { 1295 /* Invalid supported bits detected, falling back to NVM. */ 1296 ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NVM; 1297 } 1298 } 1299 1300 /* Verify that the selected command set is supported by the controller. */ 1301 if (!(ctrlr->cap.bits.css & (1u << ctrlr->opts.command_set))) { 1302 NVME_CTRLR_DEBUGLOG(ctrlr, "Requested I/O command set %u but supported mask is 0x%x\n", 1303 ctrlr->opts.command_set, ctrlr->cap.bits.css); 1304 NVME_CTRLR_DEBUGLOG(ctrlr, "Falling back to NVM. Assuming NVM is supported.\n"); 1305 ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NVM; 1306 } 1307 1308 cc.bits.css = ctrlr->opts.command_set; 1309 1310 switch (ctrlr->opts.arb_mechanism) { 1311 case SPDK_NVME_CC_AMS_RR: 1312 break; 1313 case SPDK_NVME_CC_AMS_WRR: 1314 if (SPDK_NVME_CAP_AMS_WRR & ctrlr->cap.bits.ams) { 1315 break; 1316 } 1317 return -EINVAL; 1318 case SPDK_NVME_CC_AMS_VS: 1319 if (SPDK_NVME_CAP_AMS_VS & ctrlr->cap.bits.ams) { 1320 break; 1321 } 1322 return -EINVAL; 1323 default: 1324 return -EINVAL; 1325 } 1326 1327 cc.bits.ams = ctrlr->opts.arb_mechanism; 1328 ctrlr->process_init_cc.raw = cc.raw; 1329 1330 if (nvme_ctrlr_set_cc_async(ctrlr, cc.raw, nvme_ctrlr_set_cc_en_done, ctrlr)) { 1331 NVME_CTRLR_ERRLOG(ctrlr, "set_cc() failed\n"); 1332 return -EIO; 1333 } 1334 1335 return 0; 1336 } 1337 1338 static const char * 1339 nvme_ctrlr_state_string(enum nvme_ctrlr_state state) 1340 { 1341 switch (state) { 1342 case NVME_CTRLR_STATE_INIT_DELAY: 1343 return "delay init"; 1344 case NVME_CTRLR_STATE_CONNECT_ADMINQ: 1345 return "connect adminq"; 1346 case NVME_CTRLR_STATE_WAIT_FOR_CONNECT_ADMINQ: 1347 return "wait for connect adminq"; 1348 case NVME_CTRLR_STATE_READ_VS: 1349 return "read vs"; 1350 case NVME_CTRLR_STATE_READ_VS_WAIT_FOR_VS: 1351 return "read vs wait for vs"; 1352 case NVME_CTRLR_STATE_READ_CAP: 1353 return "read cap"; 1354 case NVME_CTRLR_STATE_READ_CAP_WAIT_FOR_CAP: 1355 return "read cap wait for cap"; 1356 case NVME_CTRLR_STATE_CHECK_EN: 1357 return "check en"; 1358 case NVME_CTRLR_STATE_CHECK_EN_WAIT_FOR_CC: 1359 return "check en wait for cc"; 1360 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1: 1361 return "disable and wait for CSTS.RDY = 1"; 1362 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS: 1363 return "disable and wait for CSTS.RDY = 1 reg"; 1364 case NVME_CTRLR_STATE_SET_EN_0: 1365 return "set CC.EN = 0"; 1366 case NVME_CTRLR_STATE_SET_EN_0_WAIT_FOR_CC: 1367 return "set CC.EN = 0 wait for cc"; 1368 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0: 1369 return "disable and wait for CSTS.RDY = 0"; 1370 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0_WAIT_FOR_CSTS: 1371 return "disable and wait for CSTS.RDY = 0 reg"; 1372 case NVME_CTRLR_STATE_ENABLE: 1373 return "enable controller by writing CC.EN = 1"; 1374 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_CC: 1375 return "enable controller by writing CC.EN = 1 reg"; 1376 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1: 1377 return "wait for CSTS.RDY = 1"; 1378 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS: 1379 return "wait for CSTS.RDY = 1 reg"; 1380 case NVME_CTRLR_STATE_RESET_ADMIN_QUEUE: 1381 return "reset admin queue"; 1382 case NVME_CTRLR_STATE_IDENTIFY: 1383 return "identify controller"; 1384 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY: 1385 return "wait for identify controller"; 1386 case NVME_CTRLR_STATE_CONFIGURE_AER: 1387 return "configure AER"; 1388 case NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER: 1389 return "wait for configure aer"; 1390 case NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT: 1391 return "set keep alive timeout"; 1392 case NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT: 1393 return "wait for set keep alive timeout"; 1394 case NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC: 1395 return "identify controller iocs specific"; 1396 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_IOCS_SPECIFIC: 1397 return "wait for identify controller iocs specific"; 1398 case NVME_CTRLR_STATE_GET_ZNS_CMD_EFFECTS_LOG: 1399 return "get zns cmd and effects log page"; 1400 case NVME_CTRLR_STATE_WAIT_FOR_GET_ZNS_CMD_EFFECTS_LOG: 1401 return "wait for get zns cmd and effects log page"; 1402 case NVME_CTRLR_STATE_SET_NUM_QUEUES: 1403 return "set number of queues"; 1404 case NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES: 1405 return "wait for set number of queues"; 1406 case NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS: 1407 return "identify active ns"; 1408 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ACTIVE_NS: 1409 return "wait for identify active ns"; 1410 case NVME_CTRLR_STATE_IDENTIFY_NS: 1411 return "identify ns"; 1412 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS: 1413 return "wait for identify ns"; 1414 case NVME_CTRLR_STATE_IDENTIFY_ID_DESCS: 1415 return "identify namespace id descriptors"; 1416 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS: 1417 return "wait for identify namespace id descriptors"; 1418 case NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC: 1419 return "identify ns iocs specific"; 1420 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS_IOCS_SPECIFIC: 1421 return "wait for identify ns iocs specific"; 1422 case NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES: 1423 return "set supported log pages"; 1424 case NVME_CTRLR_STATE_SET_SUPPORTED_INTEL_LOG_PAGES: 1425 return "set supported INTEL log pages"; 1426 case NVME_CTRLR_STATE_WAIT_FOR_SUPPORTED_INTEL_LOG_PAGES: 1427 return "wait for supported INTEL log pages"; 1428 case NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES: 1429 return "set supported features"; 1430 case NVME_CTRLR_STATE_SET_DB_BUF_CFG: 1431 return "set doorbell buffer config"; 1432 case NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG: 1433 return "wait for doorbell buffer config"; 1434 case NVME_CTRLR_STATE_SET_HOST_ID: 1435 return "set host ID"; 1436 case NVME_CTRLR_STATE_WAIT_FOR_HOST_ID: 1437 return "wait for set host ID"; 1438 case NVME_CTRLR_STATE_READY: 1439 return "ready"; 1440 case NVME_CTRLR_STATE_ERROR: 1441 return "error"; 1442 } 1443 return "unknown"; 1444 }; 1445 1446 static void 1447 _nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state, 1448 uint64_t timeout_in_ms, bool quiet) 1449 { 1450 uint64_t ticks_per_ms, timeout_in_ticks, now_ticks; 1451 1452 ctrlr->state = state; 1453 if (timeout_in_ms == NVME_TIMEOUT_KEEP_EXISTING) { 1454 if (!quiet) { 1455 NVME_CTRLR_DEBUGLOG(ctrlr, "setting state to %s (keeping existing timeout)\n", 1456 nvme_ctrlr_state_string(ctrlr->state)); 1457 } 1458 return; 1459 } 1460 1461 if (timeout_in_ms == NVME_TIMEOUT_INFINITE) { 1462 goto inf; 1463 } 1464 1465 ticks_per_ms = spdk_get_ticks_hz() / 1000; 1466 if (timeout_in_ms > UINT64_MAX / ticks_per_ms) { 1467 NVME_CTRLR_ERRLOG(ctrlr, 1468 "Specified timeout would cause integer overflow. Defaulting to no timeout.\n"); 1469 goto inf; 1470 } 1471 1472 now_ticks = spdk_get_ticks(); 1473 timeout_in_ticks = timeout_in_ms * ticks_per_ms; 1474 if (timeout_in_ticks > UINT64_MAX - now_ticks) { 1475 NVME_CTRLR_ERRLOG(ctrlr, 1476 "Specified timeout would cause integer overflow. Defaulting to no timeout.\n"); 1477 goto inf; 1478 } 1479 1480 ctrlr->state_timeout_tsc = timeout_in_ticks + now_ticks; 1481 if (!quiet) { 1482 NVME_CTRLR_DEBUGLOG(ctrlr, "setting state to %s (timeout %" PRIu64 " ms)\n", 1483 nvme_ctrlr_state_string(ctrlr->state), timeout_in_ms); 1484 } 1485 return; 1486 inf: 1487 if (!quiet) { 1488 NVME_CTRLR_DEBUGLOG(ctrlr, "setting state to %s (no timeout)\n", 1489 nvme_ctrlr_state_string(ctrlr->state)); 1490 } 1491 ctrlr->state_timeout_tsc = NVME_TIMEOUT_INFINITE; 1492 } 1493 1494 static void 1495 nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state, 1496 uint64_t timeout_in_ms) 1497 { 1498 _nvme_ctrlr_set_state(ctrlr, state, timeout_in_ms, false); 1499 } 1500 1501 static void 1502 nvme_ctrlr_set_state_quiet(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state, 1503 uint64_t timeout_in_ms) 1504 { 1505 _nvme_ctrlr_set_state(ctrlr, state, timeout_in_ms, true); 1506 } 1507 1508 static void 1509 nvme_ctrlr_free_zns_specific_data(struct spdk_nvme_ctrlr *ctrlr) 1510 { 1511 spdk_free(ctrlr->cdata_zns); 1512 ctrlr->cdata_zns = NULL; 1513 } 1514 1515 static void 1516 nvme_ctrlr_free_iocs_specific_data(struct spdk_nvme_ctrlr *ctrlr) 1517 { 1518 nvme_ctrlr_free_zns_specific_data(ctrlr); 1519 } 1520 1521 static void 1522 nvme_ctrlr_free_doorbell_buffer(struct spdk_nvme_ctrlr *ctrlr) 1523 { 1524 if (ctrlr->shadow_doorbell) { 1525 spdk_free(ctrlr->shadow_doorbell); 1526 ctrlr->shadow_doorbell = NULL; 1527 } 1528 1529 if (ctrlr->eventidx) { 1530 spdk_free(ctrlr->eventidx); 1531 ctrlr->eventidx = NULL; 1532 } 1533 } 1534 1535 static void 1536 nvme_ctrlr_set_doorbell_buffer_config_done(void *arg, const struct spdk_nvme_cpl *cpl) 1537 { 1538 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 1539 1540 if (spdk_nvme_cpl_is_error(cpl)) { 1541 NVME_CTRLR_WARNLOG(ctrlr, "Doorbell buffer config failed\n"); 1542 } else { 1543 NVME_CTRLR_INFOLOG(ctrlr, "Doorbell buffer config enabled\n"); 1544 } 1545 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID, 1546 ctrlr->opts.admin_timeout_ms); 1547 } 1548 1549 static int 1550 nvme_ctrlr_set_doorbell_buffer_config(struct spdk_nvme_ctrlr *ctrlr) 1551 { 1552 int rc = 0; 1553 uint64_t prp1, prp2, len; 1554 1555 if (!ctrlr->cdata.oacs.doorbell_buffer_config) { 1556 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID, 1557 ctrlr->opts.admin_timeout_ms); 1558 return 0; 1559 } 1560 1561 if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) { 1562 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID, 1563 ctrlr->opts.admin_timeout_ms); 1564 return 0; 1565 } 1566 1567 /* only 1 page size for doorbell buffer */ 1568 ctrlr->shadow_doorbell = spdk_zmalloc(ctrlr->page_size, ctrlr->page_size, 1569 NULL, SPDK_ENV_LCORE_ID_ANY, 1570 SPDK_MALLOC_DMA | SPDK_MALLOC_SHARE); 1571 if (ctrlr->shadow_doorbell == NULL) { 1572 rc = -ENOMEM; 1573 goto error; 1574 } 1575 1576 len = ctrlr->page_size; 1577 prp1 = spdk_vtophys(ctrlr->shadow_doorbell, &len); 1578 if (prp1 == SPDK_VTOPHYS_ERROR || len != ctrlr->page_size) { 1579 rc = -EFAULT; 1580 goto error; 1581 } 1582 1583 ctrlr->eventidx = spdk_zmalloc(ctrlr->page_size, ctrlr->page_size, 1584 NULL, SPDK_ENV_LCORE_ID_ANY, 1585 SPDK_MALLOC_DMA | SPDK_MALLOC_SHARE); 1586 if (ctrlr->eventidx == NULL) { 1587 rc = -ENOMEM; 1588 goto error; 1589 } 1590 1591 len = ctrlr->page_size; 1592 prp2 = spdk_vtophys(ctrlr->eventidx, &len); 1593 if (prp2 == SPDK_VTOPHYS_ERROR || len != ctrlr->page_size) { 1594 rc = -EFAULT; 1595 goto error; 1596 } 1597 1598 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG, 1599 ctrlr->opts.admin_timeout_ms); 1600 1601 rc = nvme_ctrlr_cmd_doorbell_buffer_config(ctrlr, prp1, prp2, 1602 nvme_ctrlr_set_doorbell_buffer_config_done, ctrlr); 1603 if (rc != 0) { 1604 goto error; 1605 } 1606 1607 return 0; 1608 1609 error: 1610 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 1611 nvme_ctrlr_free_doorbell_buffer(ctrlr); 1612 return rc; 1613 } 1614 1615 static void 1616 nvme_ctrlr_abort_queued_aborts(struct spdk_nvme_ctrlr *ctrlr) 1617 { 1618 struct nvme_request *req, *tmp; 1619 struct spdk_nvme_cpl cpl = {}; 1620 1621 cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION; 1622 cpl.status.sct = SPDK_NVME_SCT_GENERIC; 1623 1624 STAILQ_FOREACH_SAFE(req, &ctrlr->queued_aborts, stailq, tmp) { 1625 STAILQ_REMOVE_HEAD(&ctrlr->queued_aborts, stailq); 1626 1627 nvme_complete_request(req->cb_fn, req->cb_arg, req->qpair, req, &cpl); 1628 nvme_free_request(req); 1629 } 1630 } 1631 1632 int 1633 spdk_nvme_ctrlr_disconnect(struct spdk_nvme_ctrlr *ctrlr) 1634 { 1635 struct spdk_nvme_qpair *qpair; 1636 1637 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1638 ctrlr->prepare_for_reset = false; 1639 1640 if (ctrlr->is_resetting || ctrlr->is_removed) { 1641 /* 1642 * Controller is already resetting or has been removed. Return 1643 * immediately since there is no need to kick off another 1644 * reset in these cases. 1645 */ 1646 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1647 return ctrlr->is_resetting ? -EBUSY : -ENXIO; 1648 } 1649 1650 ctrlr->is_resetting = true; 1651 ctrlr->is_failed = false; 1652 1653 NVME_CTRLR_NOTICELOG(ctrlr, "resetting controller\n"); 1654 1655 /* Disable keep-alive, it'll be re-enabled as part of the init process */ 1656 ctrlr->keep_alive_interval_ticks = 0; 1657 1658 /* Abort all of the queued abort requests */ 1659 nvme_ctrlr_abort_queued_aborts(ctrlr); 1660 1661 nvme_transport_admin_qpair_abort_aers(ctrlr->adminq); 1662 1663 /* Disable all queues before disabling the controller hardware. */ 1664 TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) { 1665 qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL; 1666 } 1667 1668 ctrlr->adminq->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL; 1669 nvme_transport_ctrlr_disconnect_qpair(ctrlr, ctrlr->adminq); 1670 1671 /* Doorbell buffer config is invalid during reset */ 1672 nvme_ctrlr_free_doorbell_buffer(ctrlr); 1673 1674 /* I/O Command Set Specific Identify Controller data is invalidated during reset */ 1675 nvme_ctrlr_free_iocs_specific_data(ctrlr); 1676 1677 spdk_bit_array_free(&ctrlr->free_io_qids); 1678 1679 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1680 return 0; 1681 } 1682 1683 void 1684 spdk_nvme_ctrlr_reconnect_async(struct spdk_nvme_ctrlr *ctrlr) 1685 { 1686 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1687 1688 /* Set the state back to INIT to cause a full hardware reset. */ 1689 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE); 1690 1691 /* Return without releasing ctrlr_lock. ctrlr_lock will be released when 1692 * spdk_nvme_ctrlr_reset_poll_async() returns 0. 1693 */ 1694 } 1695 1696 static int 1697 nvme_ctrlr_reset_pre(struct spdk_nvme_ctrlr *ctrlr) 1698 { 1699 int rc; 1700 1701 rc = spdk_nvme_ctrlr_disconnect(ctrlr); 1702 if (rc != 0) { 1703 return rc; 1704 } 1705 1706 spdk_nvme_ctrlr_reconnect_async(ctrlr); 1707 return 0; 1708 } 1709 1710 /** 1711 * This function will be called when the controller is being reinitialized. 1712 * Note: the ctrlr_lock must be held when calling this function. 1713 */ 1714 int 1715 spdk_nvme_ctrlr_reconnect_poll_async(struct spdk_nvme_ctrlr *ctrlr) 1716 { 1717 struct spdk_nvme_ns *ns, *tmp_ns; 1718 struct spdk_nvme_qpair *qpair; 1719 int rc = 0, rc_tmp = 0; 1720 bool async; 1721 1722 if (nvme_ctrlr_process_init(ctrlr) != 0) { 1723 NVME_CTRLR_ERRLOG(ctrlr, "controller reinitialization failed\n"); 1724 rc = -1; 1725 } 1726 if (ctrlr->state != NVME_CTRLR_STATE_READY && rc != -1) { 1727 return -EAGAIN; 1728 } 1729 1730 /* 1731 * For non-fabrics controllers, the memory locations of the transport qpair 1732 * don't change when the controller is reset. They simply need to be 1733 * re-enabled with admin commands to the controller. For fabric 1734 * controllers we need to disconnect and reconnect the qpair on its 1735 * own thread outside of the context of the reset. 1736 */ 1737 if (rc == 0 && !spdk_nvme_ctrlr_is_fabrics(ctrlr)) { 1738 /* Reinitialize qpairs */ 1739 TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) { 1740 assert(spdk_bit_array_get(ctrlr->free_io_qids, qpair->id)); 1741 spdk_bit_array_clear(ctrlr->free_io_qids, qpair->id); 1742 1743 /* Force a synchronous connect. We can't currently handle an asynchronous 1744 * operation here. */ 1745 async = qpair->async; 1746 qpair->async = false; 1747 rc_tmp = nvme_transport_ctrlr_connect_qpair(ctrlr, qpair); 1748 qpair->async = async; 1749 1750 if (rc_tmp != 0) { 1751 rc = rc_tmp; 1752 qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL; 1753 continue; 1754 } 1755 } 1756 } 1757 1758 /* 1759 * Take this opportunity to remove inactive namespaces. During a reset namespace 1760 * handles can be invalidated. 1761 */ 1762 RB_FOREACH_SAFE(ns, nvme_ns_tree, &ctrlr->ns, tmp_ns) { 1763 if (!ns->active) { 1764 RB_REMOVE(nvme_ns_tree, &ctrlr->ns, ns); 1765 spdk_free(ns); 1766 } 1767 } 1768 1769 if (rc) { 1770 nvme_ctrlr_fail(ctrlr, false); 1771 } 1772 ctrlr->is_resetting = false; 1773 1774 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1775 1776 if (!ctrlr->cdata.oaes.ns_attribute_notices) { 1777 /* 1778 * If controller doesn't support ns_attribute_notices and 1779 * namespace attributes change (e.g. number of namespaces) 1780 * we need to update system handling device reset. 1781 */ 1782 nvme_io_msg_ctrlr_update(ctrlr); 1783 } 1784 1785 return rc; 1786 } 1787 1788 static void 1789 nvme_ctrlr_reset_ctx_init(struct spdk_nvme_ctrlr_reset_ctx *ctrlr_reset_ctx, 1790 struct spdk_nvme_ctrlr *ctrlr) 1791 { 1792 ctrlr_reset_ctx->ctrlr = ctrlr; 1793 } 1794 1795 static int 1796 nvme_ctrlr_reset_poll_async(struct spdk_nvme_ctrlr_reset_ctx *ctrlr_reset_ctx) 1797 { 1798 struct spdk_nvme_ctrlr *ctrlr = ctrlr_reset_ctx->ctrlr; 1799 1800 return spdk_nvme_ctrlr_reconnect_poll_async(ctrlr); 1801 } 1802 1803 int 1804 spdk_nvme_ctrlr_reset_poll_async(struct spdk_nvme_ctrlr_reset_ctx *ctrlr_reset_ctx) 1805 { 1806 int rc; 1807 if (!ctrlr_reset_ctx) { 1808 return -EINVAL; 1809 } 1810 rc = nvme_ctrlr_reset_poll_async(ctrlr_reset_ctx); 1811 if (rc == -EAGAIN) { 1812 return rc; 1813 } 1814 1815 free(ctrlr_reset_ctx); 1816 return rc; 1817 } 1818 1819 int 1820 spdk_nvme_ctrlr_reset_async(struct spdk_nvme_ctrlr *ctrlr, 1821 struct spdk_nvme_ctrlr_reset_ctx **reset_ctx) 1822 { 1823 struct spdk_nvme_ctrlr_reset_ctx *ctrlr_reset_ctx; 1824 int rc; 1825 1826 ctrlr_reset_ctx = calloc(1, sizeof(*ctrlr_reset_ctx)); 1827 if (!ctrlr_reset_ctx) { 1828 return -ENOMEM; 1829 } 1830 1831 rc = nvme_ctrlr_reset_pre(ctrlr); 1832 if (rc != 0) { 1833 free(ctrlr_reset_ctx); 1834 } else { 1835 nvme_ctrlr_reset_ctx_init(ctrlr_reset_ctx, ctrlr); 1836 *reset_ctx = ctrlr_reset_ctx; 1837 } 1838 1839 return rc; 1840 } 1841 1842 int 1843 spdk_nvme_ctrlr_reset(struct spdk_nvme_ctrlr *ctrlr) 1844 { 1845 struct spdk_nvme_ctrlr_reset_ctx reset_ctx = {}; 1846 int rc; 1847 1848 rc = nvme_ctrlr_reset_pre(ctrlr); 1849 if (rc != 0) { 1850 if (rc == -EBUSY) { 1851 rc = 0; 1852 } 1853 return rc; 1854 } 1855 nvme_ctrlr_reset_ctx_init(&reset_ctx, ctrlr); 1856 1857 while (true) { 1858 rc = nvme_ctrlr_reset_poll_async(&reset_ctx); 1859 if (rc != -EAGAIN) { 1860 break; 1861 } 1862 } 1863 1864 return rc; 1865 } 1866 1867 void 1868 spdk_nvme_ctrlr_prepare_for_reset(struct spdk_nvme_ctrlr *ctrlr) 1869 { 1870 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1871 ctrlr->prepare_for_reset = true; 1872 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1873 } 1874 1875 int 1876 spdk_nvme_ctrlr_reset_subsystem(struct spdk_nvme_ctrlr *ctrlr) 1877 { 1878 union spdk_nvme_cap_register cap; 1879 int rc = 0; 1880 1881 cap = spdk_nvme_ctrlr_get_regs_cap(ctrlr); 1882 if (cap.bits.nssrs == 0) { 1883 NVME_CTRLR_WARNLOG(ctrlr, "subsystem reset is not supported\n"); 1884 return -ENOTSUP; 1885 } 1886 1887 NVME_CTRLR_NOTICELOG(ctrlr, "resetting subsystem\n"); 1888 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1889 ctrlr->is_resetting = true; 1890 rc = nvme_ctrlr_set_nssr(ctrlr, SPDK_NVME_NSSR_VALUE); 1891 ctrlr->is_resetting = false; 1892 1893 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1894 /* 1895 * No more cleanup at this point like in the ctrlr reset. A subsystem reset will cause 1896 * a hot remove for PCIe transport. The hot remove handling does all the necessary ctrlr cleanup. 1897 */ 1898 return rc; 1899 } 1900 1901 int 1902 spdk_nvme_ctrlr_set_trid(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_transport_id *trid) 1903 { 1904 int rc = 0; 1905 1906 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1907 1908 if (ctrlr->is_failed == false) { 1909 rc = -EPERM; 1910 goto out; 1911 } 1912 1913 if (trid->trtype != ctrlr->trid.trtype) { 1914 rc = -EINVAL; 1915 goto out; 1916 } 1917 1918 if (strncmp(trid->subnqn, ctrlr->trid.subnqn, SPDK_NVMF_NQN_MAX_LEN)) { 1919 rc = -EINVAL; 1920 goto out; 1921 } 1922 1923 ctrlr->trid = *trid; 1924 1925 out: 1926 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1927 return rc; 1928 } 1929 1930 void 1931 spdk_nvme_ctrlr_set_remove_cb(struct spdk_nvme_ctrlr *ctrlr, 1932 spdk_nvme_remove_cb remove_cb, void *remove_ctx) 1933 { 1934 if (!spdk_process_is_primary()) { 1935 return; 1936 } 1937 1938 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1939 ctrlr->remove_cb = remove_cb; 1940 ctrlr->cb_ctx = remove_ctx; 1941 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1942 } 1943 1944 static void 1945 nvme_ctrlr_identify_done(void *arg, const struct spdk_nvme_cpl *cpl) 1946 { 1947 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 1948 1949 if (spdk_nvme_cpl_is_error(cpl)) { 1950 NVME_CTRLR_ERRLOG(ctrlr, "nvme_identify_controller failed!\n"); 1951 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 1952 return; 1953 } 1954 1955 /* 1956 * Use MDTS to ensure our default max_xfer_size doesn't exceed what the 1957 * controller supports. 1958 */ 1959 ctrlr->max_xfer_size = nvme_transport_ctrlr_get_max_xfer_size(ctrlr); 1960 NVME_CTRLR_DEBUGLOG(ctrlr, "transport max_xfer_size %u\n", ctrlr->max_xfer_size); 1961 if (ctrlr->cdata.mdts > 0) { 1962 ctrlr->max_xfer_size = spdk_min(ctrlr->max_xfer_size, 1963 ctrlr->min_page_size * (1 << ctrlr->cdata.mdts)); 1964 NVME_CTRLR_DEBUGLOG(ctrlr, "MDTS max_xfer_size %u\n", ctrlr->max_xfer_size); 1965 } 1966 1967 NVME_CTRLR_DEBUGLOG(ctrlr, "CNTLID 0x%04" PRIx16 "\n", ctrlr->cdata.cntlid); 1968 if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { 1969 ctrlr->cntlid = ctrlr->cdata.cntlid; 1970 } else { 1971 /* 1972 * Fabrics controllers should already have CNTLID from the Connect command. 1973 * 1974 * If CNTLID from Connect doesn't match CNTLID in the Identify Controller data, 1975 * trust the one from Connect. 1976 */ 1977 if (ctrlr->cntlid != ctrlr->cdata.cntlid) { 1978 NVME_CTRLR_DEBUGLOG(ctrlr, "Identify CNTLID 0x%04" PRIx16 " != Connect CNTLID 0x%04" PRIx16 "\n", 1979 ctrlr->cdata.cntlid, ctrlr->cntlid); 1980 } 1981 } 1982 1983 if (ctrlr->cdata.sgls.supported) { 1984 assert(ctrlr->cdata.sgls.supported != 0x3); 1985 ctrlr->flags |= SPDK_NVME_CTRLR_SGL_SUPPORTED; 1986 if (ctrlr->cdata.sgls.supported == 0x2) { 1987 ctrlr->flags |= SPDK_NVME_CTRLR_SGL_REQUIRES_DWORD_ALIGNMENT; 1988 } 1989 1990 ctrlr->max_sges = nvme_transport_ctrlr_get_max_sges(ctrlr); 1991 NVME_CTRLR_DEBUGLOG(ctrlr, "transport max_sges %u\n", ctrlr->max_sges); 1992 } 1993 1994 if (ctrlr->cdata.oacs.security && !(ctrlr->quirks & NVME_QUIRK_OACS_SECURITY)) { 1995 ctrlr->flags |= SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED; 1996 } 1997 1998 if (ctrlr->cdata.oacs.directives) { 1999 ctrlr->flags |= SPDK_NVME_CTRLR_DIRECTIVES_SUPPORTED; 2000 } 2001 2002 NVME_CTRLR_DEBUGLOG(ctrlr, "fuses compare and write: %d\n", 2003 ctrlr->cdata.fuses.compare_and_write); 2004 if (ctrlr->cdata.fuses.compare_and_write) { 2005 ctrlr->flags |= SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED; 2006 } 2007 2008 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONFIGURE_AER, 2009 ctrlr->opts.admin_timeout_ms); 2010 } 2011 2012 static int 2013 nvme_ctrlr_identify(struct spdk_nvme_ctrlr *ctrlr) 2014 { 2015 int rc; 2016 2017 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY, 2018 ctrlr->opts.admin_timeout_ms); 2019 2020 rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_CTRLR, 0, 0, 0, 2021 &ctrlr->cdata, sizeof(ctrlr->cdata), 2022 nvme_ctrlr_identify_done, ctrlr); 2023 if (rc != 0) { 2024 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2025 return rc; 2026 } 2027 2028 return 0; 2029 } 2030 2031 static void 2032 nvme_ctrlr_get_zns_cmd_and_effects_log_done(void *arg, const struct spdk_nvme_cpl *cpl) 2033 { 2034 struct spdk_nvme_cmds_and_effect_log_page *log_page; 2035 struct spdk_nvme_ctrlr *ctrlr = arg; 2036 2037 if (spdk_nvme_cpl_is_error(cpl)) { 2038 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_get_zns_cmd_and_effects_log failed!\n"); 2039 spdk_free(ctrlr->tmp_ptr); 2040 ctrlr->tmp_ptr = NULL; 2041 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2042 return; 2043 } 2044 2045 log_page = ctrlr->tmp_ptr; 2046 2047 if (log_page->io_cmds_supported[SPDK_NVME_OPC_ZONE_APPEND].csupp) { 2048 ctrlr->flags |= SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED; 2049 } 2050 spdk_free(ctrlr->tmp_ptr); 2051 ctrlr->tmp_ptr = NULL; 2052 2053 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_NUM_QUEUES, ctrlr->opts.admin_timeout_ms); 2054 } 2055 2056 static int 2057 nvme_ctrlr_get_zns_cmd_and_effects_log(struct spdk_nvme_ctrlr *ctrlr) 2058 { 2059 int rc; 2060 2061 assert(!ctrlr->tmp_ptr); 2062 ctrlr->tmp_ptr = spdk_zmalloc(sizeof(struct spdk_nvme_cmds_and_effect_log_page), 64, NULL, 2063 SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE | SPDK_MALLOC_DMA); 2064 if (!ctrlr->tmp_ptr) { 2065 rc = -ENOMEM; 2066 goto error; 2067 } 2068 2069 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_GET_ZNS_CMD_EFFECTS_LOG, 2070 ctrlr->opts.admin_timeout_ms); 2071 2072 rc = spdk_nvme_ctrlr_cmd_get_log_page_ext(ctrlr, SPDK_NVME_LOG_COMMAND_EFFECTS_LOG, 2073 0, ctrlr->tmp_ptr, sizeof(struct spdk_nvme_cmds_and_effect_log_page), 2074 0, 0, 0, SPDK_NVME_CSI_ZNS << 24, 2075 nvme_ctrlr_get_zns_cmd_and_effects_log_done, ctrlr); 2076 if (rc != 0) { 2077 goto error; 2078 } 2079 2080 return 0; 2081 2082 error: 2083 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2084 spdk_free(ctrlr->tmp_ptr); 2085 ctrlr->tmp_ptr = NULL; 2086 return rc; 2087 } 2088 2089 static void 2090 nvme_ctrlr_identify_zns_specific_done(void *arg, const struct spdk_nvme_cpl *cpl) 2091 { 2092 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 2093 2094 if (spdk_nvme_cpl_is_error(cpl)) { 2095 /* no need to print an error, the controller simply does not support ZNS */ 2096 nvme_ctrlr_free_zns_specific_data(ctrlr); 2097 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_NUM_QUEUES, 2098 ctrlr->opts.admin_timeout_ms); 2099 return; 2100 } 2101 2102 /* A zero zasl value means use mdts */ 2103 if (ctrlr->cdata_zns->zasl) { 2104 uint32_t max_append = ctrlr->min_page_size * (1 << ctrlr->cdata_zns->zasl); 2105 ctrlr->max_zone_append_size = spdk_min(ctrlr->max_xfer_size, max_append); 2106 } else { 2107 ctrlr->max_zone_append_size = ctrlr->max_xfer_size; 2108 } 2109 2110 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_GET_ZNS_CMD_EFFECTS_LOG, 2111 ctrlr->opts.admin_timeout_ms); 2112 } 2113 2114 /** 2115 * This function will try to fetch the I/O Command Specific Controller data structure for 2116 * each I/O Command Set supported by SPDK. 2117 * 2118 * If an I/O Command Set is not supported by the controller, "Invalid Field in Command" 2119 * will be returned. Since we are fetching in a exploratively way, getting an error back 2120 * from the controller should not be treated as fatal. 2121 * 2122 * I/O Command Sets not supported by SPDK will be skipped (e.g. Key Value Command Set). 2123 * 2124 * I/O Command Sets without a IOCS specific data structure (i.e. a zero-filled IOCS specific 2125 * data structure) will be skipped (e.g. NVM Command Set, Key Value Command Set). 2126 */ 2127 static int 2128 nvme_ctrlr_identify_iocs_specific(struct spdk_nvme_ctrlr *ctrlr) 2129 { 2130 int rc; 2131 2132 if (!nvme_ctrlr_multi_iocs_enabled(ctrlr)) { 2133 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_NUM_QUEUES, 2134 ctrlr->opts.admin_timeout_ms); 2135 return 0; 2136 } 2137 2138 /* 2139 * Since SPDK currently only needs to fetch a single Command Set, keep the code here, 2140 * instead of creating multiple NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC substates, 2141 * which would require additional functions and complexity for no good reason. 2142 */ 2143 assert(!ctrlr->cdata_zns); 2144 ctrlr->cdata_zns = spdk_zmalloc(sizeof(*ctrlr->cdata_zns), 64, NULL, SPDK_ENV_SOCKET_ID_ANY, 2145 SPDK_MALLOC_SHARE | SPDK_MALLOC_DMA); 2146 if (!ctrlr->cdata_zns) { 2147 rc = -ENOMEM; 2148 goto error; 2149 } 2150 2151 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_IOCS_SPECIFIC, 2152 ctrlr->opts.admin_timeout_ms); 2153 2154 rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_CTRLR_IOCS, 0, 0, SPDK_NVME_CSI_ZNS, 2155 ctrlr->cdata_zns, sizeof(*ctrlr->cdata_zns), 2156 nvme_ctrlr_identify_zns_specific_done, ctrlr); 2157 if (rc != 0) { 2158 goto error; 2159 } 2160 2161 return 0; 2162 2163 error: 2164 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2165 nvme_ctrlr_free_zns_specific_data(ctrlr); 2166 return rc; 2167 } 2168 2169 enum nvme_active_ns_state { 2170 NVME_ACTIVE_NS_STATE_IDLE, 2171 NVME_ACTIVE_NS_STATE_PROCESSING, 2172 NVME_ACTIVE_NS_STATE_DONE, 2173 NVME_ACTIVE_NS_STATE_ERROR 2174 }; 2175 2176 typedef void (*nvme_active_ns_ctx_deleter)(struct nvme_active_ns_ctx *); 2177 2178 struct nvme_active_ns_ctx { 2179 struct spdk_nvme_ctrlr *ctrlr; 2180 uint32_t page_count; 2181 uint32_t next_nsid; 2182 uint32_t *new_ns_list; 2183 nvme_active_ns_ctx_deleter deleter; 2184 2185 enum nvme_active_ns_state state; 2186 }; 2187 2188 static struct nvme_active_ns_ctx * 2189 nvme_active_ns_ctx_create(struct spdk_nvme_ctrlr *ctrlr, nvme_active_ns_ctx_deleter deleter) 2190 { 2191 struct nvme_active_ns_ctx *ctx; 2192 uint32_t *new_ns_list = NULL; 2193 2194 ctx = calloc(1, sizeof(*ctx)); 2195 if (!ctx) { 2196 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate nvme_active_ns_ctx!\n"); 2197 return NULL; 2198 } 2199 2200 new_ns_list = spdk_zmalloc(sizeof(struct spdk_nvme_ns_list), ctrlr->page_size, 2201 NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_SHARE); 2202 if (!new_ns_list) { 2203 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate active_ns_list!\n"); 2204 free(ctx); 2205 return NULL; 2206 } 2207 2208 ctx->page_count = 1; 2209 ctx->new_ns_list = new_ns_list; 2210 ctx->ctrlr = ctrlr; 2211 ctx->deleter = deleter; 2212 2213 return ctx; 2214 } 2215 2216 static void 2217 nvme_active_ns_ctx_destroy(struct nvme_active_ns_ctx *ctx) 2218 { 2219 spdk_free(ctx->new_ns_list); 2220 free(ctx); 2221 } 2222 2223 static int 2224 nvme_ctrlr_destruct_namespace(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 2225 { 2226 struct spdk_nvme_ns tmp, *ns; 2227 2228 assert(ctrlr != NULL); 2229 2230 tmp.id = nsid; 2231 ns = RB_FIND(nvme_ns_tree, &ctrlr->ns, &tmp); 2232 if (ns == NULL) { 2233 return -EINVAL; 2234 } 2235 2236 nvme_ns_destruct(ns); 2237 ns->active = false; 2238 2239 return 0; 2240 } 2241 2242 static int 2243 nvme_ctrlr_construct_namespace(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 2244 { 2245 struct spdk_nvme_ns *ns; 2246 2247 if (nsid < 1 || nsid > ctrlr->cdata.nn) { 2248 return -EINVAL; 2249 } 2250 2251 /* Namespaces are constructed on demand, so simply request it. */ 2252 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2253 if (ns == NULL) { 2254 return -ENOMEM; 2255 } 2256 2257 ns->active = true; 2258 2259 return 0; 2260 } 2261 2262 static void 2263 nvme_ctrlr_identify_active_ns_swap(struct spdk_nvme_ctrlr *ctrlr, uint32_t *new_ns_list, 2264 size_t max_entries) 2265 { 2266 uint32_t active_ns_count = 0; 2267 size_t i; 2268 uint32_t nsid; 2269 struct spdk_nvme_ns *ns, *tmp_ns; 2270 int rc; 2271 2272 /* First, remove namespaces that no longer exist */ 2273 RB_FOREACH_SAFE(ns, nvme_ns_tree, &ctrlr->ns, tmp_ns) { 2274 nsid = new_ns_list[0]; 2275 active_ns_count = 0; 2276 while (nsid != 0) { 2277 if (nsid == ns->id) { 2278 break; 2279 } 2280 2281 nsid = new_ns_list[active_ns_count++]; 2282 } 2283 2284 if (nsid != ns->id) { 2285 /* Did not find this namespace id in the new list. */ 2286 NVME_CTRLR_DEBUGLOG(ctrlr, "Namespace %u was removed\n", ns->id); 2287 nvme_ctrlr_destruct_namespace(ctrlr, ns->id); 2288 } 2289 } 2290 2291 /* Next, add new namespaces */ 2292 active_ns_count = 0; 2293 for (i = 0; i < max_entries; i++) { 2294 nsid = new_ns_list[active_ns_count]; 2295 2296 if (nsid == 0) { 2297 break; 2298 } 2299 2300 /* If the namespace already exists, this will not construct it a second time. */ 2301 rc = nvme_ctrlr_construct_namespace(ctrlr, nsid); 2302 if (rc != 0) { 2303 /* We can't easily handle a failure here. But just move on. */ 2304 assert(false); 2305 NVME_CTRLR_DEBUGLOG(ctrlr, "Failed to allocate a namespace object.\n"); 2306 continue; 2307 } 2308 2309 active_ns_count++; 2310 } 2311 2312 ctrlr->active_ns_count = active_ns_count; 2313 } 2314 2315 static void 2316 nvme_ctrlr_identify_active_ns_async_done(void *arg, const struct spdk_nvme_cpl *cpl) 2317 { 2318 struct nvme_active_ns_ctx *ctx = arg; 2319 uint32_t *new_ns_list = NULL; 2320 2321 if (spdk_nvme_cpl_is_error(cpl)) { 2322 ctx->state = NVME_ACTIVE_NS_STATE_ERROR; 2323 goto out; 2324 } 2325 2326 ctx->next_nsid = ctx->new_ns_list[1024 * ctx->page_count - 1]; 2327 if (ctx->next_nsid == 0) { 2328 ctx->state = NVME_ACTIVE_NS_STATE_DONE; 2329 goto out; 2330 } 2331 2332 ctx->page_count++; 2333 new_ns_list = spdk_realloc(ctx->new_ns_list, 2334 ctx->page_count * sizeof(struct spdk_nvme_ns_list), 2335 ctx->ctrlr->page_size); 2336 if (!new_ns_list) { 2337 SPDK_ERRLOG("Failed to reallocate active_ns_list!\n"); 2338 ctx->state = NVME_ACTIVE_NS_STATE_ERROR; 2339 goto out; 2340 } 2341 2342 ctx->new_ns_list = new_ns_list; 2343 nvme_ctrlr_identify_active_ns_async(ctx); 2344 return; 2345 2346 out: 2347 if (ctx->deleter) { 2348 ctx->deleter(ctx); 2349 } 2350 } 2351 2352 static void 2353 nvme_ctrlr_identify_active_ns_async(struct nvme_active_ns_ctx *ctx) 2354 { 2355 struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr; 2356 uint32_t i; 2357 int rc; 2358 2359 if (ctrlr->cdata.nn == 0) { 2360 ctx->state = NVME_ACTIVE_NS_STATE_DONE; 2361 goto out; 2362 } 2363 2364 assert(ctx->new_ns_list != NULL); 2365 2366 /* 2367 * If controller doesn't support active ns list CNS 0x02 dummy up 2368 * an active ns list, i.e. all namespaces report as active 2369 */ 2370 if (ctrlr->vs.raw < SPDK_NVME_VERSION(1, 1, 0) || ctrlr->quirks & NVME_QUIRK_IDENTIFY_CNS) { 2371 uint32_t *new_ns_list; 2372 2373 /* 2374 * Active NS list must always end with zero element. 2375 * So, we allocate for cdata.nn+1. 2376 */ 2377 ctx->page_count = spdk_divide_round_up(ctrlr->cdata.nn + 1, 2378 sizeof(struct spdk_nvme_ns_list) / sizeof(new_ns_list[0])); 2379 new_ns_list = spdk_realloc(ctx->new_ns_list, 2380 ctx->page_count * sizeof(struct spdk_nvme_ns_list), 2381 ctx->ctrlr->page_size); 2382 if (!new_ns_list) { 2383 SPDK_ERRLOG("Failed to reallocate active_ns_list!\n"); 2384 ctx->state = NVME_ACTIVE_NS_STATE_ERROR; 2385 goto out; 2386 } 2387 2388 ctx->new_ns_list = new_ns_list; 2389 ctx->new_ns_list[ctrlr->cdata.nn] = 0; 2390 for (i = 0; i < ctrlr->cdata.nn; i++) { 2391 ctx->new_ns_list[i] = i + 1; 2392 } 2393 2394 ctx->state = NVME_ACTIVE_NS_STATE_DONE; 2395 goto out; 2396 } 2397 2398 ctx->state = NVME_ACTIVE_NS_STATE_PROCESSING; 2399 rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_ACTIVE_NS_LIST, 0, ctx->next_nsid, 0, 2400 &ctx->new_ns_list[1024 * (ctx->page_count - 1)], sizeof(struct spdk_nvme_ns_list), 2401 nvme_ctrlr_identify_active_ns_async_done, ctx); 2402 if (rc != 0) { 2403 ctx->state = NVME_ACTIVE_NS_STATE_ERROR; 2404 goto out; 2405 } 2406 2407 return; 2408 2409 out: 2410 if (ctx->deleter) { 2411 ctx->deleter(ctx); 2412 } 2413 } 2414 2415 static void 2416 _nvme_active_ns_ctx_deleter(struct nvme_active_ns_ctx *ctx) 2417 { 2418 struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr; 2419 struct spdk_nvme_ns *ns; 2420 2421 if (ctx->state == NVME_ACTIVE_NS_STATE_ERROR) { 2422 nvme_active_ns_ctx_destroy(ctx); 2423 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2424 return; 2425 } 2426 2427 assert(ctx->state == NVME_ACTIVE_NS_STATE_DONE); 2428 2429 RB_FOREACH(ns, nvme_ns_tree, &ctrlr->ns) { 2430 nvme_ns_free_iocs_specific_data(ns); 2431 } 2432 2433 nvme_ctrlr_identify_active_ns_swap(ctrlr, ctx->new_ns_list, ctx->page_count * 1024); 2434 nvme_active_ns_ctx_destroy(ctx); 2435 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS, ctrlr->opts.admin_timeout_ms); 2436 } 2437 2438 static void 2439 _nvme_ctrlr_identify_active_ns(struct spdk_nvme_ctrlr *ctrlr) 2440 { 2441 struct nvme_active_ns_ctx *ctx; 2442 2443 ctx = nvme_active_ns_ctx_create(ctrlr, _nvme_active_ns_ctx_deleter); 2444 if (!ctx) { 2445 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2446 return; 2447 } 2448 2449 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ACTIVE_NS, 2450 ctrlr->opts.admin_timeout_ms); 2451 nvme_ctrlr_identify_active_ns_async(ctx); 2452 } 2453 2454 int 2455 nvme_ctrlr_identify_active_ns(struct spdk_nvme_ctrlr *ctrlr) 2456 { 2457 struct nvme_active_ns_ctx *ctx; 2458 int rc; 2459 2460 ctx = nvme_active_ns_ctx_create(ctrlr, NULL); 2461 if (!ctx) { 2462 return -ENOMEM; 2463 } 2464 2465 nvme_ctrlr_identify_active_ns_async(ctx); 2466 while (ctx->state == NVME_ACTIVE_NS_STATE_PROCESSING) { 2467 rc = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 2468 if (rc < 0) { 2469 ctx->state = NVME_ACTIVE_NS_STATE_ERROR; 2470 break; 2471 } 2472 } 2473 2474 if (ctx->state == NVME_ACTIVE_NS_STATE_ERROR) { 2475 nvme_active_ns_ctx_destroy(ctx); 2476 return -ENXIO; 2477 } 2478 2479 assert(ctx->state == NVME_ACTIVE_NS_STATE_DONE); 2480 nvme_ctrlr_identify_active_ns_swap(ctrlr, ctx->new_ns_list, ctx->page_count * 1024); 2481 nvme_active_ns_ctx_destroy(ctx); 2482 2483 return 0; 2484 } 2485 2486 static void 2487 nvme_ctrlr_identify_ns_async_done(void *arg, const struct spdk_nvme_cpl *cpl) 2488 { 2489 struct spdk_nvme_ns *ns = (struct spdk_nvme_ns *)arg; 2490 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2491 uint32_t nsid; 2492 int rc; 2493 2494 if (spdk_nvme_cpl_is_error(cpl)) { 2495 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2496 return; 2497 } 2498 2499 nvme_ns_set_identify_data(ns); 2500 2501 /* move on to the next active NS */ 2502 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, ns->id); 2503 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2504 if (ns == NULL) { 2505 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_ID_DESCS, 2506 ctrlr->opts.admin_timeout_ms); 2507 return; 2508 } 2509 ns->ctrlr = ctrlr; 2510 ns->id = nsid; 2511 2512 rc = nvme_ctrlr_identify_ns_async(ns); 2513 if (rc) { 2514 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2515 } 2516 } 2517 2518 static int 2519 nvme_ctrlr_identify_ns_async(struct spdk_nvme_ns *ns) 2520 { 2521 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2522 struct spdk_nvme_ns_data *nsdata; 2523 2524 nsdata = &ns->nsdata; 2525 2526 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS, 2527 ctrlr->opts.admin_timeout_ms); 2528 return nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS, 0, ns->id, 0, 2529 nsdata, sizeof(*nsdata), 2530 nvme_ctrlr_identify_ns_async_done, ns); 2531 } 2532 2533 static int 2534 nvme_ctrlr_identify_namespaces(struct spdk_nvme_ctrlr *ctrlr) 2535 { 2536 uint32_t nsid; 2537 struct spdk_nvme_ns *ns; 2538 int rc; 2539 2540 nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); 2541 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2542 if (ns == NULL) { 2543 /* No active NS, move on to the next state */ 2544 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_ID_DESCS, 2545 ctrlr->opts.admin_timeout_ms); 2546 return 0; 2547 } 2548 2549 ns->ctrlr = ctrlr; 2550 ns->id = nsid; 2551 2552 rc = nvme_ctrlr_identify_ns_async(ns); 2553 if (rc) { 2554 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2555 } 2556 2557 return rc; 2558 } 2559 2560 static int 2561 nvme_ctrlr_identify_namespaces_iocs_specific_next(struct spdk_nvme_ctrlr *ctrlr, uint32_t prev_nsid) 2562 { 2563 uint32_t nsid; 2564 struct spdk_nvme_ns *ns; 2565 int rc; 2566 2567 if (!prev_nsid) { 2568 nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); 2569 } else { 2570 /* move on to the next active NS */ 2571 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, prev_nsid); 2572 } 2573 2574 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2575 if (ns == NULL) { 2576 /* No first/next active NS, move on to the next state */ 2577 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES, 2578 ctrlr->opts.admin_timeout_ms); 2579 return 0; 2580 } 2581 2582 /* loop until we find a ns which has (supported) iocs specific data */ 2583 while (!nvme_ns_has_supported_iocs_specific_data(ns)) { 2584 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, ns->id); 2585 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2586 if (ns == NULL) { 2587 /* no namespace with (supported) iocs specific data found */ 2588 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES, 2589 ctrlr->opts.admin_timeout_ms); 2590 return 0; 2591 } 2592 } 2593 2594 rc = nvme_ctrlr_identify_ns_iocs_specific_async(ns); 2595 if (rc) { 2596 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2597 } 2598 2599 return rc; 2600 } 2601 2602 static void 2603 nvme_ctrlr_identify_ns_zns_specific_async_done(void *arg, const struct spdk_nvme_cpl *cpl) 2604 { 2605 struct spdk_nvme_ns *ns = (struct spdk_nvme_ns *)arg; 2606 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2607 2608 if (spdk_nvme_cpl_is_error(cpl)) { 2609 nvme_ns_free_zns_specific_data(ns); 2610 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2611 return; 2612 } 2613 2614 nvme_ctrlr_identify_namespaces_iocs_specific_next(ctrlr, ns->id); 2615 } 2616 2617 static int 2618 nvme_ctrlr_identify_ns_iocs_specific_async(struct spdk_nvme_ns *ns) 2619 { 2620 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2621 int rc; 2622 2623 switch (ns->csi) { 2624 case SPDK_NVME_CSI_ZNS: 2625 break; 2626 default: 2627 /* 2628 * This switch must handle all cases for which 2629 * nvme_ns_has_supported_iocs_specific_data() returns true, 2630 * other cases should never happen. 2631 */ 2632 assert(0); 2633 } 2634 2635 assert(!ns->nsdata_zns); 2636 ns->nsdata_zns = spdk_zmalloc(sizeof(*ns->nsdata_zns), 64, NULL, SPDK_ENV_SOCKET_ID_ANY, 2637 SPDK_MALLOC_SHARE); 2638 if (!ns->nsdata_zns) { 2639 return -ENOMEM; 2640 } 2641 2642 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS_IOCS_SPECIFIC, 2643 ctrlr->opts.admin_timeout_ms); 2644 rc = nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS_IOCS, 0, ns->id, ns->csi, 2645 ns->nsdata_zns, sizeof(*ns->nsdata_zns), 2646 nvme_ctrlr_identify_ns_zns_specific_async_done, ns); 2647 if (rc) { 2648 nvme_ns_free_zns_specific_data(ns); 2649 } 2650 2651 return rc; 2652 } 2653 2654 static int 2655 nvme_ctrlr_identify_namespaces_iocs_specific(struct spdk_nvme_ctrlr *ctrlr) 2656 { 2657 if (!nvme_ctrlr_multi_iocs_enabled(ctrlr)) { 2658 /* Multi IOCS not supported/enabled, move on to the next state */ 2659 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES, 2660 ctrlr->opts.admin_timeout_ms); 2661 return 0; 2662 } 2663 2664 return nvme_ctrlr_identify_namespaces_iocs_specific_next(ctrlr, 0); 2665 } 2666 2667 static void 2668 nvme_ctrlr_identify_id_desc_async_done(void *arg, const struct spdk_nvme_cpl *cpl) 2669 { 2670 struct spdk_nvme_ns *ns = (struct spdk_nvme_ns *)arg; 2671 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2672 uint32_t nsid; 2673 int rc; 2674 2675 if (spdk_nvme_cpl_is_error(cpl)) { 2676 /* 2677 * Many controllers claim to be compatible with NVMe 1.3, however, 2678 * they do not implement NS ID Desc List. Therefore, instead of setting 2679 * the state to NVME_CTRLR_STATE_ERROR, silently ignore the completion 2680 * error and move on to the next state. 2681 * 2682 * The proper way is to create a new quirk for controllers that violate 2683 * the NVMe 1.3 spec by not supporting NS ID Desc List. 2684 * (Re-using the NVME_QUIRK_IDENTIFY_CNS quirk is not possible, since 2685 * it is too generic and was added in order to handle controllers that 2686 * violate the NVMe 1.1 spec by not supporting ACTIVE LIST). 2687 */ 2688 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC, 2689 ctrlr->opts.admin_timeout_ms); 2690 return; 2691 } 2692 2693 nvme_ns_set_id_desc_list_data(ns); 2694 2695 /* move on to the next active NS */ 2696 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, ns->id); 2697 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2698 if (ns == NULL) { 2699 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC, 2700 ctrlr->opts.admin_timeout_ms); 2701 return; 2702 } 2703 2704 rc = nvme_ctrlr_identify_id_desc_async(ns); 2705 if (rc) { 2706 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2707 } 2708 } 2709 2710 static int 2711 nvme_ctrlr_identify_id_desc_async(struct spdk_nvme_ns *ns) 2712 { 2713 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2714 2715 memset(ns->id_desc_list, 0, sizeof(ns->id_desc_list)); 2716 2717 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS, 2718 ctrlr->opts.admin_timeout_ms); 2719 return nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS_ID_DESCRIPTOR_LIST, 2720 0, ns->id, 0, ns->id_desc_list, sizeof(ns->id_desc_list), 2721 nvme_ctrlr_identify_id_desc_async_done, ns); 2722 } 2723 2724 static int 2725 nvme_ctrlr_identify_id_desc_namespaces(struct spdk_nvme_ctrlr *ctrlr) 2726 { 2727 uint32_t nsid; 2728 struct spdk_nvme_ns *ns; 2729 int rc; 2730 2731 if ((ctrlr->vs.raw < SPDK_NVME_VERSION(1, 3, 0) && 2732 !(ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_IOCS)) || 2733 (ctrlr->quirks & NVME_QUIRK_IDENTIFY_CNS)) { 2734 NVME_CTRLR_DEBUGLOG(ctrlr, "Version < 1.3; not attempting to retrieve NS ID Descriptor List\n"); 2735 /* NS ID Desc List not supported, move on to the next state */ 2736 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC, 2737 ctrlr->opts.admin_timeout_ms); 2738 return 0; 2739 } 2740 2741 nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); 2742 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2743 if (ns == NULL) { 2744 /* No active NS, move on to the next state */ 2745 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC, 2746 ctrlr->opts.admin_timeout_ms); 2747 return 0; 2748 } 2749 2750 rc = nvme_ctrlr_identify_id_desc_async(ns); 2751 if (rc) { 2752 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2753 } 2754 2755 return rc; 2756 } 2757 2758 static void 2759 nvme_ctrlr_update_nvmf_ioccsz(struct spdk_nvme_ctrlr *ctrlr) 2760 { 2761 if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_RDMA || 2762 ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_TCP || 2763 ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_FC) { 2764 if (ctrlr->cdata.nvmf_specific.ioccsz < 4) { 2765 NVME_CTRLR_ERRLOG(ctrlr, "Incorrect IOCCSZ %u, the minimum value should be 4\n", 2766 ctrlr->cdata.nvmf_specific.ioccsz); 2767 ctrlr->cdata.nvmf_specific.ioccsz = 4; 2768 assert(0); 2769 } 2770 ctrlr->ioccsz_bytes = ctrlr->cdata.nvmf_specific.ioccsz * 16 - sizeof(struct spdk_nvme_cmd); 2771 ctrlr->icdoff = ctrlr->cdata.nvmf_specific.icdoff; 2772 } 2773 } 2774 2775 static void 2776 nvme_ctrlr_set_num_queues_done(void *arg, const struct spdk_nvme_cpl *cpl) 2777 { 2778 uint32_t cq_allocated, sq_allocated, min_allocated, i; 2779 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 2780 2781 if (spdk_nvme_cpl_is_error(cpl)) { 2782 NVME_CTRLR_ERRLOG(ctrlr, "Set Features - Number of Queues failed!\n"); 2783 ctrlr->opts.num_io_queues = 0; 2784 } else { 2785 /* 2786 * Data in cdw0 is 0-based. 2787 * Lower 16-bits indicate number of submission queues allocated. 2788 * Upper 16-bits indicate number of completion queues allocated. 2789 */ 2790 sq_allocated = (cpl->cdw0 & 0xFFFF) + 1; 2791 cq_allocated = (cpl->cdw0 >> 16) + 1; 2792 2793 /* 2794 * For 1:1 queue mapping, set number of allocated queues to be minimum of 2795 * submission and completion queues. 2796 */ 2797 min_allocated = spdk_min(sq_allocated, cq_allocated); 2798 2799 /* Set number of queues to be minimum of requested and actually allocated. */ 2800 ctrlr->opts.num_io_queues = spdk_min(min_allocated, ctrlr->opts.num_io_queues); 2801 } 2802 2803 ctrlr->free_io_qids = spdk_bit_array_create(ctrlr->opts.num_io_queues + 1); 2804 if (ctrlr->free_io_qids == NULL) { 2805 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2806 return; 2807 } 2808 2809 /* Initialize list of free I/O queue IDs. QID 0 is the admin queue (implicitly allocated). */ 2810 for (i = 1; i <= ctrlr->opts.num_io_queues; i++) { 2811 spdk_nvme_ctrlr_free_qid(ctrlr, i); 2812 } 2813 2814 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS, 2815 ctrlr->opts.admin_timeout_ms); 2816 } 2817 2818 static int 2819 nvme_ctrlr_set_num_queues(struct spdk_nvme_ctrlr *ctrlr) 2820 { 2821 int rc; 2822 2823 if (ctrlr->opts.num_io_queues > SPDK_NVME_MAX_IO_QUEUES) { 2824 NVME_CTRLR_NOTICELOG(ctrlr, "Limiting requested num_io_queues %u to max %d\n", 2825 ctrlr->opts.num_io_queues, SPDK_NVME_MAX_IO_QUEUES); 2826 ctrlr->opts.num_io_queues = SPDK_NVME_MAX_IO_QUEUES; 2827 } else if (ctrlr->opts.num_io_queues < 1) { 2828 NVME_CTRLR_NOTICELOG(ctrlr, "Requested num_io_queues 0, increasing to 1\n"); 2829 ctrlr->opts.num_io_queues = 1; 2830 } 2831 2832 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES, 2833 ctrlr->opts.admin_timeout_ms); 2834 2835 rc = nvme_ctrlr_cmd_set_num_queues(ctrlr, ctrlr->opts.num_io_queues, 2836 nvme_ctrlr_set_num_queues_done, ctrlr); 2837 if (rc != 0) { 2838 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2839 return rc; 2840 } 2841 2842 return 0; 2843 } 2844 2845 static void 2846 nvme_ctrlr_set_keep_alive_timeout_done(void *arg, const struct spdk_nvme_cpl *cpl) 2847 { 2848 uint32_t keep_alive_interval_us; 2849 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 2850 2851 if (spdk_nvme_cpl_is_error(cpl)) { 2852 if ((cpl->status.sct == SPDK_NVME_SCT_GENERIC) && 2853 (cpl->status.sc == SPDK_NVME_SC_INVALID_FIELD)) { 2854 NVME_CTRLR_DEBUGLOG(ctrlr, "Keep alive timeout Get Feature is not supported\n"); 2855 } else { 2856 NVME_CTRLR_ERRLOG(ctrlr, "Keep alive timeout Get Feature failed: SC %x SCT %x\n", 2857 cpl->status.sc, cpl->status.sct); 2858 ctrlr->opts.keep_alive_timeout_ms = 0; 2859 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2860 return; 2861 } 2862 } else { 2863 if (ctrlr->opts.keep_alive_timeout_ms != cpl->cdw0) { 2864 NVME_CTRLR_DEBUGLOG(ctrlr, "Controller adjusted keep alive timeout to %u ms\n", 2865 cpl->cdw0); 2866 } 2867 2868 ctrlr->opts.keep_alive_timeout_ms = cpl->cdw0; 2869 } 2870 2871 if (ctrlr->opts.keep_alive_timeout_ms == 0) { 2872 ctrlr->keep_alive_interval_ticks = 0; 2873 } else { 2874 keep_alive_interval_us = ctrlr->opts.keep_alive_timeout_ms * 1000 / 2; 2875 2876 NVME_CTRLR_DEBUGLOG(ctrlr, "Sending keep alive every %u us\n", keep_alive_interval_us); 2877 2878 ctrlr->keep_alive_interval_ticks = (keep_alive_interval_us * spdk_get_ticks_hz()) / 2879 UINT64_C(1000000); 2880 2881 /* Schedule the first Keep Alive to be sent as soon as possible. */ 2882 ctrlr->next_keep_alive_tick = spdk_get_ticks(); 2883 } 2884 2885 if (spdk_nvme_ctrlr_is_discovery(ctrlr)) { 2886 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE); 2887 } else { 2888 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC, 2889 ctrlr->opts.admin_timeout_ms); 2890 } 2891 } 2892 2893 static int 2894 nvme_ctrlr_set_keep_alive_timeout(struct spdk_nvme_ctrlr *ctrlr) 2895 { 2896 int rc; 2897 2898 if (ctrlr->opts.keep_alive_timeout_ms == 0) { 2899 if (spdk_nvme_ctrlr_is_discovery(ctrlr)) { 2900 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE); 2901 } else { 2902 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC, 2903 ctrlr->opts.admin_timeout_ms); 2904 } 2905 return 0; 2906 } 2907 2908 /* Note: Discovery controller identify data does not populate KAS according to spec. */ 2909 if (!spdk_nvme_ctrlr_is_discovery(ctrlr) && ctrlr->cdata.kas == 0) { 2910 NVME_CTRLR_DEBUGLOG(ctrlr, "Controller KAS is 0 - not enabling Keep Alive\n"); 2911 ctrlr->opts.keep_alive_timeout_ms = 0; 2912 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC, 2913 ctrlr->opts.admin_timeout_ms); 2914 return 0; 2915 } 2916 2917 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT, 2918 ctrlr->opts.admin_timeout_ms); 2919 2920 /* Retrieve actual keep alive timeout, since the controller may have adjusted it. */ 2921 rc = spdk_nvme_ctrlr_cmd_get_feature(ctrlr, SPDK_NVME_FEAT_KEEP_ALIVE_TIMER, 0, NULL, 0, 2922 nvme_ctrlr_set_keep_alive_timeout_done, ctrlr); 2923 if (rc != 0) { 2924 NVME_CTRLR_ERRLOG(ctrlr, "Keep alive timeout Get Feature failed: %d\n", rc); 2925 ctrlr->opts.keep_alive_timeout_ms = 0; 2926 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2927 return rc; 2928 } 2929 2930 return 0; 2931 } 2932 2933 static void 2934 nvme_ctrlr_set_host_id_done(void *arg, const struct spdk_nvme_cpl *cpl) 2935 { 2936 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 2937 2938 if (spdk_nvme_cpl_is_error(cpl)) { 2939 /* 2940 * Treat Set Features - Host ID failure as non-fatal, since the Host ID feature 2941 * is optional. 2942 */ 2943 NVME_CTRLR_WARNLOG(ctrlr, "Set Features - Host ID failed: SC 0x%x SCT 0x%x\n", 2944 cpl->status.sc, cpl->status.sct); 2945 } else { 2946 NVME_CTRLR_DEBUGLOG(ctrlr, "Set Features - Host ID was successful\n"); 2947 } 2948 2949 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE); 2950 } 2951 2952 static int 2953 nvme_ctrlr_set_host_id(struct spdk_nvme_ctrlr *ctrlr) 2954 { 2955 uint8_t *host_id; 2956 uint32_t host_id_size; 2957 int rc; 2958 2959 if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) { 2960 /* 2961 * NVMe-oF sends the host ID during Connect and doesn't allow 2962 * Set Features - Host Identifier after Connect, so we don't need to do anything here. 2963 */ 2964 NVME_CTRLR_DEBUGLOG(ctrlr, "NVMe-oF transport - not sending Set Features - Host ID\n"); 2965 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE); 2966 return 0; 2967 } 2968 2969 if (ctrlr->cdata.ctratt.host_id_exhid_supported) { 2970 NVME_CTRLR_DEBUGLOG(ctrlr, "Using 128-bit extended host identifier\n"); 2971 host_id = ctrlr->opts.extended_host_id; 2972 host_id_size = sizeof(ctrlr->opts.extended_host_id); 2973 } else { 2974 NVME_CTRLR_DEBUGLOG(ctrlr, "Using 64-bit host identifier\n"); 2975 host_id = ctrlr->opts.host_id; 2976 host_id_size = sizeof(ctrlr->opts.host_id); 2977 } 2978 2979 /* If the user specified an all-zeroes host identifier, don't send the command. */ 2980 if (spdk_mem_all_zero(host_id, host_id_size)) { 2981 NVME_CTRLR_DEBUGLOG(ctrlr, "User did not specify host ID - not sending Set Features - Host ID\n"); 2982 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE); 2983 return 0; 2984 } 2985 2986 SPDK_LOGDUMP(nvme, "host_id", host_id, host_id_size); 2987 2988 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_HOST_ID, 2989 ctrlr->opts.admin_timeout_ms); 2990 2991 rc = nvme_ctrlr_cmd_set_host_id(ctrlr, host_id, host_id_size, nvme_ctrlr_set_host_id_done, ctrlr); 2992 if (rc != 0) { 2993 NVME_CTRLR_ERRLOG(ctrlr, "Set Features - Host ID failed: %d\n", rc); 2994 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2995 return rc; 2996 } 2997 2998 return 0; 2999 } 3000 3001 void 3002 nvme_ctrlr_update_namespaces(struct spdk_nvme_ctrlr *ctrlr) 3003 { 3004 uint32_t nsid; 3005 struct spdk_nvme_ns *ns; 3006 3007 for (nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); 3008 nsid != 0; nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) { 3009 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 3010 nvme_ns_construct(ns, nsid, ctrlr); 3011 } 3012 } 3013 3014 static int 3015 nvme_ctrlr_clear_changed_ns_log(struct spdk_nvme_ctrlr *ctrlr) 3016 { 3017 struct nvme_completion_poll_status *status; 3018 int rc = -ENOMEM; 3019 char *buffer = NULL; 3020 uint32_t nsid; 3021 size_t buf_size = (SPDK_NVME_MAX_CHANGED_NAMESPACES * sizeof(uint32_t)); 3022 3023 buffer = spdk_dma_zmalloc(buf_size, 4096, NULL); 3024 if (!buffer) { 3025 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate buffer for getting " 3026 "changed ns log.\n"); 3027 return rc; 3028 } 3029 3030 status = calloc(1, sizeof(*status)); 3031 if (!status) { 3032 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 3033 goto free_buffer; 3034 } 3035 3036 rc = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, 3037 SPDK_NVME_LOG_CHANGED_NS_LIST, 3038 SPDK_NVME_GLOBAL_NS_TAG, 3039 buffer, buf_size, 0, 3040 nvme_completion_poll_cb, status); 3041 3042 if (rc) { 3043 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_cmd_get_log_page() failed: rc=%d\n", rc); 3044 free(status); 3045 goto free_buffer; 3046 } 3047 3048 rc = nvme_wait_for_completion_timeout(ctrlr->adminq, status, 3049 ctrlr->opts.admin_timeout_ms * 1000); 3050 if (!status->timed_out) { 3051 free(status); 3052 } 3053 3054 if (rc) { 3055 NVME_CTRLR_ERRLOG(ctrlr, "wait for spdk_nvme_ctrlr_cmd_get_log_page failed: rc=%d\n", rc); 3056 goto free_buffer; 3057 } 3058 3059 /* only check the case of overflow. */ 3060 nsid = from_le32(buffer); 3061 if (nsid == 0xffffffffu) { 3062 NVME_CTRLR_WARNLOG(ctrlr, "changed ns log overflowed.\n"); 3063 } 3064 3065 free_buffer: 3066 spdk_dma_free(buffer); 3067 return rc; 3068 } 3069 3070 void 3071 nvme_ctrlr_process_async_event(struct spdk_nvme_ctrlr *ctrlr, 3072 const struct spdk_nvme_cpl *cpl) 3073 { 3074 union spdk_nvme_async_event_completion event; 3075 struct spdk_nvme_ctrlr_process *active_proc; 3076 int rc; 3077 3078 event.raw = cpl->cdw0; 3079 3080 if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) && 3081 (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED)) { 3082 nvme_ctrlr_clear_changed_ns_log(ctrlr); 3083 3084 rc = nvme_ctrlr_identify_active_ns(ctrlr); 3085 if (rc) { 3086 return; 3087 } 3088 nvme_ctrlr_update_namespaces(ctrlr); 3089 nvme_io_msg_ctrlr_update(ctrlr); 3090 } 3091 3092 if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) && 3093 (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_ANA_CHANGE)) { 3094 if (!ctrlr->opts.disable_read_ana_log_page) { 3095 rc = nvme_ctrlr_update_ana_log_page(ctrlr); 3096 if (rc) { 3097 return; 3098 } 3099 nvme_ctrlr_parse_ana_log_page(ctrlr, nvme_ctrlr_update_ns_ana_states, 3100 ctrlr); 3101 } 3102 } 3103 3104 active_proc = nvme_ctrlr_get_current_process(ctrlr); 3105 if (active_proc && active_proc->aer_cb_fn) { 3106 active_proc->aer_cb_fn(active_proc->aer_cb_arg, cpl); 3107 } 3108 } 3109 3110 static void 3111 nvme_ctrlr_queue_async_event(struct spdk_nvme_ctrlr *ctrlr, 3112 const struct spdk_nvme_cpl *cpl) 3113 { 3114 struct spdk_nvme_ctrlr_aer_completion_list *nvme_event; 3115 struct spdk_nvme_ctrlr_process *proc; 3116 3117 /* Add async event to each process objects event list */ 3118 TAILQ_FOREACH(proc, &ctrlr->active_procs, tailq) { 3119 /* Must be shared memory so other processes can access */ 3120 nvme_event = spdk_zmalloc(sizeof(*nvme_event), 0, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE); 3121 if (!nvme_event) { 3122 NVME_CTRLR_ERRLOG(ctrlr, "Alloc nvme event failed, ignore the event\n"); 3123 return; 3124 } 3125 nvme_event->cpl = *cpl; 3126 3127 STAILQ_INSERT_TAIL(&proc->async_events, nvme_event, link); 3128 } 3129 } 3130 3131 void 3132 nvme_ctrlr_complete_queued_async_events(struct spdk_nvme_ctrlr *ctrlr) 3133 { 3134 struct spdk_nvme_ctrlr_aer_completion_list *nvme_event, *nvme_event_tmp; 3135 struct spdk_nvme_ctrlr_process *active_proc; 3136 3137 active_proc = nvme_ctrlr_get_current_process(ctrlr); 3138 3139 STAILQ_FOREACH_SAFE(nvme_event, &active_proc->async_events, link, nvme_event_tmp) { 3140 STAILQ_REMOVE(&active_proc->async_events, nvme_event, 3141 spdk_nvme_ctrlr_aer_completion_list, link); 3142 nvme_ctrlr_process_async_event(ctrlr, &nvme_event->cpl); 3143 spdk_free(nvme_event); 3144 3145 } 3146 } 3147 3148 static void 3149 nvme_ctrlr_async_event_cb(void *arg, const struct spdk_nvme_cpl *cpl) 3150 { 3151 struct nvme_async_event_request *aer = arg; 3152 struct spdk_nvme_ctrlr *ctrlr = aer->ctrlr; 3153 3154 if (cpl->status.sct == SPDK_NVME_SCT_GENERIC && 3155 cpl->status.sc == SPDK_NVME_SC_ABORTED_SQ_DELETION) { 3156 /* 3157 * This is simulated when controller is being shut down, to 3158 * effectively abort outstanding asynchronous event requests 3159 * and make sure all memory is freed. Do not repost the 3160 * request in this case. 3161 */ 3162 return; 3163 } 3164 3165 if (cpl->status.sct == SPDK_NVME_SCT_COMMAND_SPECIFIC && 3166 cpl->status.sc == SPDK_NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED) { 3167 /* 3168 * SPDK will only send as many AERs as the device says it supports, 3169 * so this status code indicates an out-of-spec device. Do not repost 3170 * the request in this case. 3171 */ 3172 NVME_CTRLR_ERRLOG(ctrlr, "Controller appears out-of-spec for asynchronous event request\n" 3173 "handling. Do not repost this AER.\n"); 3174 return; 3175 } 3176 3177 /* Add the events to the list */ 3178 nvme_ctrlr_queue_async_event(ctrlr, cpl); 3179 3180 /* If the ctrlr was removed or in the destruct state, we should not send aer again */ 3181 if (ctrlr->is_removed || ctrlr->is_destructed) { 3182 return; 3183 } 3184 3185 /* 3186 * Repost another asynchronous event request to replace the one 3187 * that just completed. 3188 */ 3189 if (nvme_ctrlr_construct_and_submit_aer(ctrlr, aer)) { 3190 /* 3191 * We can't do anything to recover from a failure here, 3192 * so just print a warning message and leave the AER unsubmitted. 3193 */ 3194 NVME_CTRLR_ERRLOG(ctrlr, "resubmitting AER failed!\n"); 3195 } 3196 } 3197 3198 static int 3199 nvme_ctrlr_construct_and_submit_aer(struct spdk_nvme_ctrlr *ctrlr, 3200 struct nvme_async_event_request *aer) 3201 { 3202 struct nvme_request *req; 3203 3204 aer->ctrlr = ctrlr; 3205 req = nvme_allocate_request_null(ctrlr->adminq, nvme_ctrlr_async_event_cb, aer); 3206 aer->req = req; 3207 if (req == NULL) { 3208 return -1; 3209 } 3210 3211 req->cmd.opc = SPDK_NVME_OPC_ASYNC_EVENT_REQUEST; 3212 return nvme_ctrlr_submit_admin_request(ctrlr, req); 3213 } 3214 3215 static void 3216 nvme_ctrlr_configure_aer_done(void *arg, const struct spdk_nvme_cpl *cpl) 3217 { 3218 struct nvme_async_event_request *aer; 3219 int rc; 3220 uint32_t i; 3221 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 3222 3223 if (spdk_nvme_cpl_is_error(cpl)) { 3224 NVME_CTRLR_NOTICELOG(ctrlr, "nvme_ctrlr_configure_aer failed!\n"); 3225 ctrlr->num_aers = 0; 3226 } else { 3227 /* aerl is a zero-based value, so we need to add 1 here. */ 3228 ctrlr->num_aers = spdk_min(NVME_MAX_ASYNC_EVENTS, (ctrlr->cdata.aerl + 1)); 3229 } 3230 3231 for (i = 0; i < ctrlr->num_aers; i++) { 3232 aer = &ctrlr->aer[i]; 3233 rc = nvme_ctrlr_construct_and_submit_aer(ctrlr, aer); 3234 if (rc) { 3235 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_construct_and_submit_aer failed!\n"); 3236 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3237 return; 3238 } 3239 } 3240 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT, ctrlr->opts.admin_timeout_ms); 3241 } 3242 3243 static int 3244 nvme_ctrlr_configure_aer(struct spdk_nvme_ctrlr *ctrlr) 3245 { 3246 union spdk_nvme_feat_async_event_configuration config; 3247 int rc; 3248 3249 config.raw = 0; 3250 3251 if (spdk_nvme_ctrlr_is_discovery(ctrlr)) { 3252 config.bits.discovery_log_change_notice = 1; 3253 } else { 3254 config.bits.crit_warn.bits.available_spare = 1; 3255 config.bits.crit_warn.bits.temperature = 1; 3256 config.bits.crit_warn.bits.device_reliability = 1; 3257 config.bits.crit_warn.bits.read_only = 1; 3258 config.bits.crit_warn.bits.volatile_memory_backup = 1; 3259 3260 if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 2, 0)) { 3261 if (ctrlr->cdata.oaes.ns_attribute_notices) { 3262 config.bits.ns_attr_notice = 1; 3263 } 3264 if (ctrlr->cdata.oaes.fw_activation_notices) { 3265 config.bits.fw_activation_notice = 1; 3266 } 3267 if (ctrlr->cdata.oaes.ana_change_notices) { 3268 config.bits.ana_change_notice = 1; 3269 } 3270 } 3271 if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 3, 0) && ctrlr->cdata.lpa.telemetry) { 3272 config.bits.telemetry_log_notice = 1; 3273 } 3274 } 3275 3276 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER, 3277 ctrlr->opts.admin_timeout_ms); 3278 3279 rc = nvme_ctrlr_cmd_set_async_event_config(ctrlr, config, 3280 nvme_ctrlr_configure_aer_done, 3281 ctrlr); 3282 if (rc != 0) { 3283 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3284 return rc; 3285 } 3286 3287 return 0; 3288 } 3289 3290 struct spdk_nvme_ctrlr_process * 3291 nvme_ctrlr_get_process(struct spdk_nvme_ctrlr *ctrlr, pid_t pid) 3292 { 3293 struct spdk_nvme_ctrlr_process *active_proc; 3294 3295 TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) { 3296 if (active_proc->pid == pid) { 3297 return active_proc; 3298 } 3299 } 3300 3301 return NULL; 3302 } 3303 3304 struct spdk_nvme_ctrlr_process * 3305 nvme_ctrlr_get_current_process(struct spdk_nvme_ctrlr *ctrlr) 3306 { 3307 return nvme_ctrlr_get_process(ctrlr, getpid()); 3308 } 3309 3310 /** 3311 * This function will be called when a process is using the controller. 3312 * 1. For the primary process, it is called when constructing the controller. 3313 * 2. For the secondary process, it is called at probing the controller. 3314 * Note: will check whether the process is already added for the same process. 3315 */ 3316 int 3317 nvme_ctrlr_add_process(struct spdk_nvme_ctrlr *ctrlr, void *devhandle) 3318 { 3319 struct spdk_nvme_ctrlr_process *ctrlr_proc; 3320 pid_t pid = getpid(); 3321 3322 /* Check whether the process is already added or not */ 3323 if (nvme_ctrlr_get_process(ctrlr, pid)) { 3324 return 0; 3325 } 3326 3327 /* Initialize the per process properties for this ctrlr */ 3328 ctrlr_proc = spdk_zmalloc(sizeof(struct spdk_nvme_ctrlr_process), 3329 64, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE); 3330 if (ctrlr_proc == NULL) { 3331 NVME_CTRLR_ERRLOG(ctrlr, "failed to allocate memory to track the process props\n"); 3332 3333 return -1; 3334 } 3335 3336 ctrlr_proc->is_primary = spdk_process_is_primary(); 3337 ctrlr_proc->pid = pid; 3338 STAILQ_INIT(&ctrlr_proc->active_reqs); 3339 ctrlr_proc->devhandle = devhandle; 3340 ctrlr_proc->ref = 0; 3341 TAILQ_INIT(&ctrlr_proc->allocated_io_qpairs); 3342 STAILQ_INIT(&ctrlr_proc->async_events); 3343 3344 TAILQ_INSERT_TAIL(&ctrlr->active_procs, ctrlr_proc, tailq); 3345 3346 return 0; 3347 } 3348 3349 /** 3350 * This function will be called when the process detaches the controller. 3351 * Note: the ctrlr_lock must be held when calling this function. 3352 */ 3353 static void 3354 nvme_ctrlr_remove_process(struct spdk_nvme_ctrlr *ctrlr, 3355 struct spdk_nvme_ctrlr_process *proc) 3356 { 3357 struct spdk_nvme_qpair *qpair, *tmp_qpair; 3358 3359 assert(STAILQ_EMPTY(&proc->active_reqs)); 3360 3361 TAILQ_FOREACH_SAFE(qpair, &proc->allocated_io_qpairs, per_process_tailq, tmp_qpair) { 3362 spdk_nvme_ctrlr_free_io_qpair(qpair); 3363 } 3364 3365 TAILQ_REMOVE(&ctrlr->active_procs, proc, tailq); 3366 3367 if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { 3368 spdk_pci_device_detach(proc->devhandle); 3369 } 3370 3371 spdk_free(proc); 3372 } 3373 3374 /** 3375 * This function will be called when the process exited unexpectedly 3376 * in order to free any incomplete nvme request, allocated IO qpairs 3377 * and allocated memory. 3378 * Note: the ctrlr_lock must be held when calling this function. 3379 */ 3380 static void 3381 nvme_ctrlr_cleanup_process(struct spdk_nvme_ctrlr_process *proc) 3382 { 3383 struct nvme_request *req, *tmp_req; 3384 struct spdk_nvme_qpair *qpair, *tmp_qpair; 3385 struct spdk_nvme_ctrlr_aer_completion_list *event; 3386 3387 STAILQ_FOREACH_SAFE(req, &proc->active_reqs, stailq, tmp_req) { 3388 STAILQ_REMOVE(&proc->active_reqs, req, nvme_request, stailq); 3389 3390 assert(req->pid == proc->pid); 3391 3392 nvme_free_request(req); 3393 } 3394 3395 /* Remove async event from each process objects event list */ 3396 while (!STAILQ_EMPTY(&proc->async_events)) { 3397 event = STAILQ_FIRST(&proc->async_events); 3398 STAILQ_REMOVE_HEAD(&proc->async_events, link); 3399 spdk_free(event); 3400 } 3401 3402 TAILQ_FOREACH_SAFE(qpair, &proc->allocated_io_qpairs, per_process_tailq, tmp_qpair) { 3403 TAILQ_REMOVE(&proc->allocated_io_qpairs, qpair, per_process_tailq); 3404 3405 /* 3406 * The process may have been killed while some qpairs were in their 3407 * completion context. Clear that flag here to allow these IO 3408 * qpairs to be deleted. 3409 */ 3410 qpair->in_completion_context = 0; 3411 3412 qpair->no_deletion_notification_needed = 1; 3413 3414 spdk_nvme_ctrlr_free_io_qpair(qpair); 3415 } 3416 3417 spdk_free(proc); 3418 } 3419 3420 /** 3421 * This function will be called when destructing the controller. 3422 * 1. There is no more admin request on this controller. 3423 * 2. Clean up any left resource allocation when its associated process is gone. 3424 */ 3425 void 3426 nvme_ctrlr_free_processes(struct spdk_nvme_ctrlr *ctrlr) 3427 { 3428 struct spdk_nvme_ctrlr_process *active_proc, *tmp; 3429 3430 /* Free all the processes' properties and make sure no pending admin IOs */ 3431 TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) { 3432 TAILQ_REMOVE(&ctrlr->active_procs, active_proc, tailq); 3433 3434 assert(STAILQ_EMPTY(&active_proc->active_reqs)); 3435 3436 spdk_free(active_proc); 3437 } 3438 } 3439 3440 /** 3441 * This function will be called when any other process attaches or 3442 * detaches the controller in order to cleanup those unexpectedly 3443 * terminated processes. 3444 * Note: the ctrlr_lock must be held when calling this function. 3445 */ 3446 static int 3447 nvme_ctrlr_remove_inactive_proc(struct spdk_nvme_ctrlr *ctrlr) 3448 { 3449 struct spdk_nvme_ctrlr_process *active_proc, *tmp; 3450 int active_proc_count = 0; 3451 3452 TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) { 3453 if ((kill(active_proc->pid, 0) == -1) && (errno == ESRCH)) { 3454 NVME_CTRLR_ERRLOG(ctrlr, "process %d terminated unexpected\n", active_proc->pid); 3455 3456 TAILQ_REMOVE(&ctrlr->active_procs, active_proc, tailq); 3457 3458 nvme_ctrlr_cleanup_process(active_proc); 3459 } else { 3460 active_proc_count++; 3461 } 3462 } 3463 3464 return active_proc_count; 3465 } 3466 3467 void 3468 nvme_ctrlr_proc_get_ref(struct spdk_nvme_ctrlr *ctrlr) 3469 { 3470 struct spdk_nvme_ctrlr_process *active_proc; 3471 3472 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 3473 3474 nvme_ctrlr_remove_inactive_proc(ctrlr); 3475 3476 active_proc = nvme_ctrlr_get_current_process(ctrlr); 3477 if (active_proc) { 3478 active_proc->ref++; 3479 } 3480 3481 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 3482 } 3483 3484 void 3485 nvme_ctrlr_proc_put_ref(struct spdk_nvme_ctrlr *ctrlr) 3486 { 3487 struct spdk_nvme_ctrlr_process *active_proc; 3488 int proc_count; 3489 3490 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 3491 3492 proc_count = nvme_ctrlr_remove_inactive_proc(ctrlr); 3493 3494 active_proc = nvme_ctrlr_get_current_process(ctrlr); 3495 if (active_proc) { 3496 active_proc->ref--; 3497 assert(active_proc->ref >= 0); 3498 3499 /* 3500 * The last active process will be removed at the end of 3501 * the destruction of the controller. 3502 */ 3503 if (active_proc->ref == 0 && proc_count != 1) { 3504 nvme_ctrlr_remove_process(ctrlr, active_proc); 3505 } 3506 } 3507 3508 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 3509 } 3510 3511 int 3512 nvme_ctrlr_get_ref_count(struct spdk_nvme_ctrlr *ctrlr) 3513 { 3514 struct spdk_nvme_ctrlr_process *active_proc; 3515 int ref = 0; 3516 3517 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 3518 3519 nvme_ctrlr_remove_inactive_proc(ctrlr); 3520 3521 TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) { 3522 ref += active_proc->ref; 3523 } 3524 3525 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 3526 3527 return ref; 3528 } 3529 3530 /** 3531 * Get the PCI device handle which is only visible to its associated process. 3532 */ 3533 struct spdk_pci_device * 3534 nvme_ctrlr_proc_get_devhandle(struct spdk_nvme_ctrlr *ctrlr) 3535 { 3536 struct spdk_nvme_ctrlr_process *active_proc; 3537 struct spdk_pci_device *devhandle = NULL; 3538 3539 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 3540 3541 active_proc = nvme_ctrlr_get_current_process(ctrlr); 3542 if (active_proc) { 3543 devhandle = active_proc->devhandle; 3544 } 3545 3546 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 3547 3548 return devhandle; 3549 } 3550 3551 static void 3552 nvme_ctrlr_process_init_vs_done(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3553 { 3554 struct spdk_nvme_ctrlr *ctrlr = ctx; 3555 3556 if (spdk_nvme_cpl_is_error(cpl)) { 3557 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the VS register\n"); 3558 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3559 return; 3560 } 3561 3562 assert(value <= UINT32_MAX); 3563 ctrlr->vs.raw = (uint32_t)value; 3564 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READ_CAP, NVME_TIMEOUT_INFINITE); 3565 } 3566 3567 static void 3568 nvme_ctrlr_process_init_cap_done(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3569 { 3570 struct spdk_nvme_ctrlr *ctrlr = ctx; 3571 3572 if (spdk_nvme_cpl_is_error(cpl)) { 3573 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CAP register\n"); 3574 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3575 return; 3576 } 3577 3578 ctrlr->cap.raw = value; 3579 nvme_ctrlr_init_cap(ctrlr); 3580 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CHECK_EN, NVME_TIMEOUT_INFINITE); 3581 } 3582 3583 static void 3584 nvme_ctrlr_process_init_check_en(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3585 { 3586 struct spdk_nvme_ctrlr *ctrlr = ctx; 3587 enum nvme_ctrlr_state state; 3588 3589 if (spdk_nvme_cpl_is_error(cpl)) { 3590 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CC register\n"); 3591 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3592 return; 3593 } 3594 3595 assert(value <= UINT32_MAX); 3596 ctrlr->process_init_cc.raw = (uint32_t)value; 3597 3598 if (ctrlr->process_init_cc.bits.en) { 3599 NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 1\n"); 3600 state = NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1; 3601 } else { 3602 state = NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0; 3603 } 3604 3605 nvme_ctrlr_set_state(ctrlr, state, nvme_ctrlr_get_ready_timeout(ctrlr)); 3606 } 3607 3608 static void 3609 nvme_ctrlr_process_init_set_en_0(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3610 { 3611 struct spdk_nvme_ctrlr *ctrlr = ctx; 3612 3613 if (spdk_nvme_cpl_is_error(cpl)) { 3614 NVME_CTRLR_ERRLOG(ctrlr, "Failed to write the CC register\n"); 3615 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3616 return; 3617 } 3618 3619 /* 3620 * Wait 2.5 seconds before accessing PCI registers. 3621 * Not using sleep() to avoid blocking other controller's initialization. 3622 */ 3623 if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) { 3624 NVME_CTRLR_DEBUGLOG(ctrlr, "Applying quirk: delay 2.5 seconds before reading registers\n"); 3625 ctrlr->sleep_timeout_tsc = spdk_get_ticks() + (2500 * spdk_get_ticks_hz() / 1000); 3626 } 3627 3628 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, 3629 nvme_ctrlr_get_ready_timeout(ctrlr)); 3630 } 3631 3632 static void 3633 nvme_ctrlr_process_init_set_en_0_read_cc(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3634 { 3635 struct spdk_nvme_ctrlr *ctrlr = ctx; 3636 union spdk_nvme_cc_register cc; 3637 int rc; 3638 3639 if (spdk_nvme_cpl_is_error(cpl)) { 3640 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CC register\n"); 3641 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3642 return; 3643 } 3644 3645 assert(value <= UINT32_MAX); 3646 cc.raw = (uint32_t)value; 3647 cc.bits.en = 0; 3648 ctrlr->process_init_cc.raw = cc.raw; 3649 3650 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_EN_0_WAIT_FOR_CC, 3651 nvme_ctrlr_get_ready_timeout(ctrlr)); 3652 3653 rc = nvme_ctrlr_set_cc_async(ctrlr, cc.raw, nvme_ctrlr_process_init_set_en_0, ctrlr); 3654 if (rc != 0) { 3655 NVME_CTRLR_ERRLOG(ctrlr, "set_cc() failed\n"); 3656 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3657 } 3658 } 3659 3660 static void 3661 nvme_ctrlr_process_init_wait_for_ready_1(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3662 { 3663 struct spdk_nvme_ctrlr *ctrlr = ctx; 3664 union spdk_nvme_csts_register csts; 3665 3666 if (spdk_nvme_cpl_is_error(cpl)) { 3667 /* While a device is resetting, it may be unable to service MMIO reads 3668 * temporarily. Allow for this case. 3669 */ 3670 if (!ctrlr->is_failed && ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE) { 3671 NVME_CTRLR_DEBUGLOG(ctrlr, "Failed to read the CSTS register\n"); 3672 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1, 3673 NVME_TIMEOUT_KEEP_EXISTING); 3674 } else { 3675 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CSTS register\n"); 3676 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3677 } 3678 3679 return; 3680 } 3681 3682 assert(value <= UINT32_MAX); 3683 csts.raw = (uint32_t)value; 3684 if (csts.bits.rdy == 1) { 3685 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_EN_0, 3686 nvme_ctrlr_get_ready_timeout(ctrlr)); 3687 } else { 3688 NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 1 && CSTS.RDY = 0 - waiting for reset to complete\n"); 3689 nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1, 3690 NVME_TIMEOUT_KEEP_EXISTING); 3691 } 3692 } 3693 3694 static void 3695 nvme_ctrlr_process_init_wait_for_ready_0(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl) 3696 { 3697 struct spdk_nvme_ctrlr *ctrlr = ctx; 3698 union spdk_nvme_csts_register csts; 3699 3700 if (spdk_nvme_cpl_is_error(cpl)) { 3701 /* While a device is resetting, it may be unable to service MMIO reads 3702 * temporarily. Allow for this case. 3703 */ 3704 if (!ctrlr->is_failed && ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE) { 3705 NVME_CTRLR_DEBUGLOG(ctrlr, "Failed to read the CSTS register\n"); 3706 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, 3707 NVME_TIMEOUT_KEEP_EXISTING); 3708 } else { 3709 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CSTS register\n"); 3710 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3711 } 3712 3713 return; 3714 } 3715 3716 assert(value <= UINT32_MAX); 3717 csts.raw = (uint32_t)value; 3718 if (csts.bits.rdy == 0) { 3719 NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 0 && CSTS.RDY = 0\n"); 3720 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE, 3721 nvme_ctrlr_get_ready_timeout(ctrlr)); 3722 /* 3723 * Delay 100us before setting CC.EN = 1. Some NVMe SSDs miss CC.EN getting 3724 * set to 1 if it is too soon after CSTS.RDY is reported as 0. 3725 */ 3726 spdk_delay_us(100); 3727 } else { 3728 nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, 3729 NVME_TIMEOUT_KEEP_EXISTING); 3730 } 3731 } 3732 3733 static void 3734 nvme_ctrlr_process_init_enable_wait_for_ready_1(void *ctx, uint64_t value, 3735 const struct spdk_nvme_cpl *cpl) 3736 { 3737 struct spdk_nvme_ctrlr *ctrlr = ctx; 3738 union spdk_nvme_csts_register csts; 3739 3740 if (spdk_nvme_cpl_is_error(cpl)) { 3741 /* While a device is resetting, it may be unable to service MMIO reads 3742 * temporarily. Allow for this case. 3743 */ 3744 if (!ctrlr->is_failed && ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE) { 3745 NVME_CTRLR_DEBUGLOG(ctrlr, "Failed to read the CSTS register\n"); 3746 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, 3747 NVME_TIMEOUT_KEEP_EXISTING); 3748 } else { 3749 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CSTS register\n"); 3750 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3751 } 3752 3753 return; 3754 } 3755 3756 assert(value <= UINT32_MAX); 3757 csts.raw = value; 3758 if (csts.bits.rdy == 1) { 3759 NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 1 && CSTS.RDY = 1 - controller is ready\n"); 3760 /* 3761 * The controller has been enabled. 3762 * Perform the rest of initialization serially. 3763 */ 3764 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_RESET_ADMIN_QUEUE, 3765 ctrlr->opts.admin_timeout_ms); 3766 } else { 3767 nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, 3768 NVME_TIMEOUT_KEEP_EXISTING); 3769 } 3770 } 3771 3772 /** 3773 * This function will be called repeatedly during initialization until the controller is ready. 3774 */ 3775 int 3776 nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr) 3777 { 3778 uint32_t ready_timeout_in_ms; 3779 uint64_t ticks; 3780 int rc = 0; 3781 3782 ticks = spdk_get_ticks(); 3783 3784 /* 3785 * May need to avoid accessing any register on the target controller 3786 * for a while. Return early without touching the FSM. 3787 * Check sleep_timeout_tsc > 0 for unit test. 3788 */ 3789 if ((ctrlr->sleep_timeout_tsc > 0) && 3790 (ticks <= ctrlr->sleep_timeout_tsc)) { 3791 return 0; 3792 } 3793 ctrlr->sleep_timeout_tsc = 0; 3794 3795 ready_timeout_in_ms = nvme_ctrlr_get_ready_timeout(ctrlr); 3796 3797 /* 3798 * Check if the current initialization step is done or has timed out. 3799 */ 3800 switch (ctrlr->state) { 3801 case NVME_CTRLR_STATE_INIT_DELAY: 3802 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, ready_timeout_in_ms); 3803 if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_INIT) { 3804 /* 3805 * Controller may need some delay before it's enabled. 3806 * 3807 * This is a workaround for an issue where the PCIe-attached NVMe controller 3808 * is not ready after VFIO reset. We delay the initialization rather than the 3809 * enabling itself, because this is required only for the very first enabling 3810 * - directly after a VFIO reset. 3811 */ 3812 NVME_CTRLR_DEBUGLOG(ctrlr, "Adding 2 second delay before initializing the controller\n"); 3813 ctrlr->sleep_timeout_tsc = ticks + (2000 * spdk_get_ticks_hz() / 1000); 3814 } 3815 break; 3816 3817 case NVME_CTRLR_STATE_CONNECT_ADMINQ: /* synonymous with NVME_CTRLR_STATE_INIT */ 3818 rc = nvme_transport_ctrlr_connect_qpair(ctrlr, ctrlr->adminq); 3819 if (rc == 0) { 3820 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_CONNECT_ADMINQ, 3821 NVME_TIMEOUT_INFINITE); 3822 } else { 3823 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3824 } 3825 break; 3826 3827 case NVME_CTRLR_STATE_WAIT_FOR_CONNECT_ADMINQ: 3828 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 3829 3830 switch (nvme_qpair_get_state(ctrlr->adminq)) { 3831 case NVME_QPAIR_CONNECTING: 3832 break; 3833 case NVME_QPAIR_CONNECTED: 3834 nvme_qpair_set_state(ctrlr->adminq, NVME_QPAIR_ENABLED); 3835 /* Fall through */ 3836 case NVME_QPAIR_ENABLED: 3837 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READ_VS, 3838 NVME_TIMEOUT_INFINITE); 3839 /* Abort any queued requests that were sent while the adminq was connecting 3840 * to avoid stalling the init process during a reset, as requests don't get 3841 * resubmitted while the controller is resetting and subsequent commands 3842 * would get queued too. 3843 */ 3844 nvme_qpair_abort_queued_reqs(ctrlr->adminq, 0); 3845 break; 3846 default: 3847 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3848 break; 3849 } 3850 3851 break; 3852 3853 case NVME_CTRLR_STATE_READ_VS: 3854 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READ_VS_WAIT_FOR_VS, NVME_TIMEOUT_INFINITE); 3855 rc = nvme_ctrlr_get_vs_async(ctrlr, nvme_ctrlr_process_init_vs_done, ctrlr); 3856 break; 3857 3858 case NVME_CTRLR_STATE_READ_CAP: 3859 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READ_CAP_WAIT_FOR_CAP, NVME_TIMEOUT_INFINITE); 3860 rc = nvme_ctrlr_get_cap_async(ctrlr, nvme_ctrlr_process_init_cap_done, ctrlr); 3861 break; 3862 3863 case NVME_CTRLR_STATE_CHECK_EN: 3864 /* Begin the hardware initialization by making sure the controller is disabled. */ 3865 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CHECK_EN_WAIT_FOR_CC, ready_timeout_in_ms); 3866 rc = nvme_ctrlr_get_cc_async(ctrlr, nvme_ctrlr_process_init_check_en, ctrlr); 3867 break; 3868 3869 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1: 3870 /* 3871 * Controller is currently enabled. We need to disable it to cause a reset. 3872 * 3873 * If CC.EN = 1 && CSTS.RDY = 0, the controller is in the process of becoming ready. 3874 * Wait for the ready bit to be 1 before disabling the controller. 3875 */ 3876 nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS, 3877 NVME_TIMEOUT_KEEP_EXISTING); 3878 rc = nvme_ctrlr_get_csts_async(ctrlr, nvme_ctrlr_process_init_wait_for_ready_1, ctrlr); 3879 break; 3880 3881 case NVME_CTRLR_STATE_SET_EN_0: 3882 NVME_CTRLR_DEBUGLOG(ctrlr, "Setting CC.EN = 0\n"); 3883 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_EN_0_WAIT_FOR_CC, ready_timeout_in_ms); 3884 rc = nvme_ctrlr_get_cc_async(ctrlr, nvme_ctrlr_process_init_set_en_0_read_cc, ctrlr); 3885 break; 3886 3887 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0: 3888 nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0_WAIT_FOR_CSTS, 3889 NVME_TIMEOUT_KEEP_EXISTING); 3890 rc = nvme_ctrlr_get_csts_async(ctrlr, nvme_ctrlr_process_init_wait_for_ready_0, ctrlr); 3891 break; 3892 3893 case NVME_CTRLR_STATE_ENABLE: 3894 NVME_CTRLR_DEBUGLOG(ctrlr, "Setting CC.EN = 1\n"); 3895 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_CC, ready_timeout_in_ms); 3896 rc = nvme_ctrlr_enable(ctrlr); 3897 return rc; 3898 3899 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1: 3900 nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS, 3901 NVME_TIMEOUT_KEEP_EXISTING); 3902 rc = nvme_ctrlr_get_csts_async(ctrlr, nvme_ctrlr_process_init_enable_wait_for_ready_1, 3903 ctrlr); 3904 break; 3905 3906 case NVME_CTRLR_STATE_RESET_ADMIN_QUEUE: 3907 nvme_transport_qpair_reset(ctrlr->adminq); 3908 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY, NVME_TIMEOUT_INFINITE); 3909 break; 3910 3911 case NVME_CTRLR_STATE_IDENTIFY: 3912 rc = nvme_ctrlr_identify(ctrlr); 3913 break; 3914 3915 case NVME_CTRLR_STATE_CONFIGURE_AER: 3916 rc = nvme_ctrlr_configure_aer(ctrlr); 3917 break; 3918 3919 case NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT: 3920 rc = nvme_ctrlr_set_keep_alive_timeout(ctrlr); 3921 break; 3922 3923 case NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC: 3924 rc = nvme_ctrlr_identify_iocs_specific(ctrlr); 3925 break; 3926 3927 case NVME_CTRLR_STATE_GET_ZNS_CMD_EFFECTS_LOG: 3928 rc = nvme_ctrlr_get_zns_cmd_and_effects_log(ctrlr); 3929 break; 3930 3931 case NVME_CTRLR_STATE_SET_NUM_QUEUES: 3932 nvme_ctrlr_update_nvmf_ioccsz(ctrlr); 3933 rc = nvme_ctrlr_set_num_queues(ctrlr); 3934 break; 3935 3936 case NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS: 3937 _nvme_ctrlr_identify_active_ns(ctrlr); 3938 break; 3939 3940 case NVME_CTRLR_STATE_IDENTIFY_NS: 3941 rc = nvme_ctrlr_identify_namespaces(ctrlr); 3942 break; 3943 3944 case NVME_CTRLR_STATE_IDENTIFY_ID_DESCS: 3945 rc = nvme_ctrlr_identify_id_desc_namespaces(ctrlr); 3946 break; 3947 3948 case NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC: 3949 rc = nvme_ctrlr_identify_namespaces_iocs_specific(ctrlr); 3950 break; 3951 3952 case NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES: 3953 rc = nvme_ctrlr_set_supported_log_pages(ctrlr); 3954 break; 3955 3956 case NVME_CTRLR_STATE_SET_SUPPORTED_INTEL_LOG_PAGES: 3957 rc = nvme_ctrlr_set_intel_support_log_pages(ctrlr); 3958 break; 3959 3960 case NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES: 3961 nvme_ctrlr_set_supported_features(ctrlr); 3962 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_DB_BUF_CFG, 3963 ctrlr->opts.admin_timeout_ms); 3964 break; 3965 3966 case NVME_CTRLR_STATE_SET_DB_BUF_CFG: 3967 rc = nvme_ctrlr_set_doorbell_buffer_config(ctrlr); 3968 break; 3969 3970 case NVME_CTRLR_STATE_SET_HOST_ID: 3971 rc = nvme_ctrlr_set_host_id(ctrlr); 3972 break; 3973 3974 case NVME_CTRLR_STATE_READY: 3975 NVME_CTRLR_DEBUGLOG(ctrlr, "Ctrlr already in ready state\n"); 3976 return 0; 3977 3978 case NVME_CTRLR_STATE_ERROR: 3979 NVME_CTRLR_ERRLOG(ctrlr, "Ctrlr is in error state\n"); 3980 return -1; 3981 3982 case NVME_CTRLR_STATE_READ_VS_WAIT_FOR_VS: 3983 case NVME_CTRLR_STATE_READ_CAP_WAIT_FOR_CAP: 3984 case NVME_CTRLR_STATE_CHECK_EN_WAIT_FOR_CC: 3985 case NVME_CTRLR_STATE_SET_EN_0_WAIT_FOR_CC: 3986 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS: 3987 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0_WAIT_FOR_CSTS: 3988 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_CC: 3989 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS: 3990 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY: 3991 case NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER: 3992 case NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT: 3993 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_IOCS_SPECIFIC: 3994 case NVME_CTRLR_STATE_WAIT_FOR_GET_ZNS_CMD_EFFECTS_LOG: 3995 case NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES: 3996 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ACTIVE_NS: 3997 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS: 3998 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS: 3999 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS_IOCS_SPECIFIC: 4000 case NVME_CTRLR_STATE_WAIT_FOR_SUPPORTED_INTEL_LOG_PAGES: 4001 case NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG: 4002 case NVME_CTRLR_STATE_WAIT_FOR_HOST_ID: 4003 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 4004 break; 4005 4006 default: 4007 assert(0); 4008 return -1; 4009 } 4010 4011 /* Note: we use the ticks captured when we entered this function. 4012 * This covers environments where the SPDK process gets swapped out after 4013 * we tried to advance the state but before we check the timeout here. 4014 * It is not normal for this to happen, but harmless to handle it in this 4015 * way. 4016 */ 4017 if (ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE && 4018 ticks > ctrlr->state_timeout_tsc) { 4019 NVME_CTRLR_ERRLOG(ctrlr, "Initialization timed out in state %d (%s)\n", 4020 ctrlr->state, nvme_ctrlr_state_string(ctrlr->state)); 4021 return -1; 4022 } 4023 4024 return rc; 4025 } 4026 4027 int 4028 nvme_robust_mutex_init_recursive_shared(pthread_mutex_t *mtx) 4029 { 4030 pthread_mutexattr_t attr; 4031 int rc = 0; 4032 4033 if (pthread_mutexattr_init(&attr)) { 4034 return -1; 4035 } 4036 if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE) || 4037 #ifndef __FreeBSD__ 4038 pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST) || 4039 pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED) || 4040 #endif 4041 pthread_mutex_init(mtx, &attr)) { 4042 rc = -1; 4043 } 4044 pthread_mutexattr_destroy(&attr); 4045 return rc; 4046 } 4047 4048 int 4049 nvme_ctrlr_construct(struct spdk_nvme_ctrlr *ctrlr) 4050 { 4051 int rc; 4052 4053 if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { 4054 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT_DELAY, NVME_TIMEOUT_INFINITE); 4055 } else { 4056 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE); 4057 } 4058 4059 if (ctrlr->opts.admin_queue_size > SPDK_NVME_ADMIN_QUEUE_MAX_ENTRIES) { 4060 NVME_CTRLR_ERRLOG(ctrlr, "admin_queue_size %u exceeds max defined by NVMe spec, use max value\n", 4061 ctrlr->opts.admin_queue_size); 4062 ctrlr->opts.admin_queue_size = SPDK_NVME_ADMIN_QUEUE_MAX_ENTRIES; 4063 } 4064 4065 if (ctrlr->opts.admin_queue_size < SPDK_NVME_ADMIN_QUEUE_MIN_ENTRIES) { 4066 NVME_CTRLR_ERRLOG(ctrlr, 4067 "admin_queue_size %u is less than minimum defined by NVMe spec, use min value\n", 4068 ctrlr->opts.admin_queue_size); 4069 ctrlr->opts.admin_queue_size = SPDK_NVME_ADMIN_QUEUE_MIN_ENTRIES; 4070 } 4071 4072 ctrlr->flags = 0; 4073 ctrlr->free_io_qids = NULL; 4074 ctrlr->is_resetting = false; 4075 ctrlr->is_failed = false; 4076 ctrlr->is_destructed = false; 4077 4078 TAILQ_INIT(&ctrlr->active_io_qpairs); 4079 STAILQ_INIT(&ctrlr->queued_aborts); 4080 ctrlr->outstanding_aborts = 0; 4081 4082 ctrlr->ana_log_page = NULL; 4083 ctrlr->ana_log_page_size = 0; 4084 4085 rc = nvme_robust_mutex_init_recursive_shared(&ctrlr->ctrlr_lock); 4086 if (rc != 0) { 4087 return rc; 4088 } 4089 4090 TAILQ_INIT(&ctrlr->active_procs); 4091 STAILQ_INIT(&ctrlr->register_operations); 4092 4093 RB_INIT(&ctrlr->ns); 4094 4095 return rc; 4096 } 4097 4098 static void 4099 nvme_ctrlr_init_cap(struct spdk_nvme_ctrlr *ctrlr) 4100 { 4101 if (ctrlr->cap.bits.ams & SPDK_NVME_CAP_AMS_WRR) { 4102 ctrlr->flags |= SPDK_NVME_CTRLR_WRR_SUPPORTED; 4103 } 4104 4105 ctrlr->min_page_size = 1u << (12 + ctrlr->cap.bits.mpsmin); 4106 4107 /* For now, always select page_size == min_page_size. */ 4108 ctrlr->page_size = ctrlr->min_page_size; 4109 4110 ctrlr->opts.io_queue_size = spdk_max(ctrlr->opts.io_queue_size, SPDK_NVME_IO_QUEUE_MIN_ENTRIES); 4111 ctrlr->opts.io_queue_size = spdk_min(ctrlr->opts.io_queue_size, MAX_IO_QUEUE_ENTRIES); 4112 if (ctrlr->quirks & NVME_QUIRK_MINIMUM_IO_QUEUE_SIZE && 4113 ctrlr->opts.io_queue_size == DEFAULT_IO_QUEUE_SIZE) { 4114 /* If the user specifically set an IO queue size different than the 4115 * default, use that value. Otherwise overwrite with the quirked value. 4116 * This allows this quirk to be overridden when necessary. 4117 * However, cap.mqes still needs to be respected. 4118 */ 4119 ctrlr->opts.io_queue_size = DEFAULT_IO_QUEUE_SIZE_FOR_QUIRK; 4120 } 4121 ctrlr->opts.io_queue_size = spdk_min(ctrlr->opts.io_queue_size, ctrlr->cap.bits.mqes + 1u); 4122 4123 ctrlr->opts.io_queue_requests = spdk_max(ctrlr->opts.io_queue_requests, ctrlr->opts.io_queue_size); 4124 } 4125 4126 void 4127 nvme_ctrlr_destruct_finish(struct spdk_nvme_ctrlr *ctrlr) 4128 { 4129 pthread_mutex_destroy(&ctrlr->ctrlr_lock); 4130 } 4131 4132 void 4133 nvme_ctrlr_destruct_async(struct spdk_nvme_ctrlr *ctrlr, 4134 struct nvme_ctrlr_detach_ctx *ctx) 4135 { 4136 struct spdk_nvme_qpair *qpair, *tmp; 4137 4138 NVME_CTRLR_DEBUGLOG(ctrlr, "Prepare to destruct SSD\n"); 4139 4140 ctrlr->is_destructed = true; 4141 4142 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 4143 4144 nvme_ctrlr_abort_queued_aborts(ctrlr); 4145 nvme_transport_admin_qpair_abort_aers(ctrlr->adminq); 4146 4147 TAILQ_FOREACH_SAFE(qpair, &ctrlr->active_io_qpairs, tailq, tmp) { 4148 spdk_nvme_ctrlr_free_io_qpair(qpair); 4149 } 4150 4151 nvme_ctrlr_free_doorbell_buffer(ctrlr); 4152 nvme_ctrlr_free_iocs_specific_data(ctrlr); 4153 4154 nvme_ctrlr_shutdown_async(ctrlr, ctx); 4155 } 4156 4157 int 4158 nvme_ctrlr_destruct_poll_async(struct spdk_nvme_ctrlr *ctrlr, 4159 struct nvme_ctrlr_detach_ctx *ctx) 4160 { 4161 struct spdk_nvme_ns *ns, *tmp_ns; 4162 int rc = 0; 4163 4164 if (!ctx->shutdown_complete) { 4165 rc = nvme_ctrlr_shutdown_poll_async(ctrlr, ctx); 4166 if (rc == -EAGAIN) { 4167 return -EAGAIN; 4168 } 4169 /* Destruct ctrlr forcefully for any other error. */ 4170 } 4171 4172 if (ctx->cb_fn) { 4173 ctx->cb_fn(ctrlr); 4174 } 4175 4176 nvme_transport_ctrlr_disconnect_qpair(ctrlr, ctrlr->adminq); 4177 4178 RB_FOREACH_SAFE(ns, nvme_ns_tree, &ctrlr->ns, tmp_ns) { 4179 nvme_ctrlr_destruct_namespace(ctrlr, ns->id); 4180 RB_REMOVE(nvme_ns_tree, &ctrlr->ns, ns); 4181 spdk_free(ns); 4182 } 4183 4184 ctrlr->active_ns_count = 0; 4185 4186 spdk_bit_array_free(&ctrlr->free_io_qids); 4187 4188 free(ctrlr->ana_log_page); 4189 free(ctrlr->copied_ana_desc); 4190 ctrlr->ana_log_page = NULL; 4191 ctrlr->copied_ana_desc = NULL; 4192 ctrlr->ana_log_page_size = 0; 4193 4194 nvme_transport_ctrlr_destruct(ctrlr); 4195 4196 return rc; 4197 } 4198 4199 void 4200 nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) 4201 { 4202 struct nvme_ctrlr_detach_ctx ctx = { .ctrlr = ctrlr }; 4203 int rc; 4204 4205 nvme_ctrlr_destruct_async(ctrlr, &ctx); 4206 4207 while (1) { 4208 rc = nvme_ctrlr_destruct_poll_async(ctrlr, &ctx); 4209 if (rc != -EAGAIN) { 4210 break; 4211 } 4212 nvme_delay(1000); 4213 } 4214 } 4215 4216 int 4217 nvme_ctrlr_submit_admin_request(struct spdk_nvme_ctrlr *ctrlr, 4218 struct nvme_request *req) 4219 { 4220 return nvme_qpair_submit_request(ctrlr->adminq, req); 4221 } 4222 4223 static void 4224 nvme_keep_alive_completion(void *cb_ctx, const struct spdk_nvme_cpl *cpl) 4225 { 4226 /* Do nothing */ 4227 } 4228 4229 /* 4230 * Check if we need to send a Keep Alive command. 4231 * Caller must hold ctrlr->ctrlr_lock. 4232 */ 4233 static int 4234 nvme_ctrlr_keep_alive(struct spdk_nvme_ctrlr *ctrlr) 4235 { 4236 uint64_t now; 4237 struct nvme_request *req; 4238 struct spdk_nvme_cmd *cmd; 4239 int rc = 0; 4240 4241 now = spdk_get_ticks(); 4242 if (now < ctrlr->next_keep_alive_tick) { 4243 return rc; 4244 } 4245 4246 req = nvme_allocate_request_null(ctrlr->adminq, nvme_keep_alive_completion, NULL); 4247 if (req == NULL) { 4248 return rc; 4249 } 4250 4251 cmd = &req->cmd; 4252 cmd->opc = SPDK_NVME_OPC_KEEP_ALIVE; 4253 4254 rc = nvme_ctrlr_submit_admin_request(ctrlr, req); 4255 if (rc != 0) { 4256 NVME_CTRLR_ERRLOG(ctrlr, "Submitting Keep Alive failed\n"); 4257 rc = -ENXIO; 4258 } 4259 4260 ctrlr->next_keep_alive_tick = now + ctrlr->keep_alive_interval_ticks; 4261 return rc; 4262 } 4263 4264 int32_t 4265 spdk_nvme_ctrlr_process_admin_completions(struct spdk_nvme_ctrlr *ctrlr) 4266 { 4267 int32_t num_completions; 4268 int32_t rc; 4269 struct spdk_nvme_ctrlr_process *active_proc; 4270 4271 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4272 4273 if (ctrlr->keep_alive_interval_ticks) { 4274 rc = nvme_ctrlr_keep_alive(ctrlr); 4275 if (rc) { 4276 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4277 return rc; 4278 } 4279 } 4280 4281 rc = nvme_io_msg_process(ctrlr); 4282 if (rc < 0) { 4283 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4284 return rc; 4285 } 4286 num_completions = rc; 4287 4288 rc = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 4289 4290 /* Each process has an async list, complete the ones for this process object */ 4291 active_proc = nvme_ctrlr_get_current_process(ctrlr); 4292 if (active_proc) { 4293 nvme_ctrlr_complete_queued_async_events(ctrlr); 4294 } 4295 4296 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4297 4298 if (rc < 0) { 4299 num_completions = rc; 4300 } else { 4301 num_completions += rc; 4302 } 4303 4304 return num_completions; 4305 } 4306 4307 const struct spdk_nvme_ctrlr_data * 4308 spdk_nvme_ctrlr_get_data(struct spdk_nvme_ctrlr *ctrlr) 4309 { 4310 return &ctrlr->cdata; 4311 } 4312 4313 union spdk_nvme_csts_register spdk_nvme_ctrlr_get_regs_csts(struct spdk_nvme_ctrlr *ctrlr) 4314 { 4315 union spdk_nvme_csts_register csts; 4316 4317 if (nvme_ctrlr_get_csts(ctrlr, &csts)) { 4318 csts.raw = SPDK_NVME_INVALID_REGISTER_VALUE; 4319 } 4320 return csts; 4321 } 4322 4323 union spdk_nvme_cc_register spdk_nvme_ctrlr_get_regs_cc(struct spdk_nvme_ctrlr *ctrlr) 4324 { 4325 union spdk_nvme_cc_register cc; 4326 4327 if (nvme_ctrlr_get_cc(ctrlr, &cc)) { 4328 cc.raw = SPDK_NVME_INVALID_REGISTER_VALUE; 4329 } 4330 return cc; 4331 } 4332 4333 union spdk_nvme_cap_register spdk_nvme_ctrlr_get_regs_cap(struct spdk_nvme_ctrlr *ctrlr) 4334 { 4335 return ctrlr->cap; 4336 } 4337 4338 union spdk_nvme_vs_register spdk_nvme_ctrlr_get_regs_vs(struct spdk_nvme_ctrlr *ctrlr) 4339 { 4340 return ctrlr->vs; 4341 } 4342 4343 union spdk_nvme_cmbsz_register spdk_nvme_ctrlr_get_regs_cmbsz(struct spdk_nvme_ctrlr *ctrlr) 4344 { 4345 union spdk_nvme_cmbsz_register cmbsz; 4346 4347 if (nvme_ctrlr_get_cmbsz(ctrlr, &cmbsz)) { 4348 cmbsz.raw = 0; 4349 } 4350 4351 return cmbsz; 4352 } 4353 4354 union spdk_nvme_pmrcap_register spdk_nvme_ctrlr_get_regs_pmrcap(struct spdk_nvme_ctrlr *ctrlr) 4355 { 4356 union spdk_nvme_pmrcap_register pmrcap; 4357 4358 if (nvme_ctrlr_get_pmrcap(ctrlr, &pmrcap)) { 4359 pmrcap.raw = 0; 4360 } 4361 4362 return pmrcap; 4363 } 4364 4365 union spdk_nvme_bpinfo_register spdk_nvme_ctrlr_get_regs_bpinfo(struct spdk_nvme_ctrlr *ctrlr) 4366 { 4367 union spdk_nvme_bpinfo_register bpinfo; 4368 4369 if (nvme_ctrlr_get_bpinfo(ctrlr, &bpinfo)) { 4370 bpinfo.raw = 0; 4371 } 4372 4373 return bpinfo; 4374 } 4375 4376 uint64_t 4377 spdk_nvme_ctrlr_get_pmrsz(struct spdk_nvme_ctrlr *ctrlr) 4378 { 4379 return ctrlr->pmr_size; 4380 } 4381 4382 uint32_t 4383 spdk_nvme_ctrlr_get_num_ns(struct spdk_nvme_ctrlr *ctrlr) 4384 { 4385 return ctrlr->cdata.nn; 4386 } 4387 4388 bool 4389 spdk_nvme_ctrlr_is_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 4390 { 4391 struct spdk_nvme_ns tmp, *ns; 4392 4393 tmp.id = nsid; 4394 ns = RB_FIND(nvme_ns_tree, &ctrlr->ns, &tmp); 4395 4396 if (ns != NULL) { 4397 return ns->active; 4398 } 4399 4400 return false; 4401 } 4402 4403 uint32_t 4404 spdk_nvme_ctrlr_get_first_active_ns(struct spdk_nvme_ctrlr *ctrlr) 4405 { 4406 struct spdk_nvme_ns *ns; 4407 4408 ns = RB_MIN(nvme_ns_tree, &ctrlr->ns); 4409 if (ns == NULL) { 4410 return 0; 4411 } 4412 4413 while (ns != NULL) { 4414 if (ns->active) { 4415 return ns->id; 4416 } 4417 4418 ns = RB_NEXT(nvme_ns_tree, &ctrlr->ns, ns); 4419 } 4420 4421 return 0; 4422 } 4423 4424 uint32_t 4425 spdk_nvme_ctrlr_get_next_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t prev_nsid) 4426 { 4427 struct spdk_nvme_ns tmp, *ns; 4428 4429 tmp.id = prev_nsid; 4430 ns = RB_FIND(nvme_ns_tree, &ctrlr->ns, &tmp); 4431 if (ns == NULL) { 4432 return 0; 4433 } 4434 4435 ns = RB_NEXT(nvme_ns_tree, &ctrlr->ns, ns); 4436 while (ns != NULL) { 4437 if (ns->active) { 4438 return ns->id; 4439 } 4440 4441 ns = RB_NEXT(nvme_ns_tree, &ctrlr->ns, ns); 4442 } 4443 4444 return 0; 4445 } 4446 4447 struct spdk_nvme_ns * 4448 spdk_nvme_ctrlr_get_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 4449 { 4450 struct spdk_nvme_ns tmp; 4451 struct spdk_nvme_ns *ns; 4452 4453 if (nsid < 1 || nsid > ctrlr->cdata.nn) { 4454 return NULL; 4455 } 4456 4457 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4458 4459 tmp.id = nsid; 4460 ns = RB_FIND(nvme_ns_tree, &ctrlr->ns, &tmp); 4461 4462 if (ns == NULL) { 4463 ns = spdk_zmalloc(sizeof(struct spdk_nvme_ns), 64, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE); 4464 if (ns == NULL) { 4465 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4466 return NULL; 4467 } 4468 4469 NVME_CTRLR_DEBUGLOG(ctrlr, "Namespace %u was added\n", nsid); 4470 ns->id = nsid; 4471 RB_INSERT(nvme_ns_tree, &ctrlr->ns, ns); 4472 } 4473 4474 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4475 4476 return ns; 4477 } 4478 4479 struct spdk_pci_device * 4480 spdk_nvme_ctrlr_get_pci_device(struct spdk_nvme_ctrlr *ctrlr) 4481 { 4482 if (ctrlr == NULL) { 4483 return NULL; 4484 } 4485 4486 if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) { 4487 return NULL; 4488 } 4489 4490 return nvme_ctrlr_proc_get_devhandle(ctrlr); 4491 } 4492 4493 uint32_t 4494 spdk_nvme_ctrlr_get_max_xfer_size(const struct spdk_nvme_ctrlr *ctrlr) 4495 { 4496 return ctrlr->max_xfer_size; 4497 } 4498 4499 void 4500 spdk_nvme_ctrlr_register_aer_callback(struct spdk_nvme_ctrlr *ctrlr, 4501 spdk_nvme_aer_cb aer_cb_fn, 4502 void *aer_cb_arg) 4503 { 4504 struct spdk_nvme_ctrlr_process *active_proc; 4505 4506 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4507 4508 active_proc = nvme_ctrlr_get_current_process(ctrlr); 4509 if (active_proc) { 4510 active_proc->aer_cb_fn = aer_cb_fn; 4511 active_proc->aer_cb_arg = aer_cb_arg; 4512 } 4513 4514 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4515 } 4516 4517 void 4518 spdk_nvme_ctrlr_register_timeout_callback(struct spdk_nvme_ctrlr *ctrlr, 4519 uint64_t timeout_io_us, uint64_t timeout_admin_us, 4520 spdk_nvme_timeout_cb cb_fn, void *cb_arg) 4521 { 4522 struct spdk_nvme_ctrlr_process *active_proc; 4523 4524 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4525 4526 active_proc = nvme_ctrlr_get_current_process(ctrlr); 4527 if (active_proc) { 4528 active_proc->timeout_io_ticks = timeout_io_us * spdk_get_ticks_hz() / 1000000ULL; 4529 active_proc->timeout_admin_ticks = timeout_admin_us * spdk_get_ticks_hz() / 1000000ULL; 4530 active_proc->timeout_cb_fn = cb_fn; 4531 active_proc->timeout_cb_arg = cb_arg; 4532 } 4533 4534 ctrlr->timeout_enabled = true; 4535 4536 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4537 } 4538 4539 bool 4540 spdk_nvme_ctrlr_is_log_page_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t log_page) 4541 { 4542 /* No bounds check necessary, since log_page is uint8_t and log_page_supported has 256 entries */ 4543 SPDK_STATIC_ASSERT(sizeof(ctrlr->log_page_supported) == 256, "log_page_supported size mismatch"); 4544 return ctrlr->log_page_supported[log_page]; 4545 } 4546 4547 bool 4548 spdk_nvme_ctrlr_is_feature_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t feature_code) 4549 { 4550 /* No bounds check necessary, since feature_code is uint8_t and feature_supported has 256 entries */ 4551 SPDK_STATIC_ASSERT(sizeof(ctrlr->feature_supported) == 256, "feature_supported size mismatch"); 4552 return ctrlr->feature_supported[feature_code]; 4553 } 4554 4555 int 4556 spdk_nvme_ctrlr_attach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, 4557 struct spdk_nvme_ctrlr_list *payload) 4558 { 4559 struct nvme_completion_poll_status *status; 4560 struct spdk_nvme_ns *ns; 4561 int res; 4562 4563 if (nsid == 0) { 4564 return -EINVAL; 4565 } 4566 4567 status = calloc(1, sizeof(*status)); 4568 if (!status) { 4569 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4570 return -ENOMEM; 4571 } 4572 4573 res = nvme_ctrlr_cmd_attach_ns(ctrlr, nsid, payload, 4574 nvme_completion_poll_cb, status); 4575 if (res) { 4576 free(status); 4577 return res; 4578 } 4579 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4580 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_attach_ns failed!\n"); 4581 if (!status->timed_out) { 4582 free(status); 4583 } 4584 return -ENXIO; 4585 } 4586 free(status); 4587 4588 res = nvme_ctrlr_identify_active_ns(ctrlr); 4589 if (res) { 4590 return res; 4591 } 4592 4593 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 4594 return nvme_ns_construct(ns, nsid, ctrlr); 4595 } 4596 4597 int 4598 spdk_nvme_ctrlr_detach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, 4599 struct spdk_nvme_ctrlr_list *payload) 4600 { 4601 struct nvme_completion_poll_status *status; 4602 int res; 4603 4604 if (nsid == 0) { 4605 return -EINVAL; 4606 } 4607 4608 status = calloc(1, sizeof(*status)); 4609 if (!status) { 4610 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4611 return -ENOMEM; 4612 } 4613 4614 res = nvme_ctrlr_cmd_detach_ns(ctrlr, nsid, payload, 4615 nvme_completion_poll_cb, status); 4616 if (res) { 4617 free(status); 4618 return res; 4619 } 4620 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4621 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_detach_ns failed!\n"); 4622 if (!status->timed_out) { 4623 free(status); 4624 } 4625 return -ENXIO; 4626 } 4627 free(status); 4628 4629 return nvme_ctrlr_identify_active_ns(ctrlr); 4630 } 4631 4632 uint32_t 4633 spdk_nvme_ctrlr_create_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns_data *payload) 4634 { 4635 struct nvme_completion_poll_status *status; 4636 int res; 4637 uint32_t nsid; 4638 4639 status = calloc(1, sizeof(*status)); 4640 if (!status) { 4641 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4642 return 0; 4643 } 4644 4645 res = nvme_ctrlr_cmd_create_ns(ctrlr, payload, nvme_completion_poll_cb, status); 4646 if (res) { 4647 free(status); 4648 return 0; 4649 } 4650 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4651 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_create_ns failed!\n"); 4652 if (!status->timed_out) { 4653 free(status); 4654 } 4655 return 0; 4656 } 4657 4658 nsid = status->cpl.cdw0; 4659 free(status); 4660 4661 assert(nsid > 0); 4662 4663 /* Return the namespace ID that was created */ 4664 return nsid; 4665 } 4666 4667 int 4668 spdk_nvme_ctrlr_delete_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 4669 { 4670 struct nvme_completion_poll_status *status; 4671 int res; 4672 4673 if (nsid == 0) { 4674 return -EINVAL; 4675 } 4676 4677 status = calloc(1, sizeof(*status)); 4678 if (!status) { 4679 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4680 return -ENOMEM; 4681 } 4682 4683 res = nvme_ctrlr_cmd_delete_ns(ctrlr, nsid, nvme_completion_poll_cb, status); 4684 if (res) { 4685 free(status); 4686 return res; 4687 } 4688 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4689 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_delete_ns failed!\n"); 4690 if (!status->timed_out) { 4691 free(status); 4692 } 4693 return -ENXIO; 4694 } 4695 free(status); 4696 4697 return nvme_ctrlr_identify_active_ns(ctrlr); 4698 } 4699 4700 int 4701 spdk_nvme_ctrlr_format(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, 4702 struct spdk_nvme_format *format) 4703 { 4704 struct nvme_completion_poll_status *status; 4705 int res; 4706 4707 status = calloc(1, sizeof(*status)); 4708 if (!status) { 4709 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4710 return -ENOMEM; 4711 } 4712 4713 res = nvme_ctrlr_cmd_format(ctrlr, nsid, format, nvme_completion_poll_cb, 4714 status); 4715 if (res) { 4716 free(status); 4717 return res; 4718 } 4719 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4720 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_format failed!\n"); 4721 if (!status->timed_out) { 4722 free(status); 4723 } 4724 return -ENXIO; 4725 } 4726 free(status); 4727 4728 return spdk_nvme_ctrlr_reset(ctrlr); 4729 } 4730 4731 int 4732 spdk_nvme_ctrlr_update_firmware(struct spdk_nvme_ctrlr *ctrlr, void *payload, uint32_t size, 4733 int slot, enum spdk_nvme_fw_commit_action commit_action, struct spdk_nvme_status *completion_status) 4734 { 4735 struct spdk_nvme_fw_commit fw_commit; 4736 struct nvme_completion_poll_status *status; 4737 int res; 4738 unsigned int size_remaining; 4739 unsigned int offset; 4740 unsigned int transfer; 4741 void *p; 4742 4743 if (!completion_status) { 4744 return -EINVAL; 4745 } 4746 memset(completion_status, 0, sizeof(struct spdk_nvme_status)); 4747 if (size % 4) { 4748 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_update_firmware invalid size!\n"); 4749 return -1; 4750 } 4751 4752 /* Current support only for SPDK_NVME_FW_COMMIT_REPLACE_IMG 4753 * and SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG 4754 */ 4755 if ((commit_action != SPDK_NVME_FW_COMMIT_REPLACE_IMG) && 4756 (commit_action != SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG)) { 4757 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_update_firmware invalid command!\n"); 4758 return -1; 4759 } 4760 4761 status = calloc(1, sizeof(*status)); 4762 if (!status) { 4763 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4764 return -ENOMEM; 4765 } 4766 4767 /* Firmware download */ 4768 size_remaining = size; 4769 offset = 0; 4770 p = payload; 4771 4772 while (size_remaining > 0) { 4773 transfer = spdk_min(size_remaining, ctrlr->min_page_size); 4774 4775 memset(status, 0, sizeof(*status)); 4776 res = nvme_ctrlr_cmd_fw_image_download(ctrlr, transfer, offset, p, 4777 nvme_completion_poll_cb, 4778 status); 4779 if (res) { 4780 free(status); 4781 return res; 4782 } 4783 4784 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4785 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_fw_image_download failed!\n"); 4786 if (!status->timed_out) { 4787 free(status); 4788 } 4789 return -ENXIO; 4790 } 4791 p += transfer; 4792 offset += transfer; 4793 size_remaining -= transfer; 4794 } 4795 4796 /* Firmware commit */ 4797 memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit)); 4798 fw_commit.fs = slot; 4799 fw_commit.ca = commit_action; 4800 4801 memset(status, 0, sizeof(*status)); 4802 res = nvme_ctrlr_cmd_fw_commit(ctrlr, &fw_commit, nvme_completion_poll_cb, 4803 status); 4804 if (res) { 4805 free(status); 4806 return res; 4807 } 4808 4809 res = nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock); 4810 4811 memcpy(completion_status, &status->cpl.status, sizeof(struct spdk_nvme_status)); 4812 4813 if (!status->timed_out) { 4814 free(status); 4815 } 4816 4817 if (res) { 4818 if (completion_status->sct != SPDK_NVME_SCT_COMMAND_SPECIFIC || 4819 completion_status->sc != SPDK_NVME_SC_FIRMWARE_REQ_NVM_RESET) { 4820 if (completion_status->sct == SPDK_NVME_SCT_COMMAND_SPECIFIC && 4821 completion_status->sc == SPDK_NVME_SC_FIRMWARE_REQ_CONVENTIONAL_RESET) { 4822 NVME_CTRLR_NOTICELOG(ctrlr, 4823 "firmware activation requires conventional reset to be performed. !\n"); 4824 } else { 4825 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_cmd_fw_commit failed!\n"); 4826 } 4827 return -ENXIO; 4828 } 4829 } 4830 4831 return spdk_nvme_ctrlr_reset(ctrlr); 4832 } 4833 4834 int 4835 spdk_nvme_ctrlr_reserve_cmb(struct spdk_nvme_ctrlr *ctrlr) 4836 { 4837 int rc, size; 4838 union spdk_nvme_cmbsz_register cmbsz; 4839 4840 cmbsz = spdk_nvme_ctrlr_get_regs_cmbsz(ctrlr); 4841 4842 if (cmbsz.bits.rds == 0 || cmbsz.bits.wds == 0) { 4843 return -ENOTSUP; 4844 } 4845 4846 size = cmbsz.bits.sz * (0x1000 << (cmbsz.bits.szu * 4)); 4847 4848 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4849 rc = nvme_transport_ctrlr_reserve_cmb(ctrlr); 4850 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4851 4852 if (rc < 0) { 4853 return rc; 4854 } 4855 4856 return size; 4857 } 4858 4859 void * 4860 spdk_nvme_ctrlr_map_cmb(struct spdk_nvme_ctrlr *ctrlr, size_t *size) 4861 { 4862 void *buf; 4863 4864 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4865 buf = nvme_transport_ctrlr_map_cmb(ctrlr, size); 4866 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4867 4868 return buf; 4869 } 4870 4871 void 4872 spdk_nvme_ctrlr_unmap_cmb(struct spdk_nvme_ctrlr *ctrlr) 4873 { 4874 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4875 nvme_transport_ctrlr_unmap_cmb(ctrlr); 4876 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4877 } 4878 4879 int 4880 spdk_nvme_ctrlr_enable_pmr(struct spdk_nvme_ctrlr *ctrlr) 4881 { 4882 int rc; 4883 4884 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4885 rc = nvme_transport_ctrlr_enable_pmr(ctrlr); 4886 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4887 4888 return rc; 4889 } 4890 4891 int 4892 spdk_nvme_ctrlr_disable_pmr(struct spdk_nvme_ctrlr *ctrlr) 4893 { 4894 int rc; 4895 4896 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4897 rc = nvme_transport_ctrlr_disable_pmr(ctrlr); 4898 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4899 4900 return rc; 4901 } 4902 4903 void * 4904 spdk_nvme_ctrlr_map_pmr(struct spdk_nvme_ctrlr *ctrlr, size_t *size) 4905 { 4906 void *buf; 4907 4908 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4909 buf = nvme_transport_ctrlr_map_pmr(ctrlr, size); 4910 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4911 4912 return buf; 4913 } 4914 4915 int 4916 spdk_nvme_ctrlr_unmap_pmr(struct spdk_nvme_ctrlr *ctrlr) 4917 { 4918 int rc; 4919 4920 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4921 rc = nvme_transport_ctrlr_unmap_pmr(ctrlr); 4922 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4923 4924 return rc; 4925 } 4926 4927 int spdk_nvme_ctrlr_read_boot_partition_start(struct spdk_nvme_ctrlr *ctrlr, void *payload, 4928 uint32_t bprsz, uint32_t bprof, uint32_t bpid) 4929 { 4930 union spdk_nvme_bprsel_register bprsel; 4931 union spdk_nvme_bpinfo_register bpinfo; 4932 uint64_t bpmbl, bpmb_size; 4933 4934 if (ctrlr->cap.bits.bps == 0) { 4935 return -ENOTSUP; 4936 } 4937 4938 if (nvme_ctrlr_get_bpinfo(ctrlr, &bpinfo)) { 4939 NVME_CTRLR_ERRLOG(ctrlr, "get bpinfo failed\n"); 4940 return -EIO; 4941 } 4942 4943 if (bpinfo.bits.brs == SPDK_NVME_BRS_READ_IN_PROGRESS) { 4944 NVME_CTRLR_ERRLOG(ctrlr, "Boot Partition read already initiated\n"); 4945 return -EALREADY; 4946 } 4947 4948 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4949 4950 bpmb_size = bprsz * 4096; 4951 bpmbl = spdk_vtophys(payload, &bpmb_size); 4952 if (bpmbl == SPDK_VTOPHYS_ERROR) { 4953 NVME_CTRLR_ERRLOG(ctrlr, "spdk_vtophys of bpmbl failed\n"); 4954 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4955 return -EFAULT; 4956 } 4957 4958 if (bpmb_size != bprsz * 4096) { 4959 NVME_CTRLR_ERRLOG(ctrlr, "Boot Partition buffer is not physically contiguous\n"); 4960 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4961 return -EFAULT; 4962 } 4963 4964 if (nvme_ctrlr_set_bpmbl(ctrlr, bpmbl)) { 4965 NVME_CTRLR_ERRLOG(ctrlr, "set_bpmbl() failed\n"); 4966 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4967 return -EIO; 4968 } 4969 4970 bprsel.bits.bpid = bpid; 4971 bprsel.bits.bprof = bprof; 4972 bprsel.bits.bprsz = bprsz; 4973 4974 if (nvme_ctrlr_set_bprsel(ctrlr, &bprsel)) { 4975 NVME_CTRLR_ERRLOG(ctrlr, "set_bprsel() failed\n"); 4976 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4977 return -EIO; 4978 } 4979 4980 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4981 return 0; 4982 } 4983 4984 int spdk_nvme_ctrlr_read_boot_partition_poll(struct spdk_nvme_ctrlr *ctrlr) 4985 { 4986 int rc = 0; 4987 union spdk_nvme_bpinfo_register bpinfo; 4988 4989 if (nvme_ctrlr_get_bpinfo(ctrlr, &bpinfo)) { 4990 NVME_CTRLR_ERRLOG(ctrlr, "get bpinfo failed\n"); 4991 return -EIO; 4992 } 4993 4994 switch (bpinfo.bits.brs) { 4995 case SPDK_NVME_BRS_NO_READ: 4996 NVME_CTRLR_ERRLOG(ctrlr, "Boot Partition read not initiated\n"); 4997 rc = -EINVAL; 4998 break; 4999 case SPDK_NVME_BRS_READ_IN_PROGRESS: 5000 NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition read in progress\n"); 5001 rc = -EAGAIN; 5002 break; 5003 case SPDK_NVME_BRS_READ_ERROR: 5004 NVME_CTRLR_ERRLOG(ctrlr, "Error completing Boot Partition read\n"); 5005 rc = -EIO; 5006 break; 5007 case SPDK_NVME_BRS_READ_SUCCESS: 5008 NVME_CTRLR_INFOLOG(ctrlr, "Boot Partition read completed successfully\n"); 5009 break; 5010 default: 5011 NVME_CTRLR_ERRLOG(ctrlr, "Invalid Boot Partition read status\n"); 5012 rc = -EINVAL; 5013 } 5014 5015 return rc; 5016 } 5017 5018 static void 5019 nvme_write_boot_partition_cb(void *arg, const struct spdk_nvme_cpl *cpl) 5020 { 5021 int res; 5022 struct spdk_nvme_ctrlr *ctrlr = arg; 5023 struct spdk_nvme_fw_commit fw_commit; 5024 struct spdk_nvme_cpl err_cpl = 5025 {.status = {.sct = SPDK_NVME_SCT_GENERIC, .sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR }}; 5026 5027 if (spdk_nvme_cpl_is_error(cpl)) { 5028 NVME_CTRLR_ERRLOG(ctrlr, "Write Boot Partition failed\n"); 5029 ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, cpl); 5030 return; 5031 } 5032 5033 if (ctrlr->bp_ws == SPDK_NVME_BP_WS_DOWNLOADING) { 5034 NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition Downloading at Offset %d Success\n", ctrlr->fw_offset); 5035 ctrlr->fw_payload += ctrlr->fw_transfer_size; 5036 ctrlr->fw_offset += ctrlr->fw_transfer_size; 5037 ctrlr->fw_size_remaining -= ctrlr->fw_transfer_size; 5038 ctrlr->fw_transfer_size = spdk_min(ctrlr->fw_size_remaining, ctrlr->min_page_size); 5039 res = nvme_ctrlr_cmd_fw_image_download(ctrlr, ctrlr->fw_transfer_size, ctrlr->fw_offset, 5040 ctrlr->fw_payload, nvme_write_boot_partition_cb, ctrlr); 5041 if (res) { 5042 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_cmd_fw_image_download failed!\n"); 5043 ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, &err_cpl); 5044 return; 5045 } 5046 5047 if (ctrlr->fw_transfer_size < ctrlr->min_page_size) { 5048 ctrlr->bp_ws = SPDK_NVME_BP_WS_DOWNLOADED; 5049 } 5050 } else if (ctrlr->bp_ws == SPDK_NVME_BP_WS_DOWNLOADED) { 5051 NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition Download Success\n"); 5052 memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit)); 5053 fw_commit.bpid = ctrlr->bpid; 5054 fw_commit.ca = SPDK_NVME_FW_COMMIT_REPLACE_BOOT_PARTITION; 5055 res = nvme_ctrlr_cmd_fw_commit(ctrlr, &fw_commit, 5056 nvme_write_boot_partition_cb, ctrlr); 5057 if (res) { 5058 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_cmd_fw_commit failed!\n"); 5059 NVME_CTRLR_ERRLOG(ctrlr, "commit action: %d\n", fw_commit.ca); 5060 ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, &err_cpl); 5061 return; 5062 } 5063 5064 ctrlr->bp_ws = SPDK_NVME_BP_WS_REPLACE; 5065 } else if (ctrlr->bp_ws == SPDK_NVME_BP_WS_REPLACE) { 5066 NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition Replacement Success\n"); 5067 memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit)); 5068 fw_commit.bpid = ctrlr->bpid; 5069 fw_commit.ca = SPDK_NVME_FW_COMMIT_ACTIVATE_BOOT_PARTITION; 5070 res = nvme_ctrlr_cmd_fw_commit(ctrlr, &fw_commit, 5071 nvme_write_boot_partition_cb, ctrlr); 5072 if (res) { 5073 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_cmd_fw_commit failed!\n"); 5074 NVME_CTRLR_ERRLOG(ctrlr, "commit action: %d\n", fw_commit.ca); 5075 ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, &err_cpl); 5076 return; 5077 } 5078 5079 ctrlr->bp_ws = SPDK_NVME_BP_WS_ACTIVATE; 5080 } else if (ctrlr->bp_ws == SPDK_NVME_BP_WS_ACTIVATE) { 5081 NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition Activation Success\n"); 5082 ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, cpl); 5083 } else { 5084 NVME_CTRLR_ERRLOG(ctrlr, "Invalid Boot Partition write state\n"); 5085 ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, &err_cpl); 5086 return; 5087 } 5088 } 5089 5090 int spdk_nvme_ctrlr_write_boot_partition(struct spdk_nvme_ctrlr *ctrlr, 5091 void *payload, uint32_t size, uint32_t bpid, 5092 spdk_nvme_cmd_cb cb_fn, void *cb_arg) 5093 { 5094 int res; 5095 5096 if (ctrlr->cap.bits.bps == 0) { 5097 return -ENOTSUP; 5098 } 5099 5100 ctrlr->bp_ws = SPDK_NVME_BP_WS_DOWNLOADING; 5101 ctrlr->bpid = bpid; 5102 ctrlr->bp_write_cb_fn = cb_fn; 5103 ctrlr->bp_write_cb_arg = cb_arg; 5104 ctrlr->fw_offset = 0; 5105 ctrlr->fw_size_remaining = size; 5106 ctrlr->fw_payload = payload; 5107 ctrlr->fw_transfer_size = spdk_min(ctrlr->fw_size_remaining, ctrlr->min_page_size); 5108 5109 res = nvme_ctrlr_cmd_fw_image_download(ctrlr, ctrlr->fw_transfer_size, ctrlr->fw_offset, 5110 ctrlr->fw_payload, nvme_write_boot_partition_cb, ctrlr); 5111 5112 return res; 5113 } 5114 5115 bool 5116 spdk_nvme_ctrlr_is_discovery(struct spdk_nvme_ctrlr *ctrlr) 5117 { 5118 assert(ctrlr); 5119 5120 return !strncmp(ctrlr->trid.subnqn, SPDK_NVMF_DISCOVERY_NQN, 5121 strlen(SPDK_NVMF_DISCOVERY_NQN)); 5122 } 5123 5124 bool 5125 spdk_nvme_ctrlr_is_fabrics(struct spdk_nvme_ctrlr *ctrlr) 5126 { 5127 assert(ctrlr); 5128 5129 return spdk_nvme_trtype_is_fabrics(ctrlr->trid.trtype); 5130 } 5131 5132 int 5133 spdk_nvme_ctrlr_security_receive(struct spdk_nvme_ctrlr *ctrlr, uint8_t secp, 5134 uint16_t spsp, uint8_t nssf, void *payload, size_t size) 5135 { 5136 struct nvme_completion_poll_status *status; 5137 int res; 5138 5139 status = calloc(1, sizeof(*status)); 5140 if (!status) { 5141 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 5142 return -ENOMEM; 5143 } 5144 5145 res = spdk_nvme_ctrlr_cmd_security_receive(ctrlr, secp, spsp, nssf, payload, size, 5146 nvme_completion_poll_cb, status); 5147 if (res) { 5148 free(status); 5149 return res; 5150 } 5151 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 5152 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_cmd_security_receive failed!\n"); 5153 if (!status->timed_out) { 5154 free(status); 5155 } 5156 return -ENXIO; 5157 } 5158 free(status); 5159 5160 return 0; 5161 } 5162 5163 int 5164 spdk_nvme_ctrlr_security_send(struct spdk_nvme_ctrlr *ctrlr, uint8_t secp, 5165 uint16_t spsp, uint8_t nssf, void *payload, size_t size) 5166 { 5167 struct nvme_completion_poll_status *status; 5168 int res; 5169 5170 status = calloc(1, sizeof(*status)); 5171 if (!status) { 5172 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 5173 return -ENOMEM; 5174 } 5175 5176 res = spdk_nvme_ctrlr_cmd_security_send(ctrlr, secp, spsp, nssf, payload, size, 5177 nvme_completion_poll_cb, 5178 status); 5179 if (res) { 5180 free(status); 5181 return res; 5182 } 5183 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 5184 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_cmd_security_send failed!\n"); 5185 if (!status->timed_out) { 5186 free(status); 5187 } 5188 return -ENXIO; 5189 } 5190 5191 free(status); 5192 5193 return 0; 5194 } 5195 5196 uint64_t 5197 spdk_nvme_ctrlr_get_flags(struct spdk_nvme_ctrlr *ctrlr) 5198 { 5199 return ctrlr->flags; 5200 } 5201 5202 const struct spdk_nvme_transport_id * 5203 spdk_nvme_ctrlr_get_transport_id(struct spdk_nvme_ctrlr *ctrlr) 5204 { 5205 return &ctrlr->trid; 5206 } 5207 5208 int32_t 5209 spdk_nvme_ctrlr_alloc_qid(struct spdk_nvme_ctrlr *ctrlr) 5210 { 5211 uint32_t qid; 5212 5213 assert(ctrlr->free_io_qids); 5214 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 5215 qid = spdk_bit_array_find_first_set(ctrlr->free_io_qids, 1); 5216 if (qid > ctrlr->opts.num_io_queues) { 5217 NVME_CTRLR_ERRLOG(ctrlr, "No free I/O queue IDs\n"); 5218 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 5219 return -1; 5220 } 5221 5222 spdk_bit_array_clear(ctrlr->free_io_qids, qid); 5223 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 5224 return qid; 5225 } 5226 5227 void 5228 spdk_nvme_ctrlr_free_qid(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid) 5229 { 5230 assert(qid <= ctrlr->opts.num_io_queues); 5231 5232 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 5233 5234 if (spdk_likely(ctrlr->free_io_qids)) { 5235 spdk_bit_array_set(ctrlr->free_io_qids, qid); 5236 } 5237 5238 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 5239 } 5240 5241 int 5242 spdk_nvme_ctrlr_get_memory_domains(const struct spdk_nvme_ctrlr *ctrlr, 5243 struct spdk_memory_domain **domains, int array_size) 5244 { 5245 return nvme_transport_ctrlr_get_memory_domains(ctrlr, domains, array_size); 5246 } 5247