1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. All rights reserved. 5 * Copyright (c) 2019-2021 Mellanox Technologies LTD. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 36 #include "nvme_internal.h" 37 #include "nvme_io_msg.h" 38 39 #include "spdk/env.h" 40 #include "spdk/string.h" 41 #include "spdk/endian.h" 42 43 struct nvme_active_ns_ctx; 44 45 static void nvme_ctrlr_destruct_namespaces(struct spdk_nvme_ctrlr *ctrlr); 46 static int nvme_ctrlr_construct_and_submit_aer(struct spdk_nvme_ctrlr *ctrlr, 47 struct nvme_async_event_request *aer); 48 static void nvme_ctrlr_identify_active_ns_async(struct nvme_active_ns_ctx *ctx); 49 static int nvme_ctrlr_identify_ns_async(struct spdk_nvme_ns *ns); 50 static int nvme_ctrlr_identify_ns_iocs_specific_async(struct spdk_nvme_ns *ns); 51 static int nvme_ctrlr_identify_id_desc_async(struct spdk_nvme_ns *ns); 52 static void nvme_ctrlr_init_cap(struct spdk_nvme_ctrlr *ctrlr); 53 54 #define CTRLR_STRING(ctrlr) \ 55 ((ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_TCP || ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_RDMA) ? \ 56 ctrlr->trid.subnqn : ctrlr->trid.traddr) 57 58 #define NVME_CTRLR_ERRLOG(ctrlr, format, ...) \ 59 SPDK_ERRLOG("[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__); 60 61 #define NVME_CTRLR_WARNLOG(ctrlr, format, ...) \ 62 SPDK_WARNLOG("[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__); 63 64 #define NVME_CTRLR_NOTICELOG(ctrlr, format, ...) \ 65 SPDK_NOTICELOG("[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__); 66 67 #define NVME_CTRLR_INFOLOG(ctrlr, format, ...) \ 68 SPDK_INFOLOG(nvme, "[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__); 69 70 #ifdef DEBUG 71 #define NVME_CTRLR_DEBUGLOG(ctrlr, format, ...) \ 72 SPDK_DEBUGLOG(nvme, "[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__); 73 #else 74 #define NVME_CTRLR_DEBUGLOG(ctrlr, ...) do { } while (0) 75 #endif 76 77 static int 78 nvme_ctrlr_get_cc(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cc_register *cc) 79 { 80 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cc.raw), 81 &cc->raw); 82 } 83 84 static int 85 nvme_ctrlr_get_csts(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_csts_register *csts) 86 { 87 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, csts.raw), 88 &csts->raw); 89 } 90 91 int 92 nvme_ctrlr_get_cap(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cap_register *cap) 93 { 94 return nvme_transport_ctrlr_get_reg_8(ctrlr, offsetof(struct spdk_nvme_registers, cap.raw), 95 &cap->raw); 96 } 97 98 int 99 nvme_ctrlr_get_vs(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_vs_register *vs) 100 { 101 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, vs.raw), 102 &vs->raw); 103 } 104 105 static int 106 nvme_ctrlr_set_cc(struct spdk_nvme_ctrlr *ctrlr, const union spdk_nvme_cc_register *cc) 107 { 108 return nvme_transport_ctrlr_set_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cc.raw), 109 cc->raw); 110 } 111 112 int 113 nvme_ctrlr_get_cmbsz(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cmbsz_register *cmbsz) 114 { 115 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cmbsz.raw), 116 &cmbsz->raw); 117 } 118 119 int 120 nvme_ctrlr_get_pmrcap(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_pmrcap_register *pmrcap) 121 { 122 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, pmrcap.raw), 123 &pmrcap->raw); 124 } 125 126 static int 127 nvme_ctrlr_set_nssr(struct spdk_nvme_ctrlr *ctrlr, uint32_t nssr_value) 128 { 129 return nvme_transport_ctrlr_set_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, nssr), 130 nssr_value); 131 } 132 133 bool 134 nvme_ctrlr_multi_iocs_enabled(struct spdk_nvme_ctrlr *ctrlr) 135 { 136 return ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_IOCS && 137 ctrlr->opts.command_set == SPDK_NVME_CC_CSS_IOCS; 138 } 139 140 /* When the field in spdk_nvme_ctrlr_opts are changed and you change this function, please 141 * also update the nvme_ctrl_opts_init function in nvme_ctrlr.c 142 */ 143 void 144 spdk_nvme_ctrlr_get_default_ctrlr_opts(struct spdk_nvme_ctrlr_opts *opts, size_t opts_size) 145 { 146 char host_id_str[SPDK_UUID_STRING_LEN]; 147 148 assert(opts); 149 150 opts->opts_size = opts_size; 151 152 #define FIELD_OK(field) \ 153 offsetof(struct spdk_nvme_ctrlr_opts, field) + sizeof(opts->field) <= opts_size 154 155 #define SET_FIELD(field, value) \ 156 if (offsetof(struct spdk_nvme_ctrlr_opts, field) + sizeof(opts->field) <= opts_size) { \ 157 opts->field = value; \ 158 } \ 159 160 SET_FIELD(num_io_queues, DEFAULT_MAX_IO_QUEUES); 161 SET_FIELD(use_cmb_sqs, true); 162 SET_FIELD(no_shn_notification, false); 163 SET_FIELD(arb_mechanism, SPDK_NVME_CC_AMS_RR); 164 SET_FIELD(arbitration_burst, 0); 165 SET_FIELD(low_priority_weight, 0); 166 SET_FIELD(medium_priority_weight, 0); 167 SET_FIELD(high_priority_weight, 0); 168 SET_FIELD(keep_alive_timeout_ms, MIN_KEEP_ALIVE_TIMEOUT_IN_MS); 169 SET_FIELD(transport_retry_count, SPDK_NVME_DEFAULT_RETRY_COUNT); 170 SET_FIELD(io_queue_size, DEFAULT_IO_QUEUE_SIZE); 171 172 if (nvme_driver_init() == 0) { 173 if (FIELD_OK(hostnqn)) { 174 spdk_uuid_fmt_lower(host_id_str, sizeof(host_id_str), 175 &g_spdk_nvme_driver->default_extended_host_id); 176 snprintf(opts->hostnqn, sizeof(opts->hostnqn), 177 "nqn.2014-08.org.nvmexpress:uuid:%s", host_id_str); 178 } 179 180 if (FIELD_OK(extended_host_id)) { 181 memcpy(opts->extended_host_id, &g_spdk_nvme_driver->default_extended_host_id, 182 sizeof(opts->extended_host_id)); 183 } 184 185 } 186 187 SET_FIELD(io_queue_requests, DEFAULT_IO_QUEUE_REQUESTS); 188 189 if (FIELD_OK(src_addr)) { 190 memset(opts->src_addr, 0, sizeof(opts->src_addr)); 191 } 192 193 if (FIELD_OK(src_svcid)) { 194 memset(opts->src_svcid, 0, sizeof(opts->src_svcid)); 195 } 196 197 if (FIELD_OK(host_id)) { 198 memset(opts->host_id, 0, sizeof(opts->host_id)); 199 } 200 201 SET_FIELD(command_set, CHAR_BIT); 202 SET_FIELD(admin_timeout_ms, NVME_MAX_ADMIN_TIMEOUT_IN_SECS * 1000); 203 SET_FIELD(header_digest, false); 204 SET_FIELD(data_digest, false); 205 SET_FIELD(disable_error_logging, false); 206 SET_FIELD(transport_ack_timeout, SPDK_NVME_DEFAULT_TRANSPORT_ACK_TIMEOUT); 207 SET_FIELD(admin_queue_size, DEFAULT_ADMIN_QUEUE_SIZE); 208 SET_FIELD(fabrics_connect_timeout_us, NVME_FABRIC_CONNECT_COMMAND_TIMEOUT); 209 210 #undef FIELD_OK 211 #undef SET_FIELD 212 } 213 214 /** 215 * This function will be called when the process allocates the IO qpair. 216 * Note: the ctrlr_lock must be held when calling this function. 217 */ 218 static void 219 nvme_ctrlr_proc_add_io_qpair(struct spdk_nvme_qpair *qpair) 220 { 221 struct spdk_nvme_ctrlr_process *active_proc; 222 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 223 224 active_proc = nvme_ctrlr_get_current_process(ctrlr); 225 if (active_proc) { 226 TAILQ_INSERT_TAIL(&active_proc->allocated_io_qpairs, qpair, per_process_tailq); 227 qpair->active_proc = active_proc; 228 } 229 } 230 231 /** 232 * This function will be called when the process frees the IO qpair. 233 * Note: the ctrlr_lock must be held when calling this function. 234 */ 235 static void 236 nvme_ctrlr_proc_remove_io_qpair(struct spdk_nvme_qpair *qpair) 237 { 238 struct spdk_nvme_ctrlr_process *active_proc; 239 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 240 struct spdk_nvme_qpair *active_qpair, *tmp_qpair; 241 242 active_proc = nvme_ctrlr_get_current_process(ctrlr); 243 if (!active_proc) { 244 return; 245 } 246 247 TAILQ_FOREACH_SAFE(active_qpair, &active_proc->allocated_io_qpairs, 248 per_process_tailq, tmp_qpair) { 249 if (active_qpair == qpair) { 250 TAILQ_REMOVE(&active_proc->allocated_io_qpairs, 251 active_qpair, per_process_tailq); 252 253 break; 254 } 255 } 256 } 257 258 void 259 spdk_nvme_ctrlr_get_default_io_qpair_opts(struct spdk_nvme_ctrlr *ctrlr, 260 struct spdk_nvme_io_qpair_opts *opts, 261 size_t opts_size) 262 { 263 assert(ctrlr); 264 265 assert(opts); 266 267 memset(opts, 0, opts_size); 268 269 #define FIELD_OK(field) \ 270 offsetof(struct spdk_nvme_io_qpair_opts, field) + sizeof(opts->field) <= opts_size 271 272 if (FIELD_OK(qprio)) { 273 opts->qprio = SPDK_NVME_QPRIO_URGENT; 274 } 275 276 if (FIELD_OK(io_queue_size)) { 277 opts->io_queue_size = ctrlr->opts.io_queue_size; 278 } 279 280 if (FIELD_OK(io_queue_requests)) { 281 opts->io_queue_requests = ctrlr->opts.io_queue_requests; 282 } 283 284 if (FIELD_OK(delay_cmd_submit)) { 285 opts->delay_cmd_submit = false; 286 } 287 288 if (FIELD_OK(sq.vaddr)) { 289 opts->sq.vaddr = NULL; 290 } 291 292 if (FIELD_OK(sq.paddr)) { 293 opts->sq.paddr = 0; 294 } 295 296 if (FIELD_OK(sq.buffer_size)) { 297 opts->sq.buffer_size = 0; 298 } 299 300 if (FIELD_OK(cq.vaddr)) { 301 opts->cq.vaddr = NULL; 302 } 303 304 if (FIELD_OK(cq.paddr)) { 305 opts->cq.paddr = 0; 306 } 307 308 if (FIELD_OK(cq.buffer_size)) { 309 opts->cq.buffer_size = 0; 310 } 311 312 if (FIELD_OK(create_only)) { 313 opts->create_only = false; 314 } 315 316 #undef FIELD_OK 317 } 318 319 static struct spdk_nvme_qpair * 320 nvme_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, 321 const struct spdk_nvme_io_qpair_opts *opts) 322 { 323 int32_t qid; 324 struct spdk_nvme_qpair *qpair; 325 union spdk_nvme_cc_register cc; 326 327 if (!ctrlr) { 328 return NULL; 329 } 330 331 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 332 if (nvme_ctrlr_get_cc(ctrlr, &cc)) { 333 NVME_CTRLR_ERRLOG(ctrlr, "get_cc failed\n"); 334 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 335 return NULL; 336 } 337 338 if (opts->qprio & ~SPDK_NVME_CREATE_IO_SQ_QPRIO_MASK) { 339 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 340 return NULL; 341 } 342 343 /* 344 * Only value SPDK_NVME_QPRIO_URGENT(0) is valid for the 345 * default round robin arbitration method. 346 */ 347 if ((cc.bits.ams == SPDK_NVME_CC_AMS_RR) && (opts->qprio != SPDK_NVME_QPRIO_URGENT)) { 348 NVME_CTRLR_ERRLOG(ctrlr, "invalid queue priority for default round robin arbitration method\n"); 349 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 350 return NULL; 351 } 352 353 qid = spdk_nvme_ctrlr_alloc_qid(ctrlr); 354 if (qid < 0) { 355 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 356 return NULL; 357 } 358 359 qpair = nvme_transport_ctrlr_create_io_qpair(ctrlr, qid, opts); 360 if (qpair == NULL) { 361 NVME_CTRLR_ERRLOG(ctrlr, "nvme_transport_ctrlr_create_io_qpair() failed\n"); 362 spdk_nvme_ctrlr_free_qid(ctrlr, qid); 363 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 364 return NULL; 365 } 366 367 TAILQ_INSERT_TAIL(&ctrlr->active_io_qpairs, qpair, tailq); 368 369 nvme_ctrlr_proc_add_io_qpair(qpair); 370 371 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 372 373 return qpair; 374 } 375 376 int 377 spdk_nvme_ctrlr_connect_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 378 { 379 int rc; 380 381 if (nvme_qpair_get_state(qpair) != NVME_QPAIR_DISCONNECTED) { 382 return -EISCONN; 383 } 384 385 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 386 rc = nvme_transport_ctrlr_connect_qpair(ctrlr, qpair); 387 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 388 389 if (ctrlr->quirks & NVME_QUIRK_DELAY_AFTER_QUEUE_ALLOC) { 390 spdk_delay_us(100); 391 } 392 393 return rc; 394 } 395 396 void 397 spdk_nvme_ctrlr_disconnect_io_qpair(struct spdk_nvme_qpair *qpair) 398 { 399 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 400 401 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 402 nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair); 403 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 404 } 405 406 struct spdk_nvme_qpair * 407 spdk_nvme_ctrlr_alloc_io_qpair(struct spdk_nvme_ctrlr *ctrlr, 408 const struct spdk_nvme_io_qpair_opts *user_opts, 409 size_t opts_size) 410 { 411 412 struct spdk_nvme_qpair *qpair; 413 struct spdk_nvme_io_qpair_opts opts; 414 int rc; 415 416 /* 417 * Get the default options, then overwrite them with the user-provided options 418 * up to opts_size. 419 * 420 * This allows for extensions of the opts structure without breaking 421 * ABI compatibility. 422 */ 423 spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &opts, sizeof(opts)); 424 if (user_opts) { 425 memcpy(&opts, user_opts, spdk_min(sizeof(opts), opts_size)); 426 427 /* If user passes buffers, make sure they're big enough for the requested queue size */ 428 if (opts.sq.vaddr) { 429 if (opts.sq.buffer_size < (opts.io_queue_size * sizeof(struct spdk_nvme_cmd))) { 430 NVME_CTRLR_ERRLOG(ctrlr, "sq buffer size %" PRIx64 " is too small for sq size %zx\n", 431 opts.sq.buffer_size, (opts.io_queue_size * sizeof(struct spdk_nvme_cmd))); 432 return NULL; 433 } 434 } 435 if (opts.cq.vaddr) { 436 if (opts.cq.buffer_size < (opts.io_queue_size * sizeof(struct spdk_nvme_cpl))) { 437 NVME_CTRLR_ERRLOG(ctrlr, "cq buffer size %" PRIx64 " is too small for cq size %zx\n", 438 opts.cq.buffer_size, (opts.io_queue_size * sizeof(struct spdk_nvme_cpl))); 439 return NULL; 440 } 441 } 442 } 443 444 qpair = nvme_ctrlr_create_io_qpair(ctrlr, &opts); 445 446 if (qpair == NULL || opts.create_only == true) { 447 return qpair; 448 } 449 450 rc = spdk_nvme_ctrlr_connect_io_qpair(ctrlr, qpair); 451 if (rc != 0) { 452 NVME_CTRLR_ERRLOG(ctrlr, "nvme_transport_ctrlr_connect_io_qpair() failed\n"); 453 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 454 nvme_ctrlr_proc_remove_io_qpair(qpair); 455 TAILQ_REMOVE(&ctrlr->active_io_qpairs, qpair, tailq); 456 spdk_bit_array_set(ctrlr->free_io_qids, qpair->id); 457 nvme_transport_ctrlr_delete_io_qpair(ctrlr, qpair); 458 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 459 return NULL; 460 } 461 462 return qpair; 463 } 464 465 int 466 spdk_nvme_ctrlr_reconnect_io_qpair(struct spdk_nvme_qpair *qpair) 467 { 468 struct spdk_nvme_ctrlr *ctrlr; 469 enum nvme_qpair_state qpair_state; 470 int rc; 471 472 assert(qpair != NULL); 473 assert(nvme_qpair_is_admin_queue(qpair) == false); 474 assert(qpair->ctrlr != NULL); 475 476 ctrlr = qpair->ctrlr; 477 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 478 qpair_state = nvme_qpair_get_state(qpair); 479 480 if (ctrlr->is_removed) { 481 rc = -ENODEV; 482 goto out; 483 } 484 485 if (ctrlr->is_resetting || qpair_state == NVME_QPAIR_DISCONNECTING) { 486 rc = -EAGAIN; 487 goto out; 488 } 489 490 if (ctrlr->is_failed || qpair_state == NVME_QPAIR_DESTROYING) { 491 rc = -ENXIO; 492 goto out; 493 } 494 495 if (qpair_state != NVME_QPAIR_DISCONNECTED) { 496 rc = 0; 497 goto out; 498 } 499 500 rc = nvme_transport_ctrlr_connect_qpair(ctrlr, qpair); 501 if (rc) { 502 rc = -EAGAIN; 503 goto out; 504 } 505 506 out: 507 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 508 return rc; 509 } 510 511 spdk_nvme_qp_failure_reason 512 spdk_nvme_ctrlr_get_admin_qp_failure_reason(struct spdk_nvme_ctrlr *ctrlr) 513 { 514 return ctrlr->adminq->transport_failure_reason; 515 } 516 517 /* 518 * This internal function will attempt to take the controller 519 * lock before calling disconnect on a controller qpair. 520 * Functions already holding the controller lock should 521 * call nvme_transport_ctrlr_disconnect_qpair directly. 522 */ 523 void 524 nvme_ctrlr_disconnect_qpair(struct spdk_nvme_qpair *qpair) 525 { 526 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 527 528 assert(ctrlr != NULL); 529 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 530 nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair); 531 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 532 } 533 534 int 535 spdk_nvme_ctrlr_free_io_qpair(struct spdk_nvme_qpair *qpair) 536 { 537 struct spdk_nvme_ctrlr *ctrlr; 538 539 if (qpair == NULL) { 540 return 0; 541 } 542 543 ctrlr = qpair->ctrlr; 544 545 if (qpair->in_completion_context) { 546 /* 547 * There are many cases where it is convenient to delete an io qpair in the context 548 * of that qpair's completion routine. To handle this properly, set a flag here 549 * so that the completion routine will perform an actual delete after the context 550 * unwinds. 551 */ 552 qpair->delete_after_completion_context = 1; 553 return 0; 554 } 555 556 if (qpair->poll_group && qpair->poll_group->in_completion_context) { 557 /* Same as above, but in a poll group. */ 558 qpair->poll_group->num_qpairs_to_delete++; 559 qpair->delete_after_completion_context = 1; 560 return 0; 561 } 562 563 if (qpair->poll_group) { 564 spdk_nvme_poll_group_remove(qpair->poll_group->group, qpair); 565 } 566 567 /* Do not retry. */ 568 nvme_qpair_set_state(qpair, NVME_QPAIR_DESTROYING); 569 570 /* In the multi-process case, a process may call this function on a foreign 571 * I/O qpair (i.e. one that this process did not create) when that qpairs process 572 * exits unexpectedly. In that case, we must not try to abort any reqs associated 573 * with that qpair, since the callbacks will also be foreign to this process. 574 */ 575 if (qpair->active_proc == nvme_ctrlr_get_current_process(ctrlr)) { 576 nvme_qpair_abort_reqs(qpair, 1); 577 } 578 579 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 580 581 nvme_ctrlr_proc_remove_io_qpair(qpair); 582 583 TAILQ_REMOVE(&ctrlr->active_io_qpairs, qpair, tailq); 584 spdk_nvme_ctrlr_free_qid(ctrlr, qpair->id); 585 586 if (nvme_transport_ctrlr_delete_io_qpair(ctrlr, qpair)) { 587 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 588 return -1; 589 } 590 591 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 592 return 0; 593 } 594 595 static void 596 nvme_ctrlr_construct_intel_support_log_page_list(struct spdk_nvme_ctrlr *ctrlr, 597 struct spdk_nvme_intel_log_page_directory *log_page_directory) 598 { 599 if (log_page_directory == NULL) { 600 return; 601 } 602 603 if (ctrlr->cdata.vid != SPDK_PCI_VID_INTEL) { 604 return; 605 } 606 607 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY] = true; 608 609 if (log_page_directory->read_latency_log_len || 610 (ctrlr->quirks & NVME_INTEL_QUIRK_READ_LATENCY)) { 611 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_READ_CMD_LATENCY] = true; 612 } 613 if (log_page_directory->write_latency_log_len || 614 (ctrlr->quirks & NVME_INTEL_QUIRK_WRITE_LATENCY)) { 615 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_WRITE_CMD_LATENCY] = true; 616 } 617 if (log_page_directory->temperature_statistics_log_len) { 618 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_TEMPERATURE] = true; 619 } 620 if (log_page_directory->smart_log_len) { 621 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_SMART] = true; 622 } 623 if (log_page_directory->marketing_description_log_len) { 624 ctrlr->log_page_supported[SPDK_NVME_INTEL_MARKETING_DESCRIPTION] = true; 625 } 626 } 627 628 static int nvme_ctrlr_set_intel_support_log_pages(struct spdk_nvme_ctrlr *ctrlr) 629 { 630 int rc = 0; 631 struct nvme_completion_poll_status *status; 632 struct spdk_nvme_intel_log_page_directory *log_page_directory; 633 634 log_page_directory = spdk_zmalloc(sizeof(struct spdk_nvme_intel_log_page_directory), 635 64, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA); 636 if (log_page_directory == NULL) { 637 NVME_CTRLR_ERRLOG(ctrlr, "could not allocate log_page_directory\n"); 638 return -ENXIO; 639 } 640 641 status = calloc(1, sizeof(*status)); 642 if (!status) { 643 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 644 spdk_free(log_page_directory); 645 return -ENOMEM; 646 } 647 648 rc = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY, 649 SPDK_NVME_GLOBAL_NS_TAG, log_page_directory, 650 sizeof(struct spdk_nvme_intel_log_page_directory), 651 0, nvme_completion_poll_cb, status); 652 if (rc != 0) { 653 spdk_free(log_page_directory); 654 free(status); 655 return rc; 656 } 657 658 if (nvme_wait_for_completion_timeout(ctrlr->adminq, status, 659 ctrlr->opts.admin_timeout_ms * 1000)) { 660 spdk_free(log_page_directory); 661 NVME_CTRLR_WARNLOG(ctrlr, "Intel log pages not supported on Intel drive!\n"); 662 if (!status->timed_out) { 663 free(status); 664 } 665 return 0; 666 } 667 668 nvme_ctrlr_construct_intel_support_log_page_list(ctrlr, log_page_directory); 669 spdk_free(log_page_directory); 670 free(status); 671 return 0; 672 } 673 674 static int 675 nvme_ctrlr_update_ana_log_page(struct spdk_nvme_ctrlr *ctrlr) 676 { 677 struct nvme_completion_poll_status *status; 678 int rc; 679 680 status = calloc(1, sizeof(*status)); 681 if (status == NULL) { 682 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 683 return -ENOMEM; 684 } 685 686 rc = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS, 687 SPDK_NVME_GLOBAL_NS_TAG, ctrlr->ana_log_page, 688 ctrlr->ana_log_page_size, 0, 689 nvme_completion_poll_cb, status); 690 if (rc != 0) { 691 free(status); 692 return rc; 693 } 694 695 if (nvme_wait_for_completion_robust_lock_timeout(ctrlr->adminq, status, &ctrlr->ctrlr_lock, 696 ctrlr->opts.admin_timeout_ms * 1000)) { 697 if (!status->timed_out) { 698 free(status); 699 } 700 return -EIO; 701 } 702 703 free(status); 704 return 0; 705 } 706 707 static int 708 nvme_ctrlr_init_ana_log_page(struct spdk_nvme_ctrlr *ctrlr) 709 { 710 uint32_t ana_log_page_size; 711 712 ana_log_page_size = sizeof(struct spdk_nvme_ana_page) + ctrlr->cdata.nanagrpid * 713 sizeof(struct spdk_nvme_ana_group_descriptor) + ctrlr->cdata.nn * 714 sizeof(uint32_t); 715 716 ctrlr->ana_log_page = spdk_zmalloc(ana_log_page_size, 64, NULL, SPDK_ENV_SOCKET_ID_ANY, 717 SPDK_MALLOC_DMA); 718 if (ctrlr->ana_log_page == NULL) { 719 NVME_CTRLR_ERRLOG(ctrlr, "could not allocate ANA log page buffer\n"); 720 return -ENXIO; 721 } 722 ctrlr->ana_log_page_size = ana_log_page_size; 723 724 ctrlr->log_page_supported[SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS] = true; 725 726 return nvme_ctrlr_update_ana_log_page(ctrlr); 727 } 728 729 static int 730 nvme_ctrlr_update_ns_ana_states(const struct spdk_nvme_ana_group_descriptor *desc, 731 void *cb_arg) 732 { 733 struct spdk_nvme_ctrlr *ctrlr = cb_arg; 734 struct spdk_nvme_ns *ns; 735 uint32_t i, nsid; 736 737 for (i = 0; i < desc->num_of_nsid; i++) { 738 nsid = desc->nsid[i]; 739 if (nsid == 0 || nsid > ctrlr->cdata.nn) { 740 continue; 741 } 742 743 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 744 assert(ns != NULL); 745 746 ns->ana_group_id = desc->ana_group_id; 747 ns->ana_state = desc->ana_state; 748 } 749 750 return 0; 751 } 752 753 int 754 nvme_ctrlr_parse_ana_log_page(struct spdk_nvme_ctrlr *ctrlr, 755 spdk_nvme_parse_ana_log_page_cb cb_fn, void *cb_arg) 756 { 757 struct spdk_nvme_ana_group_descriptor *desc; 758 uint32_t i; 759 int rc = 0; 760 761 if (ctrlr->ana_log_page == NULL) { 762 return -EINVAL; 763 } 764 765 desc = (void *)((uint8_t *)ctrlr->ana_log_page + sizeof(struct spdk_nvme_ana_page)); 766 767 for (i = 0; i < ctrlr->ana_log_page->num_ana_group_desc; i++) { 768 rc = cb_fn(desc, cb_arg); 769 if (rc != 0) { 770 break; 771 } 772 desc = (void *)((uint8_t *)desc + sizeof(struct spdk_nvme_ana_group_descriptor) + 773 desc->num_of_nsid * sizeof(uint32_t)); 774 } 775 776 return rc; 777 } 778 779 static int 780 nvme_ctrlr_set_supported_log_pages(struct spdk_nvme_ctrlr *ctrlr) 781 { 782 int rc = 0; 783 784 memset(ctrlr->log_page_supported, 0, sizeof(ctrlr->log_page_supported)); 785 /* Mandatory pages */ 786 ctrlr->log_page_supported[SPDK_NVME_LOG_ERROR] = true; 787 ctrlr->log_page_supported[SPDK_NVME_LOG_HEALTH_INFORMATION] = true; 788 ctrlr->log_page_supported[SPDK_NVME_LOG_FIRMWARE_SLOT] = true; 789 if (ctrlr->cdata.lpa.celp) { 790 ctrlr->log_page_supported[SPDK_NVME_LOG_COMMAND_EFFECTS_LOG] = true; 791 } 792 if (ctrlr->cdata.vid == SPDK_PCI_VID_INTEL && !(ctrlr->quirks & NVME_INTEL_QUIRK_NO_LOG_PAGES)) { 793 rc = nvme_ctrlr_set_intel_support_log_pages(ctrlr); 794 if (rc != 0) { 795 goto out; 796 } 797 } 798 if (ctrlr->cdata.cmic.ana_reporting) { 799 rc = nvme_ctrlr_init_ana_log_page(ctrlr); 800 if (rc == 0) { 801 nvme_ctrlr_parse_ana_log_page(ctrlr, nvme_ctrlr_update_ns_ana_states, 802 ctrlr); 803 } 804 } else { 805 uint32_t i; 806 807 for (i = 0; i < ctrlr->num_ns; i++) { 808 ctrlr->ns[i].ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE; 809 } 810 } 811 812 out: 813 return rc; 814 } 815 816 static void 817 nvme_ctrlr_set_intel_supported_features(struct spdk_nvme_ctrlr *ctrlr) 818 { 819 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_MAX_LBA] = true; 820 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_NATIVE_MAX_LBA] = true; 821 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_POWER_GOVERNOR_SETTING] = true; 822 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_SMBUS_ADDRESS] = true; 823 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_LED_PATTERN] = true; 824 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_RESET_TIMED_WORKLOAD_COUNTERS] = true; 825 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING] = true; 826 } 827 828 static void 829 nvme_ctrlr_set_arbitration_feature(struct spdk_nvme_ctrlr *ctrlr) 830 { 831 uint32_t cdw11; 832 struct nvme_completion_poll_status *status; 833 834 if (ctrlr->opts.arbitration_burst == 0) { 835 return; 836 } 837 838 if (ctrlr->opts.arbitration_burst > 7) { 839 NVME_CTRLR_WARNLOG(ctrlr, "Valid arbitration burst values is from 0-7\n"); 840 return; 841 } 842 843 status = calloc(1, sizeof(*status)); 844 if (!status) { 845 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 846 return; 847 } 848 849 cdw11 = ctrlr->opts.arbitration_burst; 850 851 if (spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_WRR_SUPPORTED) { 852 cdw11 |= (uint32_t)ctrlr->opts.low_priority_weight << 8; 853 cdw11 |= (uint32_t)ctrlr->opts.medium_priority_weight << 16; 854 cdw11 |= (uint32_t)ctrlr->opts.high_priority_weight << 24; 855 } 856 857 if (spdk_nvme_ctrlr_cmd_set_feature(ctrlr, SPDK_NVME_FEAT_ARBITRATION, 858 cdw11, 0, NULL, 0, 859 nvme_completion_poll_cb, status) < 0) { 860 NVME_CTRLR_ERRLOG(ctrlr, "Set arbitration feature failed\n"); 861 free(status); 862 return; 863 } 864 865 if (nvme_wait_for_completion_timeout(ctrlr->adminq, status, 866 ctrlr->opts.admin_timeout_ms * 1000)) { 867 NVME_CTRLR_ERRLOG(ctrlr, "Timeout to set arbitration feature\n"); 868 } 869 870 if (!status->timed_out) { 871 free(status); 872 } 873 } 874 875 static void 876 nvme_ctrlr_set_supported_features(struct spdk_nvme_ctrlr *ctrlr) 877 { 878 memset(ctrlr->feature_supported, 0, sizeof(ctrlr->feature_supported)); 879 /* Mandatory features */ 880 ctrlr->feature_supported[SPDK_NVME_FEAT_ARBITRATION] = true; 881 ctrlr->feature_supported[SPDK_NVME_FEAT_POWER_MANAGEMENT] = true; 882 ctrlr->feature_supported[SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD] = true; 883 ctrlr->feature_supported[SPDK_NVME_FEAT_ERROR_RECOVERY] = true; 884 ctrlr->feature_supported[SPDK_NVME_FEAT_NUMBER_OF_QUEUES] = true; 885 ctrlr->feature_supported[SPDK_NVME_FEAT_INTERRUPT_COALESCING] = true; 886 ctrlr->feature_supported[SPDK_NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION] = true; 887 ctrlr->feature_supported[SPDK_NVME_FEAT_WRITE_ATOMICITY] = true; 888 ctrlr->feature_supported[SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION] = true; 889 /* Optional features */ 890 if (ctrlr->cdata.vwc.present) { 891 ctrlr->feature_supported[SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE] = true; 892 } 893 if (ctrlr->cdata.apsta.supported) { 894 ctrlr->feature_supported[SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION] = true; 895 } 896 if (ctrlr->cdata.hmpre) { 897 ctrlr->feature_supported[SPDK_NVME_FEAT_HOST_MEM_BUFFER] = true; 898 } 899 if (ctrlr->cdata.vid == SPDK_PCI_VID_INTEL) { 900 nvme_ctrlr_set_intel_supported_features(ctrlr); 901 } 902 903 nvme_ctrlr_set_arbitration_feature(ctrlr); 904 } 905 906 bool 907 spdk_nvme_ctrlr_is_failed(struct spdk_nvme_ctrlr *ctrlr) 908 { 909 return ctrlr->is_failed; 910 } 911 912 void 913 nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr, bool hot_remove) 914 { 915 /* 916 * Set the flag here and leave the work failure of qpairs to 917 * spdk_nvme_qpair_process_completions(). 918 */ 919 if (hot_remove) { 920 ctrlr->is_removed = true; 921 } 922 923 if (ctrlr->is_failed) { 924 NVME_CTRLR_NOTICELOG(ctrlr, "already in failed state\n"); 925 return; 926 } 927 928 ctrlr->is_failed = true; 929 nvme_transport_ctrlr_disconnect_qpair(ctrlr, ctrlr->adminq); 930 NVME_CTRLR_ERRLOG(ctrlr, "in failed state.\n"); 931 } 932 933 /** 934 * This public API function will try to take the controller lock. 935 * Any private functions being called from a thread already holding 936 * the ctrlr lock should call nvme_ctrlr_fail directly. 937 */ 938 void 939 spdk_nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr) 940 { 941 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 942 nvme_ctrlr_fail(ctrlr, false); 943 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 944 } 945 946 static void 947 nvme_ctrlr_shutdown_async(struct spdk_nvme_ctrlr *ctrlr, 948 struct nvme_ctrlr_detach_ctx *ctx) 949 { 950 union spdk_nvme_cc_register cc; 951 952 if (ctrlr->is_removed) { 953 ctx->shutdown_complete = true; 954 return; 955 } 956 957 if (nvme_ctrlr_get_cc(ctrlr, &cc)) { 958 NVME_CTRLR_ERRLOG(ctrlr, "get_cc() failed\n"); 959 ctx->shutdown_complete = true; 960 return; 961 } 962 963 cc.bits.shn = SPDK_NVME_SHN_NORMAL; 964 965 if (nvme_ctrlr_set_cc(ctrlr, &cc)) { 966 NVME_CTRLR_ERRLOG(ctrlr, "set_cc() failed\n"); 967 ctx->shutdown_complete = true; 968 return; 969 } 970 971 /* 972 * The NVMe specification defines RTD3E to be the time between 973 * setting SHN = 1 until the controller will set SHST = 10b. 974 * If the device doesn't report RTD3 entry latency, or if it 975 * reports RTD3 entry latency less than 10 seconds, pick 976 * 10 seconds as a reasonable amount of time to 977 * wait before proceeding. 978 */ 979 NVME_CTRLR_DEBUGLOG(ctrlr, "RTD3E = %" PRIu32 " us\n", ctrlr->cdata.rtd3e); 980 ctx->shutdown_timeout_ms = SPDK_CEIL_DIV(ctrlr->cdata.rtd3e, 1000); 981 ctx->shutdown_timeout_ms = spdk_max(ctx->shutdown_timeout_ms, 10000); 982 NVME_CTRLR_DEBUGLOG(ctrlr, "shutdown timeout = %" PRIu32 " ms\n", ctx->shutdown_timeout_ms); 983 984 ctx->shutdown_start_tsc = spdk_get_ticks(); 985 } 986 987 static int 988 nvme_ctrlr_shutdown_poll_async(struct spdk_nvme_ctrlr *ctrlr, 989 struct nvme_ctrlr_detach_ctx *ctx) 990 { 991 union spdk_nvme_csts_register csts; 992 uint32_t ms_waited; 993 994 ms_waited = (spdk_get_ticks() - ctx->shutdown_start_tsc) * 1000 / spdk_get_ticks_hz(); 995 996 if (nvme_ctrlr_get_csts(ctrlr, &csts)) { 997 NVME_CTRLR_ERRLOG(ctrlr, "get_csts() failed\n"); 998 return -EIO; 999 } 1000 1001 if (csts.bits.shst == SPDK_NVME_SHST_COMPLETE) { 1002 NVME_CTRLR_DEBUGLOG(ctrlr, "shutdown complete in %u milliseconds\n", ms_waited); 1003 return 0; 1004 } 1005 1006 if (ms_waited < ctx->shutdown_timeout_ms) { 1007 return -EAGAIN; 1008 } 1009 1010 NVME_CTRLR_ERRLOG(ctrlr, "did not shutdown within %u milliseconds\n", 1011 ctx->shutdown_timeout_ms); 1012 if (ctrlr->quirks & NVME_QUIRK_SHST_COMPLETE) { 1013 NVME_CTRLR_ERRLOG(ctrlr, "likely due to shutdown handling in the VMWare emulated NVMe SSD\n"); 1014 } 1015 1016 return 0; 1017 } 1018 1019 static int 1020 nvme_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr) 1021 { 1022 union spdk_nvme_cc_register cc; 1023 int rc; 1024 1025 rc = nvme_transport_ctrlr_enable(ctrlr); 1026 if (rc != 0) { 1027 NVME_CTRLR_ERRLOG(ctrlr, "transport ctrlr_enable failed\n"); 1028 return rc; 1029 } 1030 1031 if (nvme_ctrlr_get_cc(ctrlr, &cc)) { 1032 NVME_CTRLR_ERRLOG(ctrlr, "get_cc() failed\n"); 1033 return -EIO; 1034 } 1035 1036 if (cc.bits.en != 0) { 1037 NVME_CTRLR_ERRLOG(ctrlr, "called with CC.EN = 1\n"); 1038 return -EINVAL; 1039 } 1040 1041 cc.bits.en = 1; 1042 cc.bits.css = 0; 1043 cc.bits.shn = 0; 1044 cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */ 1045 cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */ 1046 1047 /* Page size is 2 ^ (12 + mps). */ 1048 cc.bits.mps = spdk_u32log2(ctrlr->page_size) - 12; 1049 1050 /* 1051 * Since NVMe 1.0, a controller should have at least one bit set in CAP.CSS. 1052 * A controller that does not have any bit set in CAP.CSS is not spec compliant. 1053 * Try to support such a controller regardless. 1054 */ 1055 if (ctrlr->cap.bits.css == 0) { 1056 NVME_CTRLR_INFOLOG(ctrlr, "Drive reports no command sets supported. Assuming NVM is supported.\n"); 1057 ctrlr->cap.bits.css = SPDK_NVME_CAP_CSS_NVM; 1058 } 1059 1060 /* 1061 * If the user did not explicitly request a command set, or supplied a value larger than 1062 * what can be saved in CC.CSS, use the most reasonable default. 1063 */ 1064 if (ctrlr->opts.command_set >= CHAR_BIT) { 1065 if (ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_IOCS) { 1066 ctrlr->opts.command_set = SPDK_NVME_CC_CSS_IOCS; 1067 } else if (ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_NVM) { 1068 ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NVM; 1069 } else if (ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_NOIO) { 1070 ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NOIO; 1071 } else { 1072 /* Invalid supported bits detected, falling back to NVM. */ 1073 ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NVM; 1074 } 1075 } 1076 1077 /* Verify that the selected command set is supported by the controller. */ 1078 if (!(ctrlr->cap.bits.css & (1u << ctrlr->opts.command_set))) { 1079 NVME_CTRLR_DEBUGLOG(ctrlr, "Requested I/O command set %u but supported mask is 0x%x\n", 1080 ctrlr->opts.command_set, ctrlr->cap.bits.css); 1081 NVME_CTRLR_DEBUGLOG(ctrlr, "Falling back to NVM. Assuming NVM is supported.\n"); 1082 ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NVM; 1083 } 1084 1085 cc.bits.css = ctrlr->opts.command_set; 1086 1087 switch (ctrlr->opts.arb_mechanism) { 1088 case SPDK_NVME_CC_AMS_RR: 1089 break; 1090 case SPDK_NVME_CC_AMS_WRR: 1091 if (SPDK_NVME_CAP_AMS_WRR & ctrlr->cap.bits.ams) { 1092 break; 1093 } 1094 return -EINVAL; 1095 case SPDK_NVME_CC_AMS_VS: 1096 if (SPDK_NVME_CAP_AMS_VS & ctrlr->cap.bits.ams) { 1097 break; 1098 } 1099 return -EINVAL; 1100 default: 1101 return -EINVAL; 1102 } 1103 1104 cc.bits.ams = ctrlr->opts.arb_mechanism; 1105 1106 if (nvme_ctrlr_set_cc(ctrlr, &cc)) { 1107 NVME_CTRLR_ERRLOG(ctrlr, "set_cc() failed\n"); 1108 return -EIO; 1109 } 1110 1111 return 0; 1112 } 1113 1114 static int 1115 nvme_ctrlr_disable(struct spdk_nvme_ctrlr *ctrlr) 1116 { 1117 union spdk_nvme_cc_register cc; 1118 1119 if (nvme_ctrlr_get_cc(ctrlr, &cc)) { 1120 NVME_CTRLR_ERRLOG(ctrlr, "get_cc() failed\n"); 1121 return -EIO; 1122 } 1123 1124 if (cc.bits.en == 0) { 1125 return 0; 1126 } 1127 1128 cc.bits.en = 0; 1129 1130 if (nvme_ctrlr_set_cc(ctrlr, &cc)) { 1131 NVME_CTRLR_ERRLOG(ctrlr, "set_cc() failed\n"); 1132 return -EIO; 1133 } 1134 1135 return 0; 1136 } 1137 1138 #ifdef DEBUG 1139 static const char * 1140 nvme_ctrlr_state_string(enum nvme_ctrlr_state state) 1141 { 1142 switch (state) { 1143 case NVME_CTRLR_STATE_INIT_DELAY: 1144 return "delay init"; 1145 case NVME_CTRLR_STATE_CONNECT_ADMINQ: 1146 return "connect adminq"; 1147 case NVME_CTRLR_STATE_READ_VS: 1148 return "read vs"; 1149 case NVME_CTRLR_STATE_READ_CAP: 1150 return "read cap"; 1151 case NVME_CTRLR_STATE_CHECK_EN: 1152 return "check en"; 1153 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1: 1154 return "disable and wait for CSTS.RDY = 1"; 1155 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0: 1156 return "disable and wait for CSTS.RDY = 0"; 1157 case NVME_CTRLR_STATE_ENABLE: 1158 return "enable controller by writing CC.EN = 1"; 1159 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1: 1160 return "wait for CSTS.RDY = 1"; 1161 case NVME_CTRLR_STATE_RESET_ADMIN_QUEUE: 1162 return "reset admin queue"; 1163 case NVME_CTRLR_STATE_IDENTIFY: 1164 return "identify controller"; 1165 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY: 1166 return "wait for identify controller"; 1167 case NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC: 1168 return "identify controller iocs specific"; 1169 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_IOCS_SPECIFIC: 1170 return "wait for identify controller iocs specific"; 1171 case NVME_CTRLR_STATE_GET_ZNS_CMD_EFFECTS_LOG: 1172 return "get zns cmd and effects log page"; 1173 case NVME_CTRLR_STATE_WAIT_FOR_GET_ZNS_CMD_EFFECTS_LOG: 1174 return "wait for get zns cmd and effects log page"; 1175 case NVME_CTRLR_STATE_SET_NUM_QUEUES: 1176 return "set number of queues"; 1177 case NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES: 1178 return "wait for set number of queues"; 1179 case NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS: 1180 return "identify active ns"; 1181 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ACTIVE_NS: 1182 return "wait for identify active ns"; 1183 case NVME_CTRLR_STATE_CONSTRUCT_NS: 1184 return "construct namespaces"; 1185 case NVME_CTRLR_STATE_IDENTIFY_NS: 1186 return "identify ns"; 1187 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS: 1188 return "wait for identify ns"; 1189 case NVME_CTRLR_STATE_IDENTIFY_ID_DESCS: 1190 return "identify namespace id descriptors"; 1191 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS: 1192 return "wait for identify namespace id descriptors"; 1193 case NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC: 1194 return "identify ns iocs specific"; 1195 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS_IOCS_SPECIFIC: 1196 return "wait for identify ns iocs specific"; 1197 case NVME_CTRLR_STATE_CONFIGURE_AER: 1198 return "configure AER"; 1199 case NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER: 1200 return "wait for configure aer"; 1201 case NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES: 1202 return "set supported log pages"; 1203 case NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES: 1204 return "set supported features"; 1205 case NVME_CTRLR_STATE_SET_DB_BUF_CFG: 1206 return "set doorbell buffer config"; 1207 case NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG: 1208 return "wait for doorbell buffer config"; 1209 case NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT: 1210 return "set keep alive timeout"; 1211 case NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT: 1212 return "wait for set keep alive timeout"; 1213 case NVME_CTRLR_STATE_SET_HOST_ID: 1214 return "set host ID"; 1215 case NVME_CTRLR_STATE_WAIT_FOR_HOST_ID: 1216 return "wait for set host ID"; 1217 case NVME_CTRLR_STATE_READY: 1218 return "ready"; 1219 case NVME_CTRLR_STATE_ERROR: 1220 return "error"; 1221 } 1222 return "unknown"; 1223 }; 1224 #endif /* DEBUG */ 1225 1226 static void 1227 nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state, 1228 uint64_t timeout_in_ms) 1229 { 1230 uint64_t ticks_per_ms, timeout_in_ticks, now_ticks; 1231 1232 ctrlr->state = state; 1233 if (timeout_in_ms == NVME_TIMEOUT_INFINITE) { 1234 goto inf; 1235 } 1236 1237 ticks_per_ms = spdk_get_ticks_hz() / 1000; 1238 if (timeout_in_ms > UINT64_MAX / ticks_per_ms) { 1239 NVME_CTRLR_ERRLOG(ctrlr, 1240 "Specified timeout would cause integer overflow. Defaulting to no timeout.\n"); 1241 goto inf; 1242 } 1243 1244 now_ticks = spdk_get_ticks(); 1245 timeout_in_ticks = timeout_in_ms * ticks_per_ms; 1246 if (timeout_in_ticks > UINT64_MAX - now_ticks) { 1247 NVME_CTRLR_ERRLOG(ctrlr, 1248 "Specified timeout would cause integer overflow. Defaulting to no timeout.\n"); 1249 goto inf; 1250 } 1251 1252 ctrlr->state_timeout_tsc = timeout_in_ticks + now_ticks; 1253 NVME_CTRLR_DEBUGLOG(ctrlr, "setting state to %s (timeout %" PRIu64 " ms)\n", 1254 nvme_ctrlr_state_string(ctrlr->state), timeout_in_ms); 1255 return; 1256 inf: 1257 NVME_CTRLR_DEBUGLOG(ctrlr, "setting state to %s (no timeout)\n", 1258 nvme_ctrlr_state_string(ctrlr->state)); 1259 ctrlr->state_timeout_tsc = NVME_TIMEOUT_INFINITE; 1260 } 1261 1262 static void 1263 nvme_ctrlr_free_zns_specific_data(struct spdk_nvme_ctrlr *ctrlr) 1264 { 1265 spdk_free(ctrlr->cdata_zns); 1266 ctrlr->cdata_zns = NULL; 1267 } 1268 1269 static void 1270 nvme_ctrlr_free_iocs_specific_data(struct spdk_nvme_ctrlr *ctrlr) 1271 { 1272 nvme_ctrlr_free_zns_specific_data(ctrlr); 1273 } 1274 1275 static void 1276 nvme_ctrlr_free_doorbell_buffer(struct spdk_nvme_ctrlr *ctrlr) 1277 { 1278 if (ctrlr->shadow_doorbell) { 1279 spdk_free(ctrlr->shadow_doorbell); 1280 ctrlr->shadow_doorbell = NULL; 1281 } 1282 1283 if (ctrlr->eventidx) { 1284 spdk_free(ctrlr->eventidx); 1285 ctrlr->eventidx = NULL; 1286 } 1287 } 1288 1289 static void 1290 nvme_ctrlr_set_doorbell_buffer_config_done(void *arg, const struct spdk_nvme_cpl *cpl) 1291 { 1292 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 1293 1294 if (spdk_nvme_cpl_is_error(cpl)) { 1295 NVME_CTRLR_WARNLOG(ctrlr, "Doorbell buffer config failed\n"); 1296 } else { 1297 NVME_CTRLR_INFOLOG(ctrlr, "Doorbell buffer config enabled\n"); 1298 } 1299 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT, 1300 ctrlr->opts.admin_timeout_ms); 1301 } 1302 1303 static int 1304 nvme_ctrlr_set_doorbell_buffer_config(struct spdk_nvme_ctrlr *ctrlr) 1305 { 1306 int rc = 0; 1307 uint64_t prp1, prp2, len; 1308 1309 if (!ctrlr->cdata.oacs.doorbell_buffer_config) { 1310 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT, 1311 ctrlr->opts.admin_timeout_ms); 1312 return 0; 1313 } 1314 1315 if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) { 1316 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT, 1317 ctrlr->opts.admin_timeout_ms); 1318 return 0; 1319 } 1320 1321 /* only 1 page size for doorbell buffer */ 1322 ctrlr->shadow_doorbell = spdk_zmalloc(ctrlr->page_size, ctrlr->page_size, 1323 NULL, SPDK_ENV_LCORE_ID_ANY, 1324 SPDK_MALLOC_DMA | SPDK_MALLOC_SHARE); 1325 if (ctrlr->shadow_doorbell == NULL) { 1326 rc = -ENOMEM; 1327 goto error; 1328 } 1329 1330 len = ctrlr->page_size; 1331 prp1 = spdk_vtophys(ctrlr->shadow_doorbell, &len); 1332 if (prp1 == SPDK_VTOPHYS_ERROR || len != ctrlr->page_size) { 1333 rc = -EFAULT; 1334 goto error; 1335 } 1336 1337 ctrlr->eventidx = spdk_zmalloc(ctrlr->page_size, ctrlr->page_size, 1338 NULL, SPDK_ENV_LCORE_ID_ANY, 1339 SPDK_MALLOC_DMA | SPDK_MALLOC_SHARE); 1340 if (ctrlr->eventidx == NULL) { 1341 rc = -ENOMEM; 1342 goto error; 1343 } 1344 1345 len = ctrlr->page_size; 1346 prp2 = spdk_vtophys(ctrlr->eventidx, &len); 1347 if (prp2 == SPDK_VTOPHYS_ERROR || len != ctrlr->page_size) { 1348 rc = -EFAULT; 1349 goto error; 1350 } 1351 1352 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG, 1353 ctrlr->opts.admin_timeout_ms); 1354 1355 rc = nvme_ctrlr_cmd_doorbell_buffer_config(ctrlr, prp1, prp2, 1356 nvme_ctrlr_set_doorbell_buffer_config_done, ctrlr); 1357 if (rc != 0) { 1358 goto error; 1359 } 1360 1361 return 0; 1362 1363 error: 1364 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 1365 nvme_ctrlr_free_doorbell_buffer(ctrlr); 1366 return rc; 1367 } 1368 1369 static void 1370 nvme_ctrlr_abort_queued_aborts(struct spdk_nvme_ctrlr *ctrlr) 1371 { 1372 struct nvme_request *req, *tmp; 1373 struct spdk_nvme_cpl cpl = {}; 1374 1375 cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION; 1376 cpl.status.sct = SPDK_NVME_SCT_GENERIC; 1377 1378 STAILQ_FOREACH_SAFE(req, &ctrlr->queued_aborts, stailq, tmp) { 1379 STAILQ_REMOVE_HEAD(&ctrlr->queued_aborts, stailq); 1380 1381 nvme_complete_request(req->cb_fn, req->cb_arg, req->qpair, req, &cpl); 1382 nvme_free_request(req); 1383 } 1384 } 1385 1386 int 1387 spdk_nvme_ctrlr_reset(struct spdk_nvme_ctrlr *ctrlr) 1388 { 1389 int rc = 0, rc_tmp = 0; 1390 struct spdk_nvme_qpair *qpair; 1391 1392 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1393 1394 if (ctrlr->is_resetting || ctrlr->is_removed) { 1395 /* 1396 * Controller is already resetting or has been removed. Return 1397 * immediately since there is no need to kick off another 1398 * reset in these cases. 1399 */ 1400 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1401 return ctrlr->is_resetting ? 0 : -ENXIO; 1402 } 1403 1404 ctrlr->is_resetting = true; 1405 ctrlr->is_failed = false; 1406 1407 NVME_CTRLR_NOTICELOG(ctrlr, "resetting controller\n"); 1408 1409 /* Abort all of the queued abort requests */ 1410 nvme_ctrlr_abort_queued_aborts(ctrlr); 1411 1412 nvme_transport_admin_qpair_abort_aers(ctrlr->adminq); 1413 1414 /* Disable all queues before disabling the controller hardware. */ 1415 TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) { 1416 qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL; 1417 } 1418 1419 ctrlr->adminq->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL; 1420 nvme_transport_ctrlr_disconnect_qpair(ctrlr, ctrlr->adminq); 1421 1422 /* Doorbell buffer config is invalid during reset */ 1423 nvme_ctrlr_free_doorbell_buffer(ctrlr); 1424 1425 /* I/O Command Set Specific Identify Controller data is invalidated during reset */ 1426 nvme_ctrlr_free_iocs_specific_data(ctrlr); 1427 1428 spdk_bit_array_free(&ctrlr->free_io_qids); 1429 1430 /* Set the state back to INIT to cause a full hardware reset. */ 1431 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE); 1432 1433 while (ctrlr->state != NVME_CTRLR_STATE_READY) { 1434 if (nvme_ctrlr_process_init(ctrlr) != 0) { 1435 NVME_CTRLR_ERRLOG(ctrlr, "controller reinitialization failed\n"); 1436 rc = -1; 1437 break; 1438 } 1439 } 1440 1441 /* 1442 * For non-fabrics controllers, the memory locations of the transport qpair 1443 * don't change when the controller is reset. They simply need to be 1444 * re-enabled with admin commands to the controller. For fabric 1445 * controllers we need to disconnect and reconnect the qpair on its 1446 * own thread outside of the context of the reset. 1447 */ 1448 if (rc == 0 && !spdk_nvme_ctrlr_is_fabrics(ctrlr)) { 1449 /* Reinitialize qpairs */ 1450 TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) { 1451 assert(spdk_bit_array_get(ctrlr->free_io_qids, qpair->id)); 1452 spdk_bit_array_clear(ctrlr->free_io_qids, qpair->id); 1453 rc_tmp = nvme_transport_ctrlr_connect_qpair(ctrlr, qpair); 1454 if (rc_tmp != 0) { 1455 rc = rc_tmp; 1456 qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL; 1457 continue; 1458 } 1459 } 1460 } 1461 1462 if (rc) { 1463 nvme_ctrlr_fail(ctrlr, false); 1464 } 1465 ctrlr->is_resetting = false; 1466 1467 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1468 1469 if (!ctrlr->cdata.oaes.ns_attribute_notices) { 1470 /* 1471 * If controller doesn't support ns_attribute_notices and 1472 * namespace attributes change (e.g. number of namespaces) 1473 * we need to update system handling device reset. 1474 */ 1475 nvme_io_msg_ctrlr_update(ctrlr); 1476 } 1477 1478 return rc; 1479 } 1480 1481 int 1482 spdk_nvme_ctrlr_reset_subsystem(struct spdk_nvme_ctrlr *ctrlr) 1483 { 1484 union spdk_nvme_cap_register cap; 1485 int rc = 0; 1486 1487 cap = spdk_nvme_ctrlr_get_regs_cap(ctrlr); 1488 if (cap.bits.nssrs == 0) { 1489 NVME_CTRLR_WARNLOG(ctrlr, "subsystem reset is not supported\n"); 1490 return -ENOTSUP; 1491 } 1492 1493 NVME_CTRLR_NOTICELOG(ctrlr, "resetting subsystem\n"); 1494 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1495 ctrlr->is_resetting = true; 1496 rc = nvme_ctrlr_set_nssr(ctrlr, SPDK_NVME_NSSR_VALUE); 1497 ctrlr->is_resetting = false; 1498 1499 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1500 /* 1501 * No more cleanup at this point like in the ctrlr reset. A subsystem reset will cause 1502 * a hot remove for PCIe transport. The hot remove handling does all the necessary ctrlr cleanup. 1503 */ 1504 return rc; 1505 } 1506 1507 int 1508 spdk_nvme_ctrlr_set_trid(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_transport_id *trid) 1509 { 1510 int rc = 0; 1511 1512 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1513 1514 if (ctrlr->is_failed == false) { 1515 rc = -EPERM; 1516 goto out; 1517 } 1518 1519 if (trid->trtype != ctrlr->trid.trtype) { 1520 rc = -EINVAL; 1521 goto out; 1522 } 1523 1524 if (strncmp(trid->subnqn, ctrlr->trid.subnqn, SPDK_NVMF_NQN_MAX_LEN)) { 1525 rc = -EINVAL; 1526 goto out; 1527 } 1528 1529 ctrlr->trid = *trid; 1530 1531 out: 1532 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1533 return rc; 1534 } 1535 1536 void 1537 spdk_nvme_ctrlr_set_remove_cb(struct spdk_nvme_ctrlr *ctrlr, 1538 spdk_nvme_remove_cb remove_cb, void *remove_ctx) 1539 { 1540 if (!spdk_process_is_primary()) { 1541 return; 1542 } 1543 1544 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 1545 ctrlr->remove_cb = remove_cb; 1546 ctrlr->cb_ctx = remove_ctx; 1547 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 1548 } 1549 1550 static void 1551 nvme_ctrlr_identify_done(void *arg, const struct spdk_nvme_cpl *cpl) 1552 { 1553 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 1554 1555 if (spdk_nvme_cpl_is_error(cpl)) { 1556 NVME_CTRLR_ERRLOG(ctrlr, "nvme_identify_controller failed!\n"); 1557 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 1558 return; 1559 } 1560 1561 /* 1562 * Use MDTS to ensure our default max_xfer_size doesn't exceed what the 1563 * controller supports. 1564 */ 1565 ctrlr->max_xfer_size = nvme_transport_ctrlr_get_max_xfer_size(ctrlr); 1566 NVME_CTRLR_DEBUGLOG(ctrlr, "transport max_xfer_size %u\n", ctrlr->max_xfer_size); 1567 if (ctrlr->cdata.mdts > 0) { 1568 ctrlr->max_xfer_size = spdk_min(ctrlr->max_xfer_size, 1569 ctrlr->min_page_size * (1 << ctrlr->cdata.mdts)); 1570 NVME_CTRLR_DEBUGLOG(ctrlr, "MDTS max_xfer_size %u\n", ctrlr->max_xfer_size); 1571 } 1572 1573 NVME_CTRLR_DEBUGLOG(ctrlr, "CNTLID 0x%04" PRIx16 "\n", ctrlr->cdata.cntlid); 1574 if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { 1575 ctrlr->cntlid = ctrlr->cdata.cntlid; 1576 } else { 1577 /* 1578 * Fabrics controllers should already have CNTLID from the Connect command. 1579 * 1580 * If CNTLID from Connect doesn't match CNTLID in the Identify Controller data, 1581 * trust the one from Connect. 1582 */ 1583 if (ctrlr->cntlid != ctrlr->cdata.cntlid) { 1584 NVME_CTRLR_DEBUGLOG(ctrlr, "Identify CNTLID 0x%04" PRIx16 " != Connect CNTLID 0x%04" PRIx16 "\n", 1585 ctrlr->cdata.cntlid, ctrlr->cntlid); 1586 } 1587 } 1588 1589 if (ctrlr->cdata.sgls.supported) { 1590 assert(ctrlr->cdata.sgls.supported != 0x3); 1591 ctrlr->flags |= SPDK_NVME_CTRLR_SGL_SUPPORTED; 1592 if (ctrlr->cdata.sgls.supported == 0x2) { 1593 ctrlr->flags |= SPDK_NVME_CTRLR_SGL_REQUIRES_DWORD_ALIGNMENT; 1594 } 1595 /* 1596 * Use MSDBD to ensure our max_sges doesn't exceed what the 1597 * controller supports. 1598 */ 1599 ctrlr->max_sges = nvme_transport_ctrlr_get_max_sges(ctrlr); 1600 if (ctrlr->cdata.nvmf_specific.msdbd != 0) { 1601 ctrlr->max_sges = spdk_min(ctrlr->cdata.nvmf_specific.msdbd, ctrlr->max_sges); 1602 } else { 1603 /* A value 0 indicates no limit. */ 1604 } 1605 NVME_CTRLR_DEBUGLOG(ctrlr, "transport max_sges %u\n", ctrlr->max_sges); 1606 } 1607 1608 if (ctrlr->cdata.oacs.security && !(ctrlr->quirks & NVME_QUIRK_OACS_SECURITY)) { 1609 ctrlr->flags |= SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED; 1610 } 1611 1612 if (ctrlr->cdata.oacs.directives) { 1613 ctrlr->flags |= SPDK_NVME_CTRLR_DIRECTIVES_SUPPORTED; 1614 } 1615 1616 NVME_CTRLR_DEBUGLOG(ctrlr, "fuses compare and write: %d\n", 1617 ctrlr->cdata.fuses.compare_and_write); 1618 if (ctrlr->cdata.fuses.compare_and_write) { 1619 ctrlr->flags |= SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED; 1620 } 1621 1622 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC, 1623 ctrlr->opts.admin_timeout_ms); 1624 } 1625 1626 static int 1627 nvme_ctrlr_identify(struct spdk_nvme_ctrlr *ctrlr) 1628 { 1629 int rc; 1630 1631 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY, 1632 ctrlr->opts.admin_timeout_ms); 1633 1634 rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_CTRLR, 0, 0, 0, 1635 &ctrlr->cdata, sizeof(ctrlr->cdata), 1636 nvme_ctrlr_identify_done, ctrlr); 1637 if (rc != 0) { 1638 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 1639 return rc; 1640 } 1641 1642 return 0; 1643 } 1644 1645 static void 1646 nvme_ctrlr_get_zns_cmd_and_effects_log_done(void *arg, const struct spdk_nvme_cpl *cpl) 1647 { 1648 struct spdk_nvme_cmds_and_effect_log_page *log_page; 1649 struct spdk_nvme_ctrlr *ctrlr = arg; 1650 1651 if (spdk_nvme_cpl_is_error(cpl)) { 1652 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_get_zns_cmd_and_effects_log failed!\n"); 1653 spdk_free(ctrlr->tmp_ptr); 1654 ctrlr->tmp_ptr = NULL; 1655 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 1656 return; 1657 } 1658 1659 log_page = ctrlr->tmp_ptr; 1660 1661 if (log_page->io_cmds_supported[SPDK_NVME_OPC_ZONE_APPEND].csupp) { 1662 ctrlr->flags |= SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED; 1663 } 1664 spdk_free(ctrlr->tmp_ptr); 1665 ctrlr->tmp_ptr = NULL; 1666 1667 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_NUM_QUEUES, ctrlr->opts.admin_timeout_ms); 1668 } 1669 1670 static int 1671 nvme_ctrlr_get_zns_cmd_and_effects_log(struct spdk_nvme_ctrlr *ctrlr) 1672 { 1673 int rc; 1674 1675 assert(!ctrlr->tmp_ptr); 1676 ctrlr->tmp_ptr = spdk_zmalloc(sizeof(struct spdk_nvme_cmds_and_effect_log_page), 64, NULL, 1677 SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE | SPDK_MALLOC_DMA); 1678 if (!ctrlr->tmp_ptr) { 1679 rc = -ENOMEM; 1680 goto error; 1681 } 1682 1683 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_GET_ZNS_CMD_EFFECTS_LOG, 1684 ctrlr->opts.admin_timeout_ms); 1685 1686 rc = spdk_nvme_ctrlr_cmd_get_log_page_ext(ctrlr, SPDK_NVME_LOG_COMMAND_EFFECTS_LOG, 1687 0, ctrlr->tmp_ptr, sizeof(struct spdk_nvme_cmds_and_effect_log_page), 1688 0, 0, 0, SPDK_NVME_CSI_ZNS << 24, 1689 nvme_ctrlr_get_zns_cmd_and_effects_log_done, ctrlr); 1690 if (rc != 0) { 1691 goto error; 1692 } 1693 1694 return 0; 1695 1696 error: 1697 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 1698 spdk_free(ctrlr->tmp_ptr); 1699 ctrlr->tmp_ptr = NULL; 1700 return rc; 1701 } 1702 1703 static void 1704 nvme_ctrlr_identify_zns_specific_done(void *arg, const struct spdk_nvme_cpl *cpl) 1705 { 1706 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 1707 1708 if (spdk_nvme_cpl_is_error(cpl)) { 1709 /* no need to print an error, the controller simply does not support ZNS */ 1710 nvme_ctrlr_free_zns_specific_data(ctrlr); 1711 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_NUM_QUEUES, 1712 ctrlr->opts.admin_timeout_ms); 1713 return; 1714 } 1715 1716 /* A zero zasl value means use mdts */ 1717 if (ctrlr->cdata_zns->zasl) { 1718 uint32_t max_append = ctrlr->min_page_size * (1 << ctrlr->cdata_zns->zasl); 1719 ctrlr->max_zone_append_size = spdk_min(ctrlr->max_xfer_size, max_append); 1720 } else { 1721 ctrlr->max_zone_append_size = ctrlr->max_xfer_size; 1722 } 1723 1724 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_GET_ZNS_CMD_EFFECTS_LOG, 1725 ctrlr->opts.admin_timeout_ms); 1726 } 1727 1728 /** 1729 * This function will try to fetch the I/O Command Specific Controller data structure for 1730 * each I/O Command Set supported by SPDK. 1731 * 1732 * If an I/O Command Set is not supported by the controller, "Invalid Field in Command" 1733 * will be returned. Since we are fetching in a exploratively way, getting an error back 1734 * from the controller should not be treated as fatal. 1735 * 1736 * I/O Command Sets not supported by SPDK will be skipped (e.g. Key Value Command Set). 1737 * 1738 * I/O Command Sets without a IOCS specific data structure (i.e. a zero-filled IOCS specific 1739 * data structure) will be skipped (e.g. NVM Command Set, Key Value Command Set). 1740 */ 1741 static int 1742 nvme_ctrlr_identify_iocs_specific(struct spdk_nvme_ctrlr *ctrlr) 1743 { 1744 int rc; 1745 1746 if (!nvme_ctrlr_multi_iocs_enabled(ctrlr)) { 1747 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_NUM_QUEUES, 1748 ctrlr->opts.admin_timeout_ms); 1749 return 0; 1750 } 1751 1752 /* 1753 * Since SPDK currently only needs to fetch a single Command Set, keep the code here, 1754 * instead of creating multiple NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC substates, 1755 * which would require additional functions and complexity for no good reason. 1756 */ 1757 assert(!ctrlr->cdata_zns); 1758 ctrlr->cdata_zns = spdk_zmalloc(sizeof(*ctrlr->cdata_zns), 64, NULL, SPDK_ENV_SOCKET_ID_ANY, 1759 SPDK_MALLOC_SHARE | SPDK_MALLOC_DMA); 1760 if (!ctrlr->cdata_zns) { 1761 rc = -ENOMEM; 1762 goto error; 1763 } 1764 1765 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_IOCS_SPECIFIC, 1766 ctrlr->opts.admin_timeout_ms); 1767 1768 rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_CTRLR_IOCS, 0, 0, SPDK_NVME_CSI_ZNS, 1769 ctrlr->cdata_zns, sizeof(*ctrlr->cdata_zns), 1770 nvme_ctrlr_identify_zns_specific_done, ctrlr); 1771 if (rc != 0) { 1772 goto error; 1773 } 1774 1775 return 0; 1776 1777 error: 1778 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 1779 nvme_ctrlr_free_zns_specific_data(ctrlr); 1780 return rc; 1781 } 1782 1783 enum nvme_active_ns_state { 1784 NVME_ACTIVE_NS_STATE_IDLE, 1785 NVME_ACTIVE_NS_STATE_PROCESSING, 1786 NVME_ACTIVE_NS_STATE_DONE, 1787 NVME_ACTIVE_NS_STATE_ERROR 1788 }; 1789 1790 typedef void (*nvme_active_ns_ctx_deleter)(struct nvme_active_ns_ctx *); 1791 1792 struct nvme_active_ns_ctx { 1793 struct spdk_nvme_ctrlr *ctrlr; 1794 uint32_t page; 1795 uint32_t next_nsid; 1796 uint32_t *new_ns_list; 1797 nvme_active_ns_ctx_deleter deleter; 1798 1799 enum nvme_active_ns_state state; 1800 }; 1801 1802 static struct nvme_active_ns_ctx * 1803 nvme_active_ns_ctx_create(struct spdk_nvme_ctrlr *ctrlr, nvme_active_ns_ctx_deleter deleter) 1804 { 1805 struct nvme_active_ns_ctx *ctx; 1806 uint32_t *new_ns_list = NULL; 1807 1808 ctx = calloc(1, sizeof(*ctx)); 1809 if (!ctx) { 1810 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate nvme_active_ns_ctx!\n"); 1811 return NULL; 1812 } 1813 1814 new_ns_list = spdk_zmalloc(sizeof(struct spdk_nvme_ns_list), ctrlr->page_size, 1815 NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_SHARE); 1816 if (!new_ns_list) { 1817 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate active_ns_list!\n"); 1818 free(ctx); 1819 return NULL; 1820 } 1821 1822 ctx->new_ns_list = new_ns_list; 1823 ctx->ctrlr = ctrlr; 1824 ctx->deleter = deleter; 1825 1826 return ctx; 1827 } 1828 1829 static void 1830 nvme_active_ns_ctx_destroy(struct nvme_active_ns_ctx *ctx) 1831 { 1832 spdk_free(ctx->new_ns_list); 1833 free(ctx); 1834 } 1835 1836 static void 1837 nvme_ctrlr_identify_active_ns_swap(struct spdk_nvme_ctrlr *ctrlr, uint32_t **new_ns_list) 1838 { 1839 uint32_t max_active_ns_idx = 0; 1840 1841 while ((*new_ns_list)[max_active_ns_idx++]); 1842 spdk_free(ctrlr->active_ns_list); 1843 ctrlr->active_ns_list = *new_ns_list; 1844 ctrlr->max_active_ns_idx = max_active_ns_idx; 1845 *new_ns_list = NULL; 1846 } 1847 1848 static void 1849 nvme_ctrlr_identify_active_ns_async_done(void *arg, const struct spdk_nvme_cpl *cpl) 1850 { 1851 struct nvme_active_ns_ctx *ctx = arg; 1852 uint32_t *new_ns_list = NULL; 1853 1854 if (spdk_nvme_cpl_is_error(cpl)) { 1855 ctx->state = NVME_ACTIVE_NS_STATE_ERROR; 1856 goto out; 1857 } 1858 1859 ctx->next_nsid = ctx->new_ns_list[1024 * ctx->page + 1023]; 1860 if (ctx->next_nsid == 0) { 1861 ctx->state = NVME_ACTIVE_NS_STATE_DONE; 1862 goto out; 1863 } 1864 1865 ctx->page++; 1866 new_ns_list = spdk_realloc(ctx->new_ns_list, 1867 (ctx->page + 1) * sizeof(struct spdk_nvme_ns_list), 1868 ctx->ctrlr->page_size); 1869 if (!new_ns_list) { 1870 SPDK_ERRLOG("Failed to reallocate active_ns_list!\n"); 1871 ctx->state = NVME_ACTIVE_NS_STATE_ERROR; 1872 goto out; 1873 } 1874 1875 ctx->new_ns_list = new_ns_list; 1876 nvme_ctrlr_identify_active_ns_async(ctx); 1877 return; 1878 1879 out: 1880 if (ctx->deleter) { 1881 ctx->deleter(ctx); 1882 } 1883 } 1884 1885 static void 1886 nvme_ctrlr_identify_active_ns_async(struct nvme_active_ns_ctx *ctx) 1887 { 1888 struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr; 1889 uint32_t i; 1890 int rc; 1891 1892 if (ctrlr->cdata.nn == 0) { 1893 ctx->state = NVME_ACTIVE_NS_STATE_DONE; 1894 goto out; 1895 } 1896 1897 assert(ctx->new_ns_list != NULL); 1898 1899 /* 1900 * If controller doesn't support active ns list CNS 0x02 dummy up 1901 * an active ns list, i.e. all namespaces report as active 1902 */ 1903 if (ctrlr->vs.raw < SPDK_NVME_VERSION(1, 1, 0) || ctrlr->quirks & NVME_QUIRK_IDENTIFY_CNS) { 1904 uint32_t *new_ns_list; 1905 uint32_t num_pages; 1906 1907 /* 1908 * Active NS list must always end with zero element. 1909 * So, we allocate for cdata.nn+1. 1910 */ 1911 num_pages = spdk_divide_round_up(ctrlr->cdata.nn + 1, 1912 sizeof(struct spdk_nvme_ns_list) / sizeof(new_ns_list[0])); 1913 new_ns_list = spdk_realloc(ctx->new_ns_list, 1914 num_pages * sizeof(struct spdk_nvme_ns_list), 1915 ctx->ctrlr->page_size); 1916 if (!new_ns_list) { 1917 SPDK_ERRLOG("Failed to reallocate active_ns_list!\n"); 1918 ctx->state = NVME_ACTIVE_NS_STATE_ERROR; 1919 goto out; 1920 } 1921 1922 ctx->new_ns_list = new_ns_list; 1923 ctx->new_ns_list[ctrlr->cdata.nn] = 0; 1924 for (i = 0; i < ctrlr->cdata.nn; i++) { 1925 ctx->new_ns_list[i] = i + 1; 1926 } 1927 1928 ctx->state = NVME_ACTIVE_NS_STATE_DONE; 1929 goto out; 1930 } 1931 1932 ctx->state = NVME_ACTIVE_NS_STATE_PROCESSING; 1933 rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_ACTIVE_NS_LIST, 0, ctx->next_nsid, 0, 1934 &ctx->new_ns_list[1024 * ctx->page], sizeof(struct spdk_nvme_ns_list), 1935 nvme_ctrlr_identify_active_ns_async_done, ctx); 1936 if (rc != 0) { 1937 ctx->state = NVME_ACTIVE_NS_STATE_ERROR; 1938 goto out; 1939 } 1940 1941 return; 1942 1943 out: 1944 if (ctx->deleter) { 1945 ctx->deleter(ctx); 1946 } 1947 } 1948 1949 static void 1950 _nvme_active_ns_ctx_deleter(struct nvme_active_ns_ctx *ctx) 1951 { 1952 struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr; 1953 1954 if (ctx->state == NVME_ACTIVE_NS_STATE_ERROR) { 1955 nvme_ctrlr_destruct_namespaces(ctrlr); 1956 nvme_active_ns_ctx_destroy(ctx); 1957 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 1958 return; 1959 } 1960 1961 assert(ctx->state == NVME_ACTIVE_NS_STATE_DONE); 1962 nvme_ctrlr_identify_active_ns_swap(ctrlr, &ctx->new_ns_list); 1963 nvme_active_ns_ctx_destroy(ctx); 1964 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONSTRUCT_NS, ctrlr->opts.admin_timeout_ms); 1965 } 1966 1967 static void 1968 _nvme_ctrlr_identify_active_ns(struct spdk_nvme_ctrlr *ctrlr) 1969 { 1970 struct nvme_active_ns_ctx *ctx; 1971 1972 ctx = nvme_active_ns_ctx_create(ctrlr, _nvme_active_ns_ctx_deleter); 1973 if (!ctx) { 1974 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 1975 return; 1976 } 1977 1978 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ACTIVE_NS, 1979 ctrlr->opts.admin_timeout_ms); 1980 nvme_ctrlr_identify_active_ns_async(ctx); 1981 } 1982 1983 int 1984 nvme_ctrlr_identify_active_ns(struct spdk_nvme_ctrlr *ctrlr) 1985 { 1986 struct nvme_active_ns_ctx *ctx; 1987 int rc; 1988 1989 ctx = nvme_active_ns_ctx_create(ctrlr, NULL); 1990 if (!ctx) { 1991 return -ENOMEM; 1992 } 1993 1994 nvme_ctrlr_identify_active_ns_async(ctx); 1995 while (ctx->state == NVME_ACTIVE_NS_STATE_PROCESSING) { 1996 rc = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 1997 if (rc < 0) { 1998 ctx->state = NVME_ACTIVE_NS_STATE_ERROR; 1999 break; 2000 } 2001 } 2002 2003 if (ctx->state == NVME_ACTIVE_NS_STATE_ERROR) { 2004 nvme_active_ns_ctx_destroy(ctx); 2005 return -ENXIO; 2006 } 2007 2008 assert(ctx->state == NVME_ACTIVE_NS_STATE_DONE); 2009 nvme_ctrlr_identify_active_ns_swap(ctrlr, &ctx->new_ns_list); 2010 nvme_active_ns_ctx_destroy(ctx); 2011 2012 return 0; 2013 } 2014 2015 static void 2016 nvme_ctrlr_identify_ns_async_done(void *arg, const struct spdk_nvme_cpl *cpl) 2017 { 2018 struct spdk_nvme_ns *ns = (struct spdk_nvme_ns *)arg; 2019 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2020 uint32_t nsid; 2021 int rc; 2022 2023 if (spdk_nvme_cpl_is_error(cpl)) { 2024 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2025 return; 2026 } 2027 2028 nvme_ns_set_identify_data(ns); 2029 2030 /* move on to the next active NS */ 2031 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, ns->id); 2032 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2033 if (ns == NULL) { 2034 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_ID_DESCS, 2035 ctrlr->opts.admin_timeout_ms); 2036 return; 2037 } 2038 ns->ctrlr = ctrlr; 2039 ns->id = nsid; 2040 2041 rc = nvme_ctrlr_identify_ns_async(ns); 2042 if (rc) { 2043 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2044 } 2045 } 2046 2047 static int 2048 nvme_ctrlr_identify_ns_async(struct spdk_nvme_ns *ns) 2049 { 2050 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2051 struct spdk_nvme_ns_data *nsdata; 2052 2053 nsdata = &ns->nsdata; 2054 2055 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS, 2056 ctrlr->opts.admin_timeout_ms); 2057 return nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS, 0, ns->id, 0, 2058 nsdata, sizeof(*nsdata), 2059 nvme_ctrlr_identify_ns_async_done, ns); 2060 } 2061 2062 static int 2063 nvme_ctrlr_identify_namespaces(struct spdk_nvme_ctrlr *ctrlr) 2064 { 2065 uint32_t nsid; 2066 struct spdk_nvme_ns *ns; 2067 int rc; 2068 2069 nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); 2070 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2071 if (ns == NULL) { 2072 /* No active NS, move on to the next state */ 2073 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_ID_DESCS, 2074 ctrlr->opts.admin_timeout_ms); 2075 return 0; 2076 } 2077 2078 ns->ctrlr = ctrlr; 2079 ns->id = nsid; 2080 2081 rc = nvme_ctrlr_identify_ns_async(ns); 2082 if (rc) { 2083 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2084 } 2085 2086 return rc; 2087 } 2088 2089 static int 2090 nvme_ctrlr_identify_namespaces_iocs_specific_next(struct spdk_nvme_ctrlr *ctrlr, uint32_t prev_nsid) 2091 { 2092 uint32_t nsid; 2093 struct spdk_nvme_ns *ns; 2094 int rc; 2095 2096 if (!prev_nsid) { 2097 nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); 2098 } else { 2099 /* move on to the next active NS */ 2100 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, prev_nsid); 2101 } 2102 2103 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2104 if (ns == NULL) { 2105 /* No first/next active NS, move on to the next state */ 2106 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONFIGURE_AER, 2107 ctrlr->opts.admin_timeout_ms); 2108 return 0; 2109 } 2110 2111 /* loop until we find a ns which has (supported) iocs specific data */ 2112 while (!nvme_ns_has_supported_iocs_specific_data(ns)) { 2113 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, ns->id); 2114 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2115 if (ns == NULL) { 2116 /* no namespace with (supported) iocs specific data found */ 2117 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONFIGURE_AER, 2118 ctrlr->opts.admin_timeout_ms); 2119 return 0; 2120 } 2121 } 2122 2123 rc = nvme_ctrlr_identify_ns_iocs_specific_async(ns); 2124 if (rc) { 2125 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2126 } 2127 2128 return rc; 2129 } 2130 2131 static void 2132 nvme_ctrlr_identify_ns_zns_specific_async_done(void *arg, const struct spdk_nvme_cpl *cpl) 2133 { 2134 struct spdk_nvme_ns *ns = (struct spdk_nvme_ns *)arg; 2135 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2136 2137 if (spdk_nvme_cpl_is_error(cpl)) { 2138 nvme_ns_free_zns_specific_data(ns); 2139 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2140 return; 2141 } 2142 2143 nvme_ctrlr_identify_namespaces_iocs_specific_next(ctrlr, ns->id); 2144 } 2145 2146 static int 2147 nvme_ctrlr_identify_ns_iocs_specific_async(struct spdk_nvme_ns *ns) 2148 { 2149 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2150 int rc; 2151 2152 switch (ns->csi) { 2153 case SPDK_NVME_CSI_ZNS: 2154 break; 2155 default: 2156 /* 2157 * This switch must handle all cases for which 2158 * nvme_ns_has_supported_iocs_specific_data() returns true, 2159 * other cases should never happen. 2160 */ 2161 assert(0); 2162 } 2163 2164 assert(!ns->nsdata_zns); 2165 ns->nsdata_zns = spdk_zmalloc(sizeof(*ns->nsdata_zns), 64, NULL, SPDK_ENV_SOCKET_ID_ANY, 2166 SPDK_MALLOC_SHARE); 2167 if (!ns->nsdata_zns) { 2168 return -ENOMEM; 2169 } 2170 2171 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS_IOCS_SPECIFIC, 2172 ctrlr->opts.admin_timeout_ms); 2173 rc = nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS_IOCS, 0, ns->id, ns->csi, 2174 ns->nsdata_zns, sizeof(*ns->nsdata_zns), 2175 nvme_ctrlr_identify_ns_zns_specific_async_done, ns); 2176 if (rc) { 2177 nvme_ns_free_zns_specific_data(ns); 2178 } 2179 2180 return rc; 2181 } 2182 2183 static int 2184 nvme_ctrlr_identify_namespaces_iocs_specific(struct spdk_nvme_ctrlr *ctrlr) 2185 { 2186 if (!nvme_ctrlr_multi_iocs_enabled(ctrlr)) { 2187 /* Multi IOCS not supported/enabled, move on to the next state */ 2188 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONFIGURE_AER, 2189 ctrlr->opts.admin_timeout_ms); 2190 return 0; 2191 } 2192 2193 return nvme_ctrlr_identify_namespaces_iocs_specific_next(ctrlr, 0); 2194 } 2195 2196 static void 2197 nvme_ctrlr_identify_id_desc_async_done(void *arg, const struct spdk_nvme_cpl *cpl) 2198 { 2199 struct spdk_nvme_ns *ns = (struct spdk_nvme_ns *)arg; 2200 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2201 uint32_t nsid; 2202 int rc; 2203 2204 if (spdk_nvme_cpl_is_error(cpl)) { 2205 /* 2206 * Many controllers claim to be compatible with NVMe 1.3, however, 2207 * they do not implement NS ID Desc List. Therefore, instead of setting 2208 * the state to NVME_CTRLR_STATE_ERROR, silently ignore the completion 2209 * error and move on to the next state. 2210 * 2211 * The proper way is to create a new quirk for controllers that violate 2212 * the NVMe 1.3 spec by not supporting NS ID Desc List. 2213 * (Re-using the NVME_QUIRK_IDENTIFY_CNS quirk is not possible, since 2214 * it is too generic and was added in order to handle controllers that 2215 * violate the NVMe 1.1 spec by not supporting ACTIVE LIST). 2216 */ 2217 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC, 2218 ctrlr->opts.admin_timeout_ms); 2219 return; 2220 } 2221 2222 nvme_ns_set_id_desc_list_data(ns); 2223 2224 /* move on to the next active NS */ 2225 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, ns->id); 2226 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2227 if (ns == NULL) { 2228 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC, 2229 ctrlr->opts.admin_timeout_ms); 2230 return; 2231 } 2232 2233 rc = nvme_ctrlr_identify_id_desc_async(ns); 2234 if (rc) { 2235 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2236 } 2237 } 2238 2239 static int 2240 nvme_ctrlr_identify_id_desc_async(struct spdk_nvme_ns *ns) 2241 { 2242 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr; 2243 2244 memset(ns->id_desc_list, 0, sizeof(ns->id_desc_list)); 2245 2246 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS, 2247 ctrlr->opts.admin_timeout_ms); 2248 return nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS_ID_DESCRIPTOR_LIST, 2249 0, ns->id, 0, ns->id_desc_list, sizeof(ns->id_desc_list), 2250 nvme_ctrlr_identify_id_desc_async_done, ns); 2251 } 2252 2253 static int 2254 nvme_ctrlr_identify_id_desc_namespaces(struct spdk_nvme_ctrlr *ctrlr) 2255 { 2256 uint32_t nsid; 2257 struct spdk_nvme_ns *ns; 2258 int rc; 2259 2260 if ((ctrlr->vs.raw < SPDK_NVME_VERSION(1, 3, 0) && 2261 !(ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_IOCS)) || 2262 (ctrlr->quirks & NVME_QUIRK_IDENTIFY_CNS)) { 2263 NVME_CTRLR_DEBUGLOG(ctrlr, "Version < 1.3; not attempting to retrieve NS ID Descriptor List\n"); 2264 /* NS ID Desc List not supported, move on to the next state */ 2265 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC, 2266 ctrlr->opts.admin_timeout_ms); 2267 return 0; 2268 } 2269 2270 nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); 2271 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2272 if (ns == NULL) { 2273 /* No active NS, move on to the next state */ 2274 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC, 2275 ctrlr->opts.admin_timeout_ms); 2276 return 0; 2277 } 2278 2279 rc = nvme_ctrlr_identify_id_desc_async(ns); 2280 if (rc) { 2281 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2282 } 2283 2284 return rc; 2285 } 2286 2287 static void 2288 nvme_ctrlr_update_nvmf_ioccsz(struct spdk_nvme_ctrlr *ctrlr) 2289 { 2290 if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_RDMA || 2291 ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_TCP || 2292 ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_FC) { 2293 if (ctrlr->cdata.nvmf_specific.ioccsz < 4) { 2294 NVME_CTRLR_ERRLOG(ctrlr, "Incorrect IOCCSZ %u, the minimum value should be 4\n", 2295 ctrlr->cdata.nvmf_specific.ioccsz); 2296 ctrlr->cdata.nvmf_specific.ioccsz = 4; 2297 assert(0); 2298 } 2299 ctrlr->ioccsz_bytes = ctrlr->cdata.nvmf_specific.ioccsz * 16 - sizeof(struct spdk_nvme_cmd); 2300 ctrlr->icdoff = ctrlr->cdata.nvmf_specific.icdoff; 2301 } 2302 } 2303 2304 static void 2305 nvme_ctrlr_set_num_queues_done(void *arg, const struct spdk_nvme_cpl *cpl) 2306 { 2307 uint32_t cq_allocated, sq_allocated, min_allocated, i; 2308 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 2309 2310 if (spdk_nvme_cpl_is_error(cpl)) { 2311 NVME_CTRLR_ERRLOG(ctrlr, "Set Features - Number of Queues failed!\n"); 2312 ctrlr->opts.num_io_queues = 0; 2313 } else { 2314 /* 2315 * Data in cdw0 is 0-based. 2316 * Lower 16-bits indicate number of submission queues allocated. 2317 * Upper 16-bits indicate number of completion queues allocated. 2318 */ 2319 sq_allocated = (cpl->cdw0 & 0xFFFF) + 1; 2320 cq_allocated = (cpl->cdw0 >> 16) + 1; 2321 2322 /* 2323 * For 1:1 queue mapping, set number of allocated queues to be minimum of 2324 * submission and completion queues. 2325 */ 2326 min_allocated = spdk_min(sq_allocated, cq_allocated); 2327 2328 /* Set number of queues to be minimum of requested and actually allocated. */ 2329 ctrlr->opts.num_io_queues = spdk_min(min_allocated, ctrlr->opts.num_io_queues); 2330 } 2331 2332 ctrlr->free_io_qids = spdk_bit_array_create(ctrlr->opts.num_io_queues + 1); 2333 if (ctrlr->free_io_qids == NULL) { 2334 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2335 return; 2336 } 2337 2338 /* Initialize list of free I/O queue IDs. QID 0 is the admin queue (implicitly allocated). */ 2339 for (i = 1; i <= ctrlr->opts.num_io_queues; i++) { 2340 spdk_nvme_ctrlr_free_qid(ctrlr, i); 2341 } 2342 2343 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS, 2344 ctrlr->opts.admin_timeout_ms); 2345 } 2346 2347 static int 2348 nvme_ctrlr_set_num_queues(struct spdk_nvme_ctrlr *ctrlr) 2349 { 2350 int rc; 2351 2352 if (ctrlr->opts.num_io_queues > SPDK_NVME_MAX_IO_QUEUES) { 2353 NVME_CTRLR_NOTICELOG(ctrlr, "Limiting requested num_io_queues %u to max %d\n", 2354 ctrlr->opts.num_io_queues, SPDK_NVME_MAX_IO_QUEUES); 2355 ctrlr->opts.num_io_queues = SPDK_NVME_MAX_IO_QUEUES; 2356 } else if (ctrlr->opts.num_io_queues < 1) { 2357 NVME_CTRLR_NOTICELOG(ctrlr, "Requested num_io_queues 0, increasing to 1\n"); 2358 ctrlr->opts.num_io_queues = 1; 2359 } 2360 2361 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES, 2362 ctrlr->opts.admin_timeout_ms); 2363 2364 rc = nvme_ctrlr_cmd_set_num_queues(ctrlr, ctrlr->opts.num_io_queues, 2365 nvme_ctrlr_set_num_queues_done, ctrlr); 2366 if (rc != 0) { 2367 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2368 return rc; 2369 } 2370 2371 return 0; 2372 } 2373 2374 static void 2375 nvme_ctrlr_set_keep_alive_timeout_done(void *arg, const struct spdk_nvme_cpl *cpl) 2376 { 2377 uint32_t keep_alive_interval_us; 2378 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 2379 2380 if (spdk_nvme_cpl_is_error(cpl)) { 2381 if ((cpl->status.sct == SPDK_NVME_SCT_GENERIC) && 2382 (cpl->status.sc == SPDK_NVME_SC_INVALID_FIELD)) { 2383 NVME_CTRLR_DEBUGLOG(ctrlr, "Keep alive timeout Get Feature is not supported\n"); 2384 } else { 2385 NVME_CTRLR_ERRLOG(ctrlr, "Keep alive timeout Get Feature failed: SC %x SCT %x\n", 2386 cpl->status.sc, cpl->status.sct); 2387 ctrlr->opts.keep_alive_timeout_ms = 0; 2388 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2389 return; 2390 } 2391 } else { 2392 if (ctrlr->opts.keep_alive_timeout_ms != cpl->cdw0) { 2393 NVME_CTRLR_DEBUGLOG(ctrlr, "Controller adjusted keep alive timeout to %u ms\n", 2394 cpl->cdw0); 2395 } 2396 2397 ctrlr->opts.keep_alive_timeout_ms = cpl->cdw0; 2398 } 2399 2400 if (ctrlr->opts.keep_alive_timeout_ms == 0) { 2401 ctrlr->keep_alive_interval_ticks = 0; 2402 } else { 2403 keep_alive_interval_us = ctrlr->opts.keep_alive_timeout_ms * 1000 / 2; 2404 2405 NVME_CTRLR_DEBUGLOG(ctrlr, "Sending keep alive every %u us\n", keep_alive_interval_us); 2406 2407 ctrlr->keep_alive_interval_ticks = (keep_alive_interval_us * spdk_get_ticks_hz()) / 2408 UINT64_C(1000000); 2409 2410 /* Schedule the first Keep Alive to be sent as soon as possible. */ 2411 ctrlr->next_keep_alive_tick = spdk_get_ticks(); 2412 } 2413 2414 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID, 2415 ctrlr->opts.admin_timeout_ms); 2416 } 2417 2418 static int 2419 nvme_ctrlr_set_keep_alive_timeout(struct spdk_nvme_ctrlr *ctrlr) 2420 { 2421 int rc; 2422 2423 if (ctrlr->opts.keep_alive_timeout_ms == 0) { 2424 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID, 2425 ctrlr->opts.admin_timeout_ms); 2426 return 0; 2427 } 2428 2429 if (ctrlr->cdata.kas == 0) { 2430 NVME_CTRLR_DEBUGLOG(ctrlr, "Controller KAS is 0 - not enabling Keep Alive\n"); 2431 ctrlr->opts.keep_alive_timeout_ms = 0; 2432 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID, 2433 ctrlr->opts.admin_timeout_ms); 2434 return 0; 2435 } 2436 2437 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT, 2438 ctrlr->opts.admin_timeout_ms); 2439 2440 /* Retrieve actual keep alive timeout, since the controller may have adjusted it. */ 2441 rc = spdk_nvme_ctrlr_cmd_get_feature(ctrlr, SPDK_NVME_FEAT_KEEP_ALIVE_TIMER, 0, NULL, 0, 2442 nvme_ctrlr_set_keep_alive_timeout_done, ctrlr); 2443 if (rc != 0) { 2444 NVME_CTRLR_ERRLOG(ctrlr, "Keep alive timeout Get Feature failed: %d\n", rc); 2445 ctrlr->opts.keep_alive_timeout_ms = 0; 2446 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2447 return rc; 2448 } 2449 2450 return 0; 2451 } 2452 2453 static void 2454 nvme_ctrlr_set_host_id_done(void *arg, const struct spdk_nvme_cpl *cpl) 2455 { 2456 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 2457 2458 if (spdk_nvme_cpl_is_error(cpl)) { 2459 /* 2460 * Treat Set Features - Host ID failure as non-fatal, since the Host ID feature 2461 * is optional. 2462 */ 2463 NVME_CTRLR_WARNLOG(ctrlr, "Set Features - Host ID failed: SC 0x%x SCT 0x%x\n", 2464 cpl->status.sc, cpl->status.sct); 2465 } else { 2466 NVME_CTRLR_DEBUGLOG(ctrlr, "Set Features - Host ID was successful\n"); 2467 } 2468 2469 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE); 2470 } 2471 2472 static int 2473 nvme_ctrlr_set_host_id(struct spdk_nvme_ctrlr *ctrlr) 2474 { 2475 uint8_t *host_id; 2476 uint32_t host_id_size; 2477 int rc; 2478 2479 if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) { 2480 /* 2481 * NVMe-oF sends the host ID during Connect and doesn't allow 2482 * Set Features - Host Identifier after Connect, so we don't need to do anything here. 2483 */ 2484 NVME_CTRLR_DEBUGLOG(ctrlr, "NVMe-oF transport - not sending Set Features - Host ID\n"); 2485 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE); 2486 return 0; 2487 } 2488 2489 if (ctrlr->cdata.ctratt.host_id_exhid_supported) { 2490 NVME_CTRLR_DEBUGLOG(ctrlr, "Using 128-bit extended host identifier\n"); 2491 host_id = ctrlr->opts.extended_host_id; 2492 host_id_size = sizeof(ctrlr->opts.extended_host_id); 2493 } else { 2494 NVME_CTRLR_DEBUGLOG(ctrlr, "Using 64-bit host identifier\n"); 2495 host_id = ctrlr->opts.host_id; 2496 host_id_size = sizeof(ctrlr->opts.host_id); 2497 } 2498 2499 /* If the user specified an all-zeroes host identifier, don't send the command. */ 2500 if (spdk_mem_all_zero(host_id, host_id_size)) { 2501 NVME_CTRLR_DEBUGLOG(ctrlr, "User did not specify host ID - not sending Set Features - Host ID\n"); 2502 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE); 2503 return 0; 2504 } 2505 2506 SPDK_LOGDUMP(nvme, "host_id", host_id, host_id_size); 2507 2508 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_HOST_ID, 2509 ctrlr->opts.admin_timeout_ms); 2510 2511 rc = nvme_ctrlr_cmd_set_host_id(ctrlr, host_id, host_id_size, nvme_ctrlr_set_host_id_done, ctrlr); 2512 if (rc != 0) { 2513 NVME_CTRLR_ERRLOG(ctrlr, "Set Features - Host ID failed: %d\n", rc); 2514 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2515 return rc; 2516 } 2517 2518 return 0; 2519 } 2520 2521 static void 2522 nvme_ctrlr_destruct_namespaces(struct spdk_nvme_ctrlr *ctrlr) 2523 { 2524 if (ctrlr->ns) { 2525 uint32_t i, num_ns = ctrlr->num_ns; 2526 2527 for (i = 0; i < num_ns; i++) { 2528 nvme_ns_destruct(&ctrlr->ns[i]); 2529 } 2530 2531 spdk_free(ctrlr->ns); 2532 ctrlr->ns = NULL; 2533 ctrlr->num_ns = 0; 2534 } 2535 } 2536 2537 void 2538 nvme_ctrlr_update_namespaces(struct spdk_nvme_ctrlr *ctrlr) 2539 { 2540 uint32_t i, nn = ctrlr->cdata.nn; 2541 struct spdk_nvme_ns_data *nsdata; 2542 bool ns_is_active; 2543 2544 for (i = 0; i < nn; i++) { 2545 uint32_t nsid = i + 1; 2546 struct spdk_nvme_ns *ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 2547 2548 assert(ns != NULL); 2549 nsdata = &ns->nsdata; 2550 ns_is_active = spdk_nvme_ctrlr_is_active_ns(ctrlr, nsid); 2551 2552 if (nsdata->ncap && ns_is_active) { 2553 NVME_CTRLR_DEBUGLOG(ctrlr, "Namespace %u was updated\n", nsid); 2554 if (nvme_ns_update(ns) != 0) { 2555 NVME_CTRLR_ERRLOG(ctrlr, "Failed to update active NS %u\n", nsid); 2556 continue; 2557 } 2558 } 2559 2560 if ((nsdata->ncap == 0) && ns_is_active) { 2561 NVME_CTRLR_DEBUGLOG(ctrlr, "Namespace %u was added\n", nsid); 2562 if (nvme_ns_construct(ns, nsid, ctrlr) != 0) { 2563 continue; 2564 } 2565 } 2566 2567 if (nsdata->ncap && !ns_is_active) { 2568 NVME_CTRLR_DEBUGLOG(ctrlr, "Namespace %u was removed\n", nsid); 2569 nvme_ns_destruct(ns); 2570 } 2571 } 2572 } 2573 2574 static int 2575 nvme_ctrlr_construct_namespaces(struct spdk_nvme_ctrlr *ctrlr) 2576 { 2577 int rc = 0; 2578 uint32_t i, nn = ctrlr->cdata.nn; 2579 2580 /* ctrlr->num_ns may be 0 (startup) or a different number of namespaces (reset), 2581 * so check if we need to reallocate. 2582 */ 2583 if (nn != ctrlr->num_ns) { 2584 nvme_ctrlr_destruct_namespaces(ctrlr); 2585 2586 if (nn == 0) { 2587 NVME_CTRLR_WARNLOG(ctrlr, "controller has 0 namespaces\n"); 2588 return 0; 2589 } 2590 2591 ctrlr->ns = spdk_zmalloc(nn * sizeof(struct spdk_nvme_ns), 64, NULL, 2592 SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE); 2593 if (ctrlr->ns == NULL) { 2594 rc = -ENOMEM; 2595 goto fail; 2596 } 2597 2598 ctrlr->num_ns = nn; 2599 } else { 2600 /* 2601 * The controller could have been reset with the same number of namespaces. 2602 * If so, we still need to free the iocs specific data, to get a clean slate. 2603 */ 2604 for (i = 0; i < ctrlr->num_ns; i++) { 2605 nvme_ns_free_iocs_specific_data(&ctrlr->ns[i]); 2606 } 2607 } 2608 2609 return 0; 2610 2611 fail: 2612 nvme_ctrlr_destruct_namespaces(ctrlr); 2613 NVME_CTRLR_ERRLOG(ctrlr, "Failed to construct namespaces, err %d\n", rc); 2614 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2615 return rc; 2616 } 2617 2618 static int 2619 nvme_ctrlr_clear_changed_ns_log(struct spdk_nvme_ctrlr *ctrlr) 2620 { 2621 struct nvme_completion_poll_status *status; 2622 int rc = -ENOMEM; 2623 char *buffer = NULL; 2624 uint32_t nsid; 2625 size_t buf_size = (SPDK_NVME_MAX_CHANGED_NAMESPACES * sizeof(uint32_t)); 2626 2627 buffer = spdk_dma_zmalloc(buf_size, 4096, NULL); 2628 if (!buffer) { 2629 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate buffer for getting " 2630 "changed ns log.\n"); 2631 return rc; 2632 } 2633 2634 status = calloc(1, sizeof(*status)); 2635 if (!status) { 2636 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 2637 goto free_buffer; 2638 } 2639 2640 rc = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, 2641 SPDK_NVME_LOG_CHANGED_NS_LIST, 2642 SPDK_NVME_GLOBAL_NS_TAG, 2643 buffer, buf_size, 0, 2644 nvme_completion_poll_cb, status); 2645 2646 if (rc) { 2647 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_cmd_get_log_page() failed: rc=%d\n", rc); 2648 free(status); 2649 goto free_buffer; 2650 } 2651 2652 rc = nvme_wait_for_completion_timeout(ctrlr->adminq, status, 2653 ctrlr->opts.admin_timeout_ms * 1000); 2654 if (!status->timed_out) { 2655 free(status); 2656 } 2657 2658 if (rc) { 2659 NVME_CTRLR_ERRLOG(ctrlr, "wait for spdk_nvme_ctrlr_cmd_get_log_page failed: rc=%d\n", rc); 2660 goto free_buffer; 2661 } 2662 2663 /* only check the case of overflow. */ 2664 nsid = from_le32(buffer); 2665 if (nsid == 0xffffffffu) { 2666 NVME_CTRLR_WARNLOG(ctrlr, "changed ns log overflowed.\n"); 2667 } 2668 2669 free_buffer: 2670 spdk_dma_free(buffer); 2671 return rc; 2672 } 2673 2674 void 2675 nvme_ctrlr_process_async_event(struct spdk_nvme_ctrlr *ctrlr, 2676 const struct spdk_nvme_cpl *cpl) 2677 { 2678 union spdk_nvme_async_event_completion event; 2679 struct spdk_nvme_ctrlr_process *active_proc; 2680 bool ns_changed = false; 2681 int rc; 2682 2683 event.raw = cpl->cdw0; 2684 2685 if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) && 2686 (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED)) { 2687 /* 2688 * apps (e.g., test/nvme/aer/aer.c) may also get changed ns log (through 2689 * active_proc->aer_cb_fn). To avoid impaction, move our operations 2690 * behind call of active_proc->aer_cb_fn. 2691 */ 2692 ns_changed = true; 2693 } 2694 2695 if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) && 2696 (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_ANA_CHANGE)) { 2697 rc = nvme_ctrlr_update_ana_log_page(ctrlr); 2698 if (rc) { 2699 return; 2700 } 2701 nvme_ctrlr_parse_ana_log_page(ctrlr, nvme_ctrlr_update_ns_ana_states, ctrlr); 2702 } 2703 2704 active_proc = nvme_ctrlr_get_current_process(ctrlr); 2705 if (active_proc && active_proc->aer_cb_fn) { 2706 active_proc->aer_cb_fn(active_proc->aer_cb_arg, cpl); 2707 } 2708 2709 if (ns_changed) { 2710 /* 2711 * Must have the changed ns log cleared by getting it. 2712 * Otherwise, the target won't send 2713 * the subsequent ns enabling/disabling events to us. 2714 */ 2715 nvme_ctrlr_clear_changed_ns_log(ctrlr); 2716 2717 rc = nvme_ctrlr_identify_active_ns(ctrlr); 2718 if (rc) { 2719 return; 2720 } 2721 nvme_ctrlr_update_namespaces(ctrlr); 2722 nvme_io_msg_ctrlr_update(ctrlr); 2723 } 2724 } 2725 2726 static void 2727 nvme_ctrlr_queue_async_event(struct spdk_nvme_ctrlr *ctrlr, 2728 const struct spdk_nvme_cpl *cpl) 2729 { 2730 struct spdk_nvme_ctrlr_aer_completion_list *nvme_event; 2731 2732 nvme_event = calloc(1, sizeof(*nvme_event)); 2733 if (!nvme_event) { 2734 NVME_CTRLR_ERRLOG(ctrlr, "Alloc nvme event failed, ignore the event\n"); 2735 return; 2736 } 2737 2738 nvme_event->cpl = *cpl; 2739 STAILQ_INSERT_TAIL(&ctrlr->async_events, nvme_event, link); 2740 } 2741 2742 void 2743 nvme_ctrlr_complete_queued_async_events(struct spdk_nvme_ctrlr *ctrlr) 2744 { 2745 struct spdk_nvme_ctrlr_aer_completion_list *nvme_event, *nvme_event_tmp; 2746 2747 STAILQ_FOREACH_SAFE(nvme_event, &ctrlr->async_events, link, nvme_event_tmp) { 2748 STAILQ_REMOVE(&ctrlr->async_events, nvme_event, 2749 spdk_nvme_ctrlr_aer_completion_list, link); 2750 nvme_ctrlr_process_async_event(ctrlr, &nvme_event->cpl); 2751 free(nvme_event); 2752 } 2753 } 2754 2755 static void 2756 nvme_ctrlr_async_event_cb(void *arg, const struct spdk_nvme_cpl *cpl) 2757 { 2758 struct nvme_async_event_request *aer = arg; 2759 struct spdk_nvme_ctrlr *ctrlr = aer->ctrlr; 2760 2761 if (cpl->status.sct == SPDK_NVME_SCT_GENERIC && 2762 cpl->status.sc == SPDK_NVME_SC_ABORTED_SQ_DELETION) { 2763 /* 2764 * This is simulated when controller is being shut down, to 2765 * effectively abort outstanding asynchronous event requests 2766 * and make sure all memory is freed. Do not repost the 2767 * request in this case. 2768 */ 2769 return; 2770 } 2771 2772 if (cpl->status.sct == SPDK_NVME_SCT_COMMAND_SPECIFIC && 2773 cpl->status.sc == SPDK_NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED) { 2774 /* 2775 * SPDK will only send as many AERs as the device says it supports, 2776 * so this status code indicates an out-of-spec device. Do not repost 2777 * the request in this case. 2778 */ 2779 NVME_CTRLR_ERRLOG(ctrlr, "Controller appears out-of-spec for asynchronous event request\n" 2780 "handling. Do not repost this AER.\n"); 2781 return; 2782 } 2783 2784 /* Add the events to the list */ 2785 nvme_ctrlr_queue_async_event(ctrlr, cpl); 2786 2787 /* If the ctrlr was removed or in the destruct state, we should not send aer again */ 2788 if (ctrlr->is_removed || ctrlr->is_destructed) { 2789 return; 2790 } 2791 2792 /* 2793 * Repost another asynchronous event request to replace the one 2794 * that just completed. 2795 */ 2796 if (nvme_ctrlr_construct_and_submit_aer(ctrlr, aer)) { 2797 /* 2798 * We can't do anything to recover from a failure here, 2799 * so just print a warning message and leave the AER unsubmitted. 2800 */ 2801 NVME_CTRLR_ERRLOG(ctrlr, "resubmitting AER failed!\n"); 2802 } 2803 } 2804 2805 static int 2806 nvme_ctrlr_construct_and_submit_aer(struct spdk_nvme_ctrlr *ctrlr, 2807 struct nvme_async_event_request *aer) 2808 { 2809 struct nvme_request *req; 2810 2811 aer->ctrlr = ctrlr; 2812 req = nvme_allocate_request_null(ctrlr->adminq, nvme_ctrlr_async_event_cb, aer); 2813 aer->req = req; 2814 if (req == NULL) { 2815 return -1; 2816 } 2817 2818 req->cmd.opc = SPDK_NVME_OPC_ASYNC_EVENT_REQUEST; 2819 return nvme_ctrlr_submit_admin_request(ctrlr, req); 2820 } 2821 2822 static void 2823 nvme_ctrlr_configure_aer_done(void *arg, const struct spdk_nvme_cpl *cpl) 2824 { 2825 struct nvme_async_event_request *aer; 2826 int rc; 2827 uint32_t i; 2828 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg; 2829 2830 if (spdk_nvme_cpl_is_error(cpl)) { 2831 NVME_CTRLR_NOTICELOG(ctrlr, "nvme_ctrlr_configure_aer failed!\n"); 2832 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES, 2833 ctrlr->opts.admin_timeout_ms); 2834 return; 2835 } 2836 2837 /* aerl is a zero-based value, so we need to add 1 here. */ 2838 ctrlr->num_aers = spdk_min(NVME_MAX_ASYNC_EVENTS, (ctrlr->cdata.aerl + 1)); 2839 2840 for (i = 0; i < ctrlr->num_aers; i++) { 2841 aer = &ctrlr->aer[i]; 2842 rc = nvme_ctrlr_construct_and_submit_aer(ctrlr, aer); 2843 if (rc) { 2844 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_construct_and_submit_aer failed!\n"); 2845 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2846 return; 2847 } 2848 } 2849 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES, 2850 ctrlr->opts.admin_timeout_ms); 2851 } 2852 2853 static int 2854 nvme_ctrlr_configure_aer(struct spdk_nvme_ctrlr *ctrlr) 2855 { 2856 union spdk_nvme_feat_async_event_configuration config; 2857 int rc; 2858 2859 config.raw = 0; 2860 config.bits.crit_warn.bits.available_spare = 1; 2861 config.bits.crit_warn.bits.temperature = 1; 2862 config.bits.crit_warn.bits.device_reliability = 1; 2863 config.bits.crit_warn.bits.read_only = 1; 2864 config.bits.crit_warn.bits.volatile_memory_backup = 1; 2865 2866 if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 2, 0)) { 2867 if (ctrlr->cdata.oaes.ns_attribute_notices) { 2868 config.bits.ns_attr_notice = 1; 2869 } 2870 if (ctrlr->cdata.oaes.fw_activation_notices) { 2871 config.bits.fw_activation_notice = 1; 2872 } 2873 if (ctrlr->cdata.oaes.ana_change_notices) { 2874 config.bits.ana_change_notice = 1; 2875 } 2876 } 2877 if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 3, 0) && ctrlr->cdata.lpa.telemetry) { 2878 config.bits.telemetry_log_notice = 1; 2879 } 2880 2881 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER, 2882 ctrlr->opts.admin_timeout_ms); 2883 2884 rc = nvme_ctrlr_cmd_set_async_event_config(ctrlr, config, 2885 nvme_ctrlr_configure_aer_done, 2886 ctrlr); 2887 if (rc != 0) { 2888 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 2889 return rc; 2890 } 2891 2892 return 0; 2893 } 2894 2895 struct spdk_nvme_ctrlr_process * 2896 nvme_ctrlr_get_process(struct spdk_nvme_ctrlr *ctrlr, pid_t pid) 2897 { 2898 struct spdk_nvme_ctrlr_process *active_proc; 2899 2900 TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) { 2901 if (active_proc->pid == pid) { 2902 return active_proc; 2903 } 2904 } 2905 2906 return NULL; 2907 } 2908 2909 struct spdk_nvme_ctrlr_process * 2910 nvme_ctrlr_get_current_process(struct spdk_nvme_ctrlr *ctrlr) 2911 { 2912 return nvme_ctrlr_get_process(ctrlr, getpid()); 2913 } 2914 2915 /** 2916 * This function will be called when a process is using the controller. 2917 * 1. For the primary process, it is called when constructing the controller. 2918 * 2. For the secondary process, it is called at probing the controller. 2919 * Note: will check whether the process is already added for the same process. 2920 */ 2921 int 2922 nvme_ctrlr_add_process(struct spdk_nvme_ctrlr *ctrlr, void *devhandle) 2923 { 2924 struct spdk_nvme_ctrlr_process *ctrlr_proc; 2925 pid_t pid = getpid(); 2926 2927 /* Check whether the process is already added or not */ 2928 if (nvme_ctrlr_get_process(ctrlr, pid)) { 2929 return 0; 2930 } 2931 2932 /* Initialize the per process properties for this ctrlr */ 2933 ctrlr_proc = spdk_zmalloc(sizeof(struct spdk_nvme_ctrlr_process), 2934 64, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE); 2935 if (ctrlr_proc == NULL) { 2936 NVME_CTRLR_ERRLOG(ctrlr, "failed to allocate memory to track the process props\n"); 2937 2938 return -1; 2939 } 2940 2941 ctrlr_proc->is_primary = spdk_process_is_primary(); 2942 ctrlr_proc->pid = pid; 2943 STAILQ_INIT(&ctrlr_proc->active_reqs); 2944 ctrlr_proc->devhandle = devhandle; 2945 ctrlr_proc->ref = 0; 2946 TAILQ_INIT(&ctrlr_proc->allocated_io_qpairs); 2947 2948 TAILQ_INSERT_TAIL(&ctrlr->active_procs, ctrlr_proc, tailq); 2949 2950 return 0; 2951 } 2952 2953 /** 2954 * This function will be called when the process detaches the controller. 2955 * Note: the ctrlr_lock must be held when calling this function. 2956 */ 2957 static void 2958 nvme_ctrlr_remove_process(struct spdk_nvme_ctrlr *ctrlr, 2959 struct spdk_nvme_ctrlr_process *proc) 2960 { 2961 struct spdk_nvme_qpair *qpair, *tmp_qpair; 2962 2963 assert(STAILQ_EMPTY(&proc->active_reqs)); 2964 2965 TAILQ_FOREACH_SAFE(qpair, &proc->allocated_io_qpairs, per_process_tailq, tmp_qpair) { 2966 spdk_nvme_ctrlr_free_io_qpair(qpair); 2967 } 2968 2969 TAILQ_REMOVE(&ctrlr->active_procs, proc, tailq); 2970 2971 if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { 2972 spdk_pci_device_detach(proc->devhandle); 2973 } 2974 2975 spdk_free(proc); 2976 } 2977 2978 /** 2979 * This function will be called when the process exited unexpectedly 2980 * in order to free any incomplete nvme request, allocated IO qpairs 2981 * and allocated memory. 2982 * Note: the ctrlr_lock must be held when calling this function. 2983 */ 2984 static void 2985 nvme_ctrlr_cleanup_process(struct spdk_nvme_ctrlr_process *proc) 2986 { 2987 struct nvme_request *req, *tmp_req; 2988 struct spdk_nvme_qpair *qpair, *tmp_qpair; 2989 2990 STAILQ_FOREACH_SAFE(req, &proc->active_reqs, stailq, tmp_req) { 2991 STAILQ_REMOVE(&proc->active_reqs, req, nvme_request, stailq); 2992 2993 assert(req->pid == proc->pid); 2994 2995 nvme_free_request(req); 2996 } 2997 2998 TAILQ_FOREACH_SAFE(qpair, &proc->allocated_io_qpairs, per_process_tailq, tmp_qpair) { 2999 TAILQ_REMOVE(&proc->allocated_io_qpairs, qpair, per_process_tailq); 3000 3001 /* 3002 * The process may have been killed while some qpairs were in their 3003 * completion context. Clear that flag here to allow these IO 3004 * qpairs to be deleted. 3005 */ 3006 qpair->in_completion_context = 0; 3007 3008 qpair->no_deletion_notification_needed = 1; 3009 3010 spdk_nvme_ctrlr_free_io_qpair(qpair); 3011 } 3012 3013 spdk_free(proc); 3014 } 3015 3016 /** 3017 * This function will be called when destructing the controller. 3018 * 1. There is no more admin request on this controller. 3019 * 2. Clean up any left resource allocation when its associated process is gone. 3020 */ 3021 void 3022 nvme_ctrlr_free_processes(struct spdk_nvme_ctrlr *ctrlr) 3023 { 3024 struct spdk_nvme_ctrlr_process *active_proc, *tmp; 3025 3026 /* Free all the processes' properties and make sure no pending admin IOs */ 3027 TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) { 3028 TAILQ_REMOVE(&ctrlr->active_procs, active_proc, tailq); 3029 3030 assert(STAILQ_EMPTY(&active_proc->active_reqs)); 3031 3032 spdk_free(active_proc); 3033 } 3034 } 3035 3036 /** 3037 * This function will be called when any other process attaches or 3038 * detaches the controller in order to cleanup those unexpectedly 3039 * terminated processes. 3040 * Note: the ctrlr_lock must be held when calling this function. 3041 */ 3042 static int 3043 nvme_ctrlr_remove_inactive_proc(struct spdk_nvme_ctrlr *ctrlr) 3044 { 3045 struct spdk_nvme_ctrlr_process *active_proc, *tmp; 3046 int active_proc_count = 0; 3047 3048 TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) { 3049 if ((kill(active_proc->pid, 0) == -1) && (errno == ESRCH)) { 3050 NVME_CTRLR_ERRLOG(ctrlr, "process %d terminated unexpected\n", active_proc->pid); 3051 3052 TAILQ_REMOVE(&ctrlr->active_procs, active_proc, tailq); 3053 3054 nvme_ctrlr_cleanup_process(active_proc); 3055 } else { 3056 active_proc_count++; 3057 } 3058 } 3059 3060 return active_proc_count; 3061 } 3062 3063 void 3064 nvme_ctrlr_proc_get_ref(struct spdk_nvme_ctrlr *ctrlr) 3065 { 3066 struct spdk_nvme_ctrlr_process *active_proc; 3067 3068 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 3069 3070 nvme_ctrlr_remove_inactive_proc(ctrlr); 3071 3072 active_proc = nvme_ctrlr_get_current_process(ctrlr); 3073 if (active_proc) { 3074 active_proc->ref++; 3075 } 3076 3077 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 3078 } 3079 3080 void 3081 nvme_ctrlr_proc_put_ref(struct spdk_nvme_ctrlr *ctrlr) 3082 { 3083 struct spdk_nvme_ctrlr_process *active_proc; 3084 int proc_count; 3085 3086 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 3087 3088 proc_count = nvme_ctrlr_remove_inactive_proc(ctrlr); 3089 3090 active_proc = nvme_ctrlr_get_current_process(ctrlr); 3091 if (active_proc) { 3092 active_proc->ref--; 3093 assert(active_proc->ref >= 0); 3094 3095 /* 3096 * The last active process will be removed at the end of 3097 * the destruction of the controller. 3098 */ 3099 if (active_proc->ref == 0 && proc_count != 1) { 3100 nvme_ctrlr_remove_process(ctrlr, active_proc); 3101 } 3102 } 3103 3104 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 3105 } 3106 3107 int 3108 nvme_ctrlr_get_ref_count(struct spdk_nvme_ctrlr *ctrlr) 3109 { 3110 struct spdk_nvme_ctrlr_process *active_proc; 3111 int ref = 0; 3112 3113 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 3114 3115 nvme_ctrlr_remove_inactive_proc(ctrlr); 3116 3117 TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) { 3118 ref += active_proc->ref; 3119 } 3120 3121 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 3122 3123 return ref; 3124 } 3125 3126 /** 3127 * Get the PCI device handle which is only visible to its associated process. 3128 */ 3129 struct spdk_pci_device * 3130 nvme_ctrlr_proc_get_devhandle(struct spdk_nvme_ctrlr *ctrlr) 3131 { 3132 struct spdk_nvme_ctrlr_process *active_proc; 3133 struct spdk_pci_device *devhandle = NULL; 3134 3135 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 3136 3137 active_proc = nvme_ctrlr_get_current_process(ctrlr); 3138 if (active_proc) { 3139 devhandle = active_proc->devhandle; 3140 } 3141 3142 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 3143 3144 return devhandle; 3145 } 3146 3147 /** 3148 * This function will be called repeatedly during initialization until the controller is ready. 3149 */ 3150 int 3151 nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr) 3152 { 3153 union spdk_nvme_cc_register cc; 3154 union spdk_nvme_csts_register csts; 3155 uint32_t ready_timeout_in_ms; 3156 uint64_t ticks; 3157 int rc = 0; 3158 3159 ticks = spdk_get_ticks(); 3160 3161 /* 3162 * May need to avoid accessing any register on the target controller 3163 * for a while. Return early without touching the FSM. 3164 * Check sleep_timeout_tsc > 0 for unit test. 3165 */ 3166 if ((ctrlr->sleep_timeout_tsc > 0) && 3167 (ticks <= ctrlr->sleep_timeout_tsc)) { 3168 return 0; 3169 } 3170 ctrlr->sleep_timeout_tsc = 0; 3171 3172 if (ctrlr->state > NVME_CTRLR_STATE_CONNECT_ADMINQ && 3173 (nvme_ctrlr_get_cc(ctrlr, &cc) || nvme_ctrlr_get_csts(ctrlr, &csts))) { 3174 if (!ctrlr->is_failed && ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE) { 3175 /* While a device is resetting, it may be unable to service MMIO reads 3176 * temporarily. Allow for this case. 3177 */ 3178 NVME_CTRLR_DEBUGLOG(ctrlr, "Get registers failed while waiting for CSTS.RDY == 0\n"); 3179 goto init_timeout; 3180 } 3181 NVME_CTRLR_ERRLOG(ctrlr, "Failed to read CC and CSTS in state %d\n", ctrlr->state); 3182 return -EIO; 3183 } 3184 3185 ready_timeout_in_ms = 500 * ctrlr->cap.bits.to; 3186 3187 /* 3188 * Check if the current initialization step is done or has timed out. 3189 */ 3190 switch (ctrlr->state) { 3191 case NVME_CTRLR_STATE_INIT_DELAY: 3192 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, ready_timeout_in_ms); 3193 if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_INIT) { 3194 /* 3195 * Controller may need some delay before it's enabled. 3196 * 3197 * This is a workaround for an issue where the PCIe-attached NVMe controller 3198 * is not ready after VFIO reset. We delay the initialization rather than the 3199 * enabling itself, because this is required only for the very first enabling 3200 * - directly after a VFIO reset. 3201 */ 3202 NVME_CTRLR_DEBUGLOG(ctrlr, "Adding 2 second delay before initializing the controller\n"); 3203 ctrlr->sleep_timeout_tsc = ticks + (2000 * spdk_get_ticks_hz() / 1000); 3204 } 3205 break; 3206 3207 case NVME_CTRLR_STATE_CONNECT_ADMINQ: /* synonymous with NVME_CTRLR_STATE_INIT */ 3208 rc = nvme_transport_ctrlr_connect_qpair(ctrlr, ctrlr->adminq); 3209 if (rc == 0) { 3210 nvme_qpair_set_state(ctrlr->adminq, NVME_QPAIR_ENABLED); 3211 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READ_VS, NVME_TIMEOUT_INFINITE); 3212 } else { 3213 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE); 3214 } 3215 break; 3216 3217 case NVME_CTRLR_STATE_READ_VS: 3218 nvme_ctrlr_get_vs(ctrlr, &ctrlr->vs); 3219 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READ_CAP, NVME_TIMEOUT_INFINITE); 3220 break; 3221 3222 case NVME_CTRLR_STATE_READ_CAP: 3223 nvme_ctrlr_init_cap(ctrlr); 3224 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CHECK_EN, NVME_TIMEOUT_INFINITE); 3225 break; 3226 3227 case NVME_CTRLR_STATE_CHECK_EN: 3228 /* Begin the hardware initialization by making sure the controller is disabled. */ 3229 if (cc.bits.en) { 3230 NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 1\n"); 3231 /* 3232 * Controller is currently enabled. We need to disable it to cause a reset. 3233 * 3234 * If CC.EN = 1 && CSTS.RDY = 0, the controller is in the process of becoming ready. 3235 * Wait for the ready bit to be 1 before disabling the controller. 3236 */ 3237 if (csts.bits.rdy == 0) { 3238 NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 1 && CSTS.RDY = 0 - waiting for reset to complete\n"); 3239 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1, ready_timeout_in_ms); 3240 return 0; 3241 } 3242 3243 /* CC.EN = 1 && CSTS.RDY == 1, so we can immediately disable the controller. */ 3244 NVME_CTRLR_DEBUGLOG(ctrlr, "Setting CC.EN = 0\n"); 3245 cc.bits.en = 0; 3246 if (nvme_ctrlr_set_cc(ctrlr, &cc)) { 3247 NVME_CTRLR_ERRLOG(ctrlr, "set_cc() failed\n"); 3248 return -EIO; 3249 } 3250 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms); 3251 3252 /* 3253 * Wait 2.5 seconds before accessing PCI registers. 3254 * Not using sleep() to avoid blocking other controller's initialization. 3255 */ 3256 if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) { 3257 NVME_CTRLR_DEBUGLOG(ctrlr, "Applying quirk: delay 2.5 seconds before reading registers\n"); 3258 ctrlr->sleep_timeout_tsc = ticks + (2500 * spdk_get_ticks_hz() / 1000); 3259 } 3260 return 0; 3261 } else { 3262 if (csts.bits.rdy == 1) { 3263 NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 0 && CSTS.RDY = 1 - waiting for shutdown to complete\n"); 3264 } 3265 3266 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms); 3267 return 0; 3268 } 3269 break; 3270 3271 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1: 3272 if (csts.bits.rdy == 1) { 3273 NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 1 && CSTS.RDY = 1 - disabling controller\n"); 3274 /* CC.EN = 1 && CSTS.RDY = 1, so we can set CC.EN = 0 now. */ 3275 NVME_CTRLR_DEBUGLOG(ctrlr, "Setting CC.EN = 0\n"); 3276 cc.bits.en = 0; 3277 if (nvme_ctrlr_set_cc(ctrlr, &cc)) { 3278 NVME_CTRLR_ERRLOG(ctrlr, "set_cc() failed\n"); 3279 return -EIO; 3280 } 3281 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms); 3282 return 0; 3283 } 3284 break; 3285 3286 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0: 3287 if (csts.bits.rdy == 0) { 3288 NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 0 && CSTS.RDY = 0\n"); 3289 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE, ready_timeout_in_ms); 3290 /* 3291 * Delay 100us before setting CC.EN = 1. Some NVMe SSDs miss CC.EN getting 3292 * set to 1 if it is too soon after CSTS.RDY is reported as 0. 3293 */ 3294 spdk_delay_us(100); 3295 return 0; 3296 } 3297 break; 3298 3299 case NVME_CTRLR_STATE_ENABLE: 3300 NVME_CTRLR_DEBUGLOG(ctrlr, "Setting CC.EN = 1\n"); 3301 rc = nvme_ctrlr_enable(ctrlr); 3302 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, ready_timeout_in_ms); 3303 return rc; 3304 3305 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1: 3306 if (csts.bits.rdy == 1) { 3307 NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 1 && CSTS.RDY = 1 - controller is ready\n"); 3308 /* 3309 * The controller has been enabled. 3310 * Perform the rest of initialization serially. 3311 */ 3312 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_RESET_ADMIN_QUEUE, 3313 ctrlr->opts.admin_timeout_ms); 3314 return 0; 3315 } 3316 break; 3317 3318 case NVME_CTRLR_STATE_RESET_ADMIN_QUEUE: 3319 nvme_transport_qpair_reset(ctrlr->adminq); 3320 if (spdk_nvme_ctrlr_is_discovery(ctrlr)) { 3321 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE); 3322 } else { 3323 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY, ctrlr->opts.admin_timeout_ms); 3324 } 3325 break; 3326 3327 case NVME_CTRLR_STATE_IDENTIFY: 3328 rc = nvme_ctrlr_identify(ctrlr); 3329 break; 3330 3331 case NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC: 3332 rc = nvme_ctrlr_identify_iocs_specific(ctrlr); 3333 break; 3334 3335 case NVME_CTRLR_STATE_GET_ZNS_CMD_EFFECTS_LOG: 3336 rc = nvme_ctrlr_get_zns_cmd_and_effects_log(ctrlr); 3337 break; 3338 3339 case NVME_CTRLR_STATE_SET_NUM_QUEUES: 3340 nvme_ctrlr_update_nvmf_ioccsz(ctrlr); 3341 rc = nvme_ctrlr_set_num_queues(ctrlr); 3342 break; 3343 3344 case NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS: 3345 _nvme_ctrlr_identify_active_ns(ctrlr); 3346 break; 3347 3348 case NVME_CTRLR_STATE_CONSTRUCT_NS: 3349 rc = nvme_ctrlr_construct_namespaces(ctrlr); 3350 if (!rc) { 3351 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS, 3352 ctrlr->opts.admin_timeout_ms); 3353 } 3354 break; 3355 3356 case NVME_CTRLR_STATE_IDENTIFY_NS: 3357 rc = nvme_ctrlr_identify_namespaces(ctrlr); 3358 break; 3359 3360 case NVME_CTRLR_STATE_IDENTIFY_ID_DESCS: 3361 rc = nvme_ctrlr_identify_id_desc_namespaces(ctrlr); 3362 break; 3363 3364 case NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC: 3365 rc = nvme_ctrlr_identify_namespaces_iocs_specific(ctrlr); 3366 break; 3367 3368 case NVME_CTRLR_STATE_CONFIGURE_AER: 3369 rc = nvme_ctrlr_configure_aer(ctrlr); 3370 break; 3371 3372 case NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES: 3373 rc = nvme_ctrlr_set_supported_log_pages(ctrlr); 3374 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES, 3375 ctrlr->opts.admin_timeout_ms); 3376 break; 3377 3378 case NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES: 3379 nvme_ctrlr_set_supported_features(ctrlr); 3380 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_DB_BUF_CFG, 3381 ctrlr->opts.admin_timeout_ms); 3382 break; 3383 3384 case NVME_CTRLR_STATE_SET_DB_BUF_CFG: 3385 rc = nvme_ctrlr_set_doorbell_buffer_config(ctrlr); 3386 break; 3387 3388 case NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT: 3389 rc = nvme_ctrlr_set_keep_alive_timeout(ctrlr); 3390 break; 3391 3392 case NVME_CTRLR_STATE_SET_HOST_ID: 3393 rc = nvme_ctrlr_set_host_id(ctrlr); 3394 break; 3395 3396 case NVME_CTRLR_STATE_READY: 3397 NVME_CTRLR_DEBUGLOG(ctrlr, "Ctrlr already in ready state\n"); 3398 return 0; 3399 3400 case NVME_CTRLR_STATE_ERROR: 3401 NVME_CTRLR_ERRLOG(ctrlr, "Ctrlr is in error state\n"); 3402 return -1; 3403 3404 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY: 3405 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_IOCS_SPECIFIC: 3406 case NVME_CTRLR_STATE_WAIT_FOR_GET_ZNS_CMD_EFFECTS_LOG: 3407 case NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES: 3408 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ACTIVE_NS: 3409 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS: 3410 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS: 3411 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS_IOCS_SPECIFIC: 3412 case NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER: 3413 case NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG: 3414 case NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT: 3415 case NVME_CTRLR_STATE_WAIT_FOR_HOST_ID: 3416 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 3417 break; 3418 3419 default: 3420 assert(0); 3421 return -1; 3422 } 3423 3424 init_timeout: 3425 /* Note: we use the ticks captured when we entered this function. 3426 * This covers environments where the SPDK process gets swapped out after 3427 * we tried to advance the state but before we check the timeout here. 3428 * It is not normal for this to happen, but harmless to handle it in this 3429 * way. 3430 */ 3431 if (ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE && 3432 ticks > ctrlr->state_timeout_tsc) { 3433 NVME_CTRLR_ERRLOG(ctrlr, "Initialization timed out in state %d\n", ctrlr->state); 3434 return -1; 3435 } 3436 3437 return rc; 3438 } 3439 3440 int 3441 nvme_robust_mutex_init_recursive_shared(pthread_mutex_t *mtx) 3442 { 3443 pthread_mutexattr_t attr; 3444 int rc = 0; 3445 3446 if (pthread_mutexattr_init(&attr)) { 3447 return -1; 3448 } 3449 if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE) || 3450 #ifndef __FreeBSD__ 3451 pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST) || 3452 pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED) || 3453 #endif 3454 pthread_mutex_init(mtx, &attr)) { 3455 rc = -1; 3456 } 3457 pthread_mutexattr_destroy(&attr); 3458 return rc; 3459 } 3460 3461 int 3462 nvme_ctrlr_construct(struct spdk_nvme_ctrlr *ctrlr) 3463 { 3464 int rc; 3465 3466 if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { 3467 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT_DELAY, NVME_TIMEOUT_INFINITE); 3468 } else { 3469 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE); 3470 } 3471 3472 if (ctrlr->opts.admin_queue_size > SPDK_NVME_ADMIN_QUEUE_MAX_ENTRIES) { 3473 NVME_CTRLR_ERRLOG(ctrlr, "admin_queue_size %u exceeds max defined by NVMe spec, use max value\n", 3474 ctrlr->opts.admin_queue_size); 3475 ctrlr->opts.admin_queue_size = SPDK_NVME_ADMIN_QUEUE_MAX_ENTRIES; 3476 } 3477 3478 if (ctrlr->opts.admin_queue_size < SPDK_NVME_ADMIN_QUEUE_MIN_ENTRIES) { 3479 NVME_CTRLR_ERRLOG(ctrlr, 3480 "admin_queue_size %u is less than minimum defined by NVMe spec, use min value\n", 3481 ctrlr->opts.admin_queue_size); 3482 ctrlr->opts.admin_queue_size = SPDK_NVME_ADMIN_QUEUE_MIN_ENTRIES; 3483 } 3484 3485 ctrlr->flags = 0; 3486 ctrlr->free_io_qids = NULL; 3487 ctrlr->is_resetting = false; 3488 ctrlr->is_failed = false; 3489 ctrlr->is_destructed = false; 3490 3491 TAILQ_INIT(&ctrlr->active_io_qpairs); 3492 STAILQ_INIT(&ctrlr->queued_aborts); 3493 STAILQ_INIT(&ctrlr->async_events); 3494 ctrlr->outstanding_aborts = 0; 3495 3496 ctrlr->ana_log_page = NULL; 3497 ctrlr->ana_log_page_size = 0; 3498 3499 rc = nvme_robust_mutex_init_recursive_shared(&ctrlr->ctrlr_lock); 3500 if (rc != 0) { 3501 return rc; 3502 } 3503 3504 TAILQ_INIT(&ctrlr->active_procs); 3505 3506 return rc; 3507 } 3508 3509 static void 3510 nvme_ctrlr_init_cap(struct spdk_nvme_ctrlr *ctrlr) 3511 { 3512 nvme_ctrlr_get_cap(ctrlr, &ctrlr->cap); 3513 3514 if (ctrlr->cap.bits.ams & SPDK_NVME_CAP_AMS_WRR) { 3515 ctrlr->flags |= SPDK_NVME_CTRLR_WRR_SUPPORTED; 3516 } 3517 3518 ctrlr->min_page_size = 1u << (12 + ctrlr->cap.bits.mpsmin); 3519 3520 /* For now, always select page_size == min_page_size. */ 3521 ctrlr->page_size = ctrlr->min_page_size; 3522 3523 ctrlr->opts.io_queue_size = spdk_max(ctrlr->opts.io_queue_size, SPDK_NVME_IO_QUEUE_MIN_ENTRIES); 3524 ctrlr->opts.io_queue_size = spdk_min(ctrlr->opts.io_queue_size, MAX_IO_QUEUE_ENTRIES); 3525 if (ctrlr->quirks & NVME_QUIRK_MINIMUM_IO_QUEUE_SIZE && 3526 ctrlr->opts.io_queue_size == DEFAULT_IO_QUEUE_SIZE) { 3527 /* If the user specifically set an IO queue size different than the 3528 * default, use that value. Otherwise overwrite with the quirked value. 3529 * This allows this quirk to be overridden when necessary. 3530 * However, cap.mqes still needs to be respected. 3531 */ 3532 ctrlr->opts.io_queue_size = DEFAULT_IO_QUEUE_SIZE_FOR_QUIRK; 3533 } 3534 ctrlr->opts.io_queue_size = spdk_min(ctrlr->opts.io_queue_size, ctrlr->cap.bits.mqes + 1u); 3535 3536 ctrlr->opts.io_queue_requests = spdk_max(ctrlr->opts.io_queue_requests, ctrlr->opts.io_queue_size); 3537 } 3538 3539 void 3540 nvme_ctrlr_destruct_finish(struct spdk_nvme_ctrlr *ctrlr) 3541 { 3542 pthread_mutex_destroy(&ctrlr->ctrlr_lock); 3543 } 3544 3545 void 3546 nvme_ctrlr_destruct_async(struct spdk_nvme_ctrlr *ctrlr, 3547 struct nvme_ctrlr_detach_ctx *ctx) 3548 { 3549 struct spdk_nvme_qpair *qpair, *tmp; 3550 3551 NVME_CTRLR_DEBUGLOG(ctrlr, "Prepare to destruct SSD\n"); 3552 3553 ctrlr->is_destructed = true; 3554 3555 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 3556 3557 nvme_ctrlr_abort_queued_aborts(ctrlr); 3558 nvme_transport_admin_qpair_abort_aers(ctrlr->adminq); 3559 3560 TAILQ_FOREACH_SAFE(qpair, &ctrlr->active_io_qpairs, tailq, tmp) { 3561 spdk_nvme_ctrlr_free_io_qpair(qpair); 3562 } 3563 3564 nvme_ctrlr_free_doorbell_buffer(ctrlr); 3565 nvme_ctrlr_free_iocs_specific_data(ctrlr); 3566 3567 if (ctrlr->opts.no_shn_notification) { 3568 NVME_CTRLR_INFOLOG(ctrlr, "Disable SSD without shutdown notification\n"); 3569 nvme_ctrlr_disable(ctrlr); 3570 ctx->shutdown_complete = true; 3571 } else { 3572 nvme_ctrlr_shutdown_async(ctrlr, ctx); 3573 } 3574 } 3575 3576 int 3577 nvme_ctrlr_destruct_poll_async(struct spdk_nvme_ctrlr *ctrlr, 3578 struct nvme_ctrlr_detach_ctx *ctx) 3579 { 3580 int rc = 0; 3581 3582 if (!ctx->shutdown_complete) { 3583 rc = nvme_ctrlr_shutdown_poll_async(ctrlr, ctx); 3584 if (rc == -EAGAIN) { 3585 return -EAGAIN; 3586 } 3587 /* Destruct ctrlr forcefully for any other error. */ 3588 } 3589 3590 if (ctx->cb_fn) { 3591 ctx->cb_fn(ctrlr); 3592 } 3593 3594 nvme_ctrlr_destruct_namespaces(ctrlr); 3595 spdk_free(ctrlr->active_ns_list); 3596 ctrlr->active_ns_list = NULL; 3597 ctrlr->max_active_ns_idx = 0; 3598 3599 spdk_bit_array_free(&ctrlr->free_io_qids); 3600 3601 spdk_free(ctrlr->ana_log_page); 3602 ctrlr->ana_log_page = NULL; 3603 ctrlr->ana_log_page_size = 0; 3604 3605 nvme_transport_ctrlr_destruct(ctrlr); 3606 3607 return rc; 3608 } 3609 3610 void 3611 nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) 3612 { 3613 struct nvme_ctrlr_detach_ctx ctx = {}; 3614 int rc; 3615 3616 nvme_ctrlr_destruct_async(ctrlr, &ctx); 3617 3618 while (1) { 3619 rc = nvme_ctrlr_destruct_poll_async(ctrlr, &ctx); 3620 if (rc != -EAGAIN) { 3621 break; 3622 } 3623 nvme_delay(1000); 3624 } 3625 } 3626 3627 int 3628 nvme_ctrlr_submit_admin_request(struct spdk_nvme_ctrlr *ctrlr, 3629 struct nvme_request *req) 3630 { 3631 return nvme_qpair_submit_request(ctrlr->adminq, req); 3632 } 3633 3634 static void 3635 nvme_keep_alive_completion(void *cb_ctx, const struct spdk_nvme_cpl *cpl) 3636 { 3637 /* Do nothing */ 3638 } 3639 3640 /* 3641 * Check if we need to send a Keep Alive command. 3642 * Caller must hold ctrlr->ctrlr_lock. 3643 */ 3644 static int 3645 nvme_ctrlr_keep_alive(struct spdk_nvme_ctrlr *ctrlr) 3646 { 3647 uint64_t now; 3648 struct nvme_request *req; 3649 struct spdk_nvme_cmd *cmd; 3650 int rc = 0; 3651 3652 now = spdk_get_ticks(); 3653 if (now < ctrlr->next_keep_alive_tick) { 3654 return rc; 3655 } 3656 3657 req = nvme_allocate_request_null(ctrlr->adminq, nvme_keep_alive_completion, NULL); 3658 if (req == NULL) { 3659 return rc; 3660 } 3661 3662 cmd = &req->cmd; 3663 cmd->opc = SPDK_NVME_OPC_KEEP_ALIVE; 3664 3665 rc = nvme_ctrlr_submit_admin_request(ctrlr, req); 3666 if (rc != 0) { 3667 NVME_CTRLR_ERRLOG(ctrlr, "Submitting Keep Alive failed\n"); 3668 rc = -ENXIO; 3669 } 3670 3671 ctrlr->next_keep_alive_tick = now + ctrlr->keep_alive_interval_ticks; 3672 return rc; 3673 } 3674 3675 int32_t 3676 spdk_nvme_ctrlr_process_admin_completions(struct spdk_nvme_ctrlr *ctrlr) 3677 { 3678 int32_t num_completions; 3679 int32_t rc; 3680 3681 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 3682 3683 if (ctrlr->keep_alive_interval_ticks) { 3684 rc = nvme_ctrlr_keep_alive(ctrlr); 3685 if (rc) { 3686 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 3687 return rc; 3688 } 3689 } 3690 3691 rc = nvme_io_msg_process(ctrlr); 3692 if (rc < 0) { 3693 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 3694 return rc; 3695 } 3696 num_completions = rc; 3697 3698 rc = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 3699 3700 nvme_ctrlr_complete_queued_async_events(ctrlr); 3701 3702 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 3703 3704 if (rc < 0) { 3705 num_completions = rc; 3706 } else { 3707 num_completions += rc; 3708 } 3709 3710 return num_completions; 3711 } 3712 3713 const struct spdk_nvme_ctrlr_data * 3714 spdk_nvme_ctrlr_get_data(struct spdk_nvme_ctrlr *ctrlr) 3715 { 3716 return &ctrlr->cdata; 3717 } 3718 3719 union spdk_nvme_csts_register spdk_nvme_ctrlr_get_regs_csts(struct spdk_nvme_ctrlr *ctrlr) 3720 { 3721 union spdk_nvme_csts_register csts; 3722 3723 if (nvme_ctrlr_get_csts(ctrlr, &csts)) { 3724 csts.raw = SPDK_NVME_INVALID_REGISTER_VALUE; 3725 } 3726 return csts; 3727 } 3728 3729 union spdk_nvme_cap_register spdk_nvme_ctrlr_get_regs_cap(struct spdk_nvme_ctrlr *ctrlr) 3730 { 3731 return ctrlr->cap; 3732 } 3733 3734 union spdk_nvme_vs_register spdk_nvme_ctrlr_get_regs_vs(struct spdk_nvme_ctrlr *ctrlr) 3735 { 3736 return ctrlr->vs; 3737 } 3738 3739 union spdk_nvme_cmbsz_register spdk_nvme_ctrlr_get_regs_cmbsz(struct spdk_nvme_ctrlr *ctrlr) 3740 { 3741 union spdk_nvme_cmbsz_register cmbsz; 3742 3743 if (nvme_ctrlr_get_cmbsz(ctrlr, &cmbsz)) { 3744 cmbsz.raw = 0; 3745 } 3746 3747 return cmbsz; 3748 } 3749 3750 union spdk_nvme_pmrcap_register spdk_nvme_ctrlr_get_regs_pmrcap(struct spdk_nvme_ctrlr *ctrlr) 3751 { 3752 union spdk_nvme_pmrcap_register pmrcap; 3753 3754 if (nvme_ctrlr_get_pmrcap(ctrlr, &pmrcap)) { 3755 pmrcap.raw = 0; 3756 } 3757 3758 return pmrcap; 3759 } 3760 3761 uint64_t 3762 spdk_nvme_ctrlr_get_pmrsz(struct spdk_nvme_ctrlr *ctrlr) 3763 { 3764 return ctrlr->pmr_size; 3765 } 3766 3767 uint32_t 3768 spdk_nvme_ctrlr_get_num_ns(struct spdk_nvme_ctrlr *ctrlr) 3769 { 3770 return ctrlr->num_ns; 3771 } 3772 3773 static int32_t 3774 nvme_ctrlr_active_ns_idx(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 3775 { 3776 int32_t result = -1; 3777 3778 if (ctrlr->active_ns_list == NULL || nsid == 0 || nsid > ctrlr->cdata.nn) { 3779 return result; 3780 } 3781 3782 int32_t lower = 0; 3783 int32_t upper = ctrlr->max_active_ns_idx; 3784 int32_t mid; 3785 3786 while (lower <= upper) { 3787 mid = lower + (upper - lower) / 2; 3788 if (ctrlr->active_ns_list[mid] == nsid) { 3789 result = mid; 3790 break; 3791 } else { 3792 if (ctrlr->active_ns_list[mid] != 0 && ctrlr->active_ns_list[mid] < nsid) { 3793 lower = mid + 1; 3794 } else { 3795 upper = mid - 1; 3796 } 3797 3798 } 3799 } 3800 3801 return result; 3802 } 3803 3804 bool 3805 spdk_nvme_ctrlr_is_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 3806 { 3807 return nvme_ctrlr_active_ns_idx(ctrlr, nsid) != -1; 3808 } 3809 3810 uint32_t 3811 spdk_nvme_ctrlr_get_first_active_ns(struct spdk_nvme_ctrlr *ctrlr) 3812 { 3813 return ctrlr->active_ns_list ? ctrlr->active_ns_list[0] : 0; 3814 } 3815 3816 uint32_t 3817 spdk_nvme_ctrlr_get_next_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t prev_nsid) 3818 { 3819 int32_t nsid_idx = nvme_ctrlr_active_ns_idx(ctrlr, prev_nsid); 3820 if (nsid_idx >= 0 && (uint32_t)nsid_idx < ctrlr->max_active_ns_idx) { 3821 return ctrlr->active_ns_list[nsid_idx + 1]; 3822 } 3823 return 0; 3824 } 3825 3826 struct spdk_nvme_ns * 3827 spdk_nvme_ctrlr_get_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 3828 { 3829 if (nsid < 1 || nsid > ctrlr->num_ns) { 3830 return NULL; 3831 } 3832 3833 return &ctrlr->ns[nsid - 1]; 3834 } 3835 3836 struct spdk_pci_device * 3837 spdk_nvme_ctrlr_get_pci_device(struct spdk_nvme_ctrlr *ctrlr) 3838 { 3839 if (ctrlr == NULL) { 3840 return NULL; 3841 } 3842 3843 if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) { 3844 return NULL; 3845 } 3846 3847 return nvme_ctrlr_proc_get_devhandle(ctrlr); 3848 } 3849 3850 uint32_t 3851 spdk_nvme_ctrlr_get_max_xfer_size(const struct spdk_nvme_ctrlr *ctrlr) 3852 { 3853 return ctrlr->max_xfer_size; 3854 } 3855 3856 void 3857 spdk_nvme_ctrlr_register_aer_callback(struct spdk_nvme_ctrlr *ctrlr, 3858 spdk_nvme_aer_cb aer_cb_fn, 3859 void *aer_cb_arg) 3860 { 3861 struct spdk_nvme_ctrlr_process *active_proc; 3862 3863 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 3864 3865 active_proc = nvme_ctrlr_get_current_process(ctrlr); 3866 if (active_proc) { 3867 active_proc->aer_cb_fn = aer_cb_fn; 3868 active_proc->aer_cb_arg = aer_cb_arg; 3869 } 3870 3871 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 3872 } 3873 3874 void 3875 spdk_nvme_ctrlr_register_timeout_callback(struct spdk_nvme_ctrlr *ctrlr, 3876 uint64_t timeout_us, spdk_nvme_timeout_cb cb_fn, void *cb_arg) 3877 { 3878 struct spdk_nvme_ctrlr_process *active_proc; 3879 3880 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 3881 3882 active_proc = nvme_ctrlr_get_current_process(ctrlr); 3883 if (active_proc) { 3884 active_proc->timeout_ticks = timeout_us * spdk_get_ticks_hz() / 1000000ULL; 3885 active_proc->timeout_cb_fn = cb_fn; 3886 active_proc->timeout_cb_arg = cb_arg; 3887 } 3888 3889 ctrlr->timeout_enabled = true; 3890 3891 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 3892 } 3893 3894 bool 3895 spdk_nvme_ctrlr_is_log_page_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t log_page) 3896 { 3897 /* No bounds check necessary, since log_page is uint8_t and log_page_supported has 256 entries */ 3898 SPDK_STATIC_ASSERT(sizeof(ctrlr->log_page_supported) == 256, "log_page_supported size mismatch"); 3899 return ctrlr->log_page_supported[log_page]; 3900 } 3901 3902 bool 3903 spdk_nvme_ctrlr_is_feature_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t feature_code) 3904 { 3905 /* No bounds check necessary, since feature_code is uint8_t and feature_supported has 256 entries */ 3906 SPDK_STATIC_ASSERT(sizeof(ctrlr->feature_supported) == 256, "feature_supported size mismatch"); 3907 return ctrlr->feature_supported[feature_code]; 3908 } 3909 3910 int 3911 spdk_nvme_ctrlr_attach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, 3912 struct spdk_nvme_ctrlr_list *payload) 3913 { 3914 struct nvme_completion_poll_status *status; 3915 int res; 3916 struct spdk_nvme_ns *ns; 3917 3918 if (nsid == 0) { 3919 return -EINVAL; 3920 } 3921 3922 status = calloc(1, sizeof(*status)); 3923 if (!status) { 3924 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 3925 return -ENOMEM; 3926 } 3927 3928 res = nvme_ctrlr_cmd_attach_ns(ctrlr, nsid, payload, 3929 nvme_completion_poll_cb, status); 3930 if (res) { 3931 free(status); 3932 return res; 3933 } 3934 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 3935 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_attach_ns failed!\n"); 3936 if (!status->timed_out) { 3937 free(status); 3938 } 3939 return -ENXIO; 3940 } 3941 free(status); 3942 3943 res = nvme_ctrlr_identify_active_ns(ctrlr); 3944 if (res) { 3945 return res; 3946 } 3947 3948 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 3949 assert(ns != NULL); 3950 return nvme_ns_construct(ns, nsid, ctrlr); 3951 } 3952 3953 int 3954 spdk_nvme_ctrlr_detach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, 3955 struct spdk_nvme_ctrlr_list *payload) 3956 { 3957 struct nvme_completion_poll_status *status; 3958 int res; 3959 struct spdk_nvme_ns *ns; 3960 3961 if (nsid == 0) { 3962 return -EINVAL; 3963 } 3964 3965 status = calloc(1, sizeof(*status)); 3966 if (!status) { 3967 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 3968 return -ENOMEM; 3969 } 3970 3971 res = nvme_ctrlr_cmd_detach_ns(ctrlr, nsid, payload, 3972 nvme_completion_poll_cb, status); 3973 if (res) { 3974 free(status); 3975 return res; 3976 } 3977 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 3978 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_detach_ns failed!\n"); 3979 if (!status->timed_out) { 3980 free(status); 3981 } 3982 return -ENXIO; 3983 } 3984 free(status); 3985 3986 res = nvme_ctrlr_identify_active_ns(ctrlr); 3987 if (res) { 3988 return res; 3989 } 3990 3991 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 3992 assert(ns != NULL); 3993 /* Inactive NS */ 3994 nvme_ns_destruct(ns); 3995 3996 return 0; 3997 } 3998 3999 uint32_t 4000 spdk_nvme_ctrlr_create_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns_data *payload) 4001 { 4002 struct nvme_completion_poll_status *status; 4003 int res; 4004 uint32_t nsid; 4005 struct spdk_nvme_ns *ns; 4006 4007 status = calloc(1, sizeof(*status)); 4008 if (!status) { 4009 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4010 return 0; 4011 } 4012 4013 res = nvme_ctrlr_cmd_create_ns(ctrlr, payload, nvme_completion_poll_cb, status); 4014 if (res) { 4015 free(status); 4016 return 0; 4017 } 4018 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4019 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_create_ns failed!\n"); 4020 if (!status->timed_out) { 4021 free(status); 4022 } 4023 return 0; 4024 } 4025 4026 nsid = status->cpl.cdw0; 4027 free(status); 4028 4029 assert(nsid > 0); 4030 4031 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 4032 assert(ns != NULL); 4033 /* Inactive NS */ 4034 res = nvme_ns_construct(ns, nsid, ctrlr); 4035 if (res) { 4036 return 0; 4037 } 4038 4039 /* Return the namespace ID that was created */ 4040 return nsid; 4041 } 4042 4043 int 4044 spdk_nvme_ctrlr_delete_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 4045 { 4046 struct nvme_completion_poll_status *status; 4047 int res; 4048 struct spdk_nvme_ns *ns; 4049 4050 if (nsid == 0) { 4051 return -EINVAL; 4052 } 4053 4054 status = calloc(1, sizeof(*status)); 4055 if (!status) { 4056 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4057 return -ENOMEM; 4058 } 4059 4060 res = nvme_ctrlr_cmd_delete_ns(ctrlr, nsid, nvme_completion_poll_cb, status); 4061 if (res) { 4062 free(status); 4063 return res; 4064 } 4065 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4066 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_delete_ns failed!\n"); 4067 if (!status->timed_out) { 4068 free(status); 4069 } 4070 return -ENXIO; 4071 } 4072 free(status); 4073 4074 res = nvme_ctrlr_identify_active_ns(ctrlr); 4075 if (res) { 4076 return res; 4077 } 4078 4079 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 4080 assert(ns != NULL); 4081 nvme_ns_destruct(ns); 4082 4083 return 0; 4084 } 4085 4086 int 4087 spdk_nvme_ctrlr_format(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, 4088 struct spdk_nvme_format *format) 4089 { 4090 struct nvme_completion_poll_status *status; 4091 int res; 4092 4093 status = calloc(1, sizeof(*status)); 4094 if (!status) { 4095 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4096 return -ENOMEM; 4097 } 4098 4099 res = nvme_ctrlr_cmd_format(ctrlr, nsid, format, nvme_completion_poll_cb, 4100 status); 4101 if (res) { 4102 free(status); 4103 return res; 4104 } 4105 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4106 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_format failed!\n"); 4107 if (!status->timed_out) { 4108 free(status); 4109 } 4110 return -ENXIO; 4111 } 4112 free(status); 4113 4114 return spdk_nvme_ctrlr_reset(ctrlr); 4115 } 4116 4117 int 4118 spdk_nvme_ctrlr_update_firmware(struct spdk_nvme_ctrlr *ctrlr, void *payload, uint32_t size, 4119 int slot, enum spdk_nvme_fw_commit_action commit_action, struct spdk_nvme_status *completion_status) 4120 { 4121 struct spdk_nvme_fw_commit fw_commit; 4122 struct nvme_completion_poll_status *status; 4123 int res; 4124 unsigned int size_remaining; 4125 unsigned int offset; 4126 unsigned int transfer; 4127 void *p; 4128 4129 if (!completion_status) { 4130 return -EINVAL; 4131 } 4132 memset(completion_status, 0, sizeof(struct spdk_nvme_status)); 4133 if (size % 4) { 4134 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_update_firmware invalid size!\n"); 4135 return -1; 4136 } 4137 4138 /* Current support only for SPDK_NVME_FW_COMMIT_REPLACE_IMG 4139 * and SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG 4140 */ 4141 if ((commit_action != SPDK_NVME_FW_COMMIT_REPLACE_IMG) && 4142 (commit_action != SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG)) { 4143 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_update_firmware invalid command!\n"); 4144 return -1; 4145 } 4146 4147 status = calloc(1, sizeof(*status)); 4148 if (!status) { 4149 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4150 return -ENOMEM; 4151 } 4152 4153 /* Firmware download */ 4154 size_remaining = size; 4155 offset = 0; 4156 p = payload; 4157 4158 while (size_remaining > 0) { 4159 transfer = spdk_min(size_remaining, ctrlr->min_page_size); 4160 4161 memset(status, 0, sizeof(*status)); 4162 res = nvme_ctrlr_cmd_fw_image_download(ctrlr, transfer, offset, p, 4163 nvme_completion_poll_cb, 4164 status); 4165 if (res) { 4166 free(status); 4167 return res; 4168 } 4169 4170 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4171 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_fw_image_download failed!\n"); 4172 if (!status->timed_out) { 4173 free(status); 4174 } 4175 return -ENXIO; 4176 } 4177 p += transfer; 4178 offset += transfer; 4179 size_remaining -= transfer; 4180 } 4181 4182 /* Firmware commit */ 4183 memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit)); 4184 fw_commit.fs = slot; 4185 fw_commit.ca = commit_action; 4186 4187 memset(status, 0, sizeof(*status)); 4188 res = nvme_ctrlr_cmd_fw_commit(ctrlr, &fw_commit, nvme_completion_poll_cb, 4189 status); 4190 if (res) { 4191 free(status); 4192 return res; 4193 } 4194 4195 res = nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock); 4196 4197 memcpy(completion_status, &status->cpl.status, sizeof(struct spdk_nvme_status)); 4198 4199 if (!status->timed_out) { 4200 free(status); 4201 } 4202 4203 if (res) { 4204 if (completion_status->sct != SPDK_NVME_SCT_COMMAND_SPECIFIC || 4205 completion_status->sc != SPDK_NVME_SC_FIRMWARE_REQ_NVM_RESET) { 4206 if (completion_status->sct == SPDK_NVME_SCT_COMMAND_SPECIFIC && 4207 completion_status->sc == SPDK_NVME_SC_FIRMWARE_REQ_CONVENTIONAL_RESET) { 4208 NVME_CTRLR_NOTICELOG(ctrlr, 4209 "firmware activation requires conventional reset to be performed. !\n"); 4210 } else { 4211 NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_cmd_fw_commit failed!\n"); 4212 } 4213 return -ENXIO; 4214 } 4215 } 4216 4217 return spdk_nvme_ctrlr_reset(ctrlr); 4218 } 4219 4220 int 4221 spdk_nvme_ctrlr_reserve_cmb(struct spdk_nvme_ctrlr *ctrlr) 4222 { 4223 int rc, size; 4224 union spdk_nvme_cmbsz_register cmbsz; 4225 4226 cmbsz = spdk_nvme_ctrlr_get_regs_cmbsz(ctrlr); 4227 4228 if (cmbsz.bits.rds == 0 || cmbsz.bits.wds == 0) { 4229 return -ENOTSUP; 4230 } 4231 4232 size = cmbsz.bits.sz * (0x1000 << (cmbsz.bits.szu * 4)); 4233 4234 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4235 rc = nvme_transport_ctrlr_reserve_cmb(ctrlr); 4236 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4237 4238 if (rc < 0) { 4239 return rc; 4240 } 4241 4242 return size; 4243 } 4244 4245 void * 4246 spdk_nvme_ctrlr_map_cmb(struct spdk_nvme_ctrlr *ctrlr, size_t *size) 4247 { 4248 void *buf; 4249 4250 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4251 buf = nvme_transport_ctrlr_map_cmb(ctrlr, size); 4252 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4253 4254 return buf; 4255 } 4256 4257 void 4258 spdk_nvme_ctrlr_unmap_cmb(struct spdk_nvme_ctrlr *ctrlr) 4259 { 4260 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4261 nvme_transport_ctrlr_unmap_cmb(ctrlr); 4262 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4263 } 4264 4265 int 4266 spdk_nvme_ctrlr_enable_pmr(struct spdk_nvme_ctrlr *ctrlr) 4267 { 4268 int rc; 4269 4270 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4271 rc = nvme_transport_ctrlr_enable_pmr(ctrlr); 4272 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4273 4274 return rc; 4275 } 4276 4277 int 4278 spdk_nvme_ctrlr_disable_pmr(struct spdk_nvme_ctrlr *ctrlr) 4279 { 4280 int rc; 4281 4282 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4283 rc = nvme_transport_ctrlr_disable_pmr(ctrlr); 4284 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4285 4286 return rc; 4287 } 4288 4289 void * 4290 spdk_nvme_ctrlr_map_pmr(struct spdk_nvme_ctrlr *ctrlr, size_t *size) 4291 { 4292 void *buf; 4293 4294 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4295 buf = nvme_transport_ctrlr_map_pmr(ctrlr, size); 4296 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4297 4298 return buf; 4299 } 4300 4301 int 4302 spdk_nvme_ctrlr_unmap_pmr(struct spdk_nvme_ctrlr *ctrlr) 4303 { 4304 int rc; 4305 4306 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4307 rc = nvme_transport_ctrlr_unmap_pmr(ctrlr); 4308 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4309 4310 return rc; 4311 } 4312 4313 bool 4314 spdk_nvme_ctrlr_is_discovery(struct spdk_nvme_ctrlr *ctrlr) 4315 { 4316 assert(ctrlr); 4317 4318 return !strncmp(ctrlr->trid.subnqn, SPDK_NVMF_DISCOVERY_NQN, 4319 strlen(SPDK_NVMF_DISCOVERY_NQN)); 4320 } 4321 4322 bool 4323 spdk_nvme_ctrlr_is_fabrics(struct spdk_nvme_ctrlr *ctrlr) 4324 { 4325 assert(ctrlr); 4326 4327 /* We always define non-fabrics trtypes outside of the 8-bit range 4328 * of NVMe-oF trtype. 4329 */ 4330 return ctrlr->trid.trtype < UINT8_MAX; 4331 } 4332 4333 int 4334 spdk_nvme_ctrlr_security_receive(struct spdk_nvme_ctrlr *ctrlr, uint8_t secp, 4335 uint16_t spsp, uint8_t nssf, void *payload, size_t size) 4336 { 4337 struct nvme_completion_poll_status *status; 4338 int res; 4339 4340 status = calloc(1, sizeof(*status)); 4341 if (!status) { 4342 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4343 return -ENOMEM; 4344 } 4345 4346 res = spdk_nvme_ctrlr_cmd_security_receive(ctrlr, secp, spsp, nssf, payload, size, 4347 nvme_completion_poll_cb, status); 4348 if (res) { 4349 free(status); 4350 return res; 4351 } 4352 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4353 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_cmd_security_receive failed!\n"); 4354 if (!status->timed_out) { 4355 free(status); 4356 } 4357 return -ENXIO; 4358 } 4359 free(status); 4360 4361 return 0; 4362 } 4363 4364 int 4365 spdk_nvme_ctrlr_security_send(struct spdk_nvme_ctrlr *ctrlr, uint8_t secp, 4366 uint16_t spsp, uint8_t nssf, void *payload, size_t size) 4367 { 4368 struct nvme_completion_poll_status *status; 4369 int res; 4370 4371 status = calloc(1, sizeof(*status)); 4372 if (!status) { 4373 NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n"); 4374 return -ENOMEM; 4375 } 4376 4377 res = spdk_nvme_ctrlr_cmd_security_send(ctrlr, secp, spsp, nssf, payload, size, 4378 nvme_completion_poll_cb, 4379 status); 4380 if (res) { 4381 free(status); 4382 return res; 4383 } 4384 if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) { 4385 NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_cmd_security_send failed!\n"); 4386 if (!status->timed_out) { 4387 free(status); 4388 } 4389 return -ENXIO; 4390 } 4391 4392 free(status); 4393 4394 return 0; 4395 } 4396 4397 uint64_t 4398 spdk_nvme_ctrlr_get_flags(struct spdk_nvme_ctrlr *ctrlr) 4399 { 4400 return ctrlr->flags; 4401 } 4402 4403 const struct spdk_nvme_transport_id * 4404 spdk_nvme_ctrlr_get_transport_id(struct spdk_nvme_ctrlr *ctrlr) 4405 { 4406 return &ctrlr->trid; 4407 } 4408 4409 int32_t 4410 spdk_nvme_ctrlr_alloc_qid(struct spdk_nvme_ctrlr *ctrlr) 4411 { 4412 uint32_t qid; 4413 4414 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4415 qid = spdk_bit_array_find_first_set(ctrlr->free_io_qids, 1); 4416 if (qid > ctrlr->opts.num_io_queues) { 4417 NVME_CTRLR_ERRLOG(ctrlr, "No free I/O queue IDs\n"); 4418 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4419 return -1; 4420 } 4421 4422 spdk_bit_array_clear(ctrlr->free_io_qids, qid); 4423 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4424 return qid; 4425 } 4426 4427 void 4428 spdk_nvme_ctrlr_free_qid(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid) 4429 { 4430 assert(qid <= ctrlr->opts.num_io_queues); 4431 4432 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 4433 spdk_bit_array_set(ctrlr->free_io_qids, qid); 4434 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 4435 } 4436 4437 static int 4438 nvme_cmd_map_prps(void *prv, struct spdk_nvme_cmd *cmd, struct iovec *iovs, 4439 uint32_t max_iovcnt, uint32_t len, size_t mps, 4440 void *(*gpa_to_vva)(void *prv, uint64_t addr, uint64_t len)) 4441 { 4442 uint64_t prp1, prp2; 4443 void *vva; 4444 uint32_t i; 4445 uint32_t residue_len, nents; 4446 uint64_t *prp_list; 4447 uint32_t iovcnt; 4448 4449 assert(max_iovcnt > 0); 4450 4451 prp1 = cmd->dptr.prp.prp1; 4452 prp2 = cmd->dptr.prp.prp2; 4453 4454 /* PRP1 may started with unaligned page address */ 4455 residue_len = mps - (prp1 % mps); 4456 residue_len = spdk_min(len, residue_len); 4457 4458 vva = gpa_to_vva(prv, prp1, residue_len); 4459 if (spdk_unlikely(vva == NULL)) { 4460 SPDK_ERRLOG("GPA to VVA failed\n"); 4461 return -EINVAL; 4462 } 4463 len -= residue_len; 4464 if (len && max_iovcnt < 2) { 4465 SPDK_ERRLOG("Too many page entries, at least two iovs are required\n"); 4466 return -ERANGE; 4467 } 4468 iovs[0].iov_base = vva; 4469 iovs[0].iov_len = residue_len; 4470 4471 if (len) { 4472 if (spdk_unlikely(prp2 == 0)) { 4473 SPDK_ERRLOG("no PRP2, %d remaining\n", len); 4474 return -EINVAL; 4475 } 4476 4477 if (len <= mps) { 4478 /* 2 PRP used */ 4479 iovcnt = 2; 4480 vva = gpa_to_vva(prv, prp2, len); 4481 if (spdk_unlikely(vva == NULL)) { 4482 SPDK_ERRLOG("no VVA for %#" PRIx64 ", len%#x\n", 4483 prp2, len); 4484 return -EINVAL; 4485 } 4486 iovs[1].iov_base = vva; 4487 iovs[1].iov_len = len; 4488 } else { 4489 /* PRP list used */ 4490 nents = (len + mps - 1) / mps; 4491 if (spdk_unlikely(nents + 1 > max_iovcnt)) { 4492 SPDK_ERRLOG("Too many page entries\n"); 4493 return -ERANGE; 4494 } 4495 4496 vva = gpa_to_vva(prv, prp2, nents * sizeof(*prp_list)); 4497 if (spdk_unlikely(vva == NULL)) { 4498 SPDK_ERRLOG("no VVA for %#" PRIx64 ", nents=%#x\n", 4499 prp2, nents); 4500 return -EINVAL; 4501 } 4502 prp_list = vva; 4503 i = 0; 4504 while (len != 0) { 4505 residue_len = spdk_min(len, mps); 4506 vva = gpa_to_vva(prv, prp_list[i], residue_len); 4507 if (spdk_unlikely(vva == NULL)) { 4508 SPDK_ERRLOG("no VVA for %#" PRIx64 ", residue_len=%#x\n", 4509 prp_list[i], residue_len); 4510 return -EINVAL; 4511 } 4512 iovs[i + 1].iov_base = vva; 4513 iovs[i + 1].iov_len = residue_len; 4514 len -= residue_len; 4515 i++; 4516 } 4517 iovcnt = i + 1; 4518 } 4519 } else { 4520 /* 1 PRP used */ 4521 iovcnt = 1; 4522 } 4523 4524 assert(iovcnt <= max_iovcnt); 4525 return iovcnt; 4526 } 4527 4528 static int 4529 nvme_cmd_map_sgls_data(void *prv, struct spdk_nvme_sgl_descriptor *sgls, uint32_t num_sgls, 4530 struct iovec *iovs, uint32_t max_iovcnt, 4531 void *(*gpa_to_vva)(void *prv, uint64_t addr, uint64_t len)) 4532 { 4533 uint32_t i; 4534 void *vva; 4535 4536 if (spdk_unlikely(max_iovcnt < num_sgls)) { 4537 return -ERANGE; 4538 } 4539 4540 for (i = 0; i < num_sgls; i++) { 4541 if (spdk_unlikely(sgls[i].unkeyed.type != SPDK_NVME_SGL_TYPE_DATA_BLOCK)) { 4542 SPDK_ERRLOG("Invalid SGL type %u\n", sgls[i].unkeyed.type); 4543 return -EINVAL; 4544 } 4545 vva = gpa_to_vva(prv, sgls[i].address, sgls[i].unkeyed.length); 4546 if (spdk_unlikely(vva == NULL)) { 4547 SPDK_ERRLOG("GPA to VVA failed\n"); 4548 return -EINVAL; 4549 } 4550 iovs[i].iov_base = vva; 4551 iovs[i].iov_len = sgls[i].unkeyed.length; 4552 } 4553 4554 return num_sgls; 4555 } 4556 4557 static int 4558 nvme_cmd_map_sgls(void *prv, struct spdk_nvme_cmd *cmd, struct iovec *iovs, uint32_t max_iovcnt, 4559 uint32_t len, size_t mps, 4560 void *(*gpa_to_vva)(void *prv, uint64_t addr, uint64_t len)) 4561 { 4562 struct spdk_nvme_sgl_descriptor *sgl, *last_sgl; 4563 uint32_t num_sgls, seg_len; 4564 void *vva; 4565 int ret; 4566 uint32_t total_iovcnt = 0; 4567 4568 /* SGL cases */ 4569 sgl = &cmd->dptr.sgl1; 4570 4571 /* only one SGL segment */ 4572 if (sgl->unkeyed.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK) { 4573 assert(max_iovcnt > 0); 4574 vva = gpa_to_vva(prv, sgl->address, sgl->unkeyed.length); 4575 if (spdk_unlikely(vva == NULL)) { 4576 SPDK_ERRLOG("GPA to VVA failed\n"); 4577 return -EINVAL; 4578 } 4579 iovs[0].iov_base = vva; 4580 iovs[0].iov_len = sgl->unkeyed.length; 4581 assert(sgl->unkeyed.length == len); 4582 4583 return 1; 4584 } 4585 4586 for (;;) { 4587 if (spdk_unlikely((sgl->unkeyed.type != SPDK_NVME_SGL_TYPE_SEGMENT) && 4588 (sgl->unkeyed.type != SPDK_NVME_SGL_TYPE_LAST_SEGMENT))) { 4589 SPDK_ERRLOG("Invalid SGL type %u\n", sgl->unkeyed.type); 4590 return -EINVAL; 4591 } 4592 4593 seg_len = sgl->unkeyed.length; 4594 if (spdk_unlikely(seg_len % sizeof(struct spdk_nvme_sgl_descriptor))) { 4595 SPDK_ERRLOG("Invalid SGL segment len %u\n", seg_len); 4596 return -EINVAL; 4597 } 4598 4599 num_sgls = seg_len / sizeof(struct spdk_nvme_sgl_descriptor); 4600 vva = gpa_to_vva(prv, sgl->address, sgl->unkeyed.length); 4601 if (spdk_unlikely(vva == NULL)) { 4602 SPDK_ERRLOG("GPA to VVA failed\n"); 4603 return -EINVAL; 4604 } 4605 4606 /* sgl point to the first segment */ 4607 sgl = (struct spdk_nvme_sgl_descriptor *)vva; 4608 last_sgl = &sgl[num_sgls - 1]; 4609 4610 /* we are done */ 4611 if (last_sgl->unkeyed.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK) { 4612 /* map whole sgl list */ 4613 ret = nvme_cmd_map_sgls_data(prv, sgl, num_sgls, &iovs[total_iovcnt], 4614 max_iovcnt - total_iovcnt, gpa_to_vva); 4615 if (spdk_unlikely(ret < 0)) { 4616 return ret; 4617 } 4618 total_iovcnt += ret; 4619 4620 return total_iovcnt; 4621 } 4622 4623 if (num_sgls > 1) { 4624 /* map whole sgl exclude last_sgl */ 4625 ret = nvme_cmd_map_sgls_data(prv, sgl, num_sgls - 1, &iovs[total_iovcnt], 4626 max_iovcnt - total_iovcnt, gpa_to_vva); 4627 if (spdk_unlikely(ret < 0)) { 4628 return ret; 4629 } 4630 total_iovcnt += ret; 4631 } 4632 4633 /* move to next level's segments */ 4634 sgl = last_sgl; 4635 } 4636 4637 return 0; 4638 } 4639 4640 /* FIXME need to specify max number of iovs */ 4641 int 4642 spdk_nvme_map_prps(void *prv, struct spdk_nvme_cmd *cmd, struct iovec *iovs, 4643 uint32_t len, size_t mps, 4644 void *(*gpa_to_vva)(void *prv, uint64_t addr, uint64_t len)) 4645 { 4646 if (cmd->psdt == SPDK_NVME_PSDT_PRP) { 4647 return nvme_cmd_map_prps(prv, cmd, iovs, UINT32_MAX, len, mps, gpa_to_vva); 4648 } 4649 4650 return -EINVAL; 4651 } 4652 4653 int 4654 spdk_nvme_map_cmd(void *prv, struct spdk_nvme_cmd *cmd, struct iovec *iovs, uint32_t max_iovcnt, 4655 uint32_t len, size_t mps, 4656 void *(*gpa_to_vva)(void *prv, uint64_t addr, uint64_t len)) 4657 { 4658 if (cmd->psdt == SPDK_NVME_PSDT_PRP) { 4659 return nvme_cmd_map_prps(prv, cmd, iovs, max_iovcnt, len, mps, gpa_to_vva); 4660 } 4661 4662 return nvme_cmd_map_sgls(prv, cmd, iovs, max_iovcnt, len, mps, gpa_to_vva); 4663 } 4664