1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2018 Intel Corporation. 3 * All rights reserved. 4 * Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 #include "event_nvmf.h" 8 9 #include "spdk/bdev.h" 10 #include "spdk/thread.h" 11 #include "spdk/log.h" 12 #include "spdk/nvme.h" 13 #include "spdk/nvmf_cmd.h" 14 #include "spdk_internal/usdt.h" 15 16 enum nvmf_tgt_state { 17 NVMF_TGT_INIT_NONE = 0, 18 NVMF_TGT_INIT_CREATE_TARGET, 19 NVMF_TGT_INIT_CREATE_POLL_GROUPS, 20 NVMF_TGT_INIT_START_SUBSYSTEMS, 21 NVMF_TGT_RUNNING, 22 NVMF_TGT_FINI_STOP_SUBSYSTEMS, 23 NVMF_TGT_FINI_DESTROY_SUBSYSTEMS, 24 NVMF_TGT_FINI_DESTROY_POLL_GROUPS, 25 NVMF_TGT_FINI_DESTROY_TARGET, 26 NVMF_TGT_STOPPED, 27 NVMF_TGT_ERROR, 28 }; 29 30 struct nvmf_tgt_poll_group { 31 struct spdk_nvmf_poll_group *group; 32 struct spdk_thread *thread; 33 TAILQ_ENTRY(nvmf_tgt_poll_group) link; 34 }; 35 36 struct spdk_nvmf_tgt_conf g_spdk_nvmf_tgt_conf = { 37 .admin_passthru.identify_ctrlr = false 38 }; 39 40 struct spdk_cpuset *g_poll_groups_mask = NULL; 41 struct spdk_nvmf_tgt *g_spdk_nvmf_tgt = NULL; 42 uint32_t g_spdk_nvmf_tgt_max_subsystems = 0; 43 uint16_t g_spdk_nvmf_tgt_crdt[3] = {0, 0, 0}; 44 45 static enum nvmf_tgt_state g_tgt_state; 46 47 static struct spdk_thread *g_tgt_init_thread = NULL; 48 static struct spdk_thread *g_tgt_fini_thread = NULL; 49 50 static TAILQ_HEAD(, nvmf_tgt_poll_group) g_poll_groups = TAILQ_HEAD_INITIALIZER(g_poll_groups); 51 static size_t g_num_poll_groups = 0; 52 53 static void nvmf_tgt_advance_state(void); 54 55 static void 56 nvmf_shutdown_cb(void *arg1) 57 { 58 /* Still in initialization state, defer shutdown operation */ 59 if (g_tgt_state < NVMF_TGT_RUNNING) { 60 spdk_thread_send_msg(spdk_get_thread(), nvmf_shutdown_cb, NULL); 61 return; 62 } else if (g_tgt_state != NVMF_TGT_RUNNING && g_tgt_state != NVMF_TGT_ERROR) { 63 /* Already in Shutdown status, ignore the signal */ 64 return; 65 } 66 67 if (g_tgt_state == NVMF_TGT_ERROR) { 68 /* Parse configuration error */ 69 g_tgt_state = NVMF_TGT_FINI_DESTROY_TARGET; 70 } else { 71 g_tgt_state = NVMF_TGT_FINI_STOP_SUBSYSTEMS; 72 } 73 nvmf_tgt_advance_state(); 74 } 75 76 static void 77 nvmf_subsystem_fini(void) 78 { 79 nvmf_shutdown_cb(NULL); 80 } 81 82 static void 83 _nvmf_tgt_destroy_poll_group_done(void *ctx) 84 { 85 assert(g_num_poll_groups > 0); 86 87 if (--g_num_poll_groups == 0) { 88 g_tgt_state = NVMF_TGT_FINI_DESTROY_TARGET; 89 nvmf_tgt_advance_state(); 90 } 91 } 92 93 static void 94 nvmf_tgt_destroy_poll_group_done(void *cb_arg, int status) 95 { 96 struct nvmf_tgt_poll_group *pg = cb_arg; 97 98 free(pg); 99 100 spdk_thread_send_msg(g_tgt_fini_thread, _nvmf_tgt_destroy_poll_group_done, NULL); 101 102 spdk_thread_exit(spdk_get_thread()); 103 } 104 105 static void 106 nvmf_tgt_destroy_poll_group(void *ctx) 107 { 108 struct nvmf_tgt_poll_group *pg = ctx; 109 110 spdk_nvmf_poll_group_destroy(pg->group, nvmf_tgt_destroy_poll_group_done, pg); 111 } 112 113 static void 114 nvmf_tgt_destroy_poll_groups(void) 115 { 116 struct nvmf_tgt_poll_group *pg, *tpg; 117 118 g_tgt_fini_thread = spdk_get_thread(); 119 assert(g_tgt_fini_thread != NULL); 120 121 TAILQ_FOREACH_SAFE(pg, &g_poll_groups, link, tpg) { 122 TAILQ_REMOVE(&g_poll_groups, pg, link); 123 spdk_thread_send_msg(pg->thread, nvmf_tgt_destroy_poll_group, pg); 124 } 125 } 126 127 static uint32_t 128 nvmf_get_cpuset_count(void) 129 { 130 if (g_poll_groups_mask) { 131 return spdk_cpuset_count(g_poll_groups_mask); 132 } else { 133 return spdk_env_get_core_count(); 134 } 135 } 136 137 static void 138 nvmf_tgt_create_poll_group_done(void *ctx) 139 { 140 struct nvmf_tgt_poll_group *pg = ctx; 141 142 assert(pg); 143 144 if (!pg->group) { 145 SPDK_ERRLOG("Failed to create nvmf poll group\n"); 146 /* Change the state to error but wait for completions from all other threads */ 147 g_tgt_state = NVMF_TGT_ERROR; 148 } 149 150 TAILQ_INSERT_TAIL(&g_poll_groups, pg, link); 151 152 assert(g_num_poll_groups < nvmf_get_cpuset_count()); 153 154 if (++g_num_poll_groups == nvmf_get_cpuset_count()) { 155 if (g_tgt_state != NVMF_TGT_ERROR) { 156 g_tgt_state = NVMF_TGT_INIT_START_SUBSYSTEMS; 157 } 158 nvmf_tgt_advance_state(); 159 } 160 } 161 162 static void 163 nvmf_tgt_create_poll_group(void *ctx) 164 { 165 struct nvmf_tgt_poll_group *pg; 166 167 pg = calloc(1, sizeof(*pg)); 168 if (!pg) { 169 SPDK_ERRLOG("Not enough memory to allocate poll groups\n"); 170 g_tgt_state = NVMF_TGT_ERROR; 171 nvmf_tgt_advance_state(); 172 return; 173 } 174 175 pg->thread = spdk_get_thread(); 176 pg->group = spdk_nvmf_poll_group_create(g_spdk_nvmf_tgt); 177 178 spdk_thread_send_msg(g_tgt_init_thread, nvmf_tgt_create_poll_group_done, pg); 179 } 180 181 static void 182 nvmf_tgt_create_poll_groups(void) 183 { 184 uint32_t cpu, count = 0; 185 char thread_name[32]; 186 struct spdk_thread *thread; 187 188 g_tgt_init_thread = spdk_get_thread(); 189 assert(g_tgt_init_thread != NULL); 190 191 SPDK_ENV_FOREACH_CORE(cpu) { 192 if (g_poll_groups_mask && !spdk_cpuset_get_cpu(g_poll_groups_mask, cpu)) { 193 continue; 194 } 195 snprintf(thread_name, sizeof(thread_name), "nvmf_tgt_poll_group_%u", count++); 196 197 thread = spdk_thread_create(thread_name, g_poll_groups_mask); 198 assert(thread != NULL); 199 200 spdk_thread_send_msg(thread, nvmf_tgt_create_poll_group, NULL); 201 } 202 } 203 204 static void 205 nvmf_tgt_subsystem_started(struct spdk_nvmf_subsystem *subsystem, 206 void *cb_arg, int status) 207 { 208 subsystem = spdk_nvmf_subsystem_get_next(subsystem); 209 int rc; 210 211 if (subsystem) { 212 rc = spdk_nvmf_subsystem_start(subsystem, nvmf_tgt_subsystem_started, NULL); 213 if (rc) { 214 g_tgt_state = NVMF_TGT_FINI_STOP_SUBSYSTEMS; 215 SPDK_ERRLOG("Unable to start NVMe-oF subsystem. Stopping app.\n"); 216 nvmf_tgt_advance_state(); 217 } 218 return; 219 } 220 221 g_tgt_state = NVMF_TGT_RUNNING; 222 nvmf_tgt_advance_state(); 223 } 224 225 static void 226 nvmf_tgt_subsystem_stopped(struct spdk_nvmf_subsystem *subsystem, 227 void *cb_arg, int status) 228 { 229 subsystem = spdk_nvmf_subsystem_get_next(subsystem); 230 int rc; 231 232 if (subsystem) { 233 rc = spdk_nvmf_subsystem_stop(subsystem, nvmf_tgt_subsystem_stopped, NULL); 234 if (rc) { 235 SPDK_ERRLOG("Unable to stop NVMe-oF subsystem %s with rc %d, Trying others.\n", 236 spdk_nvmf_subsystem_get_nqn(subsystem), rc); 237 nvmf_tgt_subsystem_stopped(subsystem, NULL, 0); 238 } 239 return; 240 } 241 242 g_tgt_state = NVMF_TGT_FINI_DESTROY_SUBSYSTEMS; 243 nvmf_tgt_advance_state(); 244 } 245 246 static void 247 _nvmf_tgt_subsystem_destroy(void *cb_arg) 248 { 249 struct spdk_nvmf_subsystem *subsystem, *next_subsystem; 250 int rc; 251 252 subsystem = spdk_nvmf_subsystem_get_first(g_spdk_nvmf_tgt); 253 254 while (subsystem != NULL) { 255 next_subsystem = spdk_nvmf_subsystem_get_next(subsystem); 256 rc = spdk_nvmf_subsystem_destroy(subsystem, _nvmf_tgt_subsystem_destroy, NULL); 257 if (rc) { 258 if (rc == -EINPROGRESS) { 259 /* If ret is -EINPROGRESS, nvmf_tgt_subsystem_destroyed will be called when subsystem 260 * is destroyed, _nvmf_tgt_subsystem_destroy will continue to destroy other subsystems if any */ 261 return; 262 } else { 263 SPDK_ERRLOG("Unable to destroy subsystem %s, rc %d. Trying others.\n", 264 spdk_nvmf_subsystem_get_nqn(subsystem), rc); 265 } 266 } 267 subsystem = next_subsystem; 268 } 269 270 g_tgt_state = NVMF_TGT_FINI_DESTROY_POLL_GROUPS; 271 nvmf_tgt_advance_state(); 272 } 273 274 static void 275 nvmf_tgt_destroy_done(void *ctx, int status) 276 { 277 g_tgt_state = NVMF_TGT_STOPPED; 278 279 nvmf_tgt_advance_state(); 280 } 281 282 static int 283 nvmf_add_discovery_subsystem(void) 284 { 285 struct spdk_nvmf_subsystem *subsystem; 286 287 subsystem = spdk_nvmf_subsystem_create(g_spdk_nvmf_tgt, SPDK_NVMF_DISCOVERY_NQN, 288 SPDK_NVMF_SUBTYPE_DISCOVERY_CURRENT, 0); 289 if (subsystem == NULL) { 290 SPDK_ERRLOG("Failed creating discovery nvmf library subsystem\n"); 291 return -1; 292 } 293 294 spdk_nvmf_subsystem_set_allow_any_host(subsystem, true); 295 296 return 0; 297 } 298 299 static int 300 nvmf_tgt_create_target(void) 301 { 302 struct spdk_nvmf_target_opts opts = { 303 .name = "nvmf_tgt" 304 }; 305 306 opts.max_subsystems = g_spdk_nvmf_tgt_max_subsystems; 307 opts.crdt[0] = g_spdk_nvmf_tgt_crdt[0]; 308 opts.crdt[1] = g_spdk_nvmf_tgt_crdt[1]; 309 opts.crdt[2] = g_spdk_nvmf_tgt_crdt[2]; 310 opts.discovery_filter = g_spdk_nvmf_tgt_conf.discovery_filter; 311 g_spdk_nvmf_tgt = spdk_nvmf_tgt_create(&opts); 312 if (!g_spdk_nvmf_tgt) { 313 SPDK_ERRLOG("spdk_nvmf_tgt_create() failed\n"); 314 return -1; 315 } 316 317 if (nvmf_add_discovery_subsystem() != 0) { 318 SPDK_ERRLOG("nvmf_add_discovery_subsystem failed\n"); 319 return -1; 320 } 321 322 return 0; 323 } 324 325 static void 326 fixup_identify_ctrlr(struct spdk_nvmf_request *req) 327 { 328 struct spdk_nvme_ctrlr_data nvme_cdata = {}; 329 struct spdk_nvme_ctrlr_data nvmf_cdata = {}; 330 struct spdk_nvmf_ctrlr *ctrlr = spdk_nvmf_request_get_ctrlr(req); 331 struct spdk_nvme_cpl *rsp = spdk_nvmf_request_get_response(req); 332 size_t datalen; 333 int rc; 334 335 /* This is the identify data from the NVMe drive */ 336 datalen = spdk_nvmf_request_copy_to_buf(req, &nvme_cdata, 337 sizeof(nvme_cdata)); 338 339 /* Get the NVMF identify data */ 340 rc = spdk_nvmf_ctrlr_identify_ctrlr(ctrlr, &nvmf_cdata); 341 if (rc != SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) { 342 rsp->status.sct = SPDK_NVME_SCT_GENERIC; 343 rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 344 return; 345 } 346 347 /* Fixup NVMF identify data with NVMe identify data */ 348 349 /* Serial Number (SN) */ 350 memcpy(&nvmf_cdata.sn[0], &nvme_cdata.sn[0], sizeof(nvmf_cdata.sn)); 351 /* Model Number (MN) */ 352 memcpy(&nvmf_cdata.mn[0], &nvme_cdata.mn[0], sizeof(nvmf_cdata.mn)); 353 /* Firmware Revision (FR) */ 354 memcpy(&nvmf_cdata.fr[0], &nvme_cdata.fr[0], sizeof(nvmf_cdata.fr)); 355 /* IEEE OUI Identifier (IEEE) */ 356 memcpy(&nvmf_cdata.ieee[0], &nvme_cdata.ieee[0], sizeof(nvmf_cdata.ieee)); 357 /* FRU Globally Unique Identifier (FGUID) */ 358 359 /* Copy the fixed up data back to the response */ 360 spdk_nvmf_request_copy_from_buf(req, &nvmf_cdata, datalen); 361 } 362 363 static int 364 nvmf_custom_identify_hdlr(struct spdk_nvmf_request *req) 365 { 366 struct spdk_nvme_cmd *cmd = spdk_nvmf_request_get_cmd(req); 367 struct spdk_bdev *bdev; 368 struct spdk_bdev_desc *desc; 369 struct spdk_io_channel *ch; 370 struct spdk_nvmf_subsystem *subsys; 371 int rc; 372 373 if (cmd->cdw10_bits.identify.cns != SPDK_NVME_IDENTIFY_CTRLR) { 374 return -1; /* continue */ 375 } 376 377 subsys = spdk_nvmf_request_get_subsystem(req); 378 if (subsys == NULL) { 379 return -1; 380 } 381 382 /* Only procss this request if it has exactly one namespace */ 383 if (spdk_nvmf_subsystem_get_max_nsid(subsys) != 1) { 384 return -1; 385 } 386 387 /* Forward to first namespace if it supports NVME admin commands */ 388 rc = spdk_nvmf_request_get_bdev(1, req, &bdev, &desc, &ch); 389 if (rc) { 390 /* No bdev found for this namespace. Continue. */ 391 return -1; 392 } 393 394 if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_NVME_ADMIN)) { 395 return -1; 396 } 397 398 return spdk_nvmf_bdev_ctrlr_nvme_passthru_admin(bdev, desc, ch, req, fixup_identify_ctrlr); 399 } 400 401 static void 402 nvmf_tgt_advance_state(void) 403 { 404 enum nvmf_tgt_state prev_state; 405 int rc = -1; 406 int ret; 407 408 do { 409 SPDK_DTRACE_PROBE1(nvmf_tgt_state, g_tgt_state); 410 prev_state = g_tgt_state; 411 412 switch (g_tgt_state) { 413 case NVMF_TGT_INIT_NONE: { 414 g_tgt_state = NVMF_TGT_INIT_CREATE_TARGET; 415 break; 416 } 417 case NVMF_TGT_INIT_CREATE_TARGET: 418 ret = nvmf_tgt_create_target(); 419 g_tgt_state = (ret == 0) ? NVMF_TGT_INIT_CREATE_POLL_GROUPS : NVMF_TGT_ERROR; 420 break; 421 case NVMF_TGT_INIT_CREATE_POLL_GROUPS: 422 if (g_spdk_nvmf_tgt_conf.admin_passthru.identify_ctrlr) { 423 SPDK_NOTICELOG("Custom identify ctrlr handler enabled\n"); 424 spdk_nvmf_set_custom_admin_cmd_hdlr(SPDK_NVME_OPC_IDENTIFY, nvmf_custom_identify_hdlr); 425 } 426 /* Create poll group threads, and send a message to each thread 427 * and create a poll group. 428 */ 429 nvmf_tgt_create_poll_groups(); 430 break; 431 case NVMF_TGT_INIT_START_SUBSYSTEMS: { 432 struct spdk_nvmf_subsystem *subsystem; 433 434 subsystem = spdk_nvmf_subsystem_get_first(g_spdk_nvmf_tgt); 435 436 if (subsystem) { 437 ret = spdk_nvmf_subsystem_start(subsystem, nvmf_tgt_subsystem_started, NULL); 438 if (ret) { 439 SPDK_ERRLOG("Unable to start NVMe-oF subsystem. Stopping app.\n"); 440 g_tgt_state = NVMF_TGT_FINI_STOP_SUBSYSTEMS; 441 } 442 } else { 443 g_tgt_state = NVMF_TGT_RUNNING; 444 } 445 break; 446 } 447 case NVMF_TGT_RUNNING: 448 spdk_subsystem_init_next(0); 449 break; 450 case NVMF_TGT_FINI_STOP_SUBSYSTEMS: { 451 struct spdk_nvmf_subsystem *subsystem; 452 453 subsystem = spdk_nvmf_subsystem_get_first(g_spdk_nvmf_tgt); 454 455 if (subsystem) { 456 ret = spdk_nvmf_subsystem_stop(subsystem, nvmf_tgt_subsystem_stopped, NULL); 457 if (ret) { 458 nvmf_tgt_subsystem_stopped(subsystem, NULL, 0); 459 } 460 } else { 461 g_tgt_state = NVMF_TGT_FINI_DESTROY_SUBSYSTEMS; 462 } 463 break; 464 } 465 case NVMF_TGT_FINI_DESTROY_SUBSYSTEMS: 466 _nvmf_tgt_subsystem_destroy(NULL); 467 /* Function above can be asynchronous, it will call nvmf_tgt_advance_state() once done. 468 * So just return here */ 469 return; 470 case NVMF_TGT_FINI_DESTROY_POLL_GROUPS: 471 /* Send a message to each poll group thread, and terminate the thread */ 472 nvmf_tgt_destroy_poll_groups(); 473 break; 474 case NVMF_TGT_FINI_DESTROY_TARGET: 475 spdk_nvmf_tgt_destroy(g_spdk_nvmf_tgt, nvmf_tgt_destroy_done, NULL); 476 break; 477 case NVMF_TGT_STOPPED: 478 spdk_subsystem_fini_next(); 479 return; 480 case NVMF_TGT_ERROR: 481 spdk_subsystem_init_next(rc); 482 return; 483 } 484 485 } while (g_tgt_state != prev_state); 486 } 487 488 static void 489 nvmf_subsystem_init(void) 490 { 491 g_tgt_state = NVMF_TGT_INIT_NONE; 492 nvmf_tgt_advance_state(); 493 } 494 495 static void 496 nvmf_subsystem_dump_discover_filter(struct spdk_json_write_ctx *w) 497 { 498 static char const *const answers[] = { 499 "match_any", 500 "transport", 501 "address", 502 "transport,address", 503 "svcid", 504 "transport,svcid", 505 "address,svcid", 506 "transport,address,svcid" 507 }; 508 509 if ((g_spdk_nvmf_tgt_conf.discovery_filter & ~(SPDK_NVMF_TGT_DISCOVERY_MATCH_TRANSPORT_TYPE | 510 SPDK_NVMF_TGT_DISCOVERY_MATCH_TRANSPORT_ADDRESS | 511 SPDK_NVMF_TGT_DISCOVERY_MATCH_TRANSPORT_SVCID)) != 0) { 512 SPDK_ERRLOG("Incorrect discovery filter %d\n", g_spdk_nvmf_tgt_conf.discovery_filter); 513 assert(0); 514 return; 515 } 516 517 spdk_json_write_named_string(w, "discovery_filter", answers[g_spdk_nvmf_tgt_conf.discovery_filter]); 518 } 519 520 static void 521 nvmf_subsystem_write_config_json(struct spdk_json_write_ctx *w) 522 { 523 spdk_json_write_array_begin(w); 524 525 spdk_json_write_object_begin(w); 526 spdk_json_write_named_string(w, "method", "nvmf_set_config"); 527 528 spdk_json_write_named_object_begin(w, "params"); 529 nvmf_subsystem_dump_discover_filter(w); 530 spdk_json_write_named_object_begin(w, "admin_cmd_passthru"); 531 spdk_json_write_named_bool(w, "identify_ctrlr", 532 g_spdk_nvmf_tgt_conf.admin_passthru.identify_ctrlr); 533 spdk_json_write_object_end(w); 534 if (g_poll_groups_mask) { 535 spdk_json_write_named_string(w, "poll_groups_mask", spdk_cpuset_fmt(g_poll_groups_mask)); 536 } 537 spdk_json_write_object_end(w); 538 spdk_json_write_object_end(w); 539 540 spdk_nvmf_tgt_write_config_json(w, g_spdk_nvmf_tgt); 541 spdk_json_write_array_end(w); 542 } 543 544 static struct spdk_subsystem g_spdk_subsystem_nvmf = { 545 .name = "nvmf", 546 .init = nvmf_subsystem_init, 547 .fini = nvmf_subsystem_fini, 548 .write_config_json = nvmf_subsystem_write_config_json, 549 }; 550 551 SPDK_SUBSYSTEM_REGISTER(g_spdk_subsystem_nvmf) 552 SPDK_SUBSYSTEM_DEPEND(nvmf, bdev) 553 SPDK_SUBSYSTEM_DEPEND(nvmf, sock) 554