1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * * Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * * Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * * Neither the name of Intel Corporation nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 #include "event_nvmf.h" 36 37 #include "spdk/bdev.h" 38 #include "spdk/thread.h" 39 #include "spdk/log.h" 40 #include "spdk/nvme.h" 41 #include "spdk/nvmf_cmd.h" 42 #include "spdk_internal/usdt.h" 43 44 enum nvmf_tgt_state { 45 NVMF_TGT_INIT_NONE = 0, 46 NVMF_TGT_INIT_CREATE_TARGET, 47 NVMF_TGT_INIT_CREATE_POLL_GROUPS, 48 NVMF_TGT_INIT_START_SUBSYSTEMS, 49 NVMF_TGT_RUNNING, 50 NVMF_TGT_FINI_STOP_SUBSYSTEMS, 51 NVMF_TGT_FINI_DESTROY_POLL_GROUPS, 52 NVMF_TGT_FINI_FREE_RESOURCES, 53 NVMF_TGT_STOPPED, 54 NVMF_TGT_ERROR, 55 }; 56 57 struct nvmf_tgt_poll_group { 58 struct spdk_nvmf_poll_group *group; 59 struct spdk_thread *thread; 60 TAILQ_ENTRY(nvmf_tgt_poll_group) link; 61 }; 62 63 struct spdk_nvmf_tgt_conf g_spdk_nvmf_tgt_conf = { 64 .acceptor_poll_rate = ACCEPT_TIMEOUT_US, 65 .admin_passthru.identify_ctrlr = false 66 }; 67 68 struct spdk_cpuset *g_poll_groups_mask = NULL; 69 struct spdk_nvmf_tgt *g_spdk_nvmf_tgt = NULL; 70 uint32_t g_spdk_nvmf_tgt_max_subsystems = 0; 71 uint16_t g_spdk_nvmf_tgt_crdt[3] = {0, 0, 0}; 72 73 static enum nvmf_tgt_state g_tgt_state; 74 75 static struct spdk_thread *g_tgt_init_thread = NULL; 76 static struct spdk_thread *g_tgt_fini_thread = NULL; 77 78 static TAILQ_HEAD(, nvmf_tgt_poll_group) g_poll_groups = TAILQ_HEAD_INITIALIZER(g_poll_groups); 79 static size_t g_num_poll_groups = 0; 80 81 static void nvmf_tgt_advance_state(void); 82 83 static void 84 nvmf_shutdown_cb(void *arg1) 85 { 86 /* Still in initialization state, defer shutdown operation */ 87 if (g_tgt_state < NVMF_TGT_RUNNING) { 88 spdk_thread_send_msg(spdk_get_thread(), nvmf_shutdown_cb, NULL); 89 return; 90 } else if (g_tgt_state != NVMF_TGT_RUNNING && g_tgt_state != NVMF_TGT_ERROR) { 91 /* Already in Shutdown status, ignore the signal */ 92 return; 93 } 94 95 if (g_tgt_state == NVMF_TGT_ERROR) { 96 /* Parse configuration error */ 97 g_tgt_state = NVMF_TGT_FINI_FREE_RESOURCES; 98 } else { 99 g_tgt_state = NVMF_TGT_FINI_STOP_SUBSYSTEMS; 100 } 101 nvmf_tgt_advance_state(); 102 } 103 104 static void 105 nvmf_subsystem_fini(void) 106 { 107 nvmf_shutdown_cb(NULL); 108 } 109 110 static void 111 _nvmf_tgt_destroy_poll_group_done(void *ctx) 112 { 113 assert(g_num_poll_groups > 0); 114 115 if (--g_num_poll_groups == 0) { 116 g_tgt_state = NVMF_TGT_FINI_FREE_RESOURCES; 117 nvmf_tgt_advance_state(); 118 } 119 } 120 121 static void 122 nvmf_tgt_destroy_poll_group_done(void *cb_arg, int status) 123 { 124 struct nvmf_tgt_poll_group *pg = cb_arg; 125 126 free(pg); 127 128 spdk_thread_send_msg(g_tgt_fini_thread, _nvmf_tgt_destroy_poll_group_done, NULL); 129 130 spdk_thread_exit(spdk_get_thread()); 131 } 132 133 static void 134 nvmf_tgt_destroy_poll_group(void *ctx) 135 { 136 struct nvmf_tgt_poll_group *pg = ctx; 137 138 spdk_nvmf_poll_group_destroy(pg->group, nvmf_tgt_destroy_poll_group_done, pg); 139 } 140 141 static void 142 nvmf_tgt_destroy_poll_groups(void) 143 { 144 struct nvmf_tgt_poll_group *pg, *tpg; 145 146 g_tgt_fini_thread = spdk_get_thread(); 147 assert(g_tgt_fini_thread != NULL); 148 149 TAILQ_FOREACH_SAFE(pg, &g_poll_groups, link, tpg) { 150 TAILQ_REMOVE(&g_poll_groups, pg, link); 151 spdk_thread_send_msg(pg->thread, nvmf_tgt_destroy_poll_group, pg); 152 } 153 } 154 155 static uint32_t 156 nvmf_get_cpuset_count(void) 157 { 158 if (g_poll_groups_mask) { 159 return spdk_cpuset_count(g_poll_groups_mask); 160 } else { 161 return spdk_env_get_core_count(); 162 } 163 } 164 165 static void 166 nvmf_tgt_create_poll_group_done(void *ctx) 167 { 168 struct nvmf_tgt_poll_group *pg = ctx; 169 170 assert(pg); 171 172 if (!pg->group) { 173 SPDK_ERRLOG("Failed to create nvmf poll group\n"); 174 /* Change the state to error but wait for completions from all other threads */ 175 g_tgt_state = NVMF_TGT_ERROR; 176 } 177 178 TAILQ_INSERT_TAIL(&g_poll_groups, pg, link); 179 180 assert(g_num_poll_groups < nvmf_get_cpuset_count()); 181 182 if (++g_num_poll_groups == nvmf_get_cpuset_count()) { 183 if (g_tgt_state != NVMF_TGT_ERROR) { 184 g_tgt_state = NVMF_TGT_INIT_START_SUBSYSTEMS; 185 } 186 nvmf_tgt_advance_state(); 187 } 188 } 189 190 static void 191 nvmf_tgt_create_poll_group(void *ctx) 192 { 193 struct nvmf_tgt_poll_group *pg; 194 195 pg = calloc(1, sizeof(*pg)); 196 if (!pg) { 197 SPDK_ERRLOG("Not enough memory to allocate poll groups\n"); 198 g_tgt_state = NVMF_TGT_ERROR; 199 nvmf_tgt_advance_state(); 200 return; 201 } 202 203 pg->thread = spdk_get_thread(); 204 pg->group = spdk_nvmf_poll_group_create(g_spdk_nvmf_tgt); 205 206 spdk_thread_send_msg(g_tgt_init_thread, nvmf_tgt_create_poll_group_done, pg); 207 } 208 209 static void 210 nvmf_tgt_create_poll_groups(void) 211 { 212 uint32_t i; 213 char thread_name[32]; 214 struct spdk_thread *thread; 215 216 g_tgt_init_thread = spdk_get_thread(); 217 assert(g_tgt_init_thread != NULL); 218 219 SPDK_ENV_FOREACH_CORE(i) { 220 if (g_poll_groups_mask && !spdk_cpuset_get_cpu(g_poll_groups_mask, i)) { 221 continue; 222 } 223 snprintf(thread_name, sizeof(thread_name), "nvmf_tgt_poll_group_%u", i); 224 225 thread = spdk_thread_create(thread_name, g_poll_groups_mask); 226 assert(thread != NULL); 227 228 spdk_thread_send_msg(thread, nvmf_tgt_create_poll_group, NULL); 229 } 230 } 231 232 static void 233 nvmf_tgt_subsystem_started(struct spdk_nvmf_subsystem *subsystem, 234 void *cb_arg, int status) 235 { 236 subsystem = spdk_nvmf_subsystem_get_next(subsystem); 237 int rc; 238 239 if (subsystem) { 240 rc = spdk_nvmf_subsystem_start(subsystem, nvmf_tgt_subsystem_started, NULL); 241 if (rc) { 242 g_tgt_state = NVMF_TGT_FINI_STOP_SUBSYSTEMS; 243 SPDK_ERRLOG("Unable to start NVMe-oF subsystem. Stopping app.\n"); 244 nvmf_tgt_advance_state(); 245 } 246 return; 247 } 248 249 g_tgt_state = NVMF_TGT_RUNNING; 250 nvmf_tgt_advance_state(); 251 } 252 253 static void 254 nvmf_tgt_subsystem_stopped(struct spdk_nvmf_subsystem *subsystem, 255 void *cb_arg, int status) 256 { 257 subsystem = spdk_nvmf_subsystem_get_next(subsystem); 258 int rc; 259 260 if (subsystem) { 261 rc = spdk_nvmf_subsystem_stop(subsystem, nvmf_tgt_subsystem_stopped, NULL); 262 if (rc) { 263 SPDK_ERRLOG("Unable to stop NVMe-oF subsystem. Trying others.\n"); 264 nvmf_tgt_subsystem_stopped(subsystem, NULL, 0); 265 } 266 return; 267 } 268 269 g_tgt_state = NVMF_TGT_FINI_DESTROY_POLL_GROUPS; 270 nvmf_tgt_advance_state(); 271 } 272 273 static void 274 nvmf_tgt_destroy_done(void *ctx, int status) 275 { 276 g_tgt_state = NVMF_TGT_STOPPED; 277 278 nvmf_tgt_advance_state(); 279 } 280 281 static int 282 nvmf_add_discovery_subsystem(void) 283 { 284 struct spdk_nvmf_subsystem *subsystem; 285 286 subsystem = spdk_nvmf_subsystem_create(g_spdk_nvmf_tgt, SPDK_NVMF_DISCOVERY_NQN, 287 SPDK_NVMF_SUBTYPE_DISCOVERY, 0); 288 if (subsystem == NULL) { 289 SPDK_ERRLOG("Failed creating discovery nvmf library subsystem\n"); 290 return -1; 291 } 292 293 spdk_nvmf_subsystem_set_allow_any_host(subsystem, true); 294 295 return 0; 296 } 297 298 static int 299 nvmf_tgt_create_target(void) 300 { 301 struct spdk_nvmf_target_opts opts = { 302 .name = "nvmf_tgt" 303 }; 304 305 opts.max_subsystems = g_spdk_nvmf_tgt_max_subsystems; 306 opts.acceptor_poll_rate = g_spdk_nvmf_tgt_conf.acceptor_poll_rate; 307 opts.crdt[0] = g_spdk_nvmf_tgt_crdt[0]; 308 opts.crdt[1] = g_spdk_nvmf_tgt_crdt[1]; 309 opts.crdt[2] = g_spdk_nvmf_tgt_crdt[2]; 310 opts.discovery_filter = g_spdk_nvmf_tgt_conf.discovery_filter; 311 g_spdk_nvmf_tgt = spdk_nvmf_tgt_create(&opts); 312 if (!g_spdk_nvmf_tgt) { 313 SPDK_ERRLOG("spdk_nvmf_tgt_create() failed\n"); 314 return -1; 315 } 316 317 if (nvmf_add_discovery_subsystem() != 0) { 318 SPDK_ERRLOG("nvmf_add_discovery_subsystem failed\n"); 319 return -1; 320 } 321 322 return 0; 323 } 324 325 static void 326 fixup_identify_ctrlr(struct spdk_nvmf_request *req) 327 { 328 uint32_t length; 329 int rc; 330 struct spdk_nvme_ctrlr_data *nvme_cdata; 331 struct spdk_nvme_ctrlr_data nvmf_cdata = {}; 332 struct spdk_nvmf_ctrlr *ctrlr = spdk_nvmf_request_get_ctrlr(req); 333 struct spdk_nvme_cpl *rsp = spdk_nvmf_request_get_response(req); 334 335 /* This is the identify data from the NVMe drive */ 336 spdk_nvmf_request_get_data(req, (void **)&nvme_cdata, &length); 337 338 /* Get the NVMF identify data */ 339 rc = spdk_nvmf_ctrlr_identify_ctrlr(ctrlr, &nvmf_cdata); 340 if (rc != SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) { 341 rsp->status.sct = SPDK_NVME_SCT_GENERIC; 342 rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 343 return; 344 } 345 346 /* Fixup NVMF identify data with NVMe identify data */ 347 348 /* Serial Number (SN) */ 349 memcpy(&nvmf_cdata.sn[0], &nvme_cdata->sn[0], sizeof(nvmf_cdata.sn)); 350 /* Model Number (MN) */ 351 memcpy(&nvmf_cdata.mn[0], &nvme_cdata->mn[0], sizeof(nvmf_cdata.mn)); 352 /* Firmware Revision (FR) */ 353 memcpy(&nvmf_cdata.fr[0], &nvme_cdata->fr[0], sizeof(nvmf_cdata.fr)); 354 /* IEEE OUI Identifier (IEEE) */ 355 memcpy(&nvmf_cdata.ieee[0], &nvme_cdata->ieee[0], sizeof(nvmf_cdata.ieee)); 356 /* FRU Globally Unique Identifier (FGUID) */ 357 358 /* Copy the fixed up data back to the response */ 359 memcpy(nvme_cdata, &nvmf_cdata, length); 360 } 361 362 static int 363 nvmf_custom_identify_hdlr(struct spdk_nvmf_request *req) 364 { 365 struct spdk_nvme_cmd *cmd = spdk_nvmf_request_get_cmd(req); 366 struct spdk_bdev *bdev; 367 struct spdk_bdev_desc *desc; 368 struct spdk_io_channel *ch; 369 struct spdk_nvmf_subsystem *subsys; 370 int rc; 371 372 if (cmd->cdw10_bits.identify.cns != SPDK_NVME_IDENTIFY_CTRLR) { 373 return -1; /* continue */ 374 } 375 376 subsys = spdk_nvmf_request_get_subsystem(req); 377 if (subsys == NULL) { 378 return -1; 379 } 380 381 /* Only procss this request if it has exactly one namespace */ 382 if (spdk_nvmf_subsystem_get_max_nsid(subsys) != 1) { 383 return -1; 384 } 385 386 /* Forward to first namespace if it supports NVME admin commands */ 387 rc = spdk_nvmf_request_get_bdev(1, req, &bdev, &desc, &ch); 388 if (rc) { 389 /* No bdev found for this namespace. Continue. */ 390 return -1; 391 } 392 393 if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_NVME_ADMIN)) { 394 return -1; 395 } 396 397 return spdk_nvmf_bdev_ctrlr_nvme_passthru_admin(bdev, desc, ch, req, fixup_identify_ctrlr); 398 } 399 400 static void 401 nvmf_tgt_advance_state(void) 402 { 403 enum nvmf_tgt_state prev_state; 404 int rc = -1; 405 int ret; 406 407 do { 408 SPDK_DTRACE_PROBE1(nvmf_tgt_state, g_tgt_state); 409 prev_state = g_tgt_state; 410 411 switch (g_tgt_state) { 412 case NVMF_TGT_INIT_NONE: { 413 g_tgt_state = NVMF_TGT_INIT_CREATE_TARGET; 414 break; 415 } 416 case NVMF_TGT_INIT_CREATE_TARGET: 417 ret = nvmf_tgt_create_target(); 418 g_tgt_state = (ret == 0) ? NVMF_TGT_INIT_CREATE_POLL_GROUPS : NVMF_TGT_ERROR; 419 break; 420 case NVMF_TGT_INIT_CREATE_POLL_GROUPS: 421 if (g_spdk_nvmf_tgt_conf.admin_passthru.identify_ctrlr) { 422 SPDK_NOTICELOG("Custom identify ctrlr handler enabled\n"); 423 spdk_nvmf_set_custom_admin_cmd_hdlr(SPDK_NVME_OPC_IDENTIFY, nvmf_custom_identify_hdlr); 424 } 425 /* Create poll group threads, and send a message to each thread 426 * and create a poll group. 427 */ 428 nvmf_tgt_create_poll_groups(); 429 break; 430 case NVMF_TGT_INIT_START_SUBSYSTEMS: { 431 struct spdk_nvmf_subsystem *subsystem; 432 433 subsystem = spdk_nvmf_subsystem_get_first(g_spdk_nvmf_tgt); 434 435 if (subsystem) { 436 ret = spdk_nvmf_subsystem_start(subsystem, nvmf_tgt_subsystem_started, NULL); 437 if (ret) { 438 SPDK_ERRLOG("Unable to start NVMe-oF subsystem. Stopping app.\n"); 439 g_tgt_state = NVMF_TGT_FINI_STOP_SUBSYSTEMS; 440 } 441 } else { 442 g_tgt_state = NVMF_TGT_RUNNING; 443 } 444 break; 445 } 446 case NVMF_TGT_RUNNING: 447 spdk_subsystem_init_next(0); 448 break; 449 case NVMF_TGT_FINI_STOP_SUBSYSTEMS: { 450 struct spdk_nvmf_subsystem *subsystem; 451 452 subsystem = spdk_nvmf_subsystem_get_first(g_spdk_nvmf_tgt); 453 454 if (subsystem) { 455 ret = spdk_nvmf_subsystem_stop(subsystem, nvmf_tgt_subsystem_stopped, NULL); 456 if (ret) { 457 nvmf_tgt_subsystem_stopped(subsystem, NULL, 0); 458 } 459 } else { 460 g_tgt_state = NVMF_TGT_FINI_DESTROY_POLL_GROUPS; 461 } 462 break; 463 } 464 case NVMF_TGT_FINI_DESTROY_POLL_GROUPS: 465 /* Send a message to each poll group thread, and terminate the thread */ 466 nvmf_tgt_destroy_poll_groups(); 467 break; 468 case NVMF_TGT_FINI_FREE_RESOURCES: 469 spdk_nvmf_tgt_destroy(g_spdk_nvmf_tgt, nvmf_tgt_destroy_done, NULL); 470 break; 471 case NVMF_TGT_STOPPED: 472 spdk_subsystem_fini_next(); 473 return; 474 case NVMF_TGT_ERROR: 475 spdk_subsystem_init_next(rc); 476 return; 477 } 478 479 } while (g_tgt_state != prev_state); 480 } 481 482 static void 483 nvmf_subsystem_init(void) 484 { 485 g_tgt_state = NVMF_TGT_INIT_NONE; 486 nvmf_tgt_advance_state(); 487 } 488 489 static void 490 nvmf_subsystem_dump_discover_filter(struct spdk_json_write_ctx *w) 491 { 492 static char const *const answers[] = { 493 "match_any", 494 "transport", 495 "address", 496 "transport,address", 497 "svcid", 498 "transport,svcid", 499 "address,svcid", 500 "transport,address,svcid" 501 }; 502 503 if ((g_spdk_nvmf_tgt_conf.discovery_filter & ~(SPDK_NVMF_TGT_DISCOVERY_MATCH_TRANSPORT_TYPE | 504 SPDK_NVMF_TGT_DISCOVERY_MATCH_TRANSPORT_ADDRESS | 505 SPDK_NVMF_TGT_DISCOVERY_MATCH_TRANSPORT_SVCID)) != 0) { 506 SPDK_ERRLOG("Incorrect discovery filter %d\n", g_spdk_nvmf_tgt_conf.discovery_filter); 507 assert(0); 508 return; 509 } 510 511 spdk_json_write_named_string(w, "discovery_filter", answers[g_spdk_nvmf_tgt_conf.discovery_filter]); 512 } 513 514 static void 515 nvmf_subsystem_write_config_json(struct spdk_json_write_ctx *w) 516 { 517 spdk_json_write_array_begin(w); 518 519 spdk_json_write_object_begin(w); 520 spdk_json_write_named_string(w, "method", "nvmf_set_config"); 521 522 spdk_json_write_named_object_begin(w, "params"); 523 spdk_json_write_named_uint32(w, "acceptor_poll_rate", g_spdk_nvmf_tgt_conf.acceptor_poll_rate); 524 nvmf_subsystem_dump_discover_filter(w); 525 spdk_json_write_named_object_begin(w, "admin_cmd_passthru"); 526 spdk_json_write_named_bool(w, "identify_ctrlr", 527 g_spdk_nvmf_tgt_conf.admin_passthru.identify_ctrlr); 528 spdk_json_write_object_end(w); 529 if (g_poll_groups_mask) { 530 spdk_json_write_named_string(w, "poll_groups_mask", spdk_cpuset_fmt(g_poll_groups_mask)); 531 } 532 spdk_json_write_object_end(w); 533 spdk_json_write_object_end(w); 534 535 spdk_nvmf_tgt_write_config_json(w, g_spdk_nvmf_tgt); 536 spdk_json_write_array_end(w); 537 } 538 539 static struct spdk_subsystem g_spdk_subsystem_nvmf = { 540 .name = "nvmf", 541 .init = nvmf_subsystem_init, 542 .fini = nvmf_subsystem_fini, 543 .write_config_json = nvmf_subsystem_write_config_json, 544 }; 545 546 SPDK_SUBSYSTEM_REGISTER(g_spdk_subsystem_nvmf) 547 SPDK_SUBSYSTEM_DEPEND(nvmf, bdev) 548 SPDK_SUBSYSTEM_DEPEND(nvmf, sock) 549