1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2019 Intel Corporation. 3 * All rights reserved. 4 */ 5 #include "spdk/stdinc.h" 6 #include "spdk/string.h" 7 #include "spdk/config.h" 8 #include "spdk/fd_group.h" 9 #include "spdk/log.h" 10 #include "spdk/nvme.h" 11 12 #define FUSE_USE_VERSION 31 13 14 #include <fuse3/cuse_lowlevel.h> 15 16 #include <linux/nvme_ioctl.h> 17 #include <linux/fs.h> 18 19 #include "nvme_internal.h" 20 #include "nvme_io_msg.h" 21 #include "nvme_cuse.h" 22 23 struct cuse_device { 24 bool force_exit; 25 char dev_name[128]; 26 uint32_t index; 27 int claim_fd; 28 char lock_name[64]; 29 30 struct spdk_nvme_ctrlr *ctrlr; /**< NVMe controller */ 31 uint32_t nsid; /**< NVMe name space id, or 0 */ 32 33 struct fuse_session *session; 34 int fuse_efd; 35 36 struct cuse_device *ctrlr_device; 37 TAILQ_HEAD(, cuse_device) ns_devices; 38 39 TAILQ_ENTRY(cuse_device) tailq; 40 TAILQ_ENTRY(cuse_device) cuse_thread_tailq; 41 }; 42 43 static pthread_mutex_t g_cuse_mtx = PTHREAD_MUTEX_INITIALIZER; 44 static TAILQ_HEAD(, cuse_device) g_ctrlr_ctx_head = TAILQ_HEAD_INITIALIZER(g_ctrlr_ctx_head); 45 static struct spdk_bit_array *g_ctrlr_started; 46 47 static pthread_mutex_t g_pending_device_mtx = PTHREAD_MUTEX_INITIALIZER; 48 static struct spdk_fd_group *g_device_fdgrp; 49 static int g_cuse_thread_msg_fd; 50 static TAILQ_HEAD(, cuse_device) g_pending_device_head = TAILQ_HEAD_INITIALIZER( 51 g_pending_device_head); 52 static TAILQ_HEAD(, cuse_device) g_active_device_head = TAILQ_HEAD_INITIALIZER( 53 g_active_device_head); 54 55 struct cuse_io_ctx { 56 struct spdk_nvme_cmd nvme_cmd; 57 enum spdk_nvme_data_transfer data_transfer; 58 59 uint64_t lba; 60 uint32_t lba_count; 61 uint16_t apptag; 62 uint16_t appmask; 63 64 void *data; 65 void *metadata; 66 67 int data_len; 68 int metadata_len; 69 70 fuse_req_t req; 71 }; 72 73 static void 74 cuse_io_ctx_free(struct cuse_io_ctx *ctx) 75 { 76 spdk_free(ctx->data); 77 spdk_free(ctx->metadata); 78 free(ctx); 79 } 80 81 #define FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, val) \ 82 if (out_bufsz == 0) { \ 83 struct iovec out_iov; \ 84 out_iov.iov_base = (void *)arg; \ 85 out_iov.iov_len = sizeof(val); \ 86 fuse_reply_ioctl_retry(req, NULL, 0, &out_iov, 1); \ 87 return; \ 88 } 89 90 #define FUSE_MAX_SIZE 128*1024 91 92 static bool 93 fuse_check_req_size(fuse_req_t req, struct iovec iov[], int iovcnt) 94 { 95 int total_iov_len = 0; 96 for (int i = 0; i < iovcnt; i++) { 97 total_iov_len += iov[i].iov_len; 98 if (total_iov_len > FUSE_MAX_SIZE) { 99 fuse_reply_err(req, ENOMEM); 100 SPDK_ERRLOG("FUSE request cannot be larger that %d\n", FUSE_MAX_SIZE); 101 return false; 102 } 103 } 104 return true; 105 } 106 107 static void 108 cuse_nvme_passthru_cmd_cb(void *arg, const struct spdk_nvme_cpl *cpl) 109 { 110 struct cuse_io_ctx *ctx = arg; 111 struct iovec out_iov[3]; 112 struct spdk_nvme_cpl _cpl; 113 int out_iovcnt = 0; 114 uint16_t status_field = cpl->status_raw >> 1; /* Drop out phase bit */ 115 116 memcpy(&_cpl, cpl, sizeof(struct spdk_nvme_cpl)); 117 out_iov[out_iovcnt].iov_base = &_cpl.cdw0; 118 out_iov[out_iovcnt].iov_len = sizeof(_cpl.cdw0); 119 out_iovcnt += 1; 120 121 if (ctx->data_transfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) { 122 if (ctx->data_len > 0) { 123 out_iov[out_iovcnt].iov_base = ctx->data; 124 out_iov[out_iovcnt].iov_len = ctx->data_len; 125 out_iovcnt += 1; 126 } 127 if (ctx->metadata_len > 0) { 128 out_iov[out_iovcnt].iov_base = ctx->metadata; 129 out_iov[out_iovcnt].iov_len = ctx->metadata_len; 130 out_iovcnt += 1; 131 } 132 } 133 134 fuse_reply_ioctl_iov(ctx->req, status_field, out_iov, out_iovcnt); 135 cuse_io_ctx_free(ctx); 136 } 137 138 static void 139 cuse_nvme_passthru_cmd_execute(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg) 140 { 141 int rc; 142 struct cuse_io_ctx *ctx = arg; 143 144 if (nsid != 0) { 145 rc = spdk_nvme_ctrlr_cmd_io_raw_with_md(ctrlr, ctrlr->external_io_msgs_qpair, &ctx->nvme_cmd, 146 ctx->data, 147 ctx->data_len, ctx->metadata, cuse_nvme_passthru_cmd_cb, (void *)ctx); 148 } else { 149 rc = spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, &ctx->nvme_cmd, ctx->data, ctx->data_len, 150 cuse_nvme_passthru_cmd_cb, (void *)ctx); 151 } 152 if (rc < 0) { 153 fuse_reply_err(ctx->req, EINVAL); 154 cuse_io_ctx_free(ctx); 155 } 156 } 157 158 static void 159 cuse_nvme_passthru_cmd_send(fuse_req_t req, struct nvme_passthru_cmd *passthru_cmd, 160 const void *data, const void *metadata, int cmd) 161 { 162 struct cuse_io_ctx *ctx; 163 struct cuse_device *cuse_device = fuse_req_userdata(req); 164 int rv; 165 166 ctx = (struct cuse_io_ctx *)calloc(1, sizeof(struct cuse_io_ctx)); 167 if (!ctx) { 168 SPDK_ERRLOG("Cannot allocate memory for cuse_io_ctx\n"); 169 fuse_reply_err(req, ENOMEM); 170 return; 171 } 172 173 ctx->req = req; 174 ctx->data_transfer = spdk_nvme_opc_get_data_transfer(passthru_cmd->opcode); 175 176 memset(&ctx->nvme_cmd, 0, sizeof(ctx->nvme_cmd)); 177 ctx->nvme_cmd.opc = passthru_cmd->opcode; 178 ctx->nvme_cmd.nsid = passthru_cmd->nsid; 179 ctx->nvme_cmd.cdw10 = passthru_cmd->cdw10; 180 ctx->nvme_cmd.cdw11 = passthru_cmd->cdw11; 181 ctx->nvme_cmd.cdw12 = passthru_cmd->cdw12; 182 ctx->nvme_cmd.cdw13 = passthru_cmd->cdw13; 183 ctx->nvme_cmd.cdw14 = passthru_cmd->cdw14; 184 ctx->nvme_cmd.cdw15 = passthru_cmd->cdw15; 185 186 ctx->data_len = passthru_cmd->data_len; 187 ctx->metadata_len = passthru_cmd->metadata_len; 188 189 if (ctx->data_len > 0) { 190 ctx->data = spdk_malloc(ctx->data_len, 4096, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); 191 if (!ctx->data) { 192 SPDK_ERRLOG("Cannot allocate memory for data\n"); 193 fuse_reply_err(req, ENOMEM); 194 free(ctx); 195 return; 196 } 197 if (data != NULL) { 198 memcpy(ctx->data, data, ctx->data_len); 199 } 200 } 201 202 if (ctx->metadata_len > 0) { 203 ctx->metadata = spdk_malloc(ctx->metadata_len, 4096, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); 204 if (!ctx->metadata) { 205 SPDK_ERRLOG("Cannot allocate memory for metadata\n"); 206 fuse_reply_err(req, ENOMEM); 207 cuse_io_ctx_free(ctx); 208 return; 209 } 210 if (metadata != NULL) { 211 memcpy(ctx->metadata, metadata, ctx->metadata_len); 212 } 213 } 214 215 if ((unsigned int)cmd != NVME_IOCTL_ADMIN_CMD) { 216 /* Send NS for IO IOCTLs */ 217 rv = nvme_io_msg_send(cuse_device->ctrlr, passthru_cmd->nsid, cuse_nvme_passthru_cmd_execute, ctx); 218 } else { 219 /* NS == 0 for Admin IOCTLs */ 220 rv = nvme_io_msg_send(cuse_device->ctrlr, 0, cuse_nvme_passthru_cmd_execute, ctx); 221 } 222 if (rv) { 223 SPDK_ERRLOG("Cannot send io msg to the controller\n"); 224 fuse_reply_err(req, -rv); 225 cuse_io_ctx_free(ctx); 226 return; 227 } 228 } 229 230 static void 231 cuse_nvme_passthru_cmd(fuse_req_t req, int cmd, void *arg, 232 struct fuse_file_info *fi, unsigned flags, 233 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 234 { 235 struct nvme_passthru_cmd *passthru_cmd; 236 struct iovec in_iov[3], out_iov[3]; 237 int in_iovcnt = 0, out_iovcnt = 0; 238 const void *dptr = NULL, *mdptr = NULL; 239 enum spdk_nvme_data_transfer data_transfer; 240 241 in_iov[in_iovcnt].iov_base = (void *)arg; 242 in_iov[in_iovcnt].iov_len = sizeof(*passthru_cmd); 243 in_iovcnt += 1; 244 if (in_bufsz == 0) { 245 fuse_reply_ioctl_retry(req, in_iov, in_iovcnt, NULL, out_iovcnt); 246 return; 247 } 248 249 passthru_cmd = (struct nvme_passthru_cmd *)in_buf; 250 data_transfer = spdk_nvme_opc_get_data_transfer(passthru_cmd->opcode); 251 252 if (data_transfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) { 253 /* Make data pointer accessible (RO) */ 254 if (passthru_cmd->addr != 0) { 255 in_iov[in_iovcnt].iov_base = (void *)passthru_cmd->addr; 256 in_iov[in_iovcnt].iov_len = passthru_cmd->data_len; 257 in_iovcnt += 1; 258 } 259 /* Make metadata pointer accessible (RO) */ 260 if (passthru_cmd->metadata != 0) { 261 in_iov[in_iovcnt].iov_base = (void *)passthru_cmd->metadata; 262 in_iov[in_iovcnt].iov_len = passthru_cmd->metadata_len; 263 in_iovcnt += 1; 264 } 265 } 266 267 if (!fuse_check_req_size(req, in_iov, in_iovcnt)) { 268 return; 269 } 270 /* Always make result field writeable regardless of data transfer bits */ 271 out_iov[out_iovcnt].iov_base = &((struct nvme_passthru_cmd *)arg)->result; 272 out_iov[out_iovcnt].iov_len = sizeof(uint32_t); 273 out_iovcnt += 1; 274 275 if (data_transfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) { 276 /* Make data pointer accessible (WO) */ 277 if (passthru_cmd->data_len > 0) { 278 out_iov[out_iovcnt].iov_base = (void *)passthru_cmd->addr; 279 out_iov[out_iovcnt].iov_len = passthru_cmd->data_len; 280 out_iovcnt += 1; 281 } 282 /* Make metadata pointer accessible (WO) */ 283 if (passthru_cmd->metadata_len > 0) { 284 out_iov[out_iovcnt].iov_base = (void *)passthru_cmd->metadata; 285 out_iov[out_iovcnt].iov_len = passthru_cmd->metadata_len; 286 out_iovcnt += 1; 287 } 288 } 289 290 if (!fuse_check_req_size(req, out_iov, out_iovcnt)) { 291 return; 292 } 293 294 if (out_bufsz == 0) { 295 fuse_reply_ioctl_retry(req, in_iov, in_iovcnt, out_iov, out_iovcnt); 296 return; 297 } 298 299 if (data_transfer == SPDK_NVME_DATA_BIDIRECTIONAL) { 300 fuse_reply_err(req, EINVAL); 301 return; 302 } 303 304 if (data_transfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) { 305 dptr = (passthru_cmd->addr == 0) ? NULL : (uint8_t *)in_buf + sizeof(*passthru_cmd); 306 mdptr = (passthru_cmd->metadata == 0) ? NULL : (uint8_t *)in_buf + sizeof(*passthru_cmd) + 307 passthru_cmd->data_len; 308 } 309 310 cuse_nvme_passthru_cmd_send(req, passthru_cmd, dptr, mdptr, cmd); 311 } 312 313 static void 314 cuse_nvme_reset_execute(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg) 315 { 316 int rc; 317 fuse_req_t req = arg; 318 319 rc = spdk_nvme_ctrlr_reset(ctrlr); 320 if (rc) { 321 fuse_reply_err(req, rc); 322 return; 323 } 324 325 fuse_reply_ioctl_iov(req, 0, NULL, 0); 326 } 327 328 static void 329 cuse_nvme_subsys_reset_execute(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg) 330 { 331 int rc; 332 fuse_req_t req = arg; 333 334 rc = spdk_nvme_ctrlr_reset_subsystem(ctrlr); 335 if (rc) { 336 fuse_reply_err(req, rc); 337 return; 338 } 339 340 fuse_reply_ioctl_iov(req, 0, NULL, 0); 341 } 342 343 static void 344 cuse_nvme_reset(fuse_req_t req, int cmd, void *arg, 345 struct fuse_file_info *fi, unsigned flags, 346 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 347 { 348 int rv; 349 struct cuse_device *cuse_device = fuse_req_userdata(req); 350 351 if (cuse_device->nsid) { 352 SPDK_ERRLOG("Namespace reset not supported\n"); 353 fuse_reply_err(req, EINVAL); 354 return; 355 } 356 357 if (cmd == NVME_IOCTL_SUBSYS_RESET) { 358 SPDK_DEBUGLOG(nvme_cuse, "NVME_IOCTL_SUBSYS_RESET\n"); 359 rv = nvme_io_msg_send(cuse_device->ctrlr, cuse_device->nsid, cuse_nvme_subsys_reset_execute, 360 (void *)req); 361 } else { 362 SPDK_DEBUGLOG(nvme_cuse, "NVME_IOCTL_RESET\n"); 363 rv = nvme_io_msg_send(cuse_device->ctrlr, cuse_device->nsid, cuse_nvme_reset_execute, (void *)req); 364 } 365 if (rv) { 366 SPDK_ERRLOG("Cannot send reset\n"); 367 fuse_reply_err(req, EINVAL); 368 } 369 } 370 371 static void 372 cuse_nvme_rescan_execute(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg) 373 { 374 fuse_req_t req = arg; 375 376 nvme_ctrlr_update_namespaces(ctrlr); 377 fuse_reply_ioctl_iov(req, 0, NULL, 0); 378 } 379 380 static void 381 cuse_nvme_rescan(fuse_req_t req, int cmd, void *arg, 382 struct fuse_file_info *fi, unsigned flags, 383 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 384 { 385 int rv; 386 struct cuse_device *cuse_device = fuse_req_userdata(req); 387 388 if (cuse_device->nsid) { 389 SPDK_ERRLOG("Namespace rescan not supported\n"); 390 fuse_reply_err(req, EINVAL); 391 return; 392 } 393 394 rv = nvme_io_msg_send(cuse_device->ctrlr, cuse_device->nsid, cuse_nvme_rescan_execute, (void *)req); 395 if (rv) { 396 SPDK_ERRLOG("Cannot send rescan\n"); 397 fuse_reply_err(req, EINVAL); 398 } 399 } 400 401 /***************************************************************************** 402 * Namespace IO requests 403 */ 404 405 static void 406 cuse_nvme_submit_io_write_done(void *ref, const struct spdk_nvme_cpl *cpl) 407 { 408 struct cuse_io_ctx *ctx = (struct cuse_io_ctx *)ref; 409 uint16_t status_field = cpl->status_raw >> 1; /* Drop out phase bit */ 410 411 fuse_reply_ioctl_iov(ctx->req, status_field, NULL, 0); 412 413 cuse_io_ctx_free(ctx); 414 } 415 416 static void 417 cuse_nvme_submit_io_write_cb(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg) 418 { 419 int rc; 420 struct cuse_io_ctx *ctx = arg; 421 struct spdk_nvme_ns *ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 422 423 rc = spdk_nvme_ns_cmd_write_with_md(ns, ctrlr->external_io_msgs_qpair, ctx->data, ctx->metadata, 424 ctx->lba, /* LBA start */ 425 ctx->lba_count, /* number of LBAs */ 426 cuse_nvme_submit_io_write_done, ctx, 0, 427 ctx->appmask, ctx->apptag); 428 429 if (rc != 0) { 430 SPDK_ERRLOG("write failed: rc = %d\n", rc); 431 fuse_reply_err(ctx->req, rc); 432 cuse_io_ctx_free(ctx); 433 return; 434 } 435 } 436 437 static void 438 cuse_nvme_submit_io_write(struct cuse_device *cuse_device, fuse_req_t req, int cmd, void *arg, 439 struct fuse_file_info *fi, unsigned flags, uint32_t block_size, uint32_t md_size, 440 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 441 { 442 const struct nvme_user_io *user_io = in_buf; 443 struct cuse_io_ctx *ctx; 444 int rc; 445 446 ctx = (struct cuse_io_ctx *)calloc(1, sizeof(struct cuse_io_ctx)); 447 if (!ctx) { 448 SPDK_ERRLOG("Cannot allocate memory for context\n"); 449 fuse_reply_err(req, ENOMEM); 450 return; 451 } 452 453 ctx->req = req; 454 ctx->lba = user_io->slba; 455 ctx->lba_count = user_io->nblocks + 1; 456 ctx->data_len = ctx->lba_count * block_size; 457 458 ctx->data = spdk_zmalloc(ctx->data_len, 0x1000, NULL, SPDK_ENV_NUMA_ID_ANY, 459 SPDK_MALLOC_DMA); 460 if (ctx->data == NULL) { 461 SPDK_ERRLOG("Write buffer allocation failed\n"); 462 fuse_reply_err(ctx->req, ENOMEM); 463 free(ctx); 464 return; 465 } 466 467 memcpy(ctx->data, (uint8_t *)in_buf + sizeof(*user_io), ctx->data_len); 468 469 if (user_io->metadata) { 470 ctx->apptag = user_io->apptag; 471 ctx->appmask = user_io->appmask; 472 ctx->metadata_len = md_size * ctx->lba_count; 473 ctx->metadata = spdk_zmalloc(ctx->metadata_len, 4096, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); 474 475 if (ctx->metadata == NULL) { 476 SPDK_ERRLOG("Cannot allocate memory for metadata\n"); 477 if (ctx->metadata_len == 0) { 478 SPDK_ERRLOG("Device format does not support metadata\n"); 479 } 480 fuse_reply_err(req, ENOMEM); 481 cuse_io_ctx_free(ctx); 482 return; 483 } 484 485 memcpy(ctx->metadata, (uint8_t *)in_buf + sizeof(*user_io) + ctx->data_len, 486 ctx->metadata_len); 487 } 488 489 rc = nvme_io_msg_send(cuse_device->ctrlr, cuse_device->nsid, cuse_nvme_submit_io_write_cb, 490 ctx); 491 if (rc < 0) { 492 SPDK_ERRLOG("Cannot send write io\n"); 493 fuse_reply_err(ctx->req, rc); 494 cuse_io_ctx_free(ctx); 495 } 496 } 497 498 static void 499 cuse_nvme_submit_io_read_done(void *ref, const struct spdk_nvme_cpl *cpl) 500 { 501 struct cuse_io_ctx *ctx = (struct cuse_io_ctx *)ref; 502 struct iovec iov[2]; 503 int iovcnt = 0; 504 uint16_t status_field = cpl->status_raw >> 1; /* Drop out phase bit */ 505 506 iov[iovcnt].iov_base = ctx->data; 507 iov[iovcnt].iov_len = ctx->data_len; 508 iovcnt += 1; 509 510 if (ctx->metadata) { 511 iov[iovcnt].iov_base = ctx->metadata; 512 iov[iovcnt].iov_len = ctx->metadata_len; 513 iovcnt += 1; 514 } 515 516 fuse_reply_ioctl_iov(ctx->req, status_field, iov, iovcnt); 517 518 cuse_io_ctx_free(ctx); 519 } 520 521 static void 522 cuse_nvme_submit_io_read_cb(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg) 523 { 524 int rc; 525 struct cuse_io_ctx *ctx = arg; 526 struct spdk_nvme_ns *ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 527 528 rc = spdk_nvme_ns_cmd_read_with_md(ns, ctrlr->external_io_msgs_qpair, ctx->data, ctx->metadata, 529 ctx->lba, /* LBA start */ 530 ctx->lba_count, /* number of LBAs */ 531 cuse_nvme_submit_io_read_done, ctx, 0, 532 ctx->appmask, ctx->apptag); 533 534 if (rc != 0) { 535 SPDK_ERRLOG("read failed: rc = %d\n", rc); 536 fuse_reply_err(ctx->req, rc); 537 cuse_io_ctx_free(ctx); 538 return; 539 } 540 } 541 542 static void 543 cuse_nvme_submit_io_read(struct cuse_device *cuse_device, fuse_req_t req, int cmd, void *arg, 544 struct fuse_file_info *fi, unsigned flags, uint32_t block_size, uint32_t md_size, 545 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 546 { 547 int rc; 548 struct cuse_io_ctx *ctx; 549 const struct nvme_user_io *user_io = in_buf; 550 551 ctx = (struct cuse_io_ctx *)calloc(1, sizeof(struct cuse_io_ctx)); 552 if (!ctx) { 553 SPDK_ERRLOG("Cannot allocate memory for context\n"); 554 fuse_reply_err(req, ENOMEM); 555 return; 556 } 557 558 ctx->req = req; 559 ctx->lba = user_io->slba; 560 ctx->lba_count = user_io->nblocks + 1; 561 562 ctx->data_len = ctx->lba_count * block_size; 563 ctx->data = spdk_zmalloc(ctx->data_len, 0x1000, NULL, SPDK_ENV_NUMA_ID_ANY, 564 SPDK_MALLOC_DMA); 565 if (ctx->data == NULL) { 566 SPDK_ERRLOG("Read buffer allocation failed\n"); 567 fuse_reply_err(ctx->req, ENOMEM); 568 free(ctx); 569 return; 570 } 571 572 if (user_io->metadata) { 573 ctx->apptag = user_io->apptag; 574 ctx->appmask = user_io->appmask; 575 ctx->metadata_len = md_size * ctx->lba_count; 576 ctx->metadata = spdk_zmalloc(ctx->metadata_len, 4096, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); 577 578 if (ctx->metadata == NULL) { 579 SPDK_ERRLOG("Cannot allocate memory for metadata\n"); 580 if (ctx->metadata_len == 0) { 581 SPDK_ERRLOG("Device format does not support metadata\n"); 582 } 583 fuse_reply_err(req, ENOMEM); 584 cuse_io_ctx_free(ctx); 585 return; 586 } 587 } 588 589 rc = nvme_io_msg_send(cuse_device->ctrlr, cuse_device->nsid, cuse_nvme_submit_io_read_cb, ctx); 590 if (rc < 0) { 591 SPDK_ERRLOG("Cannot send read io\n"); 592 fuse_reply_err(ctx->req, rc); 593 cuse_io_ctx_free(ctx); 594 } 595 } 596 597 598 static void 599 cuse_nvme_submit_io(fuse_req_t req, int cmd, void *arg, 600 struct fuse_file_info *fi, unsigned flags, 601 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 602 { 603 const struct nvme_user_io *user_io; 604 struct iovec in_iov[3], out_iov[2]; 605 int in_iovcnt = 0, out_iovcnt = 0; 606 struct cuse_device *cuse_device = fuse_req_userdata(req); 607 struct spdk_nvme_ns *ns; 608 uint32_t block_size; 609 uint32_t md_size; 610 611 in_iov[in_iovcnt].iov_base = (void *)arg; 612 in_iov[in_iovcnt].iov_len = sizeof(*user_io); 613 in_iovcnt += 1; 614 if (in_bufsz == 0) { 615 fuse_reply_ioctl_retry(req, in_iov, in_iovcnt, NULL, 0); 616 return; 617 } 618 619 user_io = in_buf; 620 621 ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid); 622 block_size = spdk_nvme_ns_get_sector_size(ns); 623 md_size = spdk_nvme_ns_get_md_size(ns); 624 625 switch (user_io->opcode) { 626 case SPDK_NVME_OPC_READ: 627 out_iov[out_iovcnt].iov_base = (void *)user_io->addr; 628 out_iov[out_iovcnt].iov_len = (user_io->nblocks + 1) * block_size; 629 out_iovcnt += 1; 630 if (user_io->metadata != 0) { 631 out_iov[out_iovcnt].iov_base = (void *)user_io->metadata; 632 out_iov[out_iovcnt].iov_len = (user_io->nblocks + 1) * md_size; 633 out_iovcnt += 1; 634 } 635 if (!fuse_check_req_size(req, out_iov, out_iovcnt)) { 636 return; 637 } 638 if (out_bufsz == 0) { 639 fuse_reply_ioctl_retry(req, in_iov, in_iovcnt, out_iov, out_iovcnt); 640 return; 641 } 642 643 cuse_nvme_submit_io_read(cuse_device, req, cmd, arg, fi, flags, 644 block_size, md_size, in_buf, in_bufsz, out_bufsz); 645 break; 646 case SPDK_NVME_OPC_WRITE: 647 in_iov[in_iovcnt].iov_base = (void *)user_io->addr; 648 in_iov[in_iovcnt].iov_len = (user_io->nblocks + 1) * block_size; 649 in_iovcnt += 1; 650 if (user_io->metadata != 0) { 651 in_iov[in_iovcnt].iov_base = (void *)user_io->metadata; 652 in_iov[in_iovcnt].iov_len = (user_io->nblocks + 1) * md_size; 653 in_iovcnt += 1; 654 } 655 if (!fuse_check_req_size(req, in_iov, in_iovcnt)) { 656 return; 657 } 658 if (in_bufsz == sizeof(*user_io)) { 659 fuse_reply_ioctl_retry(req, in_iov, in_iovcnt, NULL, out_iovcnt); 660 return; 661 } 662 663 cuse_nvme_submit_io_write(cuse_device, req, cmd, arg, fi, flags, 664 block_size, md_size, in_buf, in_bufsz, out_bufsz); 665 break; 666 default: 667 SPDK_ERRLOG("SUBMIT_IO: opc:%d not valid\n", user_io->opcode); 668 fuse_reply_err(req, EINVAL); 669 return; 670 } 671 672 } 673 674 /***************************************************************************** 675 * Other namespace IOCTLs 676 */ 677 static void 678 cuse_blkgetsize64(fuse_req_t req, int cmd, void *arg, 679 struct fuse_file_info *fi, unsigned flags, 680 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 681 { 682 uint64_t size; 683 struct spdk_nvme_ns *ns; 684 struct cuse_device *cuse_device = fuse_req_userdata(req); 685 686 FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, size); 687 688 ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid); 689 size = spdk_nvme_ns_get_num_sectors(ns); 690 fuse_reply_ioctl(req, 0, &size, sizeof(size)); 691 } 692 693 static void 694 cuse_blkpbszget(fuse_req_t req, int cmd, void *arg, 695 struct fuse_file_info *fi, unsigned flags, 696 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 697 { 698 int pbsz; 699 struct spdk_nvme_ns *ns; 700 struct cuse_device *cuse_device = fuse_req_userdata(req); 701 702 FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, pbsz); 703 704 ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid); 705 pbsz = spdk_nvme_ns_get_sector_size(ns); 706 fuse_reply_ioctl(req, 0, &pbsz, sizeof(pbsz)); 707 } 708 709 static void 710 cuse_blkgetsize(fuse_req_t req, int cmd, void *arg, 711 struct fuse_file_info *fi, unsigned flags, 712 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 713 { 714 long size; 715 struct spdk_nvme_ns *ns; 716 struct cuse_device *cuse_device = fuse_req_userdata(req); 717 718 FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, size); 719 720 ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid); 721 722 /* return size in 512 bytes blocks */ 723 size = spdk_nvme_ns_get_num_sectors(ns) * 512 / spdk_nvme_ns_get_sector_size(ns); 724 fuse_reply_ioctl(req, 0, &size, sizeof(size)); 725 } 726 727 static void 728 cuse_blkgetsectorsize(fuse_req_t req, int cmd, void *arg, 729 struct fuse_file_info *fi, unsigned flags, 730 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 731 { 732 int ssize; 733 struct spdk_nvme_ns *ns; 734 struct cuse_device *cuse_device = fuse_req_userdata(req); 735 736 FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, ssize); 737 738 ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid); 739 ssize = spdk_nvme_ns_get_sector_size(ns); 740 fuse_reply_ioctl(req, 0, &ssize, sizeof(ssize)); 741 } 742 743 static void 744 cuse_getid(fuse_req_t req, int cmd, void *arg, 745 struct fuse_file_info *fi, unsigned flags, 746 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 747 { 748 struct cuse_device *cuse_device = fuse_req_userdata(req); 749 750 fuse_reply_ioctl(req, cuse_device->nsid, NULL, 0); 751 } 752 753 struct cuse_transport { 754 char trstring[SPDK_NVMF_TRSTRING_MAX_LEN + 1]; 755 char traddr[SPDK_NVMF_TRADDR_MAX_LEN + 1]; 756 }; 757 758 #define SPDK_CUSE_GET_TRANSPORT _IOWR('n', 0x1, struct cuse_transport) 759 760 static void 761 cuse_get_transport(fuse_req_t req, int cmd, void *arg, 762 struct fuse_file_info *fi, unsigned flags, 763 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 764 { 765 struct cuse_device *cuse_device = fuse_req_userdata(req); 766 struct cuse_transport tr = {}; 767 768 FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, tr); 769 770 memcpy(tr.trstring, cuse_device->ctrlr->trid.trstring, SPDK_NVMF_TRSTRING_MAX_LEN + 1); 771 memcpy(tr.traddr, cuse_device->ctrlr->trid.traddr, SPDK_NVMF_TRADDR_MAX_LEN + 1); 772 773 fuse_reply_ioctl(req, 0, &tr, sizeof(tr)); 774 } 775 776 static void 777 cuse_ctrlr_ioctl(fuse_req_t req, int cmd, void *arg, 778 struct fuse_file_info *fi, unsigned flags, 779 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 780 { 781 if (flags & FUSE_IOCTL_COMPAT) { 782 fuse_reply_err(req, ENOSYS); 783 return; 784 } 785 786 switch ((unsigned int)cmd) { 787 case NVME_IOCTL_ADMIN_CMD: 788 SPDK_DEBUGLOG(nvme_cuse, "NVME_IOCTL_ADMIN_CMD\n"); 789 cuse_nvme_passthru_cmd(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz); 790 break; 791 792 case NVME_IOCTL_RESET: 793 case NVME_IOCTL_SUBSYS_RESET: 794 cuse_nvme_reset(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz); 795 break; 796 797 case NVME_IOCTL_RESCAN: 798 SPDK_DEBUGLOG(nvme_cuse, "NVME_IOCTL_RESCAN\n"); 799 cuse_nvme_rescan(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz); 800 break; 801 802 case NVME_IOCTL_ID: 803 /* Return error but don't ERRLOG - nvme-cli will frequently send this 804 * IOCTL to controller devices. 805 */ 806 fuse_reply_err(req, ENOTTY); 807 break; 808 809 case SPDK_CUSE_GET_TRANSPORT: 810 SPDK_DEBUGLOG(nvme_cuse, "SPDK_CUSE_GET_TRANSPORT\n"); 811 cuse_get_transport(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz); 812 break; 813 814 default: 815 SPDK_ERRLOG("Unsupported IOCTL 0x%X.\n", cmd); 816 fuse_reply_err(req, ENOTTY); 817 } 818 } 819 820 static void 821 cuse_ns_ioctl(fuse_req_t req, int cmd, void *arg, 822 struct fuse_file_info *fi, unsigned flags, 823 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 824 { 825 if (flags & FUSE_IOCTL_COMPAT) { 826 fuse_reply_err(req, ENOSYS); 827 return; 828 } 829 830 switch ((unsigned int)cmd) { 831 case NVME_IOCTL_ADMIN_CMD: 832 SPDK_DEBUGLOG(nvme_cuse, "NVME_IOCTL_ADMIN_CMD\n"); 833 cuse_nvme_passthru_cmd(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz); 834 break; 835 836 case NVME_IOCTL_SUBMIT_IO: 837 SPDK_DEBUGLOG(nvme_cuse, "NVME_IOCTL_SUBMIT_IO\n"); 838 cuse_nvme_submit_io(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz); 839 break; 840 841 case NVME_IOCTL_IO_CMD: 842 SPDK_DEBUGLOG(nvme_cuse, "NVME_IOCTL_IO_CMD\n"); 843 cuse_nvme_passthru_cmd(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz); 844 break; 845 846 case NVME_IOCTL_ID: 847 SPDK_DEBUGLOG(nvme_cuse, "NVME_IOCTL_ID\n"); 848 cuse_getid(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz); 849 break; 850 851 case BLKPBSZGET: 852 SPDK_DEBUGLOG(nvme_cuse, "BLKPBSZGET\n"); 853 cuse_blkpbszget(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz); 854 break; 855 856 case BLKSSZGET: 857 SPDK_DEBUGLOG(nvme_cuse, "BLKSSZGET\n"); 858 cuse_blkgetsectorsize(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz); 859 break; 860 861 case BLKGETSIZE: 862 SPDK_DEBUGLOG(nvme_cuse, "BLKGETSIZE\n"); 863 /* Returns the device size as a number of 512-byte blocks (returns pointer to long) */ 864 cuse_blkgetsize(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz); 865 break; 866 867 case BLKGETSIZE64: 868 SPDK_DEBUGLOG(nvme_cuse, "BLKGETSIZE64\n"); 869 /* Returns the device size in sectors (returns pointer to uint64_t) */ 870 cuse_blkgetsize64(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz); 871 break; 872 873 default: 874 SPDK_ERRLOG("Unsupported IOCTL 0x%X.\n", cmd); 875 fuse_reply_err(req, ENOTTY); 876 } 877 } 878 879 /***************************************************************************** 880 * CUSE threads initialization. 881 */ 882 883 static void 884 cuse_open(fuse_req_t req, struct fuse_file_info *fi) 885 { 886 fuse_reply_open(req, fi); 887 } 888 889 static const struct cuse_lowlevel_ops cuse_ctrlr_clop = { 890 .open = cuse_open, 891 .ioctl = cuse_ctrlr_ioctl, 892 }; 893 894 static const struct cuse_lowlevel_ops cuse_ns_clop = { 895 .open = cuse_open, 896 .ioctl = cuse_ns_ioctl, 897 }; 898 899 static int 900 cuse_session_create(struct cuse_device *cuse_device) 901 { 902 char *cuse_argv[] = { "cuse", "-f" }; 903 int multithreaded; 904 int cuse_argc = SPDK_COUNTOF(cuse_argv); 905 struct cuse_info ci; 906 char devname_arg[128 + 8]; 907 const char *dev_info_argv[] = { devname_arg }; 908 909 snprintf(devname_arg, sizeof(devname_arg), "DEVNAME=%s", cuse_device->dev_name); 910 911 memset(&ci, 0, sizeof(ci)); 912 ci.dev_info_argc = 1; 913 ci.dev_info_argv = dev_info_argv; 914 ci.flags = CUSE_UNRESTRICTED_IOCTL; 915 916 if (cuse_device->nsid) { 917 cuse_device->session = cuse_lowlevel_setup(cuse_argc, cuse_argv, &ci, &cuse_ns_clop, 918 &multithreaded, cuse_device); 919 } else { 920 cuse_device->session = cuse_lowlevel_setup(cuse_argc, cuse_argv, &ci, &cuse_ctrlr_clop, 921 &multithreaded, cuse_device); 922 } 923 924 if (!cuse_device->session) { 925 SPDK_ERRLOG("Cannot create cuse session\n"); 926 return -1; 927 } 928 SPDK_NOTICELOG("fuse session for device %s created\n", cuse_device->dev_name); 929 cuse_device->fuse_efd = fuse_session_fd(cuse_device->session); 930 931 pthread_mutex_lock(&g_pending_device_mtx); 932 TAILQ_INSERT_TAIL(&g_pending_device_head, cuse_device, cuse_thread_tailq); 933 if (eventfd_write(g_cuse_thread_msg_fd, 1) != 0) { 934 TAILQ_REMOVE(&g_pending_device_head, cuse_device, cuse_thread_tailq); 935 pthread_mutex_unlock(&g_pending_device_mtx); 936 SPDK_ERRLOG("eventfd_write failed: (%s).\n", spdk_strerror(errno)); 937 return -errno; 938 } 939 pthread_mutex_unlock(&g_pending_device_mtx); 940 return 0; 941 } 942 943 static int 944 process_cuse_event(void *arg) 945 { 946 struct fuse_session *session = arg; 947 struct fuse_buf buf = { .mem = NULL }; 948 int rc = fuse_session_receive_buf(session, &buf); 949 950 if (rc > 0) { 951 fuse_session_process_buf(session, &buf); 952 } 953 free(buf.mem); 954 return 0; 955 } 956 957 static int 958 cuse_thread_add_session(void *arg) 959 { 960 struct cuse_device *cuse_device, *tmp; 961 int ret; 962 eventfd_t val; 963 964 eventfd_read(g_cuse_thread_msg_fd, &val); 965 966 pthread_mutex_lock(&g_pending_device_mtx); 967 TAILQ_FOREACH_SAFE(cuse_device, &g_pending_device_head, cuse_thread_tailq, tmp) { 968 ret = spdk_fd_group_add(g_device_fdgrp, cuse_device->fuse_efd, process_cuse_event, 969 cuse_device->session, cuse_device->dev_name); 970 if (ret < 0) { 971 SPDK_ERRLOG("Failed to add fd %d: (%s).\n", cuse_device->fuse_efd, 972 spdk_strerror(-ret)); 973 TAILQ_REMOVE(&g_pending_device_head, cuse_device, cuse_thread_tailq); 974 free(cuse_device); 975 assert(false); 976 } 977 } 978 TAILQ_CONCAT(&g_active_device_head, &g_pending_device_head, cuse_thread_tailq); 979 pthread_mutex_unlock(&g_pending_device_mtx); 980 return 0; 981 } 982 983 static void * 984 cuse_thread(void *unused) 985 { 986 struct cuse_device *cuse_device, *tmp; 987 int timeout_msecs = 500; 988 bool retry; 989 990 spdk_unaffinitize_thread(); 991 992 do { 993 retry = false; 994 spdk_fd_group_wait(g_device_fdgrp, timeout_msecs); 995 while (!TAILQ_EMPTY(&g_active_device_head)) { 996 TAILQ_FOREACH_SAFE(cuse_device, &g_active_device_head, cuse_thread_tailq, tmp) { 997 if (fuse_session_exited(cuse_device->session)) { 998 spdk_fd_group_remove(g_device_fdgrp, cuse_device->fuse_efd); 999 fuse_session_reset(cuse_device->session); 1000 TAILQ_REMOVE(&g_active_device_head, cuse_device, cuse_thread_tailq); 1001 if (cuse_device->force_exit) { 1002 cuse_lowlevel_teardown(cuse_device->session); 1003 free(cuse_device); 1004 } 1005 } 1006 } 1007 /* Receive and process fuse event and new cuse device addition requests. */ 1008 spdk_fd_group_wait(g_device_fdgrp, timeout_msecs); 1009 } 1010 pthread_mutex_lock(&g_cuse_mtx); 1011 if (!TAILQ_EMPTY(&g_pending_device_head)) { 1012 pthread_mutex_unlock(&g_cuse_mtx); 1013 /* Retry as we have some cuse devices pending to be polled on. */ 1014 retry = true; 1015 } 1016 } while (retry); 1017 1018 spdk_fd_group_remove(g_device_fdgrp, g_cuse_thread_msg_fd); 1019 close(g_cuse_thread_msg_fd); 1020 spdk_fd_group_destroy(g_device_fdgrp); 1021 g_device_fdgrp = NULL; 1022 pthread_mutex_unlock(&g_cuse_mtx); 1023 SPDK_NOTICELOG("Cuse thread exited.\n"); 1024 return NULL; 1025 } 1026 1027 static struct cuse_device *nvme_cuse_get_cuse_ns_device(struct spdk_nvme_ctrlr *ctrlr, 1028 uint32_t nsid); 1029 1030 /***************************************************************************** 1031 * CUSE devices management 1032 */ 1033 1034 static int 1035 cuse_nvme_ns_start(struct cuse_device *ctrlr_device, uint32_t nsid) 1036 { 1037 struct cuse_device *ns_device = NULL; 1038 int rv; 1039 1040 ns_device = nvme_cuse_get_cuse_ns_device(ctrlr_device->ctrlr, nsid); 1041 if (ns_device != NULL) { 1042 return 0; 1043 } 1044 1045 ns_device = calloc(1, sizeof(struct cuse_device)); 1046 if (ns_device == NULL) { 1047 return -ENOMEM; 1048 } 1049 1050 ns_device->ctrlr = ctrlr_device->ctrlr; 1051 ns_device->ctrlr_device = ctrlr_device; 1052 ns_device->nsid = nsid; 1053 rv = snprintf(ns_device->dev_name, sizeof(ns_device->dev_name), "%sn%d", 1054 ctrlr_device->dev_name, ns_device->nsid); 1055 if (rv < 0) { 1056 SPDK_ERRLOG("Device name too long.\n"); 1057 rv = -ENAMETOOLONG; 1058 goto free_device; 1059 } 1060 1061 rv = cuse_session_create(ns_device); 1062 if (rv != 0) { 1063 goto free_device; 1064 } 1065 1066 TAILQ_INSERT_TAIL(&ctrlr_device->ns_devices, ns_device, tailq); 1067 1068 return 0; 1069 1070 free_device: 1071 free(ns_device); 1072 return rv; 1073 } 1074 1075 static void 1076 cuse_nvme_ns_stop(struct cuse_device *ctrlr_device, struct cuse_device *ns_device) 1077 { 1078 TAILQ_REMOVE(&ctrlr_device->ns_devices, ns_device, tailq); 1079 /* ns_device will be freed by cuse_thread */ 1080 if (ns_device->session != NULL) { 1081 ns_device->force_exit = true; 1082 fuse_session_exit(ns_device->session); 1083 } 1084 } 1085 1086 static int 1087 nvme_cuse_claim(struct cuse_device *ctrlr_device, uint32_t index) 1088 { 1089 int dev_fd; 1090 int pid; 1091 void *dev_map; 1092 struct flock cusedev_lock = { 1093 .l_type = F_WRLCK, 1094 .l_whence = SEEK_SET, 1095 .l_start = 0, 1096 .l_len = 0, 1097 }; 1098 1099 snprintf(ctrlr_device->lock_name, sizeof(ctrlr_device->lock_name), 1100 "/var/tmp/spdk_nvme_cuse_lock_%" PRIu32, index); 1101 1102 dev_fd = open(ctrlr_device->lock_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); 1103 if (dev_fd == -1) { 1104 SPDK_ERRLOG("could not open %s\n", ctrlr_device->lock_name); 1105 return -errno; 1106 } 1107 1108 if (ftruncate(dev_fd, sizeof(int)) != 0) { 1109 SPDK_ERRLOG("could not truncate %s\n", ctrlr_device->lock_name); 1110 close(dev_fd); 1111 return -errno; 1112 } 1113 1114 dev_map = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, 1115 MAP_SHARED, dev_fd, 0); 1116 if (dev_map == MAP_FAILED) { 1117 SPDK_ERRLOG("could not mmap dev %s (%d)\n", ctrlr_device->lock_name, errno); 1118 close(dev_fd); 1119 return -errno; 1120 } 1121 1122 if (fcntl(dev_fd, F_SETLK, &cusedev_lock) != 0) { 1123 pid = *(int *)dev_map; 1124 SPDK_ERRLOG("Cannot create lock on device %s, probably" 1125 " process %d has claimed it\n", ctrlr_device->lock_name, pid); 1126 munmap(dev_map, sizeof(int)); 1127 close(dev_fd); 1128 /* F_SETLK returns unspecified errnos, normalize them */ 1129 return -EACCES; 1130 } 1131 1132 *(int *)dev_map = (int)getpid(); 1133 munmap(dev_map, sizeof(int)); 1134 ctrlr_device->claim_fd = dev_fd; 1135 ctrlr_device->index = index; 1136 /* Keep dev_fd open to maintain the lock. */ 1137 return 0; 1138 } 1139 1140 static void 1141 nvme_cuse_unclaim(struct cuse_device *ctrlr_device) 1142 { 1143 close(ctrlr_device->claim_fd); 1144 ctrlr_device->claim_fd = -1; 1145 unlink(ctrlr_device->lock_name); 1146 } 1147 1148 static void 1149 cuse_nvme_ctrlr_stop(struct cuse_device *ctrlr_device) 1150 { 1151 struct cuse_device *ns_device, *tmp; 1152 1153 TAILQ_FOREACH_SAFE(ns_device, &ctrlr_device->ns_devices, tailq, tmp) { 1154 cuse_nvme_ns_stop(ctrlr_device, ns_device); 1155 } 1156 1157 assert(TAILQ_EMPTY(&ctrlr_device->ns_devices)); 1158 1159 spdk_bit_array_clear(g_ctrlr_started, ctrlr_device->index); 1160 if (spdk_bit_array_count_set(g_ctrlr_started) == 0) { 1161 spdk_bit_array_free(&g_ctrlr_started); 1162 } 1163 nvme_cuse_unclaim(ctrlr_device); 1164 1165 TAILQ_REMOVE(&g_ctrlr_ctx_head, ctrlr_device, tailq); 1166 /* ctrlr_device will be freed by cuse_thread */ 1167 ctrlr_device->force_exit = true; 1168 fuse_session_exit(ctrlr_device->session); 1169 } 1170 1171 static int 1172 cuse_nvme_ctrlr_update_namespaces(struct cuse_device *ctrlr_device) 1173 { 1174 struct cuse_device *ns_device, *tmp; 1175 uint32_t nsid; 1176 1177 /* Remove namespaces that have disappeared */ 1178 TAILQ_FOREACH_SAFE(ns_device, &ctrlr_device->ns_devices, tailq, tmp) { 1179 if (!spdk_nvme_ctrlr_is_active_ns(ctrlr_device->ctrlr, ns_device->nsid)) { 1180 cuse_nvme_ns_stop(ctrlr_device, ns_device); 1181 } 1182 } 1183 1184 /* Add new namespaces */ 1185 nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr_device->ctrlr); 1186 while (nsid != 0) { 1187 if (cuse_nvme_ns_start(ctrlr_device, nsid) < 0) { 1188 SPDK_ERRLOG("Cannot start CUSE namespace device."); 1189 return -1; 1190 } 1191 1192 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr_device->ctrlr, nsid); 1193 } 1194 1195 return 0; 1196 } 1197 1198 #ifdef FUSE_LOG_H_ 1199 static void 1200 nvme_fuse_log_func(enum fuse_log_level level, const char *fmt, va_list ap) 1201 { 1202 /* fuse will unnecessarily print this log message when tearing down 1203 * sessions, once for every session after the first. So use this custom 1204 * log handler to silence that specific log message. 1205 */ 1206 if (strstr(fmt, "fuse_remove_signal_handlers: unknown session") != NULL) { 1207 return; 1208 } 1209 1210 vfprintf(stderr, fmt, ap); 1211 } 1212 #endif 1213 1214 static int 1215 nvme_cuse_start(struct spdk_nvme_ctrlr *ctrlr) 1216 { 1217 int rv = 0; 1218 struct cuse_device *ctrlr_device; 1219 1220 SPDK_NOTICELOG("Creating cuse device for controller\n"); 1221 1222 if (g_ctrlr_started == NULL) { 1223 g_ctrlr_started = spdk_bit_array_create(128); 1224 if (g_ctrlr_started == NULL) { 1225 SPDK_ERRLOG("Cannot create bit array\n"); 1226 return -ENOMEM; 1227 } 1228 #ifdef FUSE_LOG_H_ 1229 /* Older versions of libfuse don't have fuse_set_log_func nor 1230 * fuse_log.h, so this is the easiest way to check for it 1231 * without adding a separate CONFIG flag. 1232 */ 1233 fuse_set_log_func(nvme_fuse_log_func); 1234 #endif 1235 } 1236 1237 ctrlr_device = (struct cuse_device *)calloc(1, sizeof(struct cuse_device)); 1238 if (!ctrlr_device) { 1239 SPDK_ERRLOG("Cannot allocate memory for ctrlr_device."); 1240 rv = -ENOMEM; 1241 goto free_device; 1242 } 1243 1244 ctrlr_device->ctrlr = ctrlr; 1245 1246 /* Check if device already exists, if not increment index until success */ 1247 ctrlr_device->index = 0; 1248 while (1) { 1249 ctrlr_device->index = spdk_bit_array_find_first_clear(g_ctrlr_started, ctrlr_device->index); 1250 if (ctrlr_device->index == UINT32_MAX) { 1251 SPDK_ERRLOG("Too many registered controllers\n"); 1252 goto free_device; 1253 } 1254 1255 if (nvme_cuse_claim(ctrlr_device, ctrlr_device->index) == 0) { 1256 break; 1257 } 1258 ctrlr_device->index++; 1259 } 1260 spdk_bit_array_set(g_ctrlr_started, ctrlr_device->index); 1261 snprintf(ctrlr_device->dev_name, sizeof(ctrlr_device->dev_name), "spdk/nvme%d", 1262 ctrlr_device->index); 1263 1264 rv = cuse_session_create(ctrlr_device); 1265 if (rv != 0) { 1266 goto clear_and_free; 1267 } 1268 1269 TAILQ_INSERT_TAIL(&g_ctrlr_ctx_head, ctrlr_device, tailq); 1270 1271 TAILQ_INIT(&ctrlr_device->ns_devices); 1272 1273 /* Start all active namespaces */ 1274 if (cuse_nvme_ctrlr_update_namespaces(ctrlr_device) < 0) { 1275 SPDK_ERRLOG("Cannot start CUSE namespace devices."); 1276 cuse_nvme_ctrlr_stop(ctrlr_device); 1277 return -1; 1278 } 1279 1280 return 0; 1281 1282 clear_and_free: 1283 spdk_bit_array_clear(g_ctrlr_started, ctrlr_device->index); 1284 free_device: 1285 free(ctrlr_device); 1286 if (spdk_bit_array_count_set(g_ctrlr_started) == 0) { 1287 spdk_bit_array_free(&g_ctrlr_started); 1288 } 1289 return rv; 1290 } 1291 1292 static struct cuse_device * 1293 nvme_cuse_get_cuse_ctrlr_device(struct spdk_nvme_ctrlr *ctrlr) 1294 { 1295 struct cuse_device *ctrlr_device = NULL; 1296 1297 TAILQ_FOREACH(ctrlr_device, &g_ctrlr_ctx_head, tailq) { 1298 if (ctrlr_device->ctrlr == ctrlr) { 1299 break; 1300 } 1301 } 1302 1303 return ctrlr_device; 1304 } 1305 1306 static struct cuse_device * 1307 nvme_cuse_get_cuse_ns_device(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 1308 { 1309 struct cuse_device *ctrlr_device = NULL; 1310 struct cuse_device *ns_device; 1311 1312 ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr); 1313 if (!ctrlr_device) { 1314 return NULL; 1315 } 1316 1317 TAILQ_FOREACH(ns_device, &ctrlr_device->ns_devices, tailq) { 1318 if (ns_device->nsid == nsid) { 1319 return ns_device; 1320 } 1321 } 1322 1323 return NULL; 1324 } 1325 1326 static void 1327 nvme_cuse_stop(struct spdk_nvme_ctrlr *ctrlr) 1328 { 1329 struct cuse_device *ctrlr_device; 1330 1331 assert(spdk_process_is_primary()); 1332 1333 pthread_mutex_lock(&g_cuse_mtx); 1334 1335 ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr); 1336 if (!ctrlr_device) { 1337 SPDK_ERRLOG("Cannot find associated CUSE device\n"); 1338 pthread_mutex_unlock(&g_cuse_mtx); 1339 return; 1340 } 1341 1342 cuse_nvme_ctrlr_stop(ctrlr_device); 1343 1344 pthread_mutex_unlock(&g_cuse_mtx); 1345 } 1346 1347 static void 1348 nvme_cuse_update(struct spdk_nvme_ctrlr *ctrlr) 1349 { 1350 struct cuse_device *ctrlr_device; 1351 1352 assert(spdk_process_is_primary()); 1353 1354 pthread_mutex_lock(&g_cuse_mtx); 1355 1356 ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr); 1357 if (!ctrlr_device) { 1358 pthread_mutex_unlock(&g_cuse_mtx); 1359 return; 1360 } 1361 1362 cuse_nvme_ctrlr_update_namespaces(ctrlr_device); 1363 1364 pthread_mutex_unlock(&g_cuse_mtx); 1365 } 1366 1367 static struct nvme_io_msg_producer cuse_nvme_io_msg_producer = { 1368 .name = "cuse", 1369 .stop = nvme_cuse_stop, 1370 .update = nvme_cuse_update, 1371 }; 1372 1373 static int 1374 start_cuse_thread(void) 1375 { 1376 int rc = 0; 1377 pthread_t tid; 1378 1379 rc = spdk_fd_group_create(&g_device_fdgrp); 1380 if (rc < 0) { 1381 SPDK_ERRLOG("Failed to create fd group: (%s).\n", spdk_strerror(-rc)); 1382 return rc; 1383 } 1384 1385 g_cuse_thread_msg_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); 1386 if (g_cuse_thread_msg_fd < 0) { 1387 SPDK_ERRLOG("Failed to create eventfd: (%s).\n", spdk_strerror(errno)); 1388 rc = -errno; 1389 goto destroy_fd_group; 1390 } 1391 1392 rc = SPDK_FD_GROUP_ADD(g_device_fdgrp, g_cuse_thread_msg_fd, 1393 cuse_thread_add_session, NULL); 1394 if (rc < 0) { 1395 SPDK_ERRLOG("Failed to add fd %d: %s.\n", g_cuse_thread_msg_fd, 1396 spdk_strerror(-rc)); 1397 goto close_and_destroy_fd; 1398 } 1399 1400 rc = pthread_create(&tid, NULL, cuse_thread, NULL); 1401 if (rc != 0) { 1402 SPDK_ERRLOG("pthread_create failed\n"); 1403 rc = -rc; 1404 goto remove_close_and_destroy_fd; 1405 } 1406 pthread_detach(tid); 1407 pthread_setname_np(tid, "cuse_thread"); 1408 SPDK_NOTICELOG("Successfully started cuse thread to poll for admin commands\n"); 1409 return rc; 1410 1411 remove_close_and_destroy_fd: 1412 spdk_fd_group_remove(g_device_fdgrp, g_cuse_thread_msg_fd); 1413 close_and_destroy_fd: 1414 close(g_cuse_thread_msg_fd); 1415 destroy_fd_group: 1416 spdk_fd_group_destroy(g_device_fdgrp); 1417 g_device_fdgrp = NULL; 1418 return rc; 1419 } 1420 1421 int 1422 spdk_nvme_cuse_register(struct spdk_nvme_ctrlr *ctrlr) 1423 { 1424 int rc; 1425 1426 if (!spdk_process_is_primary()) { 1427 SPDK_ERRLOG("only allowed from primary process\n"); 1428 return -EINVAL; 1429 } 1430 1431 rc = nvme_io_msg_ctrlr_register(ctrlr, &cuse_nvme_io_msg_producer); 1432 if (rc) { 1433 return rc; 1434 } 1435 1436 pthread_mutex_lock(&g_cuse_mtx); 1437 1438 if (g_device_fdgrp == NULL) { 1439 rc = start_cuse_thread(); 1440 if (rc < 0) { 1441 SPDK_ERRLOG("Failed to start cuse thread to poll for admin commands\n"); 1442 pthread_mutex_unlock(&g_cuse_mtx); 1443 return rc; 1444 } 1445 } 1446 1447 rc = nvme_cuse_start(ctrlr); 1448 if (rc) { 1449 nvme_io_msg_ctrlr_unregister(ctrlr, &cuse_nvme_io_msg_producer); 1450 } 1451 1452 pthread_mutex_unlock(&g_cuse_mtx); 1453 1454 return rc; 1455 } 1456 1457 int 1458 spdk_nvme_cuse_unregister(struct spdk_nvme_ctrlr *ctrlr) 1459 { 1460 struct cuse_device *ctrlr_device; 1461 1462 if (!spdk_process_is_primary()) { 1463 SPDK_ERRLOG("only allowed from primary process\n"); 1464 return -EINVAL; 1465 } 1466 1467 pthread_mutex_lock(&g_cuse_mtx); 1468 1469 ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr); 1470 if (!ctrlr_device) { 1471 SPDK_ERRLOG("Cannot find associated CUSE device\n"); 1472 pthread_mutex_unlock(&g_cuse_mtx); 1473 return -ENODEV; 1474 } 1475 1476 cuse_nvme_ctrlr_stop(ctrlr_device); 1477 1478 pthread_mutex_unlock(&g_cuse_mtx); 1479 1480 nvme_io_msg_ctrlr_unregister(ctrlr, &cuse_nvme_io_msg_producer); 1481 1482 return 0; 1483 } 1484 1485 void 1486 spdk_nvme_cuse_update_namespaces(struct spdk_nvme_ctrlr *ctrlr) 1487 { 1488 nvme_cuse_update(ctrlr); 1489 } 1490 1491 int 1492 spdk_nvme_cuse_get_ctrlr_name(struct spdk_nvme_ctrlr *ctrlr, char *name, size_t *size) 1493 { 1494 struct cuse_device *ctrlr_device; 1495 size_t req_len; 1496 1497 pthread_mutex_lock(&g_cuse_mtx); 1498 1499 ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr); 1500 if (!ctrlr_device) { 1501 pthread_mutex_unlock(&g_cuse_mtx); 1502 return -ENODEV; 1503 } 1504 1505 req_len = strnlen(ctrlr_device->dev_name, sizeof(ctrlr_device->dev_name)); 1506 if (*size < req_len) { 1507 *size = req_len; 1508 pthread_mutex_unlock(&g_cuse_mtx); 1509 return -ENOSPC; 1510 } 1511 snprintf(name, req_len + 1, "%s", ctrlr_device->dev_name); 1512 1513 pthread_mutex_unlock(&g_cuse_mtx); 1514 1515 return 0; 1516 } 1517 1518 int 1519 spdk_nvme_cuse_get_ns_name(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, char *name, size_t *size) 1520 { 1521 struct cuse_device *ns_device; 1522 size_t req_len; 1523 1524 pthread_mutex_lock(&g_cuse_mtx); 1525 1526 ns_device = nvme_cuse_get_cuse_ns_device(ctrlr, nsid); 1527 if (!ns_device) { 1528 pthread_mutex_unlock(&g_cuse_mtx); 1529 return -ENODEV; 1530 } 1531 1532 req_len = strnlen(ns_device->dev_name, sizeof(ns_device->dev_name)); 1533 if (*size < req_len) { 1534 *size = req_len; 1535 pthread_mutex_unlock(&g_cuse_mtx); 1536 return -ENOSPC; 1537 } 1538 snprintf(name, req_len + 1, "%s", ns_device->dev_name); 1539 1540 pthread_mutex_unlock(&g_cuse_mtx); 1541 1542 return 0; 1543 } 1544 1545 SPDK_LOG_REGISTER_COMPONENT(nvme_cuse) 1546