1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #define FUSE_USE_VERSION 31 35 36 #include <fuse3/cuse_lowlevel.h> 37 38 #include <linux/nvme_ioctl.h> 39 #include <linux/fs.h> 40 41 #include "nvme_internal.h" 42 #include "nvme_io_msg.h" 43 #include "nvme_cuse.h" 44 45 struct cuse_device { 46 bool is_started; 47 48 char dev_name[128]; 49 uint32_t index; 50 int claim_fd; 51 char lock_name[64]; 52 53 struct spdk_nvme_ctrlr *ctrlr; /**< NVMe controller */ 54 uint32_t nsid; /**< NVMe name space id, or 0 */ 55 56 pthread_t tid; 57 struct fuse_session *session; 58 59 struct cuse_device *ctrlr_device; 60 struct cuse_device *ns_devices; /**< Array of cuse ns devices */ 61 62 TAILQ_ENTRY(cuse_device) tailq; 63 }; 64 65 static pthread_mutex_t g_cuse_mtx = PTHREAD_MUTEX_INITIALIZER; 66 static TAILQ_HEAD(, cuse_device) g_ctrlr_ctx_head = TAILQ_HEAD_INITIALIZER(g_ctrlr_ctx_head); 67 static struct spdk_bit_array *g_ctrlr_started; 68 69 struct cuse_io_ctx { 70 struct spdk_nvme_cmd nvme_cmd; 71 enum spdk_nvme_data_transfer data_transfer; 72 73 uint64_t lba; 74 uint32_t lba_count; 75 76 void *data; 77 int data_len; 78 79 fuse_req_t req; 80 }; 81 82 static void 83 cuse_io_ctx_free(struct cuse_io_ctx *ctx) 84 { 85 spdk_free(ctx->data); 86 free(ctx); 87 } 88 89 #define FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, val) \ 90 if (out_bufsz == 0) { \ 91 struct iovec out_iov; \ 92 out_iov.iov_base = (void *)arg; \ 93 out_iov.iov_len = sizeof(val); \ 94 fuse_reply_ioctl_retry(req, NULL, 0, &out_iov, 1); \ 95 return; \ 96 } 97 98 static void 99 cuse_nvme_admin_cmd_cb(void *arg, const struct spdk_nvme_cpl *cpl) 100 { 101 struct cuse_io_ctx *ctx = arg; 102 struct iovec out_iov[2]; 103 struct spdk_nvme_cpl _cpl; 104 105 if (ctx->data_transfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) { 106 fuse_reply_ioctl_iov(ctx->req, cpl->status.sc, NULL, 0); 107 } else { 108 memcpy(&_cpl, cpl, sizeof(struct spdk_nvme_cpl)); 109 110 out_iov[0].iov_base = &_cpl.cdw0; 111 out_iov[0].iov_len = sizeof(_cpl.cdw0); 112 113 if (ctx->data_len > 0) { 114 out_iov[1].iov_base = ctx->data; 115 out_iov[1].iov_len = ctx->data_len; 116 fuse_reply_ioctl_iov(ctx->req, cpl->status.sc, out_iov, 2); 117 } else { 118 fuse_reply_ioctl_iov(ctx->req, cpl->status.sc, out_iov, 1); 119 } 120 } 121 122 cuse_io_ctx_free(ctx); 123 } 124 125 static void 126 cuse_nvme_admin_cmd_execute(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg) 127 { 128 int rc; 129 struct cuse_io_ctx *ctx = arg; 130 131 rc = spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, &ctx->nvme_cmd, ctx->data, ctx->data_len, 132 cuse_nvme_admin_cmd_cb, (void *)ctx); 133 if (rc < 0) { 134 fuse_reply_err(ctx->req, EINVAL); 135 cuse_io_ctx_free(ctx); 136 } 137 } 138 139 static void 140 cuse_nvme_admin_cmd_send(fuse_req_t req, struct nvme_admin_cmd *admin_cmd, 141 const void *data) 142 { 143 struct cuse_io_ctx *ctx; 144 struct cuse_device *cuse_device = fuse_req_userdata(req); 145 int rv; 146 147 ctx = (struct cuse_io_ctx *)calloc(1, sizeof(struct cuse_io_ctx)); 148 if (!ctx) { 149 SPDK_ERRLOG("Cannot allocate memory for cuse_io_ctx\n"); 150 fuse_reply_err(req, ENOMEM); 151 return; 152 } 153 154 ctx->req = req; 155 ctx->data_transfer = spdk_nvme_opc_get_data_transfer(admin_cmd->opcode); 156 157 memset(&ctx->nvme_cmd, 0, sizeof(ctx->nvme_cmd)); 158 ctx->nvme_cmd.opc = admin_cmd->opcode; 159 ctx->nvme_cmd.nsid = admin_cmd->nsid; 160 ctx->nvme_cmd.cdw10 = admin_cmd->cdw10; 161 ctx->nvme_cmd.cdw11 = admin_cmd->cdw11; 162 ctx->nvme_cmd.cdw12 = admin_cmd->cdw12; 163 ctx->nvme_cmd.cdw13 = admin_cmd->cdw13; 164 ctx->nvme_cmd.cdw14 = admin_cmd->cdw14; 165 ctx->nvme_cmd.cdw15 = admin_cmd->cdw15; 166 167 ctx->data_len = admin_cmd->data_len; 168 169 if (ctx->data_len > 0) { 170 ctx->data = spdk_malloc(ctx->data_len, 0, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); 171 if (!ctx->data) { 172 SPDK_ERRLOG("Cannot allocate memory for data\n"); 173 fuse_reply_err(req, ENOMEM); 174 free(ctx); 175 return; 176 } 177 if (data != NULL) { 178 memcpy(ctx->data, data, ctx->data_len); 179 } 180 } 181 182 rv = nvme_io_msg_send(cuse_device->ctrlr, 0, cuse_nvme_admin_cmd_execute, ctx); 183 if (rv) { 184 SPDK_ERRLOG("Cannot send io msg to the controller\n"); 185 fuse_reply_err(req, -rv); 186 cuse_io_ctx_free(ctx); 187 return; 188 } 189 } 190 191 static void 192 cuse_nvme_admin_cmd(fuse_req_t req, int cmd, void *arg, 193 struct fuse_file_info *fi, unsigned flags, 194 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 195 { 196 struct nvme_admin_cmd *admin_cmd; 197 struct iovec in_iov[2], out_iov[2]; 198 199 in_iov[0].iov_base = (void *)arg; 200 in_iov[0].iov_len = sizeof(*admin_cmd); 201 if (in_bufsz == 0) { 202 fuse_reply_ioctl_retry(req, in_iov, 1, NULL, 0); 203 return; 204 } 205 206 admin_cmd = (struct nvme_admin_cmd *)in_buf; 207 208 switch (spdk_nvme_opc_get_data_transfer(admin_cmd->opcode)) { 209 case SPDK_NVME_DATA_NONE: 210 SPDK_ERRLOG("SPDK_NVME_DATA_NONE not implemented\n"); 211 fuse_reply_err(req, EINVAL); 212 return; 213 case SPDK_NVME_DATA_HOST_TO_CONTROLLER: 214 if (admin_cmd->addr != 0) { 215 in_iov[1].iov_base = (void *)admin_cmd->addr; 216 in_iov[1].iov_len = admin_cmd->data_len; 217 if (in_bufsz == sizeof(*admin_cmd)) { 218 fuse_reply_ioctl_retry(req, in_iov, 2, NULL, 0); 219 return; 220 } 221 cuse_nvme_admin_cmd_send(req, admin_cmd, in_buf + sizeof(*admin_cmd)); 222 } else { 223 cuse_nvme_admin_cmd_send(req, admin_cmd, NULL); 224 } 225 return; 226 case SPDK_NVME_DATA_CONTROLLER_TO_HOST: 227 if (out_bufsz == 0) { 228 out_iov[0].iov_base = &((struct nvme_admin_cmd *)arg)->result; 229 out_iov[0].iov_len = sizeof(uint32_t); 230 if (admin_cmd->data_len > 0) { 231 out_iov[1].iov_base = (void *)admin_cmd->addr; 232 out_iov[1].iov_len = admin_cmd->data_len; 233 fuse_reply_ioctl_retry(req, in_iov, 1, out_iov, 2); 234 } else { 235 fuse_reply_ioctl_retry(req, in_iov, 1, out_iov, 1); 236 } 237 return; 238 } 239 240 cuse_nvme_admin_cmd_send(req, admin_cmd, NULL); 241 242 return; 243 case SPDK_NVME_DATA_BIDIRECTIONAL: 244 fuse_reply_err(req, EINVAL); 245 return; 246 } 247 } 248 249 static void 250 cuse_nvme_reset_execute(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg) 251 { 252 int rc; 253 fuse_req_t req = arg; 254 255 rc = spdk_nvme_ctrlr_reset(ctrlr); 256 if (rc) { 257 fuse_reply_err(req, rc); 258 return; 259 } 260 261 fuse_reply_ioctl_iov(req, 0, NULL, 0); 262 } 263 264 static void 265 cuse_nvme_reset(fuse_req_t req, int cmd, void *arg, 266 struct fuse_file_info *fi, unsigned flags, 267 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 268 { 269 int rv; 270 struct cuse_device *cuse_device = fuse_req_userdata(req); 271 272 if (cuse_device->nsid) { 273 SPDK_ERRLOG("Namespace reset not supported\n"); 274 fuse_reply_err(req, EINVAL); 275 return; 276 } 277 278 rv = nvme_io_msg_send(cuse_device->ctrlr, cuse_device->nsid, cuse_nvme_reset_execute, (void *)req); 279 if (rv) { 280 SPDK_ERRLOG("Cannot send reset\n"); 281 fuse_reply_err(req, EINVAL); 282 } 283 } 284 285 /***************************************************************************** 286 * Namespace IO requests 287 */ 288 289 static void 290 cuse_nvme_submit_io_write_done(void *ref, const struct spdk_nvme_cpl *cpl) 291 { 292 struct cuse_io_ctx *ctx = (struct cuse_io_ctx *)ref; 293 294 fuse_reply_ioctl_iov(ctx->req, cpl->status.sc, NULL, 0); 295 296 cuse_io_ctx_free(ctx); 297 } 298 299 static void 300 cuse_nvme_submit_io_write_cb(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg) 301 { 302 int rc; 303 struct cuse_io_ctx *ctx = arg; 304 struct spdk_nvme_ns *ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 305 306 rc = spdk_nvme_ns_cmd_write(ns, ctrlr->external_io_msgs_qpair, ctx->data, 307 ctx->lba, /* LBA start */ 308 ctx->lba_count, /* number of LBAs */ 309 cuse_nvme_submit_io_write_done, ctx, 0); 310 311 if (rc != 0) { 312 SPDK_ERRLOG("write failed: rc = %d\n", rc); 313 fuse_reply_err(ctx->req, rc); 314 cuse_io_ctx_free(ctx); 315 return; 316 } 317 } 318 319 static void 320 cuse_nvme_submit_io_write(fuse_req_t req, int cmd, void *arg, 321 struct fuse_file_info *fi, unsigned flags, 322 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 323 { 324 const struct nvme_user_io *user_io = in_buf; 325 struct cuse_io_ctx *ctx; 326 struct spdk_nvme_ns *ns; 327 uint32_t block_size; 328 int rc; 329 struct cuse_device *cuse_device = fuse_req_userdata(req); 330 331 ctx = (struct cuse_io_ctx *)calloc(1, sizeof(struct cuse_io_ctx)); 332 if (!ctx) { 333 SPDK_ERRLOG("Cannot allocate memory for context\n"); 334 fuse_reply_err(req, ENOMEM); 335 return; 336 } 337 338 ctx->req = req; 339 340 ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid); 341 block_size = spdk_nvme_ns_get_sector_size(ns); 342 343 ctx->lba = user_io->slba; 344 ctx->lba_count = user_io->nblocks + 1; 345 ctx->data_len = ctx->lba_count * block_size; 346 347 ctx->data = spdk_zmalloc(ctx->data_len, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY, 348 SPDK_MALLOC_DMA); 349 if (ctx->data == NULL) { 350 SPDK_ERRLOG("Write buffer allocation failed\n"); 351 fuse_reply_err(ctx->req, ENOMEM); 352 free(ctx); 353 return; 354 } 355 356 memcpy(ctx->data, in_buf + sizeof(*user_io), ctx->data_len); 357 358 rc = nvme_io_msg_send(cuse_device->ctrlr, cuse_device->nsid, cuse_nvme_submit_io_write_cb, 359 ctx); 360 if (rc < 0) { 361 SPDK_ERRLOG("Cannot send write io\n"); 362 fuse_reply_err(ctx->req, rc); 363 cuse_io_ctx_free(ctx); 364 } 365 } 366 367 static void 368 cuse_nvme_submit_io_read_done(void *ref, const struct spdk_nvme_cpl *cpl) 369 { 370 struct cuse_io_ctx *ctx = (struct cuse_io_ctx *)ref; 371 struct iovec iov; 372 373 iov.iov_base = ctx->data; 374 iov.iov_len = ctx->data_len; 375 376 fuse_reply_ioctl_iov(ctx->req, cpl->status.sc, &iov, 1); 377 378 cuse_io_ctx_free(ctx); 379 } 380 381 static void 382 cuse_nvme_submit_io_read_cb(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg) 383 { 384 int rc; 385 struct cuse_io_ctx *ctx = arg; 386 struct spdk_nvme_ns *ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 387 388 rc = spdk_nvme_ns_cmd_read(ns, ctrlr->external_io_msgs_qpair, ctx->data, 389 ctx->lba, /* LBA start */ 390 ctx->lba_count, /* number of LBAs */ 391 cuse_nvme_submit_io_read_done, ctx, 0); 392 393 if (rc != 0) { 394 SPDK_ERRLOG("read failed: rc = %d\n", rc); 395 fuse_reply_err(ctx->req, rc); 396 cuse_io_ctx_free(ctx); 397 return; 398 } 399 } 400 401 static void 402 cuse_nvme_submit_io_read(fuse_req_t req, int cmd, void *arg, 403 struct fuse_file_info *fi, unsigned flags, 404 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 405 { 406 int rc; 407 struct cuse_io_ctx *ctx; 408 const struct nvme_user_io *user_io = in_buf; 409 struct cuse_device *cuse_device = fuse_req_userdata(req); 410 struct spdk_nvme_ns *ns; 411 uint32_t block_size; 412 413 ctx = (struct cuse_io_ctx *)calloc(1, sizeof(struct cuse_io_ctx)); 414 if (!ctx) { 415 SPDK_ERRLOG("Cannot allocate memory for context\n"); 416 fuse_reply_err(req, ENOMEM); 417 return; 418 } 419 420 ctx->req = req; 421 ctx->lba = user_io->slba; 422 ctx->lba_count = user_io->nblocks; 423 424 ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid); 425 block_size = spdk_nvme_ns_get_sector_size(ns); 426 427 ctx->data_len = ctx->lba_count * block_size; 428 ctx->data = spdk_zmalloc(ctx->data_len, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY, 429 SPDK_MALLOC_DMA); 430 if (ctx->data == NULL) { 431 SPDK_ERRLOG("Read buffer allocation failed\n"); 432 fuse_reply_err(ctx->req, ENOMEM); 433 free(ctx); 434 return; 435 } 436 437 rc = nvme_io_msg_send(cuse_device->ctrlr, cuse_device->nsid, cuse_nvme_submit_io_read_cb, ctx); 438 if (rc < 0) { 439 SPDK_ERRLOG("Cannot send read io\n"); 440 fuse_reply_err(ctx->req, rc); 441 cuse_io_ctx_free(ctx); 442 } 443 } 444 445 446 static void 447 cuse_nvme_submit_io(fuse_req_t req, int cmd, void *arg, 448 struct fuse_file_info *fi, unsigned flags, 449 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 450 { 451 const struct nvme_user_io *user_io; 452 struct iovec in_iov[2], out_iov; 453 454 in_iov[0].iov_base = (void *)arg; 455 in_iov[0].iov_len = sizeof(*user_io); 456 if (in_bufsz == 0) { 457 fuse_reply_ioctl_retry(req, in_iov, 1, NULL, 0); 458 return; 459 } 460 461 user_io = in_buf; 462 463 switch (user_io->opcode) { 464 case SPDK_NVME_OPC_READ: 465 out_iov.iov_base = (void *)user_io->addr; 466 out_iov.iov_len = (user_io->nblocks + 1) * 512; 467 if (out_bufsz == 0) { 468 fuse_reply_ioctl_retry(req, in_iov, 1, &out_iov, 1); 469 return; 470 } 471 472 cuse_nvme_submit_io_read(req, cmd, arg, fi, flags, in_buf, 473 in_bufsz, out_bufsz); 474 break; 475 case SPDK_NVME_OPC_WRITE: 476 in_iov[1].iov_base = (void *)user_io->addr; 477 in_iov[1].iov_len = (user_io->nblocks + 1) * 512; 478 if (in_bufsz == sizeof(*user_io)) { 479 fuse_reply_ioctl_retry(req, in_iov, 2, NULL, 0); 480 return; 481 } 482 483 cuse_nvme_submit_io_write(req, cmd, arg, fi, flags, in_buf, 484 in_bufsz, out_bufsz); 485 486 break; 487 default: 488 SPDK_ERRLOG("SUBMIT_IO: opc:%d not valid\n", user_io->opcode); 489 fuse_reply_err(req, EINVAL); 490 return; 491 } 492 493 } 494 495 /***************************************************************************** 496 * Other namespace IOCTLs 497 */ 498 static void 499 cuse_blkgetsize64(fuse_req_t req, int cmd, void *arg, 500 struct fuse_file_info *fi, unsigned flags, 501 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 502 { 503 uint64_t size; 504 struct spdk_nvme_ns *ns; 505 struct cuse_device *cuse_device = fuse_req_userdata(req); 506 507 FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, size); 508 509 ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid); 510 size = spdk_nvme_ns_get_num_sectors(ns); 511 fuse_reply_ioctl(req, 0, &size, sizeof(size)); 512 } 513 514 static void 515 cuse_blkpbszget(fuse_req_t req, int cmd, void *arg, 516 struct fuse_file_info *fi, unsigned flags, 517 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 518 { 519 int pbsz; 520 struct spdk_nvme_ns *ns; 521 struct cuse_device *cuse_device = fuse_req_userdata(req); 522 523 FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, pbsz); 524 525 ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid); 526 pbsz = spdk_nvme_ns_get_sector_size(ns); 527 fuse_reply_ioctl(req, 0, &pbsz, sizeof(pbsz)); 528 } 529 530 static void 531 cuse_blkgetsize(fuse_req_t req, int cmd, void *arg, 532 struct fuse_file_info *fi, unsigned flags, 533 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 534 { 535 long size; 536 struct spdk_nvme_ns *ns; 537 struct cuse_device *cuse_device = fuse_req_userdata(req); 538 539 FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, size); 540 541 ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid); 542 543 /* return size in 512 bytes blocks */ 544 size = spdk_nvme_ns_get_num_sectors(ns) * 512 / spdk_nvme_ns_get_sector_size(ns); 545 fuse_reply_ioctl(req, 0, &size, sizeof(size)); 546 } 547 548 static void 549 cuse_getid(fuse_req_t req, int cmd, void *arg, 550 struct fuse_file_info *fi, unsigned flags, 551 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 552 { 553 struct cuse_device *cuse_device = fuse_req_userdata(req); 554 555 fuse_reply_ioctl(req, cuse_device->nsid, NULL, 0); 556 } 557 558 static void 559 cuse_ctrlr_ioctl(fuse_req_t req, int cmd, void *arg, 560 struct fuse_file_info *fi, unsigned flags, 561 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 562 { 563 if (flags & FUSE_IOCTL_COMPAT) { 564 fuse_reply_err(req, ENOSYS); 565 return; 566 } 567 568 switch (cmd) { 569 case NVME_IOCTL_ADMIN_CMD: 570 cuse_nvme_admin_cmd(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz); 571 break; 572 573 case NVME_IOCTL_RESET: 574 cuse_nvme_reset(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz); 575 break; 576 577 default: 578 SPDK_ERRLOG("Unsupported IOCTL 0x%X.\n", cmd); 579 fuse_reply_err(req, EINVAL); 580 } 581 } 582 583 static void 584 cuse_ns_ioctl(fuse_req_t req, int cmd, void *arg, 585 struct fuse_file_info *fi, unsigned flags, 586 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 587 { 588 if (flags & FUSE_IOCTL_COMPAT) { 589 fuse_reply_err(req, ENOSYS); 590 return; 591 } 592 593 switch (cmd) { 594 case NVME_IOCTL_ADMIN_CMD: 595 cuse_nvme_admin_cmd(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz); 596 break; 597 598 case NVME_IOCTL_SUBMIT_IO: 599 cuse_nvme_submit_io(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz); 600 break; 601 602 case NVME_IOCTL_ID: 603 cuse_getid(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz); 604 break; 605 606 case BLKPBSZGET: 607 cuse_blkpbszget(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz); 608 break; 609 610 case BLKGETSIZE: 611 /* Returns the device size as a number of 512-byte blocks (returns pointer to long) */ 612 cuse_blkgetsize(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz); 613 break; 614 615 case BLKGETSIZE64: 616 /* Returns the device size in sectors (returns pointer to uint64_t) */ 617 cuse_blkgetsize64(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz); 618 break; 619 620 default: 621 SPDK_ERRLOG("Unsupported IOCTL 0x%X.\n", cmd); 622 fuse_reply_err(req, EINVAL); 623 } 624 } 625 626 /***************************************************************************** 627 * CUSE threads initialization. 628 */ 629 630 static void cuse_open(fuse_req_t req, struct fuse_file_info *fi) 631 { 632 fuse_reply_open(req, fi); 633 } 634 635 static const struct cuse_lowlevel_ops cuse_ctrlr_clop = { 636 .open = cuse_open, 637 .ioctl = cuse_ctrlr_ioctl, 638 }; 639 640 static const struct cuse_lowlevel_ops cuse_ns_clop = { 641 .open = cuse_open, 642 .ioctl = cuse_ns_ioctl, 643 }; 644 645 static void * 646 cuse_thread(void *arg) 647 { 648 struct cuse_device *cuse_device = arg; 649 char *cuse_argv[] = { "cuse", "-f" }; 650 int cuse_argc = SPDK_COUNTOF(cuse_argv); 651 char devname_arg[128 + 8]; 652 const char *dev_info_argv[] = { devname_arg }; 653 struct cuse_info ci; 654 int multithreaded; 655 int rc; 656 struct fuse_buf buf = { .mem = NULL }; 657 struct pollfd fds; 658 int timeout_msecs = 500; 659 660 spdk_unaffinitize_thread(); 661 662 snprintf(devname_arg, sizeof(devname_arg), "DEVNAME=%s", cuse_device->dev_name); 663 664 memset(&ci, 0, sizeof(ci)); 665 ci.dev_info_argc = 1; 666 ci.dev_info_argv = dev_info_argv; 667 ci.flags = CUSE_UNRESTRICTED_IOCTL; 668 669 if (cuse_device->nsid) { 670 cuse_device->session = cuse_lowlevel_setup(cuse_argc, cuse_argv, &ci, &cuse_ns_clop, 671 &multithreaded, cuse_device); 672 } else { 673 cuse_device->session = cuse_lowlevel_setup(cuse_argc, cuse_argv, &ci, &cuse_ctrlr_clop, 674 &multithreaded, cuse_device); 675 } 676 if (!cuse_device->session) { 677 SPDK_ERRLOG("Cannot create cuse session\n"); 678 goto err; 679 } 680 681 SPDK_NOTICELOG("fuse session for device %s created\n", cuse_device->dev_name); 682 683 /* Receive and process fuse requests */ 684 fds.fd = fuse_session_fd(cuse_device->session); 685 fds.events = POLLIN; 686 while (!fuse_session_exited(cuse_device->session)) { 687 rc = poll(&fds, 1, timeout_msecs); 688 if (rc <= 0) { 689 continue; 690 } 691 rc = fuse_session_receive_buf(cuse_device->session, &buf); 692 if (rc > 0) { 693 fuse_session_process_buf(cuse_device->session, &buf); 694 } 695 } 696 free(buf.mem); 697 fuse_session_reset(cuse_device->session); 698 cuse_lowlevel_teardown(cuse_device->session); 699 err: 700 pthread_exit(NULL); 701 } 702 703 /***************************************************************************** 704 * CUSE devices management 705 */ 706 707 static int 708 cuse_nvme_ns_start(struct cuse_device *ctrlr_device, uint32_t nsid) 709 { 710 struct cuse_device *ns_device; 711 int rv; 712 713 ns_device = &ctrlr_device->ns_devices[nsid - 1]; 714 if (ns_device->is_started) { 715 return 0; 716 } 717 718 ns_device->ctrlr = ctrlr_device->ctrlr; 719 ns_device->ctrlr_device = ctrlr_device; 720 ns_device->nsid = nsid; 721 rv = snprintf(ns_device->dev_name, sizeof(ns_device->dev_name), "%sn%d", 722 ctrlr_device->dev_name, ns_device->nsid); 723 if (rv < 0) { 724 SPDK_ERRLOG("Device name too long.\n"); 725 free(ns_device); 726 return -ENAMETOOLONG; 727 } 728 729 rv = pthread_create(&ns_device->tid, NULL, cuse_thread, ns_device); 730 if (rv != 0) { 731 SPDK_ERRLOG("pthread_create failed\n"); 732 return -rv; 733 } 734 735 ns_device->is_started = true; 736 737 return 0; 738 } 739 740 static void 741 cuse_nvme_ns_stop(struct cuse_device *ctrlr_device, uint32_t nsid) 742 { 743 struct cuse_device *ns_device; 744 745 ns_device = &ctrlr_device->ns_devices[nsid - 1]; 746 if (!ns_device->is_started) { 747 return; 748 } 749 750 fuse_session_exit(ns_device->session); 751 pthread_join(ns_device->tid, NULL); 752 ns_device->is_started = false; 753 } 754 755 static int 756 nvme_cuse_claim(struct cuse_device *ctrlr_device, uint32_t index) 757 { 758 int dev_fd; 759 int pid; 760 void *dev_map; 761 struct flock cusedev_lock = { 762 .l_type = F_WRLCK, 763 .l_whence = SEEK_SET, 764 .l_start = 0, 765 .l_len = 0, 766 }; 767 768 snprintf(ctrlr_device->lock_name, sizeof(ctrlr_device->lock_name), 769 "/tmp/spdk_nvme_cuse_lock_%" PRIu32, index); 770 771 dev_fd = open(ctrlr_device->lock_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); 772 if (dev_fd == -1) { 773 SPDK_ERRLOG("could not open %s\n", ctrlr_device->lock_name); 774 return -errno; 775 } 776 777 if (ftruncate(dev_fd, sizeof(int)) != 0) { 778 SPDK_ERRLOG("could not truncate %s\n", ctrlr_device->lock_name); 779 close(dev_fd); 780 return -errno; 781 } 782 783 dev_map = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, 784 MAP_SHARED, dev_fd, 0); 785 if (dev_map == MAP_FAILED) { 786 SPDK_ERRLOG("could not mmap dev %s (%d)\n", ctrlr_device->lock_name, errno); 787 close(dev_fd); 788 return -errno; 789 } 790 791 if (fcntl(dev_fd, F_SETLK, &cusedev_lock) != 0) { 792 pid = *(int *)dev_map; 793 SPDK_ERRLOG("Cannot create lock on device %s, probably" 794 " process %d has claimed it\n", ctrlr_device->lock_name, pid); 795 munmap(dev_map, sizeof(int)); 796 close(dev_fd); 797 /* F_SETLK returns unspecified errnos, normalize them */ 798 return -EACCES; 799 } 800 801 *(int *)dev_map = (int)getpid(); 802 munmap(dev_map, sizeof(int)); 803 ctrlr_device->claim_fd = dev_fd; 804 ctrlr_device->index = index; 805 /* Keep dev_fd open to maintain the lock. */ 806 return 0; 807 } 808 809 static void 810 nvme_cuse_unclaim(struct cuse_device *ctrlr_device) 811 { 812 close(ctrlr_device->claim_fd); 813 ctrlr_device->claim_fd = -1; 814 unlink(ctrlr_device->lock_name); 815 } 816 817 static void 818 cuse_nvme_ctrlr_stop(struct cuse_device *ctrlr_device) 819 { 820 uint32_t i; 821 uint32_t num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr_device->ctrlr); 822 823 for (i = 1; i <= num_ns; i++) { 824 cuse_nvme_ns_stop(ctrlr_device, i); 825 } 826 827 fuse_session_exit(ctrlr_device->session); 828 pthread_join(ctrlr_device->tid, NULL); 829 TAILQ_REMOVE(&g_ctrlr_ctx_head, ctrlr_device, tailq); 830 spdk_bit_array_clear(g_ctrlr_started, ctrlr_device->index); 831 if (spdk_bit_array_count_set(g_ctrlr_started) == 0) { 832 spdk_bit_array_free(&g_ctrlr_started); 833 } 834 nvme_cuse_unclaim(ctrlr_device); 835 free(ctrlr_device->ns_devices); 836 free(ctrlr_device); 837 } 838 839 static int 840 cuse_nvme_ctrlr_update_namespaces(struct cuse_device *ctrlr_device) 841 { 842 uint32_t nsid; 843 uint32_t num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr_device->ctrlr); 844 845 for (nsid = 1; nsid <= num_ns; nsid++) { 846 if (!spdk_nvme_ctrlr_is_active_ns(ctrlr_device->ctrlr, nsid)) { 847 cuse_nvme_ns_stop(ctrlr_device, nsid); 848 continue; 849 } 850 851 if (cuse_nvme_ns_start(ctrlr_device, nsid) < 0) { 852 SPDK_ERRLOG("Cannot start CUSE namespace device."); 853 return -1; 854 } 855 } 856 857 return 0; 858 } 859 860 static int 861 nvme_cuse_start(struct spdk_nvme_ctrlr *ctrlr) 862 { 863 int rv = 0; 864 struct cuse_device *ctrlr_device; 865 uint32_t num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr); 866 867 SPDK_NOTICELOG("Creating cuse device for controller\n"); 868 869 if (g_ctrlr_started == NULL) { 870 g_ctrlr_started = spdk_bit_array_create(128); 871 if (g_ctrlr_started == NULL) { 872 SPDK_ERRLOG("Cannot create bit array\n"); 873 return -ENOMEM; 874 } 875 } 876 877 ctrlr_device = (struct cuse_device *)calloc(1, sizeof(struct cuse_device)); 878 if (!ctrlr_device) { 879 SPDK_ERRLOG("Cannot allocate memory for ctrlr_device."); 880 rv = -ENOMEM; 881 goto err2; 882 } 883 884 ctrlr_device->ctrlr = ctrlr; 885 886 /* Check if device already exists, if not increment index until success */ 887 ctrlr_device->index = 0; 888 while (1) { 889 ctrlr_device->index = spdk_bit_array_find_first_clear(g_ctrlr_started, ctrlr_device->index); 890 if (ctrlr_device->index == UINT32_MAX) { 891 SPDK_ERRLOG("Too many registered controllers\n"); 892 goto err2; 893 } 894 895 if (nvme_cuse_claim(ctrlr_device, ctrlr_device->index) == 0) { 896 break; 897 } 898 ctrlr_device->index++; 899 } 900 spdk_bit_array_set(g_ctrlr_started, ctrlr_device->index); 901 snprintf(ctrlr_device->dev_name, sizeof(ctrlr_device->dev_name), "spdk/nvme%d", 902 ctrlr_device->index); 903 904 rv = pthread_create(&ctrlr_device->tid, NULL, cuse_thread, ctrlr_device); 905 if (rv != 0) { 906 SPDK_ERRLOG("pthread_create failed\n"); 907 rv = -rv; 908 goto err3; 909 } 910 TAILQ_INSERT_TAIL(&g_ctrlr_ctx_head, ctrlr_device, tailq); 911 912 ctrlr_device->ns_devices = (struct cuse_device *)calloc(num_ns, sizeof(struct cuse_device)); 913 /* Start all active namespaces */ 914 if (cuse_nvme_ctrlr_update_namespaces(ctrlr_device) < 0) { 915 SPDK_ERRLOG("Cannot start CUSE namespace devices."); 916 cuse_nvme_ctrlr_stop(ctrlr_device); 917 rv = -1; 918 goto err3; 919 } 920 921 return 0; 922 923 err3: 924 spdk_bit_array_clear(g_ctrlr_started, ctrlr_device->index); 925 err2: 926 free(ctrlr_device); 927 if (spdk_bit_array_count_set(g_ctrlr_started) == 0) { 928 spdk_bit_array_free(&g_ctrlr_started); 929 } 930 return rv; 931 } 932 933 static struct cuse_device * 934 nvme_cuse_get_cuse_ctrlr_device(struct spdk_nvme_ctrlr *ctrlr) 935 { 936 struct cuse_device *ctrlr_device = NULL; 937 938 TAILQ_FOREACH(ctrlr_device, &g_ctrlr_ctx_head, tailq) { 939 if (ctrlr_device->ctrlr == ctrlr) { 940 break; 941 } 942 } 943 944 return ctrlr_device; 945 } 946 947 static struct cuse_device * 948 nvme_cuse_get_cuse_ns_device(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 949 { 950 struct cuse_device *ctrlr_device = NULL; 951 uint32_t num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr); 952 953 if (nsid < 1 || nsid > num_ns) { 954 return NULL; 955 } 956 957 ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr); 958 if (!ctrlr_device) { 959 return NULL; 960 } 961 962 if (!ctrlr_device->ns_devices[nsid - 1].is_started) { 963 return NULL; 964 } 965 966 return &ctrlr_device->ns_devices[nsid - 1]; 967 } 968 969 static void 970 nvme_cuse_stop(struct spdk_nvme_ctrlr *ctrlr) 971 { 972 struct cuse_device *ctrlr_device; 973 974 pthread_mutex_lock(&g_cuse_mtx); 975 976 ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr); 977 if (!ctrlr_device) { 978 SPDK_ERRLOG("Cannot find associated CUSE device\n"); 979 pthread_mutex_unlock(&g_cuse_mtx); 980 return; 981 } 982 983 cuse_nvme_ctrlr_stop(ctrlr_device); 984 985 pthread_mutex_unlock(&g_cuse_mtx); 986 } 987 988 static void 989 nvme_cuse_update(struct spdk_nvme_ctrlr *ctrlr) 990 { 991 struct cuse_device *ctrlr_device; 992 993 pthread_mutex_lock(&g_cuse_mtx); 994 995 ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr); 996 if (!ctrlr_device) { 997 pthread_mutex_unlock(&g_cuse_mtx); 998 return; 999 } 1000 1001 cuse_nvme_ctrlr_update_namespaces(ctrlr_device); 1002 1003 pthread_mutex_unlock(&g_cuse_mtx); 1004 } 1005 1006 static struct nvme_io_msg_producer cuse_nvme_io_msg_producer = { 1007 .name = "cuse", 1008 .stop = nvme_cuse_stop, 1009 .update = nvme_cuse_update, 1010 }; 1011 1012 int 1013 spdk_nvme_cuse_register(struct spdk_nvme_ctrlr *ctrlr) 1014 { 1015 int rc; 1016 1017 rc = nvme_io_msg_ctrlr_register(ctrlr, &cuse_nvme_io_msg_producer); 1018 if (rc) { 1019 return rc; 1020 } 1021 1022 pthread_mutex_lock(&g_cuse_mtx); 1023 1024 rc = nvme_cuse_start(ctrlr); 1025 if (rc) { 1026 nvme_io_msg_ctrlr_unregister(ctrlr, &cuse_nvme_io_msg_producer); 1027 } 1028 1029 pthread_mutex_unlock(&g_cuse_mtx); 1030 1031 return rc; 1032 } 1033 1034 int 1035 spdk_nvme_cuse_unregister(struct spdk_nvme_ctrlr *ctrlr) 1036 { 1037 struct cuse_device *ctrlr_device; 1038 1039 pthread_mutex_lock(&g_cuse_mtx); 1040 1041 ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr); 1042 if (!ctrlr_device) { 1043 SPDK_ERRLOG("Cannot find associated CUSE device\n"); 1044 pthread_mutex_unlock(&g_cuse_mtx); 1045 return -ENODEV; 1046 } 1047 1048 cuse_nvme_ctrlr_stop(ctrlr_device); 1049 1050 pthread_mutex_unlock(&g_cuse_mtx); 1051 1052 nvme_io_msg_ctrlr_unregister(ctrlr, &cuse_nvme_io_msg_producer); 1053 1054 return 0; 1055 } 1056 1057 void 1058 spdk_nvme_cuse_update_namespaces(struct spdk_nvme_ctrlr *ctrlr) 1059 { 1060 nvme_cuse_update(ctrlr); 1061 } 1062 1063 int 1064 spdk_nvme_cuse_get_ctrlr_name(struct spdk_nvme_ctrlr *ctrlr, char *name, size_t *size) 1065 { 1066 struct cuse_device *ctrlr_device; 1067 size_t req_len; 1068 1069 pthread_mutex_lock(&g_cuse_mtx); 1070 1071 ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr); 1072 if (!ctrlr_device) { 1073 pthread_mutex_unlock(&g_cuse_mtx); 1074 return -ENODEV; 1075 } 1076 1077 req_len = strnlen(ctrlr_device->dev_name, sizeof(ctrlr_device->dev_name)); 1078 if (*size < req_len) { 1079 *size = req_len; 1080 pthread_mutex_unlock(&g_cuse_mtx); 1081 return -ENOSPC; 1082 } 1083 snprintf(name, req_len + 1, "%s", ctrlr_device->dev_name); 1084 1085 pthread_mutex_unlock(&g_cuse_mtx); 1086 1087 return 0; 1088 } 1089 1090 int 1091 spdk_nvme_cuse_get_ns_name(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, char *name, size_t *size) 1092 { 1093 struct cuse_device *ns_device; 1094 size_t req_len; 1095 1096 pthread_mutex_lock(&g_cuse_mtx); 1097 1098 ns_device = nvme_cuse_get_cuse_ns_device(ctrlr, nsid); 1099 if (!ns_device) { 1100 pthread_mutex_unlock(&g_cuse_mtx); 1101 return -ENODEV; 1102 } 1103 1104 req_len = strnlen(ns_device->dev_name, sizeof(ns_device->dev_name)); 1105 if (*size < req_len) { 1106 *size = req_len; 1107 pthread_mutex_unlock(&g_cuse_mtx); 1108 return -ENOSPC; 1109 } 1110 snprintf(name, req_len + 1, "%s", ns_device->dev_name); 1111 1112 pthread_mutex_unlock(&g_cuse_mtx); 1113 1114 return 0; 1115 } 1116