1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #define FUSE_USE_VERSION 31 35 36 #include <fuse3/cuse_lowlevel.h> 37 38 #include <linux/nvme_ioctl.h> 39 #include <linux/fs.h> 40 41 #include "nvme_internal.h" 42 #include "nvme_io_msg.h" 43 #include "nvme_cuse.h" 44 45 struct cuse_device { 46 bool is_started; 47 48 char dev_name[128]; 49 uint32_t index; 50 int claim_fd; 51 char lock_name[64]; 52 53 struct spdk_nvme_ctrlr *ctrlr; /**< NVMe controller */ 54 uint32_t nsid; /**< NVMe name space id, or 0 */ 55 56 pthread_t tid; 57 struct fuse_session *session; 58 59 struct cuse_device *ctrlr_device; 60 struct cuse_device *ns_devices; /**< Array of cuse ns devices */ 61 62 TAILQ_ENTRY(cuse_device) tailq; 63 }; 64 65 static pthread_mutex_t g_cuse_mtx = PTHREAD_MUTEX_INITIALIZER; 66 static TAILQ_HEAD(, cuse_device) g_ctrlr_ctx_head = TAILQ_HEAD_INITIALIZER(g_ctrlr_ctx_head); 67 static struct spdk_bit_array *g_ctrlr_started; 68 69 struct cuse_io_ctx { 70 struct spdk_nvme_cmd nvme_cmd; 71 enum spdk_nvme_data_transfer data_transfer; 72 73 uint64_t lba; 74 uint32_t lba_count; 75 76 void *data; 77 int data_len; 78 79 fuse_req_t req; 80 }; 81 82 static void 83 cuse_io_ctx_free(struct cuse_io_ctx *ctx) 84 { 85 spdk_free(ctx->data); 86 free(ctx); 87 } 88 89 #define FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, val) \ 90 if (out_bufsz == 0) { \ 91 struct iovec out_iov; \ 92 out_iov.iov_base = (void *)arg; \ 93 out_iov.iov_len = sizeof(val); \ 94 fuse_reply_ioctl_retry(req, NULL, 0, &out_iov, 1); \ 95 return; \ 96 } 97 98 static void 99 cuse_nvme_passthru_cmd_cb(void *arg, const struct spdk_nvme_cpl *cpl) 100 { 101 struct cuse_io_ctx *ctx = arg; 102 struct iovec out_iov[2]; 103 struct spdk_nvme_cpl _cpl; 104 105 if (ctx->data_transfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER || 106 ctx->data_transfer == SPDK_NVME_DATA_NONE) { 107 fuse_reply_ioctl_iov(ctx->req, cpl->status.sc, NULL, 0); 108 } else { 109 memcpy(&_cpl, cpl, sizeof(struct spdk_nvme_cpl)); 110 111 out_iov[0].iov_base = &_cpl.cdw0; 112 out_iov[0].iov_len = sizeof(_cpl.cdw0); 113 114 if (ctx->data_len > 0) { 115 out_iov[1].iov_base = ctx->data; 116 out_iov[1].iov_len = ctx->data_len; 117 fuse_reply_ioctl_iov(ctx->req, cpl->status.sc, out_iov, 2); 118 } else { 119 fuse_reply_ioctl_iov(ctx->req, cpl->status.sc, out_iov, 1); 120 } 121 } 122 123 cuse_io_ctx_free(ctx); 124 } 125 126 static void 127 cuse_nvme_passthru_cmd_execute(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg) 128 { 129 int rc; 130 struct cuse_io_ctx *ctx = arg; 131 132 if (nsid != 0) { 133 rc = spdk_nvme_ctrlr_cmd_io_raw(ctrlr, ctrlr->external_io_msgs_qpair, &ctx->nvme_cmd, ctx->data, 134 ctx->data_len, cuse_nvme_passthru_cmd_cb, (void *)ctx); 135 } else { 136 rc = spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, &ctx->nvme_cmd, ctx->data, ctx->data_len, 137 cuse_nvme_passthru_cmd_cb, (void *)ctx); 138 } 139 if (rc < 0) { 140 fuse_reply_err(ctx->req, EINVAL); 141 cuse_io_ctx_free(ctx); 142 } 143 } 144 145 static void 146 cuse_nvme_passthru_cmd_send(fuse_req_t req, struct nvme_passthru_cmd *passthru_cmd, 147 const void *data, int cmd) 148 { 149 struct cuse_io_ctx *ctx; 150 struct cuse_device *cuse_device = fuse_req_userdata(req); 151 int rv; 152 153 ctx = (struct cuse_io_ctx *)calloc(1, sizeof(struct cuse_io_ctx)); 154 if (!ctx) { 155 SPDK_ERRLOG("Cannot allocate memory for cuse_io_ctx\n"); 156 fuse_reply_err(req, ENOMEM); 157 return; 158 } 159 160 ctx->req = req; 161 ctx->data_transfer = spdk_nvme_opc_get_data_transfer(passthru_cmd->opcode); 162 163 memset(&ctx->nvme_cmd, 0, sizeof(ctx->nvme_cmd)); 164 ctx->nvme_cmd.opc = passthru_cmd->opcode; 165 ctx->nvme_cmd.nsid = passthru_cmd->nsid; 166 ctx->nvme_cmd.cdw10 = passthru_cmd->cdw10; 167 ctx->nvme_cmd.cdw11 = passthru_cmd->cdw11; 168 ctx->nvme_cmd.cdw12 = passthru_cmd->cdw12; 169 ctx->nvme_cmd.cdw13 = passthru_cmd->cdw13; 170 ctx->nvme_cmd.cdw14 = passthru_cmd->cdw14; 171 ctx->nvme_cmd.cdw15 = passthru_cmd->cdw15; 172 173 ctx->data_len = passthru_cmd->data_len; 174 175 if (ctx->data_len > 0) { 176 ctx->data = spdk_malloc(ctx->data_len, 4096, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); 177 if (!ctx->data) { 178 SPDK_ERRLOG("Cannot allocate memory for data\n"); 179 fuse_reply_err(req, ENOMEM); 180 free(ctx); 181 return; 182 } 183 if (data != NULL) { 184 memcpy(ctx->data, data, ctx->data_len); 185 } 186 } 187 188 if ((unsigned int)cmd != NVME_IOCTL_ADMIN_CMD) { 189 /* Send NS for IO IOCTLs */ 190 rv = nvme_io_msg_send(cuse_device->ctrlr, passthru_cmd->nsid, cuse_nvme_passthru_cmd_execute, ctx); 191 } else { 192 /* NS == 0 for Admin IOCTLs */ 193 rv = nvme_io_msg_send(cuse_device->ctrlr, 0, cuse_nvme_passthru_cmd_execute, ctx); 194 } 195 if (rv) { 196 SPDK_ERRLOG("Cannot send io msg to the controller\n"); 197 fuse_reply_err(req, -rv); 198 cuse_io_ctx_free(ctx); 199 return; 200 } 201 } 202 203 static void 204 cuse_nvme_passthru_cmd(fuse_req_t req, int cmd, void *arg, 205 struct fuse_file_info *fi, unsigned flags, 206 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 207 { 208 struct nvme_passthru_cmd *passthru_cmd; 209 struct iovec in_iov[2], out_iov[2]; 210 211 in_iov[0].iov_base = (void *)arg; 212 in_iov[0].iov_len = sizeof(*passthru_cmd); 213 if (in_bufsz == 0) { 214 fuse_reply_ioctl_retry(req, in_iov, 1, NULL, 0); 215 return; 216 } 217 218 passthru_cmd = (struct nvme_passthru_cmd *)in_buf; 219 220 switch (spdk_nvme_opc_get_data_transfer(passthru_cmd->opcode)) { 221 case SPDK_NVME_DATA_HOST_TO_CONTROLLER: 222 if (passthru_cmd->addr != 0) { 223 in_iov[1].iov_base = (void *)passthru_cmd->addr; 224 in_iov[1].iov_len = passthru_cmd->data_len; 225 if (in_bufsz == sizeof(*passthru_cmd)) { 226 fuse_reply_ioctl_retry(req, in_iov, 2, NULL, 0); 227 return; 228 } 229 cuse_nvme_passthru_cmd_send(req, passthru_cmd, in_buf + sizeof(*passthru_cmd), cmd); 230 } else { 231 cuse_nvme_passthru_cmd_send(req, passthru_cmd, NULL, cmd); 232 } 233 return; 234 case SPDK_NVME_DATA_NONE: 235 case SPDK_NVME_DATA_CONTROLLER_TO_HOST: 236 if (out_bufsz == 0) { 237 out_iov[0].iov_base = &((struct nvme_passthru_cmd *)arg)->result; 238 out_iov[0].iov_len = sizeof(uint32_t); 239 if (passthru_cmd->data_len > 0) { 240 out_iov[1].iov_base = (void *)passthru_cmd->addr; 241 out_iov[1].iov_len = passthru_cmd->data_len; 242 fuse_reply_ioctl_retry(req, in_iov, 1, out_iov, 2); 243 } else { 244 fuse_reply_ioctl_retry(req, in_iov, 1, out_iov, 1); 245 } 246 return; 247 } 248 249 cuse_nvme_passthru_cmd_send(req, passthru_cmd, NULL, cmd); 250 251 return; 252 case SPDK_NVME_DATA_BIDIRECTIONAL: 253 fuse_reply_err(req, EINVAL); 254 return; 255 } 256 } 257 258 static void 259 cuse_nvme_reset_execute(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg) 260 { 261 int rc; 262 fuse_req_t req = arg; 263 264 rc = spdk_nvme_ctrlr_reset(ctrlr); 265 if (rc) { 266 fuse_reply_err(req, rc); 267 return; 268 } 269 270 fuse_reply_ioctl_iov(req, 0, NULL, 0); 271 } 272 273 static void 274 cuse_nvme_reset(fuse_req_t req, int cmd, void *arg, 275 struct fuse_file_info *fi, unsigned flags, 276 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 277 { 278 int rv; 279 struct cuse_device *cuse_device = fuse_req_userdata(req); 280 281 if (cuse_device->nsid) { 282 SPDK_ERRLOG("Namespace reset not supported\n"); 283 fuse_reply_err(req, EINVAL); 284 return; 285 } 286 287 rv = nvme_io_msg_send(cuse_device->ctrlr, cuse_device->nsid, cuse_nvme_reset_execute, (void *)req); 288 if (rv) { 289 SPDK_ERRLOG("Cannot send reset\n"); 290 fuse_reply_err(req, EINVAL); 291 } 292 } 293 294 /***************************************************************************** 295 * Namespace IO requests 296 */ 297 298 static void 299 cuse_nvme_submit_io_write_done(void *ref, const struct spdk_nvme_cpl *cpl) 300 { 301 struct cuse_io_ctx *ctx = (struct cuse_io_ctx *)ref; 302 303 fuse_reply_ioctl_iov(ctx->req, cpl->status.sc, NULL, 0); 304 305 cuse_io_ctx_free(ctx); 306 } 307 308 static void 309 cuse_nvme_submit_io_write_cb(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg) 310 { 311 int rc; 312 struct cuse_io_ctx *ctx = arg; 313 struct spdk_nvme_ns *ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 314 315 rc = spdk_nvme_ns_cmd_write(ns, ctrlr->external_io_msgs_qpair, ctx->data, 316 ctx->lba, /* LBA start */ 317 ctx->lba_count, /* number of LBAs */ 318 cuse_nvme_submit_io_write_done, ctx, 0); 319 320 if (rc != 0) { 321 SPDK_ERRLOG("write failed: rc = %d\n", rc); 322 fuse_reply_err(ctx->req, rc); 323 cuse_io_ctx_free(ctx); 324 return; 325 } 326 } 327 328 static void 329 cuse_nvme_submit_io_write(struct cuse_device *cuse_device, fuse_req_t req, int cmd, void *arg, 330 struct fuse_file_info *fi, unsigned flags, uint32_t block_size, 331 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 332 { 333 const struct nvme_user_io *user_io = in_buf; 334 struct cuse_io_ctx *ctx; 335 int rc; 336 337 ctx = (struct cuse_io_ctx *)calloc(1, sizeof(struct cuse_io_ctx)); 338 if (!ctx) { 339 SPDK_ERRLOG("Cannot allocate memory for context\n"); 340 fuse_reply_err(req, ENOMEM); 341 return; 342 } 343 344 ctx->req = req; 345 ctx->lba = user_io->slba; 346 ctx->lba_count = user_io->nblocks + 1; 347 ctx->data_len = ctx->lba_count * block_size; 348 349 ctx->data = spdk_zmalloc(ctx->data_len, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY, 350 SPDK_MALLOC_DMA); 351 if (ctx->data == NULL) { 352 SPDK_ERRLOG("Write buffer allocation failed\n"); 353 fuse_reply_err(ctx->req, ENOMEM); 354 free(ctx); 355 return; 356 } 357 358 memcpy(ctx->data, in_buf + sizeof(*user_io), ctx->data_len); 359 360 rc = nvme_io_msg_send(cuse_device->ctrlr, cuse_device->nsid, cuse_nvme_submit_io_write_cb, 361 ctx); 362 if (rc < 0) { 363 SPDK_ERRLOG("Cannot send write io\n"); 364 fuse_reply_err(ctx->req, rc); 365 cuse_io_ctx_free(ctx); 366 } 367 } 368 369 static void 370 cuse_nvme_submit_io_read_done(void *ref, const struct spdk_nvme_cpl *cpl) 371 { 372 struct cuse_io_ctx *ctx = (struct cuse_io_ctx *)ref; 373 struct iovec iov; 374 375 iov.iov_base = ctx->data; 376 iov.iov_len = ctx->data_len; 377 378 fuse_reply_ioctl_iov(ctx->req, cpl->status.sc, &iov, 1); 379 380 cuse_io_ctx_free(ctx); 381 } 382 383 static void 384 cuse_nvme_submit_io_read_cb(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg) 385 { 386 int rc; 387 struct cuse_io_ctx *ctx = arg; 388 struct spdk_nvme_ns *ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 389 390 rc = spdk_nvme_ns_cmd_read(ns, ctrlr->external_io_msgs_qpair, ctx->data, 391 ctx->lba, /* LBA start */ 392 ctx->lba_count, /* number of LBAs */ 393 cuse_nvme_submit_io_read_done, ctx, 0); 394 395 if (rc != 0) { 396 SPDK_ERRLOG("read failed: rc = %d\n", rc); 397 fuse_reply_err(ctx->req, rc); 398 cuse_io_ctx_free(ctx); 399 return; 400 } 401 } 402 403 static void 404 cuse_nvme_submit_io_read(struct cuse_device *cuse_device, fuse_req_t req, int cmd, void *arg, 405 struct fuse_file_info *fi, unsigned flags, uint32_t block_size, 406 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 407 { 408 int rc; 409 struct cuse_io_ctx *ctx; 410 const struct nvme_user_io *user_io = in_buf; 411 412 ctx = (struct cuse_io_ctx *)calloc(1, sizeof(struct cuse_io_ctx)); 413 if (!ctx) { 414 SPDK_ERRLOG("Cannot allocate memory for context\n"); 415 fuse_reply_err(req, ENOMEM); 416 return; 417 } 418 419 ctx->req = req; 420 ctx->lba = user_io->slba; 421 ctx->lba_count = user_io->nblocks + 1; 422 423 ctx->data_len = ctx->lba_count * block_size; 424 ctx->data = spdk_zmalloc(ctx->data_len, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY, 425 SPDK_MALLOC_DMA); 426 if (ctx->data == NULL) { 427 SPDK_ERRLOG("Read buffer allocation failed\n"); 428 fuse_reply_err(ctx->req, ENOMEM); 429 free(ctx); 430 return; 431 } 432 433 rc = nvme_io_msg_send(cuse_device->ctrlr, cuse_device->nsid, cuse_nvme_submit_io_read_cb, ctx); 434 if (rc < 0) { 435 SPDK_ERRLOG("Cannot send read io\n"); 436 fuse_reply_err(ctx->req, rc); 437 cuse_io_ctx_free(ctx); 438 } 439 } 440 441 442 static void 443 cuse_nvme_submit_io(fuse_req_t req, int cmd, void *arg, 444 struct fuse_file_info *fi, unsigned flags, 445 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 446 { 447 const struct nvme_user_io *user_io; 448 struct iovec in_iov[2], out_iov; 449 struct cuse_device *cuse_device = fuse_req_userdata(req); 450 struct spdk_nvme_ns *ns; 451 uint32_t block_size; 452 453 in_iov[0].iov_base = (void *)arg; 454 in_iov[0].iov_len = sizeof(*user_io); 455 if (in_bufsz == 0) { 456 fuse_reply_ioctl_retry(req, in_iov, 1, NULL, 0); 457 return; 458 } 459 460 user_io = in_buf; 461 462 ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid); 463 block_size = spdk_nvme_ns_get_sector_size(ns); 464 465 switch (user_io->opcode) { 466 case SPDK_NVME_OPC_READ: 467 out_iov.iov_base = (void *)user_io->addr; 468 out_iov.iov_len = (user_io->nblocks + 1) * block_size; 469 if (out_bufsz == 0) { 470 fuse_reply_ioctl_retry(req, in_iov, 1, &out_iov, 1); 471 return; 472 } 473 474 cuse_nvme_submit_io_read(cuse_device, req, cmd, arg, fi, flags, 475 block_size, in_buf, in_bufsz, out_bufsz); 476 break; 477 case SPDK_NVME_OPC_WRITE: 478 in_iov[1].iov_base = (void *)user_io->addr; 479 in_iov[1].iov_len = (user_io->nblocks + 1) * block_size; 480 if (in_bufsz == sizeof(*user_io)) { 481 fuse_reply_ioctl_retry(req, in_iov, 2, NULL, 0); 482 return; 483 } 484 485 cuse_nvme_submit_io_write(cuse_device, req, cmd, arg, fi, flags, 486 block_size, in_buf, in_bufsz, out_bufsz); 487 break; 488 default: 489 SPDK_ERRLOG("SUBMIT_IO: opc:%d not valid\n", user_io->opcode); 490 fuse_reply_err(req, EINVAL); 491 return; 492 } 493 494 } 495 496 /***************************************************************************** 497 * Other namespace IOCTLs 498 */ 499 static void 500 cuse_blkgetsize64(fuse_req_t req, int cmd, void *arg, 501 struct fuse_file_info *fi, unsigned flags, 502 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 503 { 504 uint64_t size; 505 struct spdk_nvme_ns *ns; 506 struct cuse_device *cuse_device = fuse_req_userdata(req); 507 508 FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, size); 509 510 ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid); 511 size = spdk_nvme_ns_get_num_sectors(ns); 512 fuse_reply_ioctl(req, 0, &size, sizeof(size)); 513 } 514 515 static void 516 cuse_blkpbszget(fuse_req_t req, int cmd, void *arg, 517 struct fuse_file_info *fi, unsigned flags, 518 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 519 { 520 int pbsz; 521 struct spdk_nvme_ns *ns; 522 struct cuse_device *cuse_device = fuse_req_userdata(req); 523 524 FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, pbsz); 525 526 ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid); 527 pbsz = spdk_nvme_ns_get_sector_size(ns); 528 fuse_reply_ioctl(req, 0, &pbsz, sizeof(pbsz)); 529 } 530 531 static void 532 cuse_blkgetsize(fuse_req_t req, int cmd, void *arg, 533 struct fuse_file_info *fi, unsigned flags, 534 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 535 { 536 long size; 537 struct spdk_nvme_ns *ns; 538 struct cuse_device *cuse_device = fuse_req_userdata(req); 539 540 FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, size); 541 542 ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid); 543 544 /* return size in 512 bytes blocks */ 545 size = spdk_nvme_ns_get_num_sectors(ns) * 512 / spdk_nvme_ns_get_sector_size(ns); 546 fuse_reply_ioctl(req, 0, &size, sizeof(size)); 547 } 548 549 static void 550 cuse_blkgetsectorsize(fuse_req_t req, int cmd, void *arg, 551 struct fuse_file_info *fi, unsigned flags, 552 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 553 { 554 int ssize; 555 struct spdk_nvme_ns *ns; 556 struct cuse_device *cuse_device = fuse_req_userdata(req); 557 558 FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, ssize); 559 560 ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid); 561 ssize = spdk_nvme_ns_get_sector_size(ns); 562 fuse_reply_ioctl(req, 0, &ssize, sizeof(ssize)); 563 } 564 565 static void 566 cuse_getid(fuse_req_t req, int cmd, void *arg, 567 struct fuse_file_info *fi, unsigned flags, 568 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 569 { 570 struct cuse_device *cuse_device = fuse_req_userdata(req); 571 572 fuse_reply_ioctl(req, cuse_device->nsid, NULL, 0); 573 } 574 575 static void 576 cuse_ctrlr_ioctl(fuse_req_t req, int cmd, void *arg, 577 struct fuse_file_info *fi, unsigned flags, 578 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 579 { 580 if (flags & FUSE_IOCTL_COMPAT) { 581 fuse_reply_err(req, ENOSYS); 582 return; 583 } 584 585 switch ((unsigned int)cmd) { 586 case NVME_IOCTL_ADMIN_CMD: 587 SPDK_DEBUGLOG(nvme_cuse, "NVME_IOCTL_ADMIN_CMD\n"); 588 cuse_nvme_passthru_cmd(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz); 589 break; 590 591 case NVME_IOCTL_RESET: 592 SPDK_DEBUGLOG(nvme_cuse, "NVME_IOCTL_RESET\n"); 593 cuse_nvme_reset(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz); 594 break; 595 596 default: 597 SPDK_ERRLOG("Unsupported IOCTL 0x%X.\n", cmd); 598 fuse_reply_err(req, EINVAL); 599 } 600 } 601 602 static void 603 cuse_ns_ioctl(fuse_req_t req, int cmd, void *arg, 604 struct fuse_file_info *fi, unsigned flags, 605 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 606 { 607 if (flags & FUSE_IOCTL_COMPAT) { 608 fuse_reply_err(req, ENOSYS); 609 return; 610 } 611 612 switch ((unsigned int)cmd) { 613 case NVME_IOCTL_ADMIN_CMD: 614 SPDK_DEBUGLOG(nvme_cuse, "NVME_IOCTL_ADMIN_CMD\n"); 615 cuse_nvme_passthru_cmd(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz); 616 break; 617 618 case NVME_IOCTL_SUBMIT_IO: 619 SPDK_DEBUGLOG(nvme_cuse, "NVME_IOCTL_SUBMIT_IO\n"); 620 cuse_nvme_submit_io(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz); 621 break; 622 623 case NVME_IOCTL_IO_CMD: 624 SPDK_DEBUGLOG(nvme_cuse, "NVME_IOCTL_IO_CMD\n"); 625 cuse_nvme_passthru_cmd(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz); 626 break; 627 628 case NVME_IOCTL_ID: 629 SPDK_DEBUGLOG(nvme_cuse, "NVME_IOCTL_ID\n"); 630 cuse_getid(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz); 631 break; 632 633 case BLKPBSZGET: 634 SPDK_DEBUGLOG(nvme_cuse, "BLKPBSZGET\n"); 635 cuse_blkpbszget(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz); 636 break; 637 638 case BLKSSZGET: 639 SPDK_DEBUGLOG(nvme_cuse, "BLKSSZGET\n"); 640 cuse_blkgetsectorsize(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz); 641 break; 642 643 case BLKGETSIZE: 644 SPDK_DEBUGLOG(nvme_cuse, "BLKGETSIZE\n"); 645 /* Returns the device size as a number of 512-byte blocks (returns pointer to long) */ 646 cuse_blkgetsize(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz); 647 break; 648 649 case BLKGETSIZE64: 650 SPDK_DEBUGLOG(nvme_cuse, "BLKGETSIZE64\n"); 651 /* Returns the device size in sectors (returns pointer to uint64_t) */ 652 cuse_blkgetsize64(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz); 653 break; 654 655 default: 656 SPDK_ERRLOG("Unsupported IOCTL 0x%X.\n", cmd); 657 fuse_reply_err(req, EINVAL); 658 } 659 } 660 661 /***************************************************************************** 662 * CUSE threads initialization. 663 */ 664 665 static void cuse_open(fuse_req_t req, struct fuse_file_info *fi) 666 { 667 fuse_reply_open(req, fi); 668 } 669 670 static const struct cuse_lowlevel_ops cuse_ctrlr_clop = { 671 .open = cuse_open, 672 .ioctl = cuse_ctrlr_ioctl, 673 }; 674 675 static const struct cuse_lowlevel_ops cuse_ns_clop = { 676 .open = cuse_open, 677 .ioctl = cuse_ns_ioctl, 678 }; 679 680 static void * 681 cuse_thread(void *arg) 682 { 683 struct cuse_device *cuse_device = arg; 684 char *cuse_argv[] = { "cuse", "-f" }; 685 int cuse_argc = SPDK_COUNTOF(cuse_argv); 686 char devname_arg[128 + 8]; 687 const char *dev_info_argv[] = { devname_arg }; 688 struct cuse_info ci; 689 int multithreaded; 690 int rc; 691 struct fuse_buf buf = { .mem = NULL }; 692 struct pollfd fds; 693 int timeout_msecs = 500; 694 695 spdk_unaffinitize_thread(); 696 697 snprintf(devname_arg, sizeof(devname_arg), "DEVNAME=%s", cuse_device->dev_name); 698 699 memset(&ci, 0, sizeof(ci)); 700 ci.dev_info_argc = 1; 701 ci.dev_info_argv = dev_info_argv; 702 ci.flags = CUSE_UNRESTRICTED_IOCTL; 703 704 if (cuse_device->nsid) { 705 cuse_device->session = cuse_lowlevel_setup(cuse_argc, cuse_argv, &ci, &cuse_ns_clop, 706 &multithreaded, cuse_device); 707 } else { 708 cuse_device->session = cuse_lowlevel_setup(cuse_argc, cuse_argv, &ci, &cuse_ctrlr_clop, 709 &multithreaded, cuse_device); 710 } 711 if (!cuse_device->session) { 712 SPDK_ERRLOG("Cannot create cuse session\n"); 713 goto err; 714 } 715 716 SPDK_NOTICELOG("fuse session for device %s created\n", cuse_device->dev_name); 717 718 /* Receive and process fuse requests */ 719 fds.fd = fuse_session_fd(cuse_device->session); 720 fds.events = POLLIN; 721 while (!fuse_session_exited(cuse_device->session)) { 722 rc = poll(&fds, 1, timeout_msecs); 723 if (rc <= 0) { 724 continue; 725 } 726 rc = fuse_session_receive_buf(cuse_device->session, &buf); 727 if (rc > 0) { 728 fuse_session_process_buf(cuse_device->session, &buf); 729 } 730 } 731 free(buf.mem); 732 fuse_session_reset(cuse_device->session); 733 cuse_lowlevel_teardown(cuse_device->session); 734 err: 735 pthread_exit(NULL); 736 } 737 738 /***************************************************************************** 739 * CUSE devices management 740 */ 741 742 static int 743 cuse_nvme_ns_start(struct cuse_device *ctrlr_device, uint32_t nsid) 744 { 745 struct cuse_device *ns_device; 746 int rv; 747 748 ns_device = &ctrlr_device->ns_devices[nsid - 1]; 749 if (ns_device->is_started) { 750 return 0; 751 } 752 753 ns_device->ctrlr = ctrlr_device->ctrlr; 754 ns_device->ctrlr_device = ctrlr_device; 755 ns_device->nsid = nsid; 756 rv = snprintf(ns_device->dev_name, sizeof(ns_device->dev_name), "%sn%d", 757 ctrlr_device->dev_name, ns_device->nsid); 758 if (rv < 0) { 759 SPDK_ERRLOG("Device name too long.\n"); 760 free(ns_device); 761 return -ENAMETOOLONG; 762 } 763 764 rv = pthread_create(&ns_device->tid, NULL, cuse_thread, ns_device); 765 if (rv != 0) { 766 SPDK_ERRLOG("pthread_create failed\n"); 767 return -rv; 768 } 769 770 ns_device->is_started = true; 771 772 return 0; 773 } 774 775 static void 776 cuse_nvme_ns_stop(struct cuse_device *ctrlr_device, uint32_t nsid) 777 { 778 struct cuse_device *ns_device; 779 780 ns_device = &ctrlr_device->ns_devices[nsid - 1]; 781 if (!ns_device->is_started) { 782 return; 783 } 784 785 fuse_session_exit(ns_device->session); 786 pthread_join(ns_device->tid, NULL); 787 ns_device->is_started = false; 788 } 789 790 static int 791 nvme_cuse_claim(struct cuse_device *ctrlr_device, uint32_t index) 792 { 793 int dev_fd; 794 int pid; 795 void *dev_map; 796 struct flock cusedev_lock = { 797 .l_type = F_WRLCK, 798 .l_whence = SEEK_SET, 799 .l_start = 0, 800 .l_len = 0, 801 }; 802 803 snprintf(ctrlr_device->lock_name, sizeof(ctrlr_device->lock_name), 804 "/var/tmp/spdk_nvme_cuse_lock_%" PRIu32, index); 805 806 dev_fd = open(ctrlr_device->lock_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); 807 if (dev_fd == -1) { 808 SPDK_ERRLOG("could not open %s\n", ctrlr_device->lock_name); 809 return -errno; 810 } 811 812 if (ftruncate(dev_fd, sizeof(int)) != 0) { 813 SPDK_ERRLOG("could not truncate %s\n", ctrlr_device->lock_name); 814 close(dev_fd); 815 return -errno; 816 } 817 818 dev_map = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, 819 MAP_SHARED, dev_fd, 0); 820 if (dev_map == MAP_FAILED) { 821 SPDK_ERRLOG("could not mmap dev %s (%d)\n", ctrlr_device->lock_name, errno); 822 close(dev_fd); 823 return -errno; 824 } 825 826 if (fcntl(dev_fd, F_SETLK, &cusedev_lock) != 0) { 827 pid = *(int *)dev_map; 828 SPDK_ERRLOG("Cannot create lock on device %s, probably" 829 " process %d has claimed it\n", ctrlr_device->lock_name, pid); 830 munmap(dev_map, sizeof(int)); 831 close(dev_fd); 832 /* F_SETLK returns unspecified errnos, normalize them */ 833 return -EACCES; 834 } 835 836 *(int *)dev_map = (int)getpid(); 837 munmap(dev_map, sizeof(int)); 838 ctrlr_device->claim_fd = dev_fd; 839 ctrlr_device->index = index; 840 /* Keep dev_fd open to maintain the lock. */ 841 return 0; 842 } 843 844 static void 845 nvme_cuse_unclaim(struct cuse_device *ctrlr_device) 846 { 847 close(ctrlr_device->claim_fd); 848 ctrlr_device->claim_fd = -1; 849 unlink(ctrlr_device->lock_name); 850 } 851 852 static void 853 cuse_nvme_ctrlr_stop(struct cuse_device *ctrlr_device) 854 { 855 uint32_t i; 856 uint32_t num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr_device->ctrlr); 857 858 for (i = 1; i <= num_ns; i++) { 859 cuse_nvme_ns_stop(ctrlr_device, i); 860 } 861 862 fuse_session_exit(ctrlr_device->session); 863 pthread_join(ctrlr_device->tid, NULL); 864 TAILQ_REMOVE(&g_ctrlr_ctx_head, ctrlr_device, tailq); 865 spdk_bit_array_clear(g_ctrlr_started, ctrlr_device->index); 866 if (spdk_bit_array_count_set(g_ctrlr_started) == 0) { 867 spdk_bit_array_free(&g_ctrlr_started); 868 } 869 nvme_cuse_unclaim(ctrlr_device); 870 free(ctrlr_device->ns_devices); 871 free(ctrlr_device); 872 } 873 874 static int 875 cuse_nvme_ctrlr_update_namespaces(struct cuse_device *ctrlr_device) 876 { 877 uint32_t nsid; 878 uint32_t num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr_device->ctrlr); 879 880 for (nsid = 1; nsid <= num_ns; nsid++) { 881 if (!spdk_nvme_ctrlr_is_active_ns(ctrlr_device->ctrlr, nsid)) { 882 cuse_nvme_ns_stop(ctrlr_device, nsid); 883 continue; 884 } 885 886 if (cuse_nvme_ns_start(ctrlr_device, nsid) < 0) { 887 SPDK_ERRLOG("Cannot start CUSE namespace device."); 888 return -1; 889 } 890 } 891 892 return 0; 893 } 894 895 static int 896 nvme_cuse_start(struct spdk_nvme_ctrlr *ctrlr) 897 { 898 int rv = 0; 899 struct cuse_device *ctrlr_device; 900 uint32_t num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr); 901 902 SPDK_NOTICELOG("Creating cuse device for controller\n"); 903 904 if (g_ctrlr_started == NULL) { 905 g_ctrlr_started = spdk_bit_array_create(128); 906 if (g_ctrlr_started == NULL) { 907 SPDK_ERRLOG("Cannot create bit array\n"); 908 return -ENOMEM; 909 } 910 } 911 912 ctrlr_device = (struct cuse_device *)calloc(1, sizeof(struct cuse_device)); 913 if (!ctrlr_device) { 914 SPDK_ERRLOG("Cannot allocate memory for ctrlr_device."); 915 rv = -ENOMEM; 916 goto err2; 917 } 918 919 ctrlr_device->ctrlr = ctrlr; 920 921 /* Check if device already exists, if not increment index until success */ 922 ctrlr_device->index = 0; 923 while (1) { 924 ctrlr_device->index = spdk_bit_array_find_first_clear(g_ctrlr_started, ctrlr_device->index); 925 if (ctrlr_device->index == UINT32_MAX) { 926 SPDK_ERRLOG("Too many registered controllers\n"); 927 goto err2; 928 } 929 930 if (nvme_cuse_claim(ctrlr_device, ctrlr_device->index) == 0) { 931 break; 932 } 933 ctrlr_device->index++; 934 } 935 spdk_bit_array_set(g_ctrlr_started, ctrlr_device->index); 936 snprintf(ctrlr_device->dev_name, sizeof(ctrlr_device->dev_name), "spdk/nvme%d", 937 ctrlr_device->index); 938 939 rv = pthread_create(&ctrlr_device->tid, NULL, cuse_thread, ctrlr_device); 940 if (rv != 0) { 941 SPDK_ERRLOG("pthread_create failed\n"); 942 rv = -rv; 943 goto err3; 944 } 945 TAILQ_INSERT_TAIL(&g_ctrlr_ctx_head, ctrlr_device, tailq); 946 947 ctrlr_device->ns_devices = (struct cuse_device *)calloc(num_ns, sizeof(struct cuse_device)); 948 /* Start all active namespaces */ 949 if (cuse_nvme_ctrlr_update_namespaces(ctrlr_device) < 0) { 950 SPDK_ERRLOG("Cannot start CUSE namespace devices."); 951 cuse_nvme_ctrlr_stop(ctrlr_device); 952 rv = -1; 953 goto err3; 954 } 955 956 return 0; 957 958 err3: 959 spdk_bit_array_clear(g_ctrlr_started, ctrlr_device->index); 960 err2: 961 free(ctrlr_device); 962 if (spdk_bit_array_count_set(g_ctrlr_started) == 0) { 963 spdk_bit_array_free(&g_ctrlr_started); 964 } 965 return rv; 966 } 967 968 static struct cuse_device * 969 nvme_cuse_get_cuse_ctrlr_device(struct spdk_nvme_ctrlr *ctrlr) 970 { 971 struct cuse_device *ctrlr_device = NULL; 972 973 TAILQ_FOREACH(ctrlr_device, &g_ctrlr_ctx_head, tailq) { 974 if (ctrlr_device->ctrlr == ctrlr) { 975 break; 976 } 977 } 978 979 return ctrlr_device; 980 } 981 982 static struct cuse_device * 983 nvme_cuse_get_cuse_ns_device(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 984 { 985 struct cuse_device *ctrlr_device = NULL; 986 uint32_t num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr); 987 988 if (nsid < 1 || nsid > num_ns) { 989 return NULL; 990 } 991 992 ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr); 993 if (!ctrlr_device) { 994 return NULL; 995 } 996 997 if (!ctrlr_device->ns_devices[nsid - 1].is_started) { 998 return NULL; 999 } 1000 1001 return &ctrlr_device->ns_devices[nsid - 1]; 1002 } 1003 1004 static void 1005 nvme_cuse_stop(struct spdk_nvme_ctrlr *ctrlr) 1006 { 1007 struct cuse_device *ctrlr_device; 1008 1009 pthread_mutex_lock(&g_cuse_mtx); 1010 1011 ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr); 1012 if (!ctrlr_device) { 1013 SPDK_ERRLOG("Cannot find associated CUSE device\n"); 1014 pthread_mutex_unlock(&g_cuse_mtx); 1015 return; 1016 } 1017 1018 cuse_nvme_ctrlr_stop(ctrlr_device); 1019 1020 pthread_mutex_unlock(&g_cuse_mtx); 1021 } 1022 1023 static void 1024 nvme_cuse_update(struct spdk_nvme_ctrlr *ctrlr) 1025 { 1026 struct cuse_device *ctrlr_device; 1027 1028 pthread_mutex_lock(&g_cuse_mtx); 1029 1030 ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr); 1031 if (!ctrlr_device) { 1032 pthread_mutex_unlock(&g_cuse_mtx); 1033 return; 1034 } 1035 1036 cuse_nvme_ctrlr_update_namespaces(ctrlr_device); 1037 1038 pthread_mutex_unlock(&g_cuse_mtx); 1039 } 1040 1041 static struct nvme_io_msg_producer cuse_nvme_io_msg_producer = { 1042 .name = "cuse", 1043 .stop = nvme_cuse_stop, 1044 .update = nvme_cuse_update, 1045 }; 1046 1047 int 1048 spdk_nvme_cuse_register(struct spdk_nvme_ctrlr *ctrlr) 1049 { 1050 int rc; 1051 1052 rc = nvme_io_msg_ctrlr_register(ctrlr, &cuse_nvme_io_msg_producer); 1053 if (rc) { 1054 return rc; 1055 } 1056 1057 pthread_mutex_lock(&g_cuse_mtx); 1058 1059 rc = nvme_cuse_start(ctrlr); 1060 if (rc) { 1061 nvme_io_msg_ctrlr_unregister(ctrlr, &cuse_nvme_io_msg_producer); 1062 } 1063 1064 pthread_mutex_unlock(&g_cuse_mtx); 1065 1066 return rc; 1067 } 1068 1069 int 1070 spdk_nvme_cuse_unregister(struct spdk_nvme_ctrlr *ctrlr) 1071 { 1072 struct cuse_device *ctrlr_device; 1073 1074 pthread_mutex_lock(&g_cuse_mtx); 1075 1076 ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr); 1077 if (!ctrlr_device) { 1078 SPDK_ERRLOG("Cannot find associated CUSE device\n"); 1079 pthread_mutex_unlock(&g_cuse_mtx); 1080 return -ENODEV; 1081 } 1082 1083 cuse_nvme_ctrlr_stop(ctrlr_device); 1084 1085 pthread_mutex_unlock(&g_cuse_mtx); 1086 1087 nvme_io_msg_ctrlr_unregister(ctrlr, &cuse_nvme_io_msg_producer); 1088 1089 return 0; 1090 } 1091 1092 void 1093 spdk_nvme_cuse_update_namespaces(struct spdk_nvme_ctrlr *ctrlr) 1094 { 1095 nvme_cuse_update(ctrlr); 1096 } 1097 1098 int 1099 spdk_nvme_cuse_get_ctrlr_name(struct spdk_nvme_ctrlr *ctrlr, char *name, size_t *size) 1100 { 1101 struct cuse_device *ctrlr_device; 1102 size_t req_len; 1103 1104 pthread_mutex_lock(&g_cuse_mtx); 1105 1106 ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr); 1107 if (!ctrlr_device) { 1108 pthread_mutex_unlock(&g_cuse_mtx); 1109 return -ENODEV; 1110 } 1111 1112 req_len = strnlen(ctrlr_device->dev_name, sizeof(ctrlr_device->dev_name)); 1113 if (*size < req_len) { 1114 *size = req_len; 1115 pthread_mutex_unlock(&g_cuse_mtx); 1116 return -ENOSPC; 1117 } 1118 snprintf(name, req_len + 1, "%s", ctrlr_device->dev_name); 1119 1120 pthread_mutex_unlock(&g_cuse_mtx); 1121 1122 return 0; 1123 } 1124 1125 int 1126 spdk_nvme_cuse_get_ns_name(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, char *name, size_t *size) 1127 { 1128 struct cuse_device *ns_device; 1129 size_t req_len; 1130 1131 pthread_mutex_lock(&g_cuse_mtx); 1132 1133 ns_device = nvme_cuse_get_cuse_ns_device(ctrlr, nsid); 1134 if (!ns_device) { 1135 pthread_mutex_unlock(&g_cuse_mtx); 1136 return -ENODEV; 1137 } 1138 1139 req_len = strnlen(ns_device->dev_name, sizeof(ns_device->dev_name)); 1140 if (*size < req_len) { 1141 *size = req_len; 1142 pthread_mutex_unlock(&g_cuse_mtx); 1143 return -ENOSPC; 1144 } 1145 snprintf(name, req_len + 1, "%s", ns_device->dev_name); 1146 1147 pthread_mutex_unlock(&g_cuse_mtx); 1148 1149 return 0; 1150 } 1151 1152 SPDK_LOG_REGISTER_COMPONENT(nvme_cuse) 1153