1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #define FUSE_USE_VERSION 31 35 36 #include <fuse3/cuse_lowlevel.h> 37 38 #include <linux/nvme_ioctl.h> 39 #include <linux/fs.h> 40 41 #include "nvme_internal.h" 42 #include "nvme_io_msg.h" 43 #include "nvme_cuse.h" 44 45 struct cuse_device { 46 bool is_started; 47 48 char dev_name[128]; 49 uint32_t index; 50 int claim_fd; 51 char lock_name[64]; 52 53 struct spdk_nvme_ctrlr *ctrlr; /**< NVMe controller */ 54 uint32_t nsid; /**< NVMe name space id, or 0 */ 55 56 pthread_t tid; 57 struct fuse_session *session; 58 59 struct cuse_device *ctrlr_device; 60 struct cuse_device *ns_devices; /**< Array of cuse ns devices */ 61 62 TAILQ_ENTRY(cuse_device) tailq; 63 }; 64 65 static pthread_mutex_t g_cuse_mtx = PTHREAD_MUTEX_INITIALIZER; 66 static TAILQ_HEAD(, cuse_device) g_ctrlr_ctx_head = TAILQ_HEAD_INITIALIZER(g_ctrlr_ctx_head); 67 static struct spdk_bit_array *g_ctrlr_started; 68 69 struct cuse_io_ctx { 70 struct spdk_nvme_cmd nvme_cmd; 71 enum spdk_nvme_data_transfer data_transfer; 72 73 uint64_t lba; 74 uint32_t lba_count; 75 76 void *data; 77 int data_len; 78 79 fuse_req_t req; 80 }; 81 82 static void 83 cuse_io_ctx_free(struct cuse_io_ctx *ctx) 84 { 85 spdk_free(ctx->data); 86 free(ctx); 87 } 88 89 #define FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, val) \ 90 if (out_bufsz == 0) { \ 91 struct iovec out_iov; \ 92 out_iov.iov_base = (void *)arg; \ 93 out_iov.iov_len = sizeof(val); \ 94 fuse_reply_ioctl_retry(req, NULL, 0, &out_iov, 1); \ 95 return; \ 96 } 97 98 static void 99 cuse_nvme_admin_cmd_cb(void *arg, const struct spdk_nvme_cpl *cpl) 100 { 101 struct cuse_io_ctx *ctx = arg; 102 struct iovec out_iov[2]; 103 struct spdk_nvme_cpl _cpl; 104 105 if (ctx->data_transfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER || 106 ctx->data_transfer == SPDK_NVME_DATA_NONE) { 107 fuse_reply_ioctl_iov(ctx->req, cpl->status.sc, NULL, 0); 108 } else { 109 memcpy(&_cpl, cpl, sizeof(struct spdk_nvme_cpl)); 110 111 out_iov[0].iov_base = &_cpl.cdw0; 112 out_iov[0].iov_len = sizeof(_cpl.cdw0); 113 114 if (ctx->data_len > 0) { 115 out_iov[1].iov_base = ctx->data; 116 out_iov[1].iov_len = ctx->data_len; 117 fuse_reply_ioctl_iov(ctx->req, cpl->status.sc, out_iov, 2); 118 } else { 119 fuse_reply_ioctl_iov(ctx->req, cpl->status.sc, out_iov, 1); 120 } 121 } 122 123 cuse_io_ctx_free(ctx); 124 } 125 126 static void 127 cuse_nvme_admin_cmd_execute(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg) 128 { 129 int rc; 130 struct cuse_io_ctx *ctx = arg; 131 132 rc = spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, &ctx->nvme_cmd, ctx->data, ctx->data_len, 133 cuse_nvme_admin_cmd_cb, (void *)ctx); 134 if (rc < 0) { 135 fuse_reply_err(ctx->req, EINVAL); 136 cuse_io_ctx_free(ctx); 137 } 138 } 139 140 static void 141 cuse_nvme_admin_cmd_send(fuse_req_t req, struct nvme_admin_cmd *admin_cmd, 142 const void *data) 143 { 144 struct cuse_io_ctx *ctx; 145 struct cuse_device *cuse_device = fuse_req_userdata(req); 146 int rv; 147 148 ctx = (struct cuse_io_ctx *)calloc(1, sizeof(struct cuse_io_ctx)); 149 if (!ctx) { 150 SPDK_ERRLOG("Cannot allocate memory for cuse_io_ctx\n"); 151 fuse_reply_err(req, ENOMEM); 152 return; 153 } 154 155 ctx->req = req; 156 ctx->data_transfer = spdk_nvme_opc_get_data_transfer(admin_cmd->opcode); 157 158 memset(&ctx->nvme_cmd, 0, sizeof(ctx->nvme_cmd)); 159 ctx->nvme_cmd.opc = admin_cmd->opcode; 160 ctx->nvme_cmd.nsid = admin_cmd->nsid; 161 ctx->nvme_cmd.cdw10 = admin_cmd->cdw10; 162 ctx->nvme_cmd.cdw11 = admin_cmd->cdw11; 163 ctx->nvme_cmd.cdw12 = admin_cmd->cdw12; 164 ctx->nvme_cmd.cdw13 = admin_cmd->cdw13; 165 ctx->nvme_cmd.cdw14 = admin_cmd->cdw14; 166 ctx->nvme_cmd.cdw15 = admin_cmd->cdw15; 167 168 ctx->data_len = admin_cmd->data_len; 169 170 if (ctx->data_len > 0) { 171 ctx->data = spdk_malloc(ctx->data_len, 0, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); 172 if (!ctx->data) { 173 SPDK_ERRLOG("Cannot allocate memory for data\n"); 174 fuse_reply_err(req, ENOMEM); 175 free(ctx); 176 return; 177 } 178 if (data != NULL) { 179 memcpy(ctx->data, data, ctx->data_len); 180 } 181 } 182 183 rv = nvme_io_msg_send(cuse_device->ctrlr, 0, cuse_nvme_admin_cmd_execute, ctx); 184 if (rv) { 185 SPDK_ERRLOG("Cannot send io msg to the controller\n"); 186 fuse_reply_err(req, -rv); 187 cuse_io_ctx_free(ctx); 188 return; 189 } 190 } 191 192 static void 193 cuse_nvme_admin_cmd(fuse_req_t req, int cmd, void *arg, 194 struct fuse_file_info *fi, unsigned flags, 195 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 196 { 197 struct nvme_admin_cmd *admin_cmd; 198 struct iovec in_iov[2], out_iov[2]; 199 200 in_iov[0].iov_base = (void *)arg; 201 in_iov[0].iov_len = sizeof(*admin_cmd); 202 if (in_bufsz == 0) { 203 fuse_reply_ioctl_retry(req, in_iov, 1, NULL, 0); 204 return; 205 } 206 207 admin_cmd = (struct nvme_admin_cmd *)in_buf; 208 209 switch (spdk_nvme_opc_get_data_transfer(admin_cmd->opcode)) { 210 case SPDK_NVME_DATA_HOST_TO_CONTROLLER: 211 if (admin_cmd->addr != 0) { 212 in_iov[1].iov_base = (void *)admin_cmd->addr; 213 in_iov[1].iov_len = admin_cmd->data_len; 214 if (in_bufsz == sizeof(*admin_cmd)) { 215 fuse_reply_ioctl_retry(req, in_iov, 2, NULL, 0); 216 return; 217 } 218 cuse_nvme_admin_cmd_send(req, admin_cmd, in_buf + sizeof(*admin_cmd)); 219 } else { 220 cuse_nvme_admin_cmd_send(req, admin_cmd, NULL); 221 } 222 return; 223 case SPDK_NVME_DATA_NONE: 224 case SPDK_NVME_DATA_CONTROLLER_TO_HOST: 225 if (out_bufsz == 0) { 226 out_iov[0].iov_base = &((struct nvme_admin_cmd *)arg)->result; 227 out_iov[0].iov_len = sizeof(uint32_t); 228 if (admin_cmd->data_len > 0) { 229 out_iov[1].iov_base = (void *)admin_cmd->addr; 230 out_iov[1].iov_len = admin_cmd->data_len; 231 fuse_reply_ioctl_retry(req, in_iov, 1, out_iov, 2); 232 } else { 233 fuse_reply_ioctl_retry(req, in_iov, 1, out_iov, 1); 234 } 235 return; 236 } 237 238 cuse_nvme_admin_cmd_send(req, admin_cmd, NULL); 239 240 return; 241 case SPDK_NVME_DATA_BIDIRECTIONAL: 242 fuse_reply_err(req, EINVAL); 243 return; 244 } 245 } 246 247 static void 248 cuse_nvme_reset_execute(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg) 249 { 250 int rc; 251 fuse_req_t req = arg; 252 253 rc = spdk_nvme_ctrlr_reset(ctrlr); 254 if (rc) { 255 fuse_reply_err(req, rc); 256 return; 257 } 258 259 fuse_reply_ioctl_iov(req, 0, NULL, 0); 260 } 261 262 static void 263 cuse_nvme_reset(fuse_req_t req, int cmd, void *arg, 264 struct fuse_file_info *fi, unsigned flags, 265 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 266 { 267 int rv; 268 struct cuse_device *cuse_device = fuse_req_userdata(req); 269 270 if (cuse_device->nsid) { 271 SPDK_ERRLOG("Namespace reset not supported\n"); 272 fuse_reply_err(req, EINVAL); 273 return; 274 } 275 276 rv = nvme_io_msg_send(cuse_device->ctrlr, cuse_device->nsid, cuse_nvme_reset_execute, (void *)req); 277 if (rv) { 278 SPDK_ERRLOG("Cannot send reset\n"); 279 fuse_reply_err(req, EINVAL); 280 } 281 } 282 283 /***************************************************************************** 284 * Namespace IO requests 285 */ 286 287 static void 288 cuse_nvme_submit_io_write_done(void *ref, const struct spdk_nvme_cpl *cpl) 289 { 290 struct cuse_io_ctx *ctx = (struct cuse_io_ctx *)ref; 291 292 fuse_reply_ioctl_iov(ctx->req, cpl->status.sc, NULL, 0); 293 294 cuse_io_ctx_free(ctx); 295 } 296 297 static void 298 cuse_nvme_submit_io_write_cb(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg) 299 { 300 int rc; 301 struct cuse_io_ctx *ctx = arg; 302 struct spdk_nvme_ns *ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 303 304 rc = spdk_nvme_ns_cmd_write(ns, ctrlr->external_io_msgs_qpair, ctx->data, 305 ctx->lba, /* LBA start */ 306 ctx->lba_count, /* number of LBAs */ 307 cuse_nvme_submit_io_write_done, ctx, 0); 308 309 if (rc != 0) { 310 SPDK_ERRLOG("write failed: rc = %d\n", rc); 311 fuse_reply_err(ctx->req, rc); 312 cuse_io_ctx_free(ctx); 313 return; 314 } 315 } 316 317 static void 318 cuse_nvme_submit_io_write(struct cuse_device *cuse_device, fuse_req_t req, int cmd, void *arg, 319 struct fuse_file_info *fi, unsigned flags, uint32_t block_size, 320 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 321 { 322 const struct nvme_user_io *user_io = in_buf; 323 struct cuse_io_ctx *ctx; 324 int rc; 325 326 ctx = (struct cuse_io_ctx *)calloc(1, sizeof(struct cuse_io_ctx)); 327 if (!ctx) { 328 SPDK_ERRLOG("Cannot allocate memory for context\n"); 329 fuse_reply_err(req, ENOMEM); 330 return; 331 } 332 333 ctx->req = req; 334 ctx->lba = user_io->slba; 335 ctx->lba_count = user_io->nblocks + 1; 336 ctx->data_len = ctx->lba_count * block_size; 337 338 ctx->data = spdk_zmalloc(ctx->data_len, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY, 339 SPDK_MALLOC_DMA); 340 if (ctx->data == NULL) { 341 SPDK_ERRLOG("Write buffer allocation failed\n"); 342 fuse_reply_err(ctx->req, ENOMEM); 343 free(ctx); 344 return; 345 } 346 347 memcpy(ctx->data, in_buf + sizeof(*user_io), ctx->data_len); 348 349 rc = nvme_io_msg_send(cuse_device->ctrlr, cuse_device->nsid, cuse_nvme_submit_io_write_cb, 350 ctx); 351 if (rc < 0) { 352 SPDK_ERRLOG("Cannot send write io\n"); 353 fuse_reply_err(ctx->req, rc); 354 cuse_io_ctx_free(ctx); 355 } 356 } 357 358 static void 359 cuse_nvme_submit_io_read_done(void *ref, const struct spdk_nvme_cpl *cpl) 360 { 361 struct cuse_io_ctx *ctx = (struct cuse_io_ctx *)ref; 362 struct iovec iov; 363 364 iov.iov_base = ctx->data; 365 iov.iov_len = ctx->data_len; 366 367 fuse_reply_ioctl_iov(ctx->req, cpl->status.sc, &iov, 1); 368 369 cuse_io_ctx_free(ctx); 370 } 371 372 static void 373 cuse_nvme_submit_io_read_cb(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg) 374 { 375 int rc; 376 struct cuse_io_ctx *ctx = arg; 377 struct spdk_nvme_ns *ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 378 379 rc = spdk_nvme_ns_cmd_read(ns, ctrlr->external_io_msgs_qpair, ctx->data, 380 ctx->lba, /* LBA start */ 381 ctx->lba_count, /* number of LBAs */ 382 cuse_nvme_submit_io_read_done, ctx, 0); 383 384 if (rc != 0) { 385 SPDK_ERRLOG("read failed: rc = %d\n", rc); 386 fuse_reply_err(ctx->req, rc); 387 cuse_io_ctx_free(ctx); 388 return; 389 } 390 } 391 392 static void 393 cuse_nvme_submit_io_read(struct cuse_device *cuse_device, fuse_req_t req, int cmd, void *arg, 394 struct fuse_file_info *fi, unsigned flags, uint32_t block_size, 395 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 396 { 397 int rc; 398 struct cuse_io_ctx *ctx; 399 const struct nvme_user_io *user_io = in_buf; 400 401 ctx = (struct cuse_io_ctx *)calloc(1, sizeof(struct cuse_io_ctx)); 402 if (!ctx) { 403 SPDK_ERRLOG("Cannot allocate memory for context\n"); 404 fuse_reply_err(req, ENOMEM); 405 return; 406 } 407 408 ctx->req = req; 409 ctx->lba = user_io->slba; 410 ctx->lba_count = user_io->nblocks + 1; 411 412 ctx->data_len = ctx->lba_count * block_size; 413 ctx->data = spdk_zmalloc(ctx->data_len, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY, 414 SPDK_MALLOC_DMA); 415 if (ctx->data == NULL) { 416 SPDK_ERRLOG("Read buffer allocation failed\n"); 417 fuse_reply_err(ctx->req, ENOMEM); 418 free(ctx); 419 return; 420 } 421 422 rc = nvme_io_msg_send(cuse_device->ctrlr, cuse_device->nsid, cuse_nvme_submit_io_read_cb, ctx); 423 if (rc < 0) { 424 SPDK_ERRLOG("Cannot send read io\n"); 425 fuse_reply_err(ctx->req, rc); 426 cuse_io_ctx_free(ctx); 427 } 428 } 429 430 431 static void 432 cuse_nvme_submit_io(fuse_req_t req, int cmd, void *arg, 433 struct fuse_file_info *fi, unsigned flags, 434 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 435 { 436 const struct nvme_user_io *user_io; 437 struct iovec in_iov[2], out_iov; 438 struct cuse_device *cuse_device = fuse_req_userdata(req); 439 struct spdk_nvme_ns *ns; 440 uint32_t block_size; 441 442 in_iov[0].iov_base = (void *)arg; 443 in_iov[0].iov_len = sizeof(*user_io); 444 if (in_bufsz == 0) { 445 fuse_reply_ioctl_retry(req, in_iov, 1, NULL, 0); 446 return; 447 } 448 449 user_io = in_buf; 450 451 ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid); 452 block_size = spdk_nvme_ns_get_sector_size(ns); 453 454 switch (user_io->opcode) { 455 case SPDK_NVME_OPC_READ: 456 out_iov.iov_base = (void *)user_io->addr; 457 out_iov.iov_len = (user_io->nblocks + 1) * block_size; 458 if (out_bufsz == 0) { 459 fuse_reply_ioctl_retry(req, in_iov, 1, &out_iov, 1); 460 return; 461 } 462 463 cuse_nvme_submit_io_read(cuse_device, req, cmd, arg, fi, flags, 464 block_size, in_buf, in_bufsz, out_bufsz); 465 break; 466 case SPDK_NVME_OPC_WRITE: 467 in_iov[1].iov_base = (void *)user_io->addr; 468 in_iov[1].iov_len = (user_io->nblocks + 1) * block_size; 469 if (in_bufsz == sizeof(*user_io)) { 470 fuse_reply_ioctl_retry(req, in_iov, 2, NULL, 0); 471 return; 472 } 473 474 cuse_nvme_submit_io_write(cuse_device, req, cmd, arg, fi, flags, 475 block_size, in_buf, in_bufsz, out_bufsz); 476 break; 477 default: 478 SPDK_ERRLOG("SUBMIT_IO: opc:%d not valid\n", user_io->opcode); 479 fuse_reply_err(req, EINVAL); 480 return; 481 } 482 483 } 484 485 /***************************************************************************** 486 * Other namespace IOCTLs 487 */ 488 static void 489 cuse_blkgetsize64(fuse_req_t req, int cmd, void *arg, 490 struct fuse_file_info *fi, unsigned flags, 491 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 492 { 493 uint64_t size; 494 struct spdk_nvme_ns *ns; 495 struct cuse_device *cuse_device = fuse_req_userdata(req); 496 497 FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, size); 498 499 ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid); 500 size = spdk_nvme_ns_get_num_sectors(ns); 501 fuse_reply_ioctl(req, 0, &size, sizeof(size)); 502 } 503 504 static void 505 cuse_blkpbszget(fuse_req_t req, int cmd, void *arg, 506 struct fuse_file_info *fi, unsigned flags, 507 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 508 { 509 int pbsz; 510 struct spdk_nvme_ns *ns; 511 struct cuse_device *cuse_device = fuse_req_userdata(req); 512 513 FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, pbsz); 514 515 ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid); 516 pbsz = spdk_nvme_ns_get_sector_size(ns); 517 fuse_reply_ioctl(req, 0, &pbsz, sizeof(pbsz)); 518 } 519 520 static void 521 cuse_blkgetsize(fuse_req_t req, int cmd, void *arg, 522 struct fuse_file_info *fi, unsigned flags, 523 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 524 { 525 long size; 526 struct spdk_nvme_ns *ns; 527 struct cuse_device *cuse_device = fuse_req_userdata(req); 528 529 FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, size); 530 531 ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid); 532 533 /* return size in 512 bytes blocks */ 534 size = spdk_nvme_ns_get_num_sectors(ns) * 512 / spdk_nvme_ns_get_sector_size(ns); 535 fuse_reply_ioctl(req, 0, &size, sizeof(size)); 536 } 537 538 static void 539 cuse_getid(fuse_req_t req, int cmd, void *arg, 540 struct fuse_file_info *fi, unsigned flags, 541 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 542 { 543 struct cuse_device *cuse_device = fuse_req_userdata(req); 544 545 fuse_reply_ioctl(req, cuse_device->nsid, NULL, 0); 546 } 547 548 static void 549 cuse_ctrlr_ioctl(fuse_req_t req, int cmd, void *arg, 550 struct fuse_file_info *fi, unsigned flags, 551 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 552 { 553 if (flags & FUSE_IOCTL_COMPAT) { 554 fuse_reply_err(req, ENOSYS); 555 return; 556 } 557 558 switch (cmd) { 559 case NVME_IOCTL_ADMIN_CMD: 560 cuse_nvme_admin_cmd(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz); 561 break; 562 563 case NVME_IOCTL_RESET: 564 cuse_nvme_reset(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz); 565 break; 566 567 default: 568 SPDK_ERRLOG("Unsupported IOCTL 0x%X.\n", cmd); 569 fuse_reply_err(req, EINVAL); 570 } 571 } 572 573 static void 574 cuse_ns_ioctl(fuse_req_t req, int cmd, void *arg, 575 struct fuse_file_info *fi, unsigned flags, 576 const void *in_buf, size_t in_bufsz, size_t out_bufsz) 577 { 578 if (flags & FUSE_IOCTL_COMPAT) { 579 fuse_reply_err(req, ENOSYS); 580 return; 581 } 582 583 switch (cmd) { 584 case NVME_IOCTL_ADMIN_CMD: 585 cuse_nvme_admin_cmd(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz); 586 break; 587 588 case NVME_IOCTL_SUBMIT_IO: 589 cuse_nvme_submit_io(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz); 590 break; 591 592 case NVME_IOCTL_ID: 593 cuse_getid(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz); 594 break; 595 596 case BLKPBSZGET: 597 cuse_blkpbszget(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz); 598 break; 599 600 case BLKGETSIZE: 601 /* Returns the device size as a number of 512-byte blocks (returns pointer to long) */ 602 cuse_blkgetsize(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz); 603 break; 604 605 case BLKGETSIZE64: 606 /* Returns the device size in sectors (returns pointer to uint64_t) */ 607 cuse_blkgetsize64(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz); 608 break; 609 610 default: 611 SPDK_ERRLOG("Unsupported IOCTL 0x%X.\n", cmd); 612 fuse_reply_err(req, EINVAL); 613 } 614 } 615 616 /***************************************************************************** 617 * CUSE threads initialization. 618 */ 619 620 static void cuse_open(fuse_req_t req, struct fuse_file_info *fi) 621 { 622 fuse_reply_open(req, fi); 623 } 624 625 static const struct cuse_lowlevel_ops cuse_ctrlr_clop = { 626 .open = cuse_open, 627 .ioctl = cuse_ctrlr_ioctl, 628 }; 629 630 static const struct cuse_lowlevel_ops cuse_ns_clop = { 631 .open = cuse_open, 632 .ioctl = cuse_ns_ioctl, 633 }; 634 635 static void * 636 cuse_thread(void *arg) 637 { 638 struct cuse_device *cuse_device = arg; 639 char *cuse_argv[] = { "cuse", "-f" }; 640 int cuse_argc = SPDK_COUNTOF(cuse_argv); 641 char devname_arg[128 + 8]; 642 const char *dev_info_argv[] = { devname_arg }; 643 struct cuse_info ci; 644 int multithreaded; 645 int rc; 646 struct fuse_buf buf = { .mem = NULL }; 647 struct pollfd fds; 648 int timeout_msecs = 500; 649 650 spdk_unaffinitize_thread(); 651 652 snprintf(devname_arg, sizeof(devname_arg), "DEVNAME=%s", cuse_device->dev_name); 653 654 memset(&ci, 0, sizeof(ci)); 655 ci.dev_info_argc = 1; 656 ci.dev_info_argv = dev_info_argv; 657 ci.flags = CUSE_UNRESTRICTED_IOCTL; 658 659 if (cuse_device->nsid) { 660 cuse_device->session = cuse_lowlevel_setup(cuse_argc, cuse_argv, &ci, &cuse_ns_clop, 661 &multithreaded, cuse_device); 662 } else { 663 cuse_device->session = cuse_lowlevel_setup(cuse_argc, cuse_argv, &ci, &cuse_ctrlr_clop, 664 &multithreaded, cuse_device); 665 } 666 if (!cuse_device->session) { 667 SPDK_ERRLOG("Cannot create cuse session\n"); 668 goto err; 669 } 670 671 SPDK_NOTICELOG("fuse session for device %s created\n", cuse_device->dev_name); 672 673 /* Receive and process fuse requests */ 674 fds.fd = fuse_session_fd(cuse_device->session); 675 fds.events = POLLIN; 676 while (!fuse_session_exited(cuse_device->session)) { 677 rc = poll(&fds, 1, timeout_msecs); 678 if (rc <= 0) { 679 continue; 680 } 681 rc = fuse_session_receive_buf(cuse_device->session, &buf); 682 if (rc > 0) { 683 fuse_session_process_buf(cuse_device->session, &buf); 684 } 685 } 686 free(buf.mem); 687 fuse_session_reset(cuse_device->session); 688 cuse_lowlevel_teardown(cuse_device->session); 689 err: 690 pthread_exit(NULL); 691 } 692 693 /***************************************************************************** 694 * CUSE devices management 695 */ 696 697 static int 698 cuse_nvme_ns_start(struct cuse_device *ctrlr_device, uint32_t nsid) 699 { 700 struct cuse_device *ns_device; 701 int rv; 702 703 ns_device = &ctrlr_device->ns_devices[nsid - 1]; 704 if (ns_device->is_started) { 705 return 0; 706 } 707 708 ns_device->ctrlr = ctrlr_device->ctrlr; 709 ns_device->ctrlr_device = ctrlr_device; 710 ns_device->nsid = nsid; 711 rv = snprintf(ns_device->dev_name, sizeof(ns_device->dev_name), "%sn%d", 712 ctrlr_device->dev_name, ns_device->nsid); 713 if (rv < 0) { 714 SPDK_ERRLOG("Device name too long.\n"); 715 free(ns_device); 716 return -ENAMETOOLONG; 717 } 718 719 rv = pthread_create(&ns_device->tid, NULL, cuse_thread, ns_device); 720 if (rv != 0) { 721 SPDK_ERRLOG("pthread_create failed\n"); 722 return -rv; 723 } 724 725 ns_device->is_started = true; 726 727 return 0; 728 } 729 730 static void 731 cuse_nvme_ns_stop(struct cuse_device *ctrlr_device, uint32_t nsid) 732 { 733 struct cuse_device *ns_device; 734 735 ns_device = &ctrlr_device->ns_devices[nsid - 1]; 736 if (!ns_device->is_started) { 737 return; 738 } 739 740 fuse_session_exit(ns_device->session); 741 pthread_join(ns_device->tid, NULL); 742 ns_device->is_started = false; 743 } 744 745 static int 746 nvme_cuse_claim(struct cuse_device *ctrlr_device, uint32_t index) 747 { 748 int dev_fd; 749 int pid; 750 void *dev_map; 751 struct flock cusedev_lock = { 752 .l_type = F_WRLCK, 753 .l_whence = SEEK_SET, 754 .l_start = 0, 755 .l_len = 0, 756 }; 757 758 snprintf(ctrlr_device->lock_name, sizeof(ctrlr_device->lock_name), 759 "/tmp/spdk_nvme_cuse_lock_%" PRIu32, index); 760 761 dev_fd = open(ctrlr_device->lock_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); 762 if (dev_fd == -1) { 763 SPDK_ERRLOG("could not open %s\n", ctrlr_device->lock_name); 764 return -errno; 765 } 766 767 if (ftruncate(dev_fd, sizeof(int)) != 0) { 768 SPDK_ERRLOG("could not truncate %s\n", ctrlr_device->lock_name); 769 close(dev_fd); 770 return -errno; 771 } 772 773 dev_map = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, 774 MAP_SHARED, dev_fd, 0); 775 if (dev_map == MAP_FAILED) { 776 SPDK_ERRLOG("could not mmap dev %s (%d)\n", ctrlr_device->lock_name, errno); 777 close(dev_fd); 778 return -errno; 779 } 780 781 if (fcntl(dev_fd, F_SETLK, &cusedev_lock) != 0) { 782 pid = *(int *)dev_map; 783 SPDK_ERRLOG("Cannot create lock on device %s, probably" 784 " process %d has claimed it\n", ctrlr_device->lock_name, pid); 785 munmap(dev_map, sizeof(int)); 786 close(dev_fd); 787 /* F_SETLK returns unspecified errnos, normalize them */ 788 return -EACCES; 789 } 790 791 *(int *)dev_map = (int)getpid(); 792 munmap(dev_map, sizeof(int)); 793 ctrlr_device->claim_fd = dev_fd; 794 ctrlr_device->index = index; 795 /* Keep dev_fd open to maintain the lock. */ 796 return 0; 797 } 798 799 static void 800 nvme_cuse_unclaim(struct cuse_device *ctrlr_device) 801 { 802 close(ctrlr_device->claim_fd); 803 ctrlr_device->claim_fd = -1; 804 unlink(ctrlr_device->lock_name); 805 } 806 807 static void 808 cuse_nvme_ctrlr_stop(struct cuse_device *ctrlr_device) 809 { 810 uint32_t i; 811 uint32_t num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr_device->ctrlr); 812 813 for (i = 1; i <= num_ns; i++) { 814 cuse_nvme_ns_stop(ctrlr_device, i); 815 } 816 817 fuse_session_exit(ctrlr_device->session); 818 pthread_join(ctrlr_device->tid, NULL); 819 TAILQ_REMOVE(&g_ctrlr_ctx_head, ctrlr_device, tailq); 820 spdk_bit_array_clear(g_ctrlr_started, ctrlr_device->index); 821 if (spdk_bit_array_count_set(g_ctrlr_started) == 0) { 822 spdk_bit_array_free(&g_ctrlr_started); 823 } 824 nvme_cuse_unclaim(ctrlr_device); 825 free(ctrlr_device->ns_devices); 826 free(ctrlr_device); 827 } 828 829 static int 830 cuse_nvme_ctrlr_update_namespaces(struct cuse_device *ctrlr_device) 831 { 832 uint32_t nsid; 833 uint32_t num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr_device->ctrlr); 834 835 for (nsid = 1; nsid <= num_ns; nsid++) { 836 if (!spdk_nvme_ctrlr_is_active_ns(ctrlr_device->ctrlr, nsid)) { 837 cuse_nvme_ns_stop(ctrlr_device, nsid); 838 continue; 839 } 840 841 if (cuse_nvme_ns_start(ctrlr_device, nsid) < 0) { 842 SPDK_ERRLOG("Cannot start CUSE namespace device."); 843 return -1; 844 } 845 } 846 847 return 0; 848 } 849 850 static int 851 nvme_cuse_start(struct spdk_nvme_ctrlr *ctrlr) 852 { 853 int rv = 0; 854 struct cuse_device *ctrlr_device; 855 uint32_t num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr); 856 857 SPDK_NOTICELOG("Creating cuse device for controller\n"); 858 859 if (g_ctrlr_started == NULL) { 860 g_ctrlr_started = spdk_bit_array_create(128); 861 if (g_ctrlr_started == NULL) { 862 SPDK_ERRLOG("Cannot create bit array\n"); 863 return -ENOMEM; 864 } 865 } 866 867 ctrlr_device = (struct cuse_device *)calloc(1, sizeof(struct cuse_device)); 868 if (!ctrlr_device) { 869 SPDK_ERRLOG("Cannot allocate memory for ctrlr_device."); 870 rv = -ENOMEM; 871 goto err2; 872 } 873 874 ctrlr_device->ctrlr = ctrlr; 875 876 /* Check if device already exists, if not increment index until success */ 877 ctrlr_device->index = 0; 878 while (1) { 879 ctrlr_device->index = spdk_bit_array_find_first_clear(g_ctrlr_started, ctrlr_device->index); 880 if (ctrlr_device->index == UINT32_MAX) { 881 SPDK_ERRLOG("Too many registered controllers\n"); 882 goto err2; 883 } 884 885 if (nvme_cuse_claim(ctrlr_device, ctrlr_device->index) == 0) { 886 break; 887 } 888 ctrlr_device->index++; 889 } 890 spdk_bit_array_set(g_ctrlr_started, ctrlr_device->index); 891 snprintf(ctrlr_device->dev_name, sizeof(ctrlr_device->dev_name), "spdk/nvme%d", 892 ctrlr_device->index); 893 894 rv = pthread_create(&ctrlr_device->tid, NULL, cuse_thread, ctrlr_device); 895 if (rv != 0) { 896 SPDK_ERRLOG("pthread_create failed\n"); 897 rv = -rv; 898 goto err3; 899 } 900 TAILQ_INSERT_TAIL(&g_ctrlr_ctx_head, ctrlr_device, tailq); 901 902 ctrlr_device->ns_devices = (struct cuse_device *)calloc(num_ns, sizeof(struct cuse_device)); 903 /* Start all active namespaces */ 904 if (cuse_nvme_ctrlr_update_namespaces(ctrlr_device) < 0) { 905 SPDK_ERRLOG("Cannot start CUSE namespace devices."); 906 cuse_nvme_ctrlr_stop(ctrlr_device); 907 rv = -1; 908 goto err3; 909 } 910 911 return 0; 912 913 err3: 914 spdk_bit_array_clear(g_ctrlr_started, ctrlr_device->index); 915 err2: 916 free(ctrlr_device); 917 if (spdk_bit_array_count_set(g_ctrlr_started) == 0) { 918 spdk_bit_array_free(&g_ctrlr_started); 919 } 920 return rv; 921 } 922 923 static struct cuse_device * 924 nvme_cuse_get_cuse_ctrlr_device(struct spdk_nvme_ctrlr *ctrlr) 925 { 926 struct cuse_device *ctrlr_device = NULL; 927 928 TAILQ_FOREACH(ctrlr_device, &g_ctrlr_ctx_head, tailq) { 929 if (ctrlr_device->ctrlr == ctrlr) { 930 break; 931 } 932 } 933 934 return ctrlr_device; 935 } 936 937 static struct cuse_device * 938 nvme_cuse_get_cuse_ns_device(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) 939 { 940 struct cuse_device *ctrlr_device = NULL; 941 uint32_t num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr); 942 943 if (nsid < 1 || nsid > num_ns) { 944 return NULL; 945 } 946 947 ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr); 948 if (!ctrlr_device) { 949 return NULL; 950 } 951 952 if (!ctrlr_device->ns_devices[nsid - 1].is_started) { 953 return NULL; 954 } 955 956 return &ctrlr_device->ns_devices[nsid - 1]; 957 } 958 959 static void 960 nvme_cuse_stop(struct spdk_nvme_ctrlr *ctrlr) 961 { 962 struct cuse_device *ctrlr_device; 963 964 pthread_mutex_lock(&g_cuse_mtx); 965 966 ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr); 967 if (!ctrlr_device) { 968 SPDK_ERRLOG("Cannot find associated CUSE device\n"); 969 pthread_mutex_unlock(&g_cuse_mtx); 970 return; 971 } 972 973 cuse_nvme_ctrlr_stop(ctrlr_device); 974 975 pthread_mutex_unlock(&g_cuse_mtx); 976 } 977 978 static void 979 nvme_cuse_update(struct spdk_nvme_ctrlr *ctrlr) 980 { 981 struct cuse_device *ctrlr_device; 982 983 pthread_mutex_lock(&g_cuse_mtx); 984 985 ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr); 986 if (!ctrlr_device) { 987 pthread_mutex_unlock(&g_cuse_mtx); 988 return; 989 } 990 991 cuse_nvme_ctrlr_update_namespaces(ctrlr_device); 992 993 pthread_mutex_unlock(&g_cuse_mtx); 994 } 995 996 static struct nvme_io_msg_producer cuse_nvme_io_msg_producer = { 997 .name = "cuse", 998 .stop = nvme_cuse_stop, 999 .update = nvme_cuse_update, 1000 }; 1001 1002 int 1003 spdk_nvme_cuse_register(struct spdk_nvme_ctrlr *ctrlr) 1004 { 1005 int rc; 1006 1007 rc = nvme_io_msg_ctrlr_register(ctrlr, &cuse_nvme_io_msg_producer); 1008 if (rc) { 1009 return rc; 1010 } 1011 1012 pthread_mutex_lock(&g_cuse_mtx); 1013 1014 rc = nvme_cuse_start(ctrlr); 1015 if (rc) { 1016 nvme_io_msg_ctrlr_unregister(ctrlr, &cuse_nvme_io_msg_producer); 1017 } 1018 1019 pthread_mutex_unlock(&g_cuse_mtx); 1020 1021 return rc; 1022 } 1023 1024 int 1025 spdk_nvme_cuse_unregister(struct spdk_nvme_ctrlr *ctrlr) 1026 { 1027 struct cuse_device *ctrlr_device; 1028 1029 pthread_mutex_lock(&g_cuse_mtx); 1030 1031 ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr); 1032 if (!ctrlr_device) { 1033 SPDK_ERRLOG("Cannot find associated CUSE device\n"); 1034 pthread_mutex_unlock(&g_cuse_mtx); 1035 return -ENODEV; 1036 } 1037 1038 cuse_nvme_ctrlr_stop(ctrlr_device); 1039 1040 pthread_mutex_unlock(&g_cuse_mtx); 1041 1042 nvme_io_msg_ctrlr_unregister(ctrlr, &cuse_nvme_io_msg_producer); 1043 1044 return 0; 1045 } 1046 1047 void 1048 spdk_nvme_cuse_update_namespaces(struct spdk_nvme_ctrlr *ctrlr) 1049 { 1050 nvme_cuse_update(ctrlr); 1051 } 1052 1053 int 1054 spdk_nvme_cuse_get_ctrlr_name(struct spdk_nvme_ctrlr *ctrlr, char *name, size_t *size) 1055 { 1056 struct cuse_device *ctrlr_device; 1057 size_t req_len; 1058 1059 pthread_mutex_lock(&g_cuse_mtx); 1060 1061 ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr); 1062 if (!ctrlr_device) { 1063 pthread_mutex_unlock(&g_cuse_mtx); 1064 return -ENODEV; 1065 } 1066 1067 req_len = strnlen(ctrlr_device->dev_name, sizeof(ctrlr_device->dev_name)); 1068 if (*size < req_len) { 1069 *size = req_len; 1070 pthread_mutex_unlock(&g_cuse_mtx); 1071 return -ENOSPC; 1072 } 1073 snprintf(name, req_len + 1, "%s", ctrlr_device->dev_name); 1074 1075 pthread_mutex_unlock(&g_cuse_mtx); 1076 1077 return 0; 1078 } 1079 1080 int 1081 spdk_nvme_cuse_get_ns_name(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, char *name, size_t *size) 1082 { 1083 struct cuse_device *ns_device; 1084 size_t req_len; 1085 1086 pthread_mutex_lock(&g_cuse_mtx); 1087 1088 ns_device = nvme_cuse_get_cuse_ns_device(ctrlr, nsid); 1089 if (!ns_device) { 1090 pthread_mutex_unlock(&g_cuse_mtx); 1091 return -ENODEV; 1092 } 1093 1094 req_len = strnlen(ns_device->dev_name, sizeof(ns_device->dev_name)); 1095 if (*size < req_len) { 1096 *size = req_len; 1097 pthread_mutex_unlock(&g_cuse_mtx); 1098 return -ENOSPC; 1099 } 1100 snprintf(name, req_len + 1, "%s", ns_device->dev_name); 1101 1102 pthread_mutex_unlock(&g_cuse_mtx); 1103 1104 return 0; 1105 } 1106