1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. All rights reserved. 5 * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. 6 * Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * * Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * * Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * * Neither the name of Intel Corporation nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 #include "spdk/stdinc.h" 36 37 #include "nvmf_internal.h" 38 39 #include "spdk/bdev.h" 40 #include "spdk/endian.h" 41 #include "spdk/thread.h" 42 #include "spdk/likely.h" 43 #include "spdk/nvme.h" 44 #include "spdk/nvmf_cmd.h" 45 #include "spdk/nvmf_spec.h" 46 #include "spdk/trace.h" 47 #include "spdk/scsi_spec.h" 48 #include "spdk/string.h" 49 #include "spdk/util.h" 50 51 #include "spdk/log.h" 52 53 static bool 54 nvmf_subsystem_bdev_io_type_supported(struct spdk_nvmf_subsystem *subsystem, 55 enum spdk_bdev_io_type io_type) 56 { 57 struct spdk_nvmf_ns *ns; 58 59 for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL; 60 ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) { 61 if (ns->bdev == NULL) { 62 continue; 63 } 64 65 if (!spdk_bdev_io_type_supported(ns->bdev, io_type)) { 66 SPDK_DEBUGLOG(nvmf, 67 "Subsystem %s namespace %u (%s) does not support io_type %d\n", 68 spdk_nvmf_subsystem_get_nqn(subsystem), 69 ns->opts.nsid, spdk_bdev_get_name(ns->bdev), (int)io_type); 70 return false; 71 } 72 } 73 74 SPDK_DEBUGLOG(nvmf, "All devices in Subsystem %s support io_type %d\n", 75 spdk_nvmf_subsystem_get_nqn(subsystem), (int)io_type); 76 return true; 77 } 78 79 bool 80 nvmf_ctrlr_dsm_supported(struct spdk_nvmf_ctrlr *ctrlr) 81 { 82 return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_UNMAP); 83 } 84 85 bool 86 nvmf_ctrlr_write_zeroes_supported(struct spdk_nvmf_ctrlr *ctrlr) 87 { 88 return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_WRITE_ZEROES); 89 } 90 91 static void 92 nvmf_bdev_ctrlr_complete_cmd(struct spdk_bdev_io *bdev_io, bool success, 93 void *cb_arg) 94 { 95 struct spdk_nvmf_request *req = cb_arg; 96 struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl; 97 int first_sc = 0, first_sct = 0, sc = 0, sct = 0; 98 uint32_t cdw0 = 0; 99 struct spdk_nvmf_request *first_req = req->first_fused_req; 100 101 if (spdk_unlikely(first_req != NULL)) { 102 /* fused commands - get status for both operations */ 103 struct spdk_nvme_cpl *first_response = &first_req->rsp->nvme_cpl; 104 105 spdk_bdev_io_get_nvme_fused_status(bdev_io, &cdw0, &first_sct, &first_sc, &sct, &sc); 106 first_response->cdw0 = cdw0; 107 first_response->status.sc = first_sc; 108 first_response->status.sct = first_sct; 109 110 /* first request should be completed */ 111 spdk_nvmf_request_complete(first_req); 112 req->first_fused_req = NULL; 113 } else { 114 spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc); 115 } 116 117 response->cdw0 = cdw0; 118 response->status.sc = sc; 119 response->status.sct = sct; 120 121 spdk_nvmf_request_complete(req); 122 spdk_bdev_free_io(bdev_io); 123 } 124 125 static void 126 nvmf_bdev_ctrlr_complete_admin_cmd(struct spdk_bdev_io *bdev_io, bool success, 127 void *cb_arg) 128 { 129 struct spdk_nvmf_request *req = cb_arg; 130 131 if (req->cmd_cb_fn) { 132 req->cmd_cb_fn(req); 133 } 134 135 nvmf_bdev_ctrlr_complete_cmd(bdev_io, success, req); 136 } 137 138 void 139 nvmf_bdev_ctrlr_identify_ns(struct spdk_nvmf_ns *ns, struct spdk_nvme_ns_data *nsdata, 140 bool dif_insert_or_strip) 141 { 142 struct spdk_bdev *bdev = ns->bdev; 143 uint64_t num_blocks; 144 uint32_t phys_blocklen; 145 146 num_blocks = spdk_bdev_get_num_blocks(bdev); 147 148 nsdata->nsze = num_blocks; 149 nsdata->ncap = num_blocks; 150 nsdata->nuse = num_blocks; 151 nsdata->nlbaf = 0; 152 nsdata->flbas.format = 0; 153 nsdata->nacwu = spdk_bdev_get_acwu(bdev) - 1; /* nacwu is 0-based */ 154 if (!dif_insert_or_strip) { 155 nsdata->lbaf[0].ms = spdk_bdev_get_md_size(bdev); 156 nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_block_size(bdev)); 157 if (nsdata->lbaf[0].ms != 0) { 158 nsdata->flbas.extended = 1; 159 nsdata->mc.extended = 1; 160 nsdata->mc.pointer = 0; 161 nsdata->dps.md_start = spdk_bdev_is_dif_head_of_md(bdev); 162 163 switch (spdk_bdev_get_dif_type(bdev)) { 164 case SPDK_DIF_TYPE1: 165 nsdata->dpc.pit1 = 1; 166 nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_TYPE1; 167 break; 168 case SPDK_DIF_TYPE2: 169 nsdata->dpc.pit2 = 1; 170 nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_TYPE2; 171 break; 172 case SPDK_DIF_TYPE3: 173 nsdata->dpc.pit3 = 1; 174 nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_TYPE3; 175 break; 176 default: 177 SPDK_DEBUGLOG(nvmf, "Protection Disabled\n"); 178 nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_DISABLE; 179 break; 180 } 181 } 182 } else { 183 nsdata->lbaf[0].ms = 0; 184 nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_data_block_size(bdev)); 185 } 186 187 phys_blocklen = spdk_bdev_get_physical_block_size(bdev); 188 assert(phys_blocklen > 0); 189 /* Linux driver uses min(nawupf, npwg) to set physical_block_size */ 190 nsdata->nsfeat.optperf = 1; 191 nsdata->nsfeat.ns_atomic_write_unit = 1; 192 nsdata->npwg = (phys_blocklen >> nsdata->lbaf[0].lbads) - 1; 193 nsdata->nawupf = nsdata->npwg; 194 nsdata->npwa = nsdata->npwg; 195 nsdata->npdg = nsdata->npwg; 196 nsdata->npda = nsdata->npwg; 197 198 nsdata->noiob = spdk_bdev_get_optimal_io_boundary(bdev); 199 nsdata->nmic.can_share = 1; 200 if (ns->ptpl_file != NULL) { 201 nsdata->nsrescap.rescap.persist = 1; 202 } 203 nsdata->nsrescap.rescap.write_exclusive = 1; 204 nsdata->nsrescap.rescap.exclusive_access = 1; 205 nsdata->nsrescap.rescap.write_exclusive_reg_only = 1; 206 nsdata->nsrescap.rescap.exclusive_access_reg_only = 1; 207 nsdata->nsrescap.rescap.write_exclusive_all_reg = 1; 208 nsdata->nsrescap.rescap.exclusive_access_all_reg = 1; 209 nsdata->nsrescap.rescap.ignore_existing_key = 1; 210 211 SPDK_STATIC_ASSERT(sizeof(nsdata->nguid) == sizeof(ns->opts.nguid), "size mismatch"); 212 memcpy(nsdata->nguid, ns->opts.nguid, sizeof(nsdata->nguid)); 213 214 SPDK_STATIC_ASSERT(sizeof(nsdata->eui64) == sizeof(ns->opts.eui64), "size mismatch"); 215 memcpy(&nsdata->eui64, ns->opts.eui64, sizeof(nsdata->eui64)); 216 } 217 218 static void 219 nvmf_bdev_ctrlr_get_rw_params(const struct spdk_nvme_cmd *cmd, uint64_t *start_lba, 220 uint64_t *num_blocks) 221 { 222 /* SLBA: CDW10 and CDW11 */ 223 *start_lba = from_le64(&cmd->cdw10); 224 225 /* NLB: CDW12 bits 15:00, 0's based */ 226 *num_blocks = (from_le32(&cmd->cdw12) & 0xFFFFu) + 1; 227 } 228 229 static bool 230 nvmf_bdev_ctrlr_lba_in_range(uint64_t bdev_num_blocks, uint64_t io_start_lba, 231 uint64_t io_num_blocks) 232 { 233 if (io_start_lba + io_num_blocks > bdev_num_blocks || 234 io_start_lba + io_num_blocks < io_start_lba) { 235 return false; 236 } 237 238 return true; 239 } 240 241 static void 242 nvmf_ctrlr_process_io_cmd_resubmit(void *arg) 243 { 244 struct spdk_nvmf_request *req = arg; 245 int rc; 246 247 rc = nvmf_ctrlr_process_io_cmd(req); 248 if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) { 249 spdk_nvmf_request_complete(req); 250 } 251 } 252 253 static void 254 nvmf_ctrlr_process_admin_cmd_resubmit(void *arg) 255 { 256 struct spdk_nvmf_request *req = arg; 257 int rc; 258 259 rc = nvmf_ctrlr_process_admin_cmd(req); 260 if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) { 261 spdk_nvmf_request_complete(req); 262 } 263 } 264 265 static void 266 nvmf_bdev_ctrl_queue_io(struct spdk_nvmf_request *req, struct spdk_bdev *bdev, 267 struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn, void *cb_arg) 268 { 269 int rc; 270 271 req->bdev_io_wait.bdev = bdev; 272 req->bdev_io_wait.cb_fn = cb_fn; 273 req->bdev_io_wait.cb_arg = cb_arg; 274 275 rc = spdk_bdev_queue_io_wait(bdev, ch, &req->bdev_io_wait); 276 if (rc != 0) { 277 assert(false); 278 } 279 req->qpair->group->stat.pending_bdev_io++; 280 } 281 282 bool 283 nvmf_bdev_zcopy_enabled(struct spdk_bdev *bdev) 284 { 285 return spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_ZCOPY); 286 } 287 288 int 289 nvmf_bdev_ctrlr_read_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 290 struct spdk_io_channel *ch, struct spdk_nvmf_request *req) 291 { 292 uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev); 293 uint32_t block_size = spdk_bdev_get_block_size(bdev); 294 struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; 295 struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl; 296 uint64_t start_lba; 297 uint64_t num_blocks; 298 int rc; 299 300 nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks); 301 302 if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) { 303 SPDK_ERRLOG("end of media\n"); 304 rsp->status.sct = SPDK_NVME_SCT_GENERIC; 305 rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE; 306 return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; 307 } 308 309 if (spdk_unlikely(num_blocks * block_size > req->length)) { 310 SPDK_ERRLOG("Read NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n", 311 num_blocks, block_size, req->length); 312 rsp->status.sct = SPDK_NVME_SCT_GENERIC; 313 rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID; 314 return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; 315 } 316 317 assert(!spdk_nvmf_request_using_zcopy(req)); 318 319 rc = spdk_bdev_readv_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks, 320 nvmf_bdev_ctrlr_complete_cmd, req); 321 if (spdk_unlikely(rc)) { 322 if (rc == -ENOMEM) { 323 nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req); 324 return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; 325 } 326 rsp->status.sct = SPDK_NVME_SCT_GENERIC; 327 rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 328 return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; 329 } 330 331 return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; 332 } 333 334 int 335 nvmf_bdev_ctrlr_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 336 struct spdk_io_channel *ch, struct spdk_nvmf_request *req) 337 { 338 uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev); 339 uint32_t block_size = spdk_bdev_get_block_size(bdev); 340 struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; 341 struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl; 342 uint64_t start_lba; 343 uint64_t num_blocks; 344 int rc; 345 346 nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks); 347 348 if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) { 349 SPDK_ERRLOG("end of media\n"); 350 rsp->status.sct = SPDK_NVME_SCT_GENERIC; 351 rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE; 352 return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; 353 } 354 355 if (spdk_unlikely(num_blocks * block_size > req->length)) { 356 SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n", 357 num_blocks, block_size, req->length); 358 rsp->status.sct = SPDK_NVME_SCT_GENERIC; 359 rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID; 360 return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; 361 } 362 363 assert(!spdk_nvmf_request_using_zcopy(req)); 364 365 rc = spdk_bdev_writev_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks, 366 nvmf_bdev_ctrlr_complete_cmd, req); 367 if (spdk_unlikely(rc)) { 368 if (rc == -ENOMEM) { 369 nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req); 370 return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; 371 } 372 rsp->status.sct = SPDK_NVME_SCT_GENERIC; 373 rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 374 return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; 375 } 376 377 return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; 378 } 379 380 int 381 nvmf_bdev_ctrlr_compare_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 382 struct spdk_io_channel *ch, struct spdk_nvmf_request *req) 383 { 384 uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev); 385 uint32_t block_size = spdk_bdev_get_block_size(bdev); 386 struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; 387 struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl; 388 uint64_t start_lba; 389 uint64_t num_blocks; 390 int rc; 391 392 nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks); 393 394 if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) { 395 SPDK_ERRLOG("end of media\n"); 396 rsp->status.sct = SPDK_NVME_SCT_GENERIC; 397 rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE; 398 return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; 399 } 400 401 if (spdk_unlikely(num_blocks * block_size > req->length)) { 402 SPDK_ERRLOG("Compare NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n", 403 num_blocks, block_size, req->length); 404 rsp->status.sct = SPDK_NVME_SCT_GENERIC; 405 rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID; 406 return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; 407 } 408 409 rc = spdk_bdev_comparev_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks, 410 nvmf_bdev_ctrlr_complete_cmd, req); 411 if (spdk_unlikely(rc)) { 412 if (rc == -ENOMEM) { 413 nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req); 414 return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; 415 } 416 rsp->status.sct = SPDK_NVME_SCT_GENERIC; 417 rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 418 return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; 419 } 420 421 return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; 422 } 423 424 int 425 nvmf_bdev_ctrlr_compare_and_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 426 struct spdk_io_channel *ch, struct spdk_nvmf_request *cmp_req, struct spdk_nvmf_request *write_req) 427 { 428 uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev); 429 uint32_t block_size = spdk_bdev_get_block_size(bdev); 430 struct spdk_nvme_cmd *cmp_cmd = &cmp_req->cmd->nvme_cmd; 431 struct spdk_nvme_cmd *write_cmd = &write_req->cmd->nvme_cmd; 432 struct spdk_nvme_cpl *rsp = &write_req->rsp->nvme_cpl; 433 uint64_t write_start_lba, cmp_start_lba; 434 uint64_t write_num_blocks, cmp_num_blocks; 435 int rc; 436 437 nvmf_bdev_ctrlr_get_rw_params(cmp_cmd, &cmp_start_lba, &cmp_num_blocks); 438 nvmf_bdev_ctrlr_get_rw_params(write_cmd, &write_start_lba, &write_num_blocks); 439 440 if (spdk_unlikely(write_start_lba != cmp_start_lba || write_num_blocks != cmp_num_blocks)) { 441 SPDK_ERRLOG("Fused command start lba / num blocks mismatch\n"); 442 rsp->status.sct = SPDK_NVME_SCT_GENERIC; 443 rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD; 444 return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; 445 } 446 447 if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, write_start_lba, 448 write_num_blocks))) { 449 SPDK_ERRLOG("end of media\n"); 450 rsp->status.sct = SPDK_NVME_SCT_GENERIC; 451 rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE; 452 return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; 453 } 454 455 if (spdk_unlikely(write_num_blocks * block_size > write_req->length)) { 456 SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n", 457 write_num_blocks, block_size, write_req->length); 458 rsp->status.sct = SPDK_NVME_SCT_GENERIC; 459 rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID; 460 return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; 461 } 462 463 rc = spdk_bdev_comparev_and_writev_blocks(desc, ch, cmp_req->iov, cmp_req->iovcnt, write_req->iov, 464 write_req->iovcnt, write_start_lba, write_num_blocks, nvmf_bdev_ctrlr_complete_cmd, write_req); 465 if (spdk_unlikely(rc)) { 466 if (rc == -ENOMEM) { 467 nvmf_bdev_ctrl_queue_io(cmp_req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, cmp_req); 468 nvmf_bdev_ctrl_queue_io(write_req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, write_req); 469 return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; 470 } 471 rsp->status.sct = SPDK_NVME_SCT_GENERIC; 472 rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 473 return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; 474 } 475 476 return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; 477 } 478 479 int 480 nvmf_bdev_ctrlr_write_zeroes_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 481 struct spdk_io_channel *ch, struct spdk_nvmf_request *req) 482 { 483 uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev); 484 struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; 485 struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl; 486 uint64_t start_lba; 487 uint64_t num_blocks; 488 int rc; 489 490 nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks); 491 492 if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) { 493 SPDK_ERRLOG("end of media\n"); 494 rsp->status.sct = SPDK_NVME_SCT_GENERIC; 495 rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE; 496 return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; 497 } 498 499 rc = spdk_bdev_write_zeroes_blocks(desc, ch, start_lba, num_blocks, 500 nvmf_bdev_ctrlr_complete_cmd, req); 501 if (spdk_unlikely(rc)) { 502 if (rc == -ENOMEM) { 503 nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req); 504 return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; 505 } 506 rsp->status.sct = SPDK_NVME_SCT_GENERIC; 507 rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 508 return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; 509 } 510 511 return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; 512 } 513 514 int 515 nvmf_bdev_ctrlr_flush_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 516 struct spdk_io_channel *ch, struct spdk_nvmf_request *req) 517 { 518 struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl; 519 int rc; 520 521 /* As for NVMeoF controller, SPDK always set volatile write 522 * cache bit to 1, return success for those block devices 523 * which can't support FLUSH command. 524 */ 525 if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_FLUSH)) { 526 response->status.sct = SPDK_NVME_SCT_GENERIC; 527 response->status.sc = SPDK_NVME_SC_SUCCESS; 528 return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; 529 } 530 531 rc = spdk_bdev_flush_blocks(desc, ch, 0, spdk_bdev_get_num_blocks(bdev), 532 nvmf_bdev_ctrlr_complete_cmd, req); 533 if (spdk_unlikely(rc)) { 534 if (rc == -ENOMEM) { 535 nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req); 536 return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; 537 } 538 response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 539 return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; 540 } 541 return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; 542 } 543 544 struct nvmf_bdev_ctrlr_unmap { 545 struct spdk_nvmf_request *req; 546 uint32_t count; 547 struct spdk_bdev_desc *desc; 548 struct spdk_bdev *bdev; 549 struct spdk_io_channel *ch; 550 uint32_t range_index; 551 }; 552 553 static void 554 nvmf_bdev_ctrlr_unmap_cpl(struct spdk_bdev_io *bdev_io, bool success, 555 void *cb_arg) 556 { 557 struct nvmf_bdev_ctrlr_unmap *unmap_ctx = cb_arg; 558 struct spdk_nvmf_request *req = unmap_ctx->req; 559 struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl; 560 int sc, sct; 561 uint32_t cdw0; 562 563 unmap_ctx->count--; 564 565 if (response->status.sct == SPDK_NVME_SCT_GENERIC && 566 response->status.sc == SPDK_NVME_SC_SUCCESS) { 567 spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc); 568 response->cdw0 = cdw0; 569 response->status.sc = sc; 570 response->status.sct = sct; 571 } 572 573 if (unmap_ctx->count == 0) { 574 spdk_nvmf_request_complete(req); 575 free(unmap_ctx); 576 } 577 spdk_bdev_free_io(bdev_io); 578 } 579 580 static int 581 nvmf_bdev_ctrlr_unmap(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 582 struct spdk_io_channel *ch, struct spdk_nvmf_request *req, 583 struct nvmf_bdev_ctrlr_unmap *unmap_ctx); 584 static void 585 nvmf_bdev_ctrlr_unmap_resubmit(void *arg) 586 { 587 struct nvmf_bdev_ctrlr_unmap *unmap_ctx = arg; 588 struct spdk_nvmf_request *req = unmap_ctx->req; 589 struct spdk_bdev_desc *desc = unmap_ctx->desc; 590 struct spdk_bdev *bdev = unmap_ctx->bdev; 591 struct spdk_io_channel *ch = unmap_ctx->ch; 592 593 nvmf_bdev_ctrlr_unmap(bdev, desc, ch, req, unmap_ctx); 594 } 595 596 static int 597 nvmf_bdev_ctrlr_unmap(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 598 struct spdk_io_channel *ch, struct spdk_nvmf_request *req, 599 struct nvmf_bdev_ctrlr_unmap *unmap_ctx) 600 { 601 uint16_t nr, i; 602 struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; 603 struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl; 604 struct spdk_nvme_dsm_range *dsm_range; 605 uint64_t lba; 606 uint32_t lba_count; 607 int rc; 608 609 nr = cmd->cdw10_bits.dsm.nr + 1; 610 if (nr * sizeof(struct spdk_nvme_dsm_range) > req->length) { 611 SPDK_ERRLOG("Dataset Management number of ranges > SGL length\n"); 612 response->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID; 613 return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; 614 } 615 616 if (unmap_ctx == NULL) { 617 unmap_ctx = calloc(1, sizeof(*unmap_ctx)); 618 if (!unmap_ctx) { 619 response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 620 return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; 621 } 622 623 unmap_ctx->req = req; 624 unmap_ctx->desc = desc; 625 unmap_ctx->ch = ch; 626 unmap_ctx->bdev = bdev; 627 628 response->status.sct = SPDK_NVME_SCT_GENERIC; 629 response->status.sc = SPDK_NVME_SC_SUCCESS; 630 } else { 631 unmap_ctx->count--; /* dequeued */ 632 } 633 634 dsm_range = (struct spdk_nvme_dsm_range *)req->data; 635 for (i = unmap_ctx->range_index; i < nr; i++) { 636 lba = dsm_range[i].starting_lba; 637 lba_count = dsm_range[i].length; 638 639 unmap_ctx->count++; 640 641 rc = spdk_bdev_unmap_blocks(desc, ch, lba, lba_count, 642 nvmf_bdev_ctrlr_unmap_cpl, unmap_ctx); 643 if (rc) { 644 if (rc == -ENOMEM) { 645 nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_bdev_ctrlr_unmap_resubmit, unmap_ctx); 646 /* Unmap was not yet submitted to bdev */ 647 /* unmap_ctx->count will be decremented when the request is dequeued */ 648 return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; 649 } 650 response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 651 unmap_ctx->count--; 652 /* We can't return here - we may have to wait for any other 653 * unmaps already sent to complete */ 654 break; 655 } 656 unmap_ctx->range_index++; 657 } 658 659 if (unmap_ctx->count == 0) { 660 free(unmap_ctx); 661 return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; 662 } 663 664 return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; 665 } 666 667 int 668 nvmf_bdev_ctrlr_dsm_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 669 struct spdk_io_channel *ch, struct spdk_nvmf_request *req) 670 { 671 struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; 672 struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl; 673 674 if (cmd->cdw11_bits.dsm.ad) { 675 return nvmf_bdev_ctrlr_unmap(bdev, desc, ch, req, NULL); 676 } 677 678 response->status.sct = SPDK_NVME_SCT_GENERIC; 679 response->status.sc = SPDK_NVME_SC_SUCCESS; 680 return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; 681 } 682 683 int 684 nvmf_bdev_ctrlr_nvme_passthru_io(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 685 struct spdk_io_channel *ch, struct spdk_nvmf_request *req) 686 { 687 int rc; 688 689 rc = spdk_bdev_nvme_io_passthru(desc, ch, &req->cmd->nvme_cmd, req->data, req->length, 690 nvmf_bdev_ctrlr_complete_cmd, req); 691 if (spdk_unlikely(rc)) { 692 if (rc == -ENOMEM) { 693 nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req); 694 return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; 695 } 696 req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC; 697 req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE; 698 req->rsp->nvme_cpl.status.dnr = 1; 699 return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; 700 } 701 702 return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; 703 } 704 705 int 706 spdk_nvmf_bdev_ctrlr_nvme_passthru_admin(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 707 struct spdk_io_channel *ch, struct spdk_nvmf_request *req, 708 spdk_nvmf_nvme_passthru_cmd_cb cb_fn) 709 { 710 int rc; 711 712 req->cmd_cb_fn = cb_fn; 713 714 rc = spdk_bdev_nvme_admin_passthru(desc, ch, &req->cmd->nvme_cmd, req->data, req->length, 715 nvmf_bdev_ctrlr_complete_admin_cmd, req); 716 if (spdk_unlikely(rc)) { 717 if (rc == -ENOMEM) { 718 nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_admin_cmd_resubmit, req); 719 return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; 720 } 721 req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC; 722 if (rc == -ENOTSUP) { 723 req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE; 724 } else { 725 req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 726 } 727 728 req->rsp->nvme_cpl.status.dnr = 1; 729 return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; 730 } 731 732 return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; 733 } 734 735 static void 736 nvmf_bdev_ctrlr_complete_abort_cmd(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 737 { 738 struct spdk_nvmf_request *req = cb_arg; 739 740 if (success) { 741 req->rsp->nvme_cpl.cdw0 &= ~1U; 742 } 743 744 spdk_nvmf_request_complete(req); 745 spdk_bdev_free_io(bdev_io); 746 } 747 748 int 749 spdk_nvmf_bdev_ctrlr_abort_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 750 struct spdk_io_channel *ch, struct spdk_nvmf_request *req, 751 struct spdk_nvmf_request *req_to_abort) 752 { 753 int rc; 754 755 assert((req->rsp->nvme_cpl.cdw0 & 1U) != 0); 756 757 rc = spdk_bdev_abort(desc, ch, req_to_abort, nvmf_bdev_ctrlr_complete_abort_cmd, req); 758 if (spdk_likely(rc == 0)) { 759 return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; 760 } else if (rc == -ENOMEM) { 761 nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_admin_cmd_resubmit, req); 762 return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; 763 } else { 764 return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; 765 } 766 } 767 768 bool 769 nvmf_bdev_ctrlr_get_dif_ctx(struct spdk_bdev *bdev, struct spdk_nvme_cmd *cmd, 770 struct spdk_dif_ctx *dif_ctx) 771 { 772 uint32_t init_ref_tag, dif_check_flags = 0; 773 int rc; 774 775 if (spdk_bdev_get_md_size(bdev) == 0) { 776 return false; 777 } 778 779 /* Initial Reference Tag is the lower 32 bits of the start LBA. */ 780 init_ref_tag = (uint32_t)from_le64(&cmd->cdw10); 781 782 if (spdk_bdev_is_dif_check_enabled(bdev, SPDK_DIF_CHECK_TYPE_REFTAG)) { 783 dif_check_flags |= SPDK_DIF_FLAGS_REFTAG_CHECK; 784 } 785 786 if (spdk_bdev_is_dif_check_enabled(bdev, SPDK_DIF_CHECK_TYPE_GUARD)) { 787 dif_check_flags |= SPDK_DIF_FLAGS_GUARD_CHECK; 788 } 789 790 rc = spdk_dif_ctx_init(dif_ctx, 791 spdk_bdev_get_block_size(bdev), 792 spdk_bdev_get_md_size(bdev), 793 spdk_bdev_is_md_interleaved(bdev), 794 spdk_bdev_is_dif_head_of_md(bdev), 795 spdk_bdev_get_dif_type(bdev), 796 dif_check_flags, 797 init_ref_tag, 0, 0, 0, 0); 798 799 return (rc == 0) ? true : false; 800 } 801 802 static void 803 nvmf_bdev_ctrlr_zcopy_start_complete(struct spdk_bdev_io *bdev_io, bool success, 804 void *cb_arg) 805 { 806 struct spdk_nvmf_request *req = cb_arg; 807 struct iovec *iov; 808 int iovcnt; 809 810 if (spdk_unlikely(!success)) { 811 int sc = 0, sct = 0; 812 uint32_t cdw0 = 0; 813 struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl; 814 spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc); 815 816 response->cdw0 = cdw0; 817 response->status.sc = sc; 818 response->status.sct = sct; 819 820 spdk_bdev_free_io(bdev_io); 821 spdk_nvmf_request_complete(req); 822 return; 823 } 824 825 spdk_bdev_io_get_iovec(bdev_io, &iov, &iovcnt); 826 827 assert(iovcnt <= NVMF_REQ_MAX_BUFFERS); 828 assert(iovcnt > 0); 829 830 req->iovcnt = iovcnt; 831 832 assert(req->iov == iov); 833 834 /* backward compatible */ 835 req->data = req->iov[0].iov_base; 836 837 req->zcopy_bdev_io = bdev_io; /* Preserve the bdev_io for the end zcopy */ 838 839 spdk_nvmf_request_complete(req); 840 /* Don't free the bdev_io here as it is needed for the END ZCOPY */ 841 } 842 843 int 844 nvmf_bdev_ctrlr_zcopy_start(struct spdk_bdev *bdev, 845 struct spdk_bdev_desc *desc, 846 struct spdk_io_channel *ch, 847 struct spdk_nvmf_request *req) 848 { 849 struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl; 850 uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev); 851 uint32_t block_size = spdk_bdev_get_block_size(bdev); 852 uint64_t start_lba; 853 uint64_t num_blocks; 854 int rc; 855 856 nvmf_bdev_ctrlr_get_rw_params(&req->cmd->nvme_cmd, &start_lba, &num_blocks); 857 858 if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) { 859 SPDK_ERRLOG("end of media\n"); 860 rsp->status.sct = SPDK_NVME_SCT_GENERIC; 861 rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE; 862 return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; 863 } 864 865 if (spdk_unlikely(num_blocks * block_size > req->length)) { 866 SPDK_ERRLOG("Read NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n", 867 num_blocks, block_size, req->length); 868 rsp->status.sct = SPDK_NVME_SCT_GENERIC; 869 rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID; 870 return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; 871 } 872 873 bool populate = (req->cmd->nvme_cmd.opc == SPDK_NVME_OPC_READ) ? true : false; 874 875 rc = spdk_bdev_zcopy_start(desc, ch, req->iov, req->iovcnt, start_lba, 876 num_blocks, populate, nvmf_bdev_ctrlr_zcopy_start_complete, req); 877 if (spdk_unlikely(rc != 0)) { 878 if (rc == -ENOMEM) { 879 nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req); 880 return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; 881 } 882 rsp->status.sct = SPDK_NVME_SCT_GENERIC; 883 rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 884 return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; 885 } 886 887 return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; 888 } 889 890 static void 891 nvmf_bdev_ctrlr_zcopy_end_complete(struct spdk_bdev_io *bdev_io, bool success, 892 void *cb_arg) 893 { 894 struct spdk_nvmf_request *req = cb_arg; 895 896 if (spdk_unlikely(!success)) { 897 int sc = 0, sct = 0; 898 uint32_t cdw0 = 0; 899 struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl; 900 spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc); 901 902 response->cdw0 = cdw0; 903 response->status.sc = sc; 904 response->status.sct = sct; 905 } 906 907 spdk_bdev_free_io(bdev_io); 908 req->zcopy_bdev_io = NULL; 909 spdk_nvmf_request_complete(req); 910 } 911 912 void 913 nvmf_bdev_ctrlr_zcopy_end(struct spdk_nvmf_request *req, bool commit) 914 { 915 int rc __attribute__((unused)); 916 917 rc = spdk_bdev_zcopy_end(req->zcopy_bdev_io, commit, nvmf_bdev_ctrlr_zcopy_end_complete, req); 918 919 /* The only way spdk_bdev_zcopy_end() can fail is if we pass a bdev_io type that isn't ZCOPY */ 920 assert(rc == 0); 921 } 922