xref: /spdk/lib/nvmf/ctrlr_bdev.c (revision 7506a7aa53d239f533af3bc768f0d2af55e735fe)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation. All rights reserved.
5  *   Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
6  *   Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 #include "spdk/stdinc.h"
36 
37 #include "nvmf_internal.h"
38 
39 #include "spdk/bdev.h"
40 #include "spdk/endian.h"
41 #include "spdk/thread.h"
42 #include "spdk/likely.h"
43 #include "spdk/nvme.h"
44 #include "spdk/nvmf_cmd.h"
45 #include "spdk/nvmf_spec.h"
46 #include "spdk/trace.h"
47 #include "spdk/scsi_spec.h"
48 #include "spdk/string.h"
49 #include "spdk/util.h"
50 
51 #include "spdk/log.h"
52 
53 static bool
54 nvmf_subsystem_bdev_io_type_supported(struct spdk_nvmf_subsystem *subsystem,
55 				      enum spdk_bdev_io_type io_type)
56 {
57 	struct spdk_nvmf_ns *ns;
58 
59 	for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL;
60 	     ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) {
61 		if (ns->bdev == NULL) {
62 			continue;
63 		}
64 
65 		if (!spdk_bdev_io_type_supported(ns->bdev, io_type)) {
66 			SPDK_DEBUGLOG(nvmf,
67 				      "Subsystem %s namespace %u (%s) does not support io_type %d\n",
68 				      spdk_nvmf_subsystem_get_nqn(subsystem),
69 				      ns->opts.nsid, spdk_bdev_get_name(ns->bdev), (int)io_type);
70 			return false;
71 		}
72 	}
73 
74 	SPDK_DEBUGLOG(nvmf, "All devices in Subsystem %s support io_type %d\n",
75 		      spdk_nvmf_subsystem_get_nqn(subsystem), (int)io_type);
76 	return true;
77 }
78 
79 bool
80 nvmf_ctrlr_dsm_supported(struct spdk_nvmf_ctrlr *ctrlr)
81 {
82 	return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_UNMAP);
83 }
84 
85 bool
86 nvmf_ctrlr_write_zeroes_supported(struct spdk_nvmf_ctrlr *ctrlr)
87 {
88 	return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_WRITE_ZEROES);
89 }
90 
91 static void
92 nvmf_bdev_ctrlr_complete_cmd(struct spdk_bdev_io *bdev_io, bool success,
93 			     void *cb_arg)
94 {
95 	struct spdk_nvmf_request	*req = cb_arg;
96 	struct spdk_nvme_cpl		*response = &req->rsp->nvme_cpl;
97 	int				first_sc = 0, first_sct = 0, sc = 0, sct = 0;
98 	uint32_t			cdw0 = 0;
99 	struct spdk_nvmf_request	*first_req = req->first_fused_req;
100 
101 	if (spdk_unlikely(first_req != NULL)) {
102 		/* fused commands - get status for both operations */
103 		struct spdk_nvme_cpl *first_response = &first_req->rsp->nvme_cpl;
104 
105 		spdk_bdev_io_get_nvme_fused_status(bdev_io, &cdw0, &first_sct, &first_sc, &sct, &sc);
106 		first_response->cdw0 = cdw0;
107 		first_response->status.sc = first_sc;
108 		first_response->status.sct = first_sct;
109 
110 		/* first request should be completed */
111 		spdk_nvmf_request_complete(first_req);
112 		req->first_fused_req = NULL;
113 	} else {
114 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
115 	}
116 
117 	response->cdw0 = cdw0;
118 	response->status.sc = sc;
119 	response->status.sct = sct;
120 
121 	spdk_nvmf_request_complete(req);
122 	spdk_bdev_free_io(bdev_io);
123 }
124 
125 static void
126 nvmf_bdev_ctrlr_complete_admin_cmd(struct spdk_bdev_io *bdev_io, bool success,
127 				   void *cb_arg)
128 {
129 	struct spdk_nvmf_request *req = cb_arg;
130 
131 	if (req->cmd_cb_fn) {
132 		req->cmd_cb_fn(req);
133 	}
134 
135 	nvmf_bdev_ctrlr_complete_cmd(bdev_io, success, req);
136 }
137 
138 void
139 nvmf_bdev_ctrlr_identify_ns(struct spdk_nvmf_ns *ns, struct spdk_nvme_ns_data *nsdata,
140 			    bool dif_insert_or_strip)
141 {
142 	struct spdk_bdev *bdev = ns->bdev;
143 	uint64_t num_blocks;
144 	uint32_t phys_blocklen;
145 
146 	num_blocks = spdk_bdev_get_num_blocks(bdev);
147 
148 	nsdata->nsze = num_blocks;
149 	nsdata->ncap = num_blocks;
150 	nsdata->nuse = num_blocks;
151 	nsdata->nlbaf = 0;
152 	nsdata->flbas.format = 0;
153 	nsdata->nacwu = spdk_bdev_get_acwu(bdev) - 1; /* nacwu is 0-based */
154 	if (!dif_insert_or_strip) {
155 		nsdata->lbaf[0].ms = spdk_bdev_get_md_size(bdev);
156 		nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_block_size(bdev));
157 		if (nsdata->lbaf[0].ms != 0) {
158 			nsdata->flbas.extended = 1;
159 			nsdata->mc.extended = 1;
160 			nsdata->mc.pointer = 0;
161 			nsdata->dps.md_start = spdk_bdev_is_dif_head_of_md(bdev);
162 
163 			switch (spdk_bdev_get_dif_type(bdev)) {
164 			case SPDK_DIF_TYPE1:
165 				nsdata->dpc.pit1 = 1;
166 				nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_TYPE1;
167 				break;
168 			case SPDK_DIF_TYPE2:
169 				nsdata->dpc.pit2 = 1;
170 				nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_TYPE2;
171 				break;
172 			case SPDK_DIF_TYPE3:
173 				nsdata->dpc.pit3 = 1;
174 				nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_TYPE3;
175 				break;
176 			default:
177 				SPDK_DEBUGLOG(nvmf, "Protection Disabled\n");
178 				nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_DISABLE;
179 				break;
180 			}
181 		}
182 	} else {
183 		nsdata->lbaf[0].ms = 0;
184 		nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_data_block_size(bdev));
185 	}
186 
187 	phys_blocklen = spdk_bdev_get_physical_block_size(bdev);
188 	assert(phys_blocklen > 0);
189 	/* Linux driver uses min(nawupf, npwg) to set physical_block_size */
190 	nsdata->nsfeat.optperf = 1;
191 	nsdata->nsfeat.ns_atomic_write_unit = 1;
192 	nsdata->npwg = (phys_blocklen >> nsdata->lbaf[0].lbads) - 1;
193 	nsdata->nawupf = nsdata->npwg;
194 	nsdata->npwa = nsdata->npwg;
195 	nsdata->npdg = nsdata->npwg;
196 	nsdata->npda = nsdata->npwg;
197 
198 	nsdata->noiob = spdk_bdev_get_optimal_io_boundary(bdev);
199 	nsdata->nmic.can_share = 1;
200 	if (ns->ptpl_file != NULL) {
201 		nsdata->nsrescap.rescap.persist = 1;
202 	}
203 	nsdata->nsrescap.rescap.write_exclusive = 1;
204 	nsdata->nsrescap.rescap.exclusive_access = 1;
205 	nsdata->nsrescap.rescap.write_exclusive_reg_only = 1;
206 	nsdata->nsrescap.rescap.exclusive_access_reg_only = 1;
207 	nsdata->nsrescap.rescap.write_exclusive_all_reg = 1;
208 	nsdata->nsrescap.rescap.exclusive_access_all_reg = 1;
209 	nsdata->nsrescap.rescap.ignore_existing_key = 1;
210 
211 	SPDK_STATIC_ASSERT(sizeof(nsdata->nguid) == sizeof(ns->opts.nguid), "size mismatch");
212 	memcpy(nsdata->nguid, ns->opts.nguid, sizeof(nsdata->nguid));
213 
214 	SPDK_STATIC_ASSERT(sizeof(nsdata->eui64) == sizeof(ns->opts.eui64), "size mismatch");
215 	memcpy(&nsdata->eui64, ns->opts.eui64, sizeof(nsdata->eui64));
216 }
217 
218 static void
219 nvmf_bdev_ctrlr_get_rw_params(const struct spdk_nvme_cmd *cmd, uint64_t *start_lba,
220 			      uint64_t *num_blocks)
221 {
222 	/* SLBA: CDW10 and CDW11 */
223 	*start_lba = from_le64(&cmd->cdw10);
224 
225 	/* NLB: CDW12 bits 15:00, 0's based */
226 	*num_blocks = (from_le32(&cmd->cdw12) & 0xFFFFu) + 1;
227 }
228 
229 static bool
230 nvmf_bdev_ctrlr_lba_in_range(uint64_t bdev_num_blocks, uint64_t io_start_lba,
231 			     uint64_t io_num_blocks)
232 {
233 	if (io_start_lba + io_num_blocks > bdev_num_blocks ||
234 	    io_start_lba + io_num_blocks < io_start_lba) {
235 		return false;
236 	}
237 
238 	return true;
239 }
240 
241 static void
242 nvmf_ctrlr_process_io_cmd_resubmit(void *arg)
243 {
244 	struct spdk_nvmf_request *req = arg;
245 	int rc;
246 
247 	rc = nvmf_ctrlr_process_io_cmd(req);
248 	if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
249 		spdk_nvmf_request_complete(req);
250 	}
251 }
252 
253 static void
254 nvmf_ctrlr_process_admin_cmd_resubmit(void *arg)
255 {
256 	struct spdk_nvmf_request *req = arg;
257 	int rc;
258 
259 	rc = nvmf_ctrlr_process_admin_cmd(req);
260 	if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
261 		spdk_nvmf_request_complete(req);
262 	}
263 }
264 
265 static void
266 nvmf_bdev_ctrl_queue_io(struct spdk_nvmf_request *req, struct spdk_bdev *bdev,
267 			struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn, void *cb_arg)
268 {
269 	int rc;
270 
271 	req->bdev_io_wait.bdev = bdev;
272 	req->bdev_io_wait.cb_fn = cb_fn;
273 	req->bdev_io_wait.cb_arg = cb_arg;
274 
275 	rc = spdk_bdev_queue_io_wait(bdev, ch, &req->bdev_io_wait);
276 	if (rc != 0) {
277 		assert(false);
278 	}
279 	req->qpair->group->stat.pending_bdev_io++;
280 }
281 
282 bool
283 nvmf_bdev_zcopy_enabled(struct spdk_bdev *bdev)
284 {
285 	return spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_ZCOPY);
286 }
287 
288 int
289 nvmf_bdev_ctrlr_read_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
290 			 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
291 {
292 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
293 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
294 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
295 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
296 	uint64_t start_lba;
297 	uint64_t num_blocks;
298 	int rc;
299 
300 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
301 
302 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
303 		SPDK_ERRLOG("end of media\n");
304 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
305 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
306 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
307 	}
308 
309 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
310 		SPDK_ERRLOG("Read NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
311 			    num_blocks, block_size, req->length);
312 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
313 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
314 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
315 	}
316 
317 	assert(!spdk_nvmf_request_using_zcopy(req));
318 
319 	rc = spdk_bdev_readv_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
320 				    nvmf_bdev_ctrlr_complete_cmd, req);
321 	if (spdk_unlikely(rc)) {
322 		if (rc == -ENOMEM) {
323 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
324 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
325 		}
326 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
327 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
328 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
329 	}
330 
331 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
332 }
333 
334 int
335 nvmf_bdev_ctrlr_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
336 			  struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
337 {
338 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
339 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
340 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
341 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
342 	uint64_t start_lba;
343 	uint64_t num_blocks;
344 	int rc;
345 
346 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
347 
348 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
349 		SPDK_ERRLOG("end of media\n");
350 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
351 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
352 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
353 	}
354 
355 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
356 		SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
357 			    num_blocks, block_size, req->length);
358 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
359 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
360 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
361 	}
362 
363 	assert(!spdk_nvmf_request_using_zcopy(req));
364 
365 	rc = spdk_bdev_writev_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
366 				     nvmf_bdev_ctrlr_complete_cmd, req);
367 	if (spdk_unlikely(rc)) {
368 		if (rc == -ENOMEM) {
369 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
370 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
371 		}
372 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
373 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
374 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
375 	}
376 
377 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
378 }
379 
380 int
381 nvmf_bdev_ctrlr_compare_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
382 			    struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
383 {
384 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
385 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
386 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
387 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
388 	uint64_t start_lba;
389 	uint64_t num_blocks;
390 	int rc;
391 
392 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
393 
394 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
395 		SPDK_ERRLOG("end of media\n");
396 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
397 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
398 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
399 	}
400 
401 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
402 		SPDK_ERRLOG("Compare NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
403 			    num_blocks, block_size, req->length);
404 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
405 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
406 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
407 	}
408 
409 	rc = spdk_bdev_comparev_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
410 				       nvmf_bdev_ctrlr_complete_cmd, req);
411 	if (spdk_unlikely(rc)) {
412 		if (rc == -ENOMEM) {
413 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
414 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
415 		}
416 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
417 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
418 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
419 	}
420 
421 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
422 }
423 
424 int
425 nvmf_bdev_ctrlr_compare_and_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
426 				      struct spdk_io_channel *ch, struct spdk_nvmf_request *cmp_req, struct spdk_nvmf_request *write_req)
427 {
428 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
429 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
430 	struct spdk_nvme_cmd *cmp_cmd = &cmp_req->cmd->nvme_cmd;
431 	struct spdk_nvme_cmd *write_cmd = &write_req->cmd->nvme_cmd;
432 	struct spdk_nvme_cpl *rsp = &write_req->rsp->nvme_cpl;
433 	uint64_t write_start_lba, cmp_start_lba;
434 	uint64_t write_num_blocks, cmp_num_blocks;
435 	int rc;
436 
437 	nvmf_bdev_ctrlr_get_rw_params(cmp_cmd, &cmp_start_lba, &cmp_num_blocks);
438 	nvmf_bdev_ctrlr_get_rw_params(write_cmd, &write_start_lba, &write_num_blocks);
439 
440 	if (spdk_unlikely(write_start_lba != cmp_start_lba || write_num_blocks != cmp_num_blocks)) {
441 		SPDK_ERRLOG("Fused command start lba / num blocks mismatch\n");
442 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
443 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
444 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
445 	}
446 
447 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, write_start_lba,
448 			  write_num_blocks))) {
449 		SPDK_ERRLOG("end of media\n");
450 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
451 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
452 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
453 	}
454 
455 	if (spdk_unlikely(write_num_blocks * block_size > write_req->length)) {
456 		SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
457 			    write_num_blocks, block_size, write_req->length);
458 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
459 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
460 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
461 	}
462 
463 	rc = spdk_bdev_comparev_and_writev_blocks(desc, ch, cmp_req->iov, cmp_req->iovcnt, write_req->iov,
464 			write_req->iovcnt, write_start_lba, write_num_blocks, nvmf_bdev_ctrlr_complete_cmd, write_req);
465 	if (spdk_unlikely(rc)) {
466 		if (rc == -ENOMEM) {
467 			nvmf_bdev_ctrl_queue_io(cmp_req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, cmp_req);
468 			nvmf_bdev_ctrl_queue_io(write_req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, write_req);
469 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
470 		}
471 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
472 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
473 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
474 	}
475 
476 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
477 }
478 
479 int
480 nvmf_bdev_ctrlr_write_zeroes_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
481 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
482 {
483 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
484 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
485 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
486 	uint64_t start_lba;
487 	uint64_t num_blocks;
488 	int rc;
489 
490 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
491 
492 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
493 		SPDK_ERRLOG("end of media\n");
494 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
495 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
496 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
497 	}
498 
499 	rc = spdk_bdev_write_zeroes_blocks(desc, ch, start_lba, num_blocks,
500 					   nvmf_bdev_ctrlr_complete_cmd, req);
501 	if (spdk_unlikely(rc)) {
502 		if (rc == -ENOMEM) {
503 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
504 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
505 		}
506 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
507 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
508 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
509 	}
510 
511 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
512 }
513 
514 int
515 nvmf_bdev_ctrlr_flush_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
516 			  struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
517 {
518 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
519 	int rc;
520 
521 	/* As for NVMeoF controller, SPDK always set volatile write
522 	 * cache bit to 1, return success for those block devices
523 	 * which can't support FLUSH command.
524 	 */
525 	if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_FLUSH)) {
526 		response->status.sct = SPDK_NVME_SCT_GENERIC;
527 		response->status.sc = SPDK_NVME_SC_SUCCESS;
528 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
529 	}
530 
531 	rc = spdk_bdev_flush_blocks(desc, ch, 0, spdk_bdev_get_num_blocks(bdev),
532 				    nvmf_bdev_ctrlr_complete_cmd, req);
533 	if (spdk_unlikely(rc)) {
534 		if (rc == -ENOMEM) {
535 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
536 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
537 		}
538 		response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
539 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
540 	}
541 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
542 }
543 
544 struct nvmf_bdev_ctrlr_unmap {
545 	struct spdk_nvmf_request	*req;
546 	uint32_t			count;
547 	struct spdk_bdev_desc		*desc;
548 	struct spdk_bdev		*bdev;
549 	struct spdk_io_channel		*ch;
550 	uint32_t			range_index;
551 };
552 
553 static void
554 nvmf_bdev_ctrlr_unmap_cpl(struct spdk_bdev_io *bdev_io, bool success,
555 			  void *cb_arg)
556 {
557 	struct nvmf_bdev_ctrlr_unmap *unmap_ctx = cb_arg;
558 	struct spdk_nvmf_request	*req = unmap_ctx->req;
559 	struct spdk_nvme_cpl		*response = &req->rsp->nvme_cpl;
560 	int				sc, sct;
561 	uint32_t			cdw0;
562 
563 	unmap_ctx->count--;
564 
565 	if (response->status.sct == SPDK_NVME_SCT_GENERIC &&
566 	    response->status.sc == SPDK_NVME_SC_SUCCESS) {
567 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
568 		response->cdw0 = cdw0;
569 		response->status.sc = sc;
570 		response->status.sct = sct;
571 	}
572 
573 	if (unmap_ctx->count == 0) {
574 		spdk_nvmf_request_complete(req);
575 		free(unmap_ctx);
576 	}
577 	spdk_bdev_free_io(bdev_io);
578 }
579 
580 static int
581 nvmf_bdev_ctrlr_unmap(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
582 		      struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
583 		      struct nvmf_bdev_ctrlr_unmap *unmap_ctx);
584 static void
585 nvmf_bdev_ctrlr_unmap_resubmit(void *arg)
586 {
587 	struct nvmf_bdev_ctrlr_unmap *unmap_ctx = arg;
588 	struct spdk_nvmf_request *req = unmap_ctx->req;
589 	struct spdk_bdev_desc *desc = unmap_ctx->desc;
590 	struct spdk_bdev *bdev = unmap_ctx->bdev;
591 	struct spdk_io_channel *ch = unmap_ctx->ch;
592 
593 	nvmf_bdev_ctrlr_unmap(bdev, desc, ch, req, unmap_ctx);
594 }
595 
596 static int
597 nvmf_bdev_ctrlr_unmap(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
598 		      struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
599 		      struct nvmf_bdev_ctrlr_unmap *unmap_ctx)
600 {
601 	uint16_t nr, i;
602 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
603 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
604 	struct spdk_nvme_dsm_range *dsm_range;
605 	uint64_t lba;
606 	uint32_t lba_count;
607 	int rc;
608 
609 	nr = cmd->cdw10_bits.dsm.nr + 1;
610 	if (nr * sizeof(struct spdk_nvme_dsm_range) > req->length) {
611 		SPDK_ERRLOG("Dataset Management number of ranges > SGL length\n");
612 		response->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
613 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
614 	}
615 
616 	if (unmap_ctx == NULL) {
617 		unmap_ctx = calloc(1, sizeof(*unmap_ctx));
618 		if (!unmap_ctx) {
619 			response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
620 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
621 		}
622 
623 		unmap_ctx->req = req;
624 		unmap_ctx->desc = desc;
625 		unmap_ctx->ch = ch;
626 		unmap_ctx->bdev = bdev;
627 
628 		response->status.sct = SPDK_NVME_SCT_GENERIC;
629 		response->status.sc = SPDK_NVME_SC_SUCCESS;
630 	} else {
631 		unmap_ctx->count--;	/* dequeued */
632 	}
633 
634 	dsm_range = (struct spdk_nvme_dsm_range *)req->data;
635 	for (i = unmap_ctx->range_index; i < nr; i++) {
636 		lba = dsm_range[i].starting_lba;
637 		lba_count = dsm_range[i].length;
638 
639 		unmap_ctx->count++;
640 
641 		rc = spdk_bdev_unmap_blocks(desc, ch, lba, lba_count,
642 					    nvmf_bdev_ctrlr_unmap_cpl, unmap_ctx);
643 		if (rc) {
644 			if (rc == -ENOMEM) {
645 				nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_bdev_ctrlr_unmap_resubmit, unmap_ctx);
646 				/* Unmap was not yet submitted to bdev */
647 				/* unmap_ctx->count will be decremented when the request is dequeued */
648 				return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
649 			}
650 			response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
651 			unmap_ctx->count--;
652 			/* We can't return here - we may have to wait for any other
653 				* unmaps already sent to complete */
654 			break;
655 		}
656 		unmap_ctx->range_index++;
657 	}
658 
659 	if (unmap_ctx->count == 0) {
660 		free(unmap_ctx);
661 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
662 	}
663 
664 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
665 }
666 
667 int
668 nvmf_bdev_ctrlr_dsm_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
669 			struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
670 {
671 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
672 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
673 
674 	if (cmd->cdw11_bits.dsm.ad) {
675 		return nvmf_bdev_ctrlr_unmap(bdev, desc, ch, req, NULL);
676 	}
677 
678 	response->status.sct = SPDK_NVME_SCT_GENERIC;
679 	response->status.sc = SPDK_NVME_SC_SUCCESS;
680 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
681 }
682 
683 int
684 nvmf_bdev_ctrlr_nvme_passthru_io(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
685 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
686 {
687 	int rc;
688 
689 	rc = spdk_bdev_nvme_io_passthru(desc, ch, &req->cmd->nvme_cmd, req->data, req->length,
690 					nvmf_bdev_ctrlr_complete_cmd, req);
691 	if (spdk_unlikely(rc)) {
692 		if (rc == -ENOMEM) {
693 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
694 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
695 		}
696 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
697 		req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
698 		req->rsp->nvme_cpl.status.dnr = 1;
699 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
700 	}
701 
702 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
703 }
704 
705 int
706 spdk_nvmf_bdev_ctrlr_nvme_passthru_admin(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
707 		struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
708 		spdk_nvmf_nvme_passthru_cmd_cb cb_fn)
709 {
710 	int rc;
711 
712 	req->cmd_cb_fn = cb_fn;
713 
714 	rc = spdk_bdev_nvme_admin_passthru(desc, ch, &req->cmd->nvme_cmd, req->data, req->length,
715 					   nvmf_bdev_ctrlr_complete_admin_cmd, req);
716 	if (spdk_unlikely(rc)) {
717 		if (rc == -ENOMEM) {
718 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_admin_cmd_resubmit, req);
719 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
720 		}
721 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
722 		if (rc == -ENOTSUP) {
723 			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
724 		} else {
725 			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
726 		}
727 
728 		req->rsp->nvme_cpl.status.dnr = 1;
729 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
730 	}
731 
732 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
733 }
734 
735 static void
736 nvmf_bdev_ctrlr_complete_abort_cmd(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
737 {
738 	struct spdk_nvmf_request *req = cb_arg;
739 
740 	if (success) {
741 		req->rsp->nvme_cpl.cdw0 &= ~1U;
742 	}
743 
744 	spdk_nvmf_request_complete(req);
745 	spdk_bdev_free_io(bdev_io);
746 }
747 
748 int
749 spdk_nvmf_bdev_ctrlr_abort_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
750 			       struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
751 			       struct spdk_nvmf_request *req_to_abort)
752 {
753 	int rc;
754 
755 	assert((req->rsp->nvme_cpl.cdw0 & 1U) != 0);
756 
757 	rc = spdk_bdev_abort(desc, ch, req_to_abort, nvmf_bdev_ctrlr_complete_abort_cmd, req);
758 	if (spdk_likely(rc == 0)) {
759 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
760 	} else if (rc == -ENOMEM) {
761 		nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_admin_cmd_resubmit, req);
762 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
763 	} else {
764 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
765 	}
766 }
767 
768 bool
769 nvmf_bdev_ctrlr_get_dif_ctx(struct spdk_bdev *bdev, struct spdk_nvme_cmd *cmd,
770 			    struct spdk_dif_ctx *dif_ctx)
771 {
772 	uint32_t init_ref_tag, dif_check_flags = 0;
773 	int rc;
774 
775 	if (spdk_bdev_get_md_size(bdev) == 0) {
776 		return false;
777 	}
778 
779 	/* Initial Reference Tag is the lower 32 bits of the start LBA. */
780 	init_ref_tag = (uint32_t)from_le64(&cmd->cdw10);
781 
782 	if (spdk_bdev_is_dif_check_enabled(bdev, SPDK_DIF_CHECK_TYPE_REFTAG)) {
783 		dif_check_flags |= SPDK_DIF_FLAGS_REFTAG_CHECK;
784 	}
785 
786 	if (spdk_bdev_is_dif_check_enabled(bdev, SPDK_DIF_CHECK_TYPE_GUARD)) {
787 		dif_check_flags |= SPDK_DIF_FLAGS_GUARD_CHECK;
788 	}
789 
790 	rc = spdk_dif_ctx_init(dif_ctx,
791 			       spdk_bdev_get_block_size(bdev),
792 			       spdk_bdev_get_md_size(bdev),
793 			       spdk_bdev_is_md_interleaved(bdev),
794 			       spdk_bdev_is_dif_head_of_md(bdev),
795 			       spdk_bdev_get_dif_type(bdev),
796 			       dif_check_flags,
797 			       init_ref_tag, 0, 0, 0, 0);
798 
799 	return (rc == 0) ? true : false;
800 }
801 
802 static void
803 nvmf_bdev_ctrlr_zcopy_start_complete(struct spdk_bdev_io *bdev_io, bool success,
804 				     void *cb_arg)
805 {
806 	struct spdk_nvmf_request	*req = cb_arg;
807 	struct iovec *iov;
808 	int iovcnt;
809 
810 	if (spdk_unlikely(!success)) {
811 		int                     sc = 0, sct = 0;
812 		uint32_t                cdw0 = 0;
813 		struct spdk_nvme_cpl    *response = &req->rsp->nvme_cpl;
814 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
815 
816 		response->cdw0 = cdw0;
817 		response->status.sc = sc;
818 		response->status.sct = sct;
819 
820 		spdk_bdev_free_io(bdev_io);
821 		spdk_nvmf_request_complete(req);
822 		return;
823 	}
824 
825 	spdk_bdev_io_get_iovec(bdev_io, &iov, &iovcnt);
826 
827 	assert(iovcnt <= NVMF_REQ_MAX_BUFFERS);
828 	assert(iovcnt > 0);
829 
830 	req->iovcnt = iovcnt;
831 
832 	assert(req->iov == iov);
833 
834 	/* backward compatible */
835 	req->data = req->iov[0].iov_base;
836 
837 	req->zcopy_bdev_io = bdev_io; /* Preserve the bdev_io for the end zcopy */
838 
839 	spdk_nvmf_request_complete(req);
840 	/* Don't free the bdev_io here as it is needed for the END ZCOPY */
841 }
842 
843 int
844 nvmf_bdev_ctrlr_zcopy_start(struct spdk_bdev *bdev,
845 			    struct spdk_bdev_desc *desc,
846 			    struct spdk_io_channel *ch,
847 			    struct spdk_nvmf_request *req)
848 {
849 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
850 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
851 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
852 	uint64_t start_lba;
853 	uint64_t num_blocks;
854 	int rc;
855 
856 	nvmf_bdev_ctrlr_get_rw_params(&req->cmd->nvme_cmd, &start_lba, &num_blocks);
857 
858 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
859 		SPDK_ERRLOG("end of media\n");
860 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
861 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
862 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
863 	}
864 
865 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
866 		SPDK_ERRLOG("Read NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
867 			    num_blocks, block_size, req->length);
868 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
869 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
870 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
871 	}
872 
873 	bool populate = (req->cmd->nvme_cmd.opc == SPDK_NVME_OPC_READ) ? true : false;
874 
875 	rc = spdk_bdev_zcopy_start(desc, ch, req->iov, req->iovcnt, start_lba,
876 				   num_blocks, populate, nvmf_bdev_ctrlr_zcopy_start_complete, req);
877 	if (spdk_unlikely(rc != 0)) {
878 		if (rc == -ENOMEM) {
879 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
880 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
881 		}
882 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
883 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
884 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
885 	}
886 
887 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
888 }
889 
890 static void
891 nvmf_bdev_ctrlr_zcopy_end_complete(struct spdk_bdev_io *bdev_io, bool success,
892 				   void *cb_arg)
893 {
894 	struct spdk_nvmf_request	*req = cb_arg;
895 
896 	if (spdk_unlikely(!success)) {
897 		int                     sc = 0, sct = 0;
898 		uint32_t                cdw0 = 0;
899 		struct spdk_nvme_cpl    *response = &req->rsp->nvme_cpl;
900 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
901 
902 		response->cdw0 = cdw0;
903 		response->status.sc = sc;
904 		response->status.sct = sct;
905 	}
906 
907 	spdk_bdev_free_io(bdev_io);
908 	req->zcopy_bdev_io = NULL;
909 	spdk_nvmf_request_complete(req);
910 }
911 
912 void
913 nvmf_bdev_ctrlr_zcopy_end(struct spdk_nvmf_request *req, bool commit)
914 {
915 	int rc __attribute__((unused));
916 
917 	rc = spdk_bdev_zcopy_end(req->zcopy_bdev_io, commit, nvmf_bdev_ctrlr_zcopy_end_complete, req);
918 
919 	/* The only way spdk_bdev_zcopy_end() can fail is if we pass a bdev_io type that isn't ZCOPY */
920 	assert(rc == 0);
921 }
922