xref: /spdk/lib/nvmf/ctrlr_bdev.c (revision 2e10c84c822790902c20cbe1ae21fdaeff91a220)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2017 Intel Corporation. All rights reserved.
3  *   Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
4  *   Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5  */
6 
7 #include "spdk/stdinc.h"
8 
9 #include "nvmf_internal.h"
10 
11 #include "spdk/bdev.h"
12 #include "spdk/endian.h"
13 #include "spdk/thread.h"
14 #include "spdk/likely.h"
15 #include "spdk/nvme.h"
16 #include "spdk/nvmf_cmd.h"
17 #include "spdk/nvmf_spec.h"
18 #include "spdk/trace.h"
19 #include "spdk/scsi_spec.h"
20 #include "spdk/string.h"
21 #include "spdk/util.h"
22 
23 #include "spdk/log.h"
24 
25 static bool
26 nvmf_subsystem_bdev_io_type_supported(struct spdk_nvmf_subsystem *subsystem,
27 				      enum spdk_bdev_io_type io_type)
28 {
29 	struct spdk_nvmf_ns *ns;
30 
31 	for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL;
32 	     ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) {
33 		if (ns->bdev == NULL) {
34 			continue;
35 		}
36 
37 		if (!spdk_bdev_io_type_supported(ns->bdev, io_type)) {
38 			SPDK_DEBUGLOG(nvmf,
39 				      "Subsystem %s namespace %u (%s) does not support io_type %d\n",
40 				      spdk_nvmf_subsystem_get_nqn(subsystem),
41 				      ns->opts.nsid, spdk_bdev_get_name(ns->bdev), (int)io_type);
42 			return false;
43 		}
44 	}
45 
46 	SPDK_DEBUGLOG(nvmf, "All devices in Subsystem %s support io_type %d\n",
47 		      spdk_nvmf_subsystem_get_nqn(subsystem), (int)io_type);
48 	return true;
49 }
50 
51 bool
52 nvmf_ctrlr_dsm_supported(struct spdk_nvmf_ctrlr *ctrlr)
53 {
54 	return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_UNMAP);
55 }
56 
57 bool
58 nvmf_ctrlr_write_zeroes_supported(struct spdk_nvmf_ctrlr *ctrlr)
59 {
60 	return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_WRITE_ZEROES);
61 }
62 
63 bool
64 nvmf_ctrlr_copy_supported(struct spdk_nvmf_ctrlr *ctrlr)
65 {
66 	return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_COPY);
67 }
68 
69 static void
70 nvmf_bdev_ctrlr_complete_cmd(struct spdk_bdev_io *bdev_io, bool success,
71 			     void *cb_arg)
72 {
73 	struct spdk_nvmf_request	*req = cb_arg;
74 	struct spdk_nvme_cpl		*response = &req->rsp->nvme_cpl;
75 	int				sc = 0, sct = 0;
76 	uint32_t			cdw0 = 0;
77 
78 	if (spdk_unlikely(req->first_fused)) {
79 		struct spdk_nvmf_request	*first_req = req->first_fused_req;
80 		struct spdk_nvme_cpl		*first_response = &first_req->rsp->nvme_cpl;
81 		int				first_sc = 0, first_sct = 0;
82 
83 		/* get status for both operations */
84 		spdk_bdev_io_get_nvme_fused_status(bdev_io, &cdw0, &first_sct, &first_sc, &sct, &sc);
85 		first_response->cdw0 = cdw0;
86 		first_response->status.sc = first_sc;
87 		first_response->status.sct = first_sct;
88 
89 		/* first request should be completed */
90 		spdk_nvmf_request_complete(first_req);
91 		req->first_fused_req = NULL;
92 		req->first_fused = false;
93 	} else {
94 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
95 	}
96 
97 	response->cdw0 = cdw0;
98 	response->status.sc = sc;
99 	response->status.sct = sct;
100 
101 	spdk_nvmf_request_complete(req);
102 	spdk_bdev_free_io(bdev_io);
103 }
104 
105 static void
106 nvmf_bdev_ctrlr_complete_admin_cmd(struct spdk_bdev_io *bdev_io, bool success,
107 				   void *cb_arg)
108 {
109 	struct spdk_nvmf_request *req = cb_arg;
110 
111 	if (req->cmd_cb_fn) {
112 		req->cmd_cb_fn(req);
113 	}
114 
115 	nvmf_bdev_ctrlr_complete_cmd(bdev_io, success, req);
116 }
117 
118 void
119 nvmf_bdev_ctrlr_identify_ns(struct spdk_nvmf_ns *ns, struct spdk_nvme_ns_data *nsdata,
120 			    bool dif_insert_or_strip)
121 {
122 	struct spdk_bdev *bdev = ns->bdev;
123 	struct spdk_bdev_desc *desc = ns->desc;
124 	uint64_t num_blocks;
125 	uint32_t phys_blocklen;
126 	uint32_t max_copy;
127 
128 	num_blocks = spdk_bdev_get_num_blocks(bdev);
129 
130 	nsdata->nsze = num_blocks;
131 	nsdata->ncap = num_blocks;
132 	nsdata->nuse = num_blocks;
133 	nsdata->nlbaf = 0;
134 	nsdata->flbas.format = 0;
135 	nsdata->flbas.msb_format = 0;
136 	nsdata->nacwu = spdk_bdev_get_acwu(bdev) - 1; /* nacwu is 0-based */
137 	if (!dif_insert_or_strip) {
138 		nsdata->lbaf[0].ms = spdk_bdev_desc_get_md_size(desc);
139 		nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_desc_get_block_size(desc));
140 		if (nsdata->lbaf[0].ms != 0) {
141 			nsdata->flbas.extended = 1;
142 			nsdata->mc.extended = 1;
143 			nsdata->mc.pointer = 0;
144 			nsdata->dps.md_start = spdk_bdev_desc_is_dif_head_of_md(desc);
145 
146 			switch (spdk_bdev_get_dif_type(bdev)) {
147 			case SPDK_DIF_TYPE1:
148 				nsdata->dpc.pit1 = 1;
149 				nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_TYPE1;
150 				break;
151 			case SPDK_DIF_TYPE2:
152 				nsdata->dpc.pit2 = 1;
153 				nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_TYPE2;
154 				break;
155 			case SPDK_DIF_TYPE3:
156 				nsdata->dpc.pit3 = 1;
157 				nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_TYPE3;
158 				break;
159 			default:
160 				SPDK_DEBUGLOG(nvmf, "Protection Disabled\n");
161 				nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_DISABLE;
162 				break;
163 			}
164 		}
165 	} else {
166 		nsdata->lbaf[0].ms = 0;
167 		nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_data_block_size(bdev));
168 	}
169 
170 	phys_blocklen = spdk_bdev_get_physical_block_size(bdev);
171 	assert(phys_blocklen > 0);
172 	/* Linux driver uses min(nawupf, npwg) to set physical_block_size */
173 	nsdata->nsfeat.optperf = 1;
174 	nsdata->nsfeat.ns_atomic_write_unit = 1;
175 	nsdata->npwg = (phys_blocklen >> nsdata->lbaf[0].lbads) - 1;
176 	nsdata->nawupf = nsdata->npwg;
177 	nsdata->npwa = nsdata->npwg;
178 	nsdata->npdg = nsdata->npwg;
179 	nsdata->npda = nsdata->npwg;
180 
181 	if (spdk_bdev_get_write_unit_size(bdev) == 1) {
182 		nsdata->noiob = spdk_bdev_get_optimal_io_boundary(bdev);
183 	}
184 	nsdata->nmic.can_share = 1;
185 	if (nvmf_ns_is_ptpl_capable(ns)) {
186 		nsdata->nsrescap.rescap.persist = 1;
187 	}
188 	nsdata->nsrescap.rescap.write_exclusive = 1;
189 	nsdata->nsrescap.rescap.exclusive_access = 1;
190 	nsdata->nsrescap.rescap.write_exclusive_reg_only = 1;
191 	nsdata->nsrescap.rescap.exclusive_access_reg_only = 1;
192 	nsdata->nsrescap.rescap.write_exclusive_all_reg = 1;
193 	nsdata->nsrescap.rescap.exclusive_access_all_reg = 1;
194 	nsdata->nsrescap.rescap.ignore_existing_key = 1;
195 
196 	SPDK_STATIC_ASSERT(sizeof(nsdata->nguid) == sizeof(ns->opts.nguid), "size mismatch");
197 	memcpy(nsdata->nguid, ns->opts.nguid, sizeof(nsdata->nguid));
198 
199 	SPDK_STATIC_ASSERT(sizeof(nsdata->eui64) == sizeof(ns->opts.eui64), "size mismatch");
200 	memcpy(&nsdata->eui64, ns->opts.eui64, sizeof(nsdata->eui64));
201 
202 	/* For now we support just one source range for copy command */
203 	nsdata->msrc = 0;
204 
205 	max_copy = spdk_bdev_get_max_copy(bdev);
206 	if (max_copy == 0 || max_copy > UINT16_MAX) {
207 		/* Zero means copy size is unlimited */
208 		nsdata->mcl = UINT16_MAX;
209 		nsdata->mssrl = UINT16_MAX;
210 	} else {
211 		nsdata->mcl = max_copy;
212 		nsdata->mssrl = max_copy;
213 	}
214 }
215 
216 void
217 nvmf_bdev_ctrlr_identify_iocs_nvm(struct spdk_nvmf_ns *ns,
218 				  struct spdk_nvme_nvm_ns_data *nsdata_nvm)
219 {
220 	struct spdk_bdev_desc *desc = ns->desc;
221 	uint8_t _16bpists;
222 	uint32_t sts, pif;
223 
224 	if (spdk_bdev_desc_get_dif_type(desc) == SPDK_DIF_DISABLE) {
225 		return;
226 	}
227 
228 	pif = spdk_bdev_desc_get_dif_pi_format(desc);
229 
230 	/*
231 	 * 16BPISTS shall be 1 for 32/64b Guard PI.
232 	 * STCRS shall be 1 if 16BPISTS is 1.
233 	 * 16 is the minimum value of STS for 32b Guard PI.
234 	 */
235 	switch (pif) {
236 	case SPDK_DIF_PI_FORMAT_16:
237 		_16bpists = 0;
238 		sts = 0;
239 		break;
240 	case SPDK_DIF_PI_FORMAT_32:
241 		_16bpists = 1;
242 		sts = 16;
243 		break;
244 	case SPDK_DIF_PI_FORMAT_64:
245 		_16bpists = 1;
246 		sts = 0;
247 		break;
248 	default:
249 		SPDK_WARNLOG("PI format %u is not supported\n", pif);
250 		return;
251 	}
252 
253 	/* For 16b Guard PI, Storage Tag is not available because we set STS to 0.
254 	 * In this case, we do not have to set 16BPISTM to 1. For simplicity,
255 	 * set 16BPISTM to 0 and set LBSTM to all zeroes.
256 	 *
257 	 * We will revisit here when we find any OS uses Storage Tag.
258 	 */
259 	nsdata_nvm->lbstm = 0;
260 	nsdata_nvm->pic._16bpistm = 0;
261 
262 	nsdata_nvm->pic._16bpists = _16bpists;
263 	nsdata_nvm->pic.stcrs = 0;
264 	nsdata_nvm->elbaf[0].sts = sts;
265 	nsdata_nvm->elbaf[0].pif = pif;
266 }
267 
268 static void
269 nvmf_bdev_ctrlr_get_rw_params(const struct spdk_nvme_cmd *cmd, uint64_t *start_lba,
270 			      uint64_t *num_blocks)
271 {
272 	/* SLBA: CDW10 and CDW11 */
273 	*start_lba = from_le64(&cmd->cdw10);
274 
275 	/* NLB: CDW12 bits 15:00, 0's based */
276 	*num_blocks = (from_le32(&cmd->cdw12) & 0xFFFFu) + 1;
277 }
278 
279 static void
280 nvmf_bdev_ctrlr_get_rw_ext_params(const struct spdk_nvme_cmd *cmd,
281 				  struct spdk_bdev_ext_io_opts *opts)
282 {
283 	/* Get CDW12 values */
284 	opts->nvme_cdw12.raw = from_le32(&cmd->cdw12);
285 
286 	/* Get CDW13 values */
287 	opts->nvme_cdw13.raw = from_le32(&cmd->cdw13);
288 
289 	/* Bdev layer checks PRACT in CDW12 because it is NVMe specific, but
290 	 * it does not check DIF check flags in CDW because DIF is not NVMe
291 	 * specific. Hence, copy DIF check flags from CDW12 to dif_check_flags_exclude_mask.
292 	 */
293 	opts->dif_check_flags_exclude_mask = (~opts->nvme_cdw12.raw) & SPDK_NVME_IO_FLAGS_PRCHK_MASK;
294 }
295 
296 static bool
297 nvmf_bdev_ctrlr_lba_in_range(uint64_t bdev_num_blocks, uint64_t io_start_lba,
298 			     uint64_t io_num_blocks)
299 {
300 	if (io_start_lba + io_num_blocks > bdev_num_blocks ||
301 	    io_start_lba + io_num_blocks < io_start_lba) {
302 		return false;
303 	}
304 
305 	return true;
306 }
307 
308 static void
309 nvmf_ctrlr_process_io_cmd_resubmit(void *arg)
310 {
311 	struct spdk_nvmf_request *req = arg;
312 	int rc;
313 
314 	rc = nvmf_ctrlr_process_io_cmd(req);
315 	if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
316 		spdk_nvmf_request_complete(req);
317 	}
318 }
319 
320 static void
321 nvmf_ctrlr_process_admin_cmd_resubmit(void *arg)
322 {
323 	struct spdk_nvmf_request *req = arg;
324 	int rc;
325 
326 	rc = nvmf_ctrlr_process_admin_cmd(req);
327 	if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
328 		spdk_nvmf_request_complete(req);
329 	}
330 }
331 
332 static void
333 nvmf_bdev_ctrl_queue_io(struct spdk_nvmf_request *req, struct spdk_bdev *bdev,
334 			struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn, void *cb_arg)
335 {
336 	int rc;
337 
338 	req->bdev_io_wait.bdev = bdev;
339 	req->bdev_io_wait.cb_fn = cb_fn;
340 	req->bdev_io_wait.cb_arg = cb_arg;
341 
342 	rc = spdk_bdev_queue_io_wait(bdev, ch, &req->bdev_io_wait);
343 	if (rc != 0) {
344 		assert(false);
345 	}
346 	req->qpair->group->stat.pending_bdev_io++;
347 }
348 
349 bool
350 nvmf_bdev_zcopy_enabled(struct spdk_bdev *bdev)
351 {
352 	return spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_ZCOPY);
353 }
354 
355 int
356 nvmf_bdev_ctrlr_read_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
357 			 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
358 {
359 	struct spdk_bdev_ext_io_opts opts = {
360 		.size = SPDK_SIZEOF(&opts, accel_sequence),
361 		.memory_domain = req->memory_domain,
362 		.memory_domain_ctx = req->memory_domain_ctx,
363 		.accel_sequence = req->accel_sequence,
364 	};
365 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
366 	uint32_t block_size = spdk_bdev_desc_get_block_size(desc);
367 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
368 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
369 	uint64_t start_lba;
370 	uint64_t num_blocks;
371 	int rc;
372 
373 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
374 	nvmf_bdev_ctrlr_get_rw_ext_params(cmd, &opts);
375 
376 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
377 		SPDK_ERRLOG("end of media\n");
378 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
379 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
380 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
381 	}
382 
383 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
384 		SPDK_ERRLOG("Read NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
385 			    num_blocks, block_size, req->length);
386 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
387 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
388 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
389 	}
390 
391 	assert(!spdk_nvmf_request_using_zcopy(req));
392 
393 	rc = spdk_bdev_readv_blocks_ext(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
394 					nvmf_bdev_ctrlr_complete_cmd, req, &opts);
395 	if (spdk_unlikely(rc)) {
396 		if (rc == -ENOMEM) {
397 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
398 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
399 		}
400 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
401 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
402 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
403 	}
404 
405 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
406 }
407 
408 int
409 nvmf_bdev_ctrlr_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
410 			  struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
411 {
412 	struct spdk_bdev_ext_io_opts opts = {
413 		.size = SPDK_SIZEOF(&opts, nvme_cdw13),
414 		.memory_domain = req->memory_domain,
415 		.memory_domain_ctx = req->memory_domain_ctx,
416 		.accel_sequence = req->accel_sequence,
417 	};
418 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
419 	uint32_t block_size = spdk_bdev_desc_get_block_size(desc);
420 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
421 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
422 	uint64_t start_lba;
423 	uint64_t num_blocks;
424 	int rc;
425 
426 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
427 	nvmf_bdev_ctrlr_get_rw_ext_params(cmd, &opts);
428 
429 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
430 		SPDK_ERRLOG("end of media\n");
431 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
432 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
433 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
434 	}
435 
436 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
437 		SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
438 			    num_blocks, block_size, req->length);
439 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
440 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
441 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
442 	}
443 
444 	assert(!spdk_nvmf_request_using_zcopy(req));
445 
446 	rc = spdk_bdev_writev_blocks_ext(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
447 					 nvmf_bdev_ctrlr_complete_cmd, req, &opts);
448 	if (spdk_unlikely(rc)) {
449 		if (rc == -ENOMEM) {
450 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
451 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
452 		}
453 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
454 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
455 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
456 	}
457 
458 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
459 }
460 
461 int
462 nvmf_bdev_ctrlr_compare_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
463 			    struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
464 {
465 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
466 	uint32_t block_size = spdk_bdev_desc_get_block_size(desc);
467 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
468 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
469 	uint64_t start_lba;
470 	uint64_t num_blocks;
471 	int rc;
472 
473 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
474 
475 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
476 		SPDK_ERRLOG("end of media\n");
477 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
478 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
479 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
480 	}
481 
482 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
483 		SPDK_ERRLOG("Compare NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
484 			    num_blocks, block_size, req->length);
485 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
486 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
487 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
488 	}
489 
490 	rc = spdk_bdev_comparev_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
491 				       nvmf_bdev_ctrlr_complete_cmd, req);
492 	if (spdk_unlikely(rc)) {
493 		if (rc == -ENOMEM) {
494 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
495 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
496 		}
497 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
498 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
499 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
500 	}
501 
502 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
503 }
504 
505 int
506 nvmf_bdev_ctrlr_compare_and_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
507 				      struct spdk_io_channel *ch, struct spdk_nvmf_request *cmp_req, struct spdk_nvmf_request *write_req)
508 {
509 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
510 	uint32_t block_size = spdk_bdev_desc_get_block_size(desc);
511 	struct spdk_nvme_cmd *cmp_cmd = &cmp_req->cmd->nvme_cmd;
512 	struct spdk_nvme_cmd *write_cmd = &write_req->cmd->nvme_cmd;
513 	struct spdk_nvme_cpl *rsp = &write_req->rsp->nvme_cpl;
514 	uint64_t write_start_lba, cmp_start_lba;
515 	uint64_t write_num_blocks, cmp_num_blocks;
516 	int rc;
517 
518 	nvmf_bdev_ctrlr_get_rw_params(cmp_cmd, &cmp_start_lba, &cmp_num_blocks);
519 	nvmf_bdev_ctrlr_get_rw_params(write_cmd, &write_start_lba, &write_num_blocks);
520 
521 	if (spdk_unlikely(write_start_lba != cmp_start_lba || write_num_blocks != cmp_num_blocks)) {
522 		SPDK_ERRLOG("Fused command start lba / num blocks mismatch\n");
523 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
524 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
525 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
526 	}
527 
528 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, write_start_lba,
529 			  write_num_blocks))) {
530 		SPDK_ERRLOG("end of media\n");
531 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
532 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
533 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
534 	}
535 
536 	if (spdk_unlikely(write_num_blocks * block_size > write_req->length)) {
537 		SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
538 			    write_num_blocks, block_size, write_req->length);
539 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
540 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
541 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
542 	}
543 
544 	rc = spdk_bdev_comparev_and_writev_blocks(desc, ch, cmp_req->iov, cmp_req->iovcnt, write_req->iov,
545 			write_req->iovcnt, write_start_lba, write_num_blocks, nvmf_bdev_ctrlr_complete_cmd, write_req);
546 	if (spdk_unlikely(rc)) {
547 		if (rc == -ENOMEM) {
548 			nvmf_bdev_ctrl_queue_io(cmp_req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, cmp_req);
549 			nvmf_bdev_ctrl_queue_io(write_req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, write_req);
550 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
551 		}
552 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
553 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
554 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
555 	}
556 
557 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
558 }
559 
560 int
561 nvmf_bdev_ctrlr_write_zeroes_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
562 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
563 {
564 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
565 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
566 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
567 	uint64_t max_write_zeroes_size = req->qpair->ctrlr->subsys->max_write_zeroes_size_kib;
568 	uint64_t start_lba;
569 	uint64_t num_blocks;
570 	int rc;
571 
572 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
573 	if (spdk_unlikely(max_write_zeroes_size > 0 &&
574 			  num_blocks > (max_write_zeroes_size << 10) / spdk_bdev_desc_get_block_size(desc))) {
575 		SPDK_ERRLOG("invalid write zeroes size, should not exceed %" PRIu64 "Kib\n", max_write_zeroes_size);
576 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
577 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
578 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
579 	}
580 
581 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
582 		SPDK_ERRLOG("end of media\n");
583 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
584 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
585 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
586 	}
587 
588 	if (spdk_unlikely(cmd->cdw12_bits.write_zeroes.deac)) {
589 		SPDK_ERRLOG("Write Zeroes Deallocate is not supported\n");
590 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
591 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
592 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
593 	}
594 
595 	rc = spdk_bdev_write_zeroes_blocks(desc, ch, start_lba, num_blocks,
596 					   nvmf_bdev_ctrlr_complete_cmd, req);
597 	if (spdk_unlikely(rc)) {
598 		if (rc == -ENOMEM) {
599 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
600 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
601 		}
602 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
603 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
604 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
605 	}
606 
607 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
608 }
609 
610 int
611 nvmf_bdev_ctrlr_flush_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
612 			  struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
613 {
614 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
615 	int rc;
616 
617 	/* As for NVMeoF controller, SPDK always set volatile write
618 	 * cache bit to 1, return success for those block devices
619 	 * which can't support FLUSH command.
620 	 */
621 	if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_FLUSH)) {
622 		response->status.sct = SPDK_NVME_SCT_GENERIC;
623 		response->status.sc = SPDK_NVME_SC_SUCCESS;
624 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
625 	}
626 
627 	rc = spdk_bdev_flush_blocks(desc, ch, 0, spdk_bdev_get_num_blocks(bdev),
628 				    nvmf_bdev_ctrlr_complete_cmd, req);
629 	if (spdk_unlikely(rc)) {
630 		if (rc == -ENOMEM) {
631 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
632 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
633 		}
634 		response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
635 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
636 	}
637 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
638 }
639 
640 struct nvmf_bdev_ctrlr_unmap {
641 	struct spdk_nvmf_request	*req;
642 	uint32_t			count;
643 	struct spdk_bdev_desc		*desc;
644 	struct spdk_bdev		*bdev;
645 	struct spdk_io_channel		*ch;
646 	uint32_t			range_index;
647 };
648 
649 static void
650 nvmf_bdev_ctrlr_unmap_cpl(struct spdk_bdev_io *bdev_io, bool success,
651 			  void *cb_arg)
652 {
653 	struct nvmf_bdev_ctrlr_unmap *unmap_ctx = cb_arg;
654 	struct spdk_nvmf_request	*req = unmap_ctx->req;
655 	struct spdk_nvme_cpl		*response = &req->rsp->nvme_cpl;
656 	int				sc, sct;
657 	uint32_t			cdw0;
658 
659 	unmap_ctx->count--;
660 
661 	if (response->status.sct == SPDK_NVME_SCT_GENERIC &&
662 	    response->status.sc == SPDK_NVME_SC_SUCCESS) {
663 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
664 		response->cdw0 = cdw0;
665 		response->status.sc = sc;
666 		response->status.sct = sct;
667 	}
668 
669 	if (unmap_ctx->count == 0) {
670 		spdk_nvmf_request_complete(req);
671 		free(unmap_ctx);
672 	}
673 	spdk_bdev_free_io(bdev_io);
674 }
675 
676 static int nvmf_bdev_ctrlr_unmap(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
677 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
678 				 struct nvmf_bdev_ctrlr_unmap *unmap_ctx);
679 static void
680 nvmf_bdev_ctrlr_unmap_resubmit(void *arg)
681 {
682 	struct nvmf_bdev_ctrlr_unmap *unmap_ctx = arg;
683 	struct spdk_nvmf_request *req = unmap_ctx->req;
684 	struct spdk_bdev_desc *desc = unmap_ctx->desc;
685 	struct spdk_bdev *bdev = unmap_ctx->bdev;
686 	struct spdk_io_channel *ch = unmap_ctx->ch;
687 
688 	nvmf_bdev_ctrlr_unmap(bdev, desc, ch, req, unmap_ctx);
689 }
690 
691 static int
692 nvmf_bdev_ctrlr_unmap(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
693 		      struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
694 		      struct nvmf_bdev_ctrlr_unmap *unmap_ctx)
695 {
696 	uint16_t nr, i;
697 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
698 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
699 	uint64_t max_discard_size = req->qpair->ctrlr->subsys->max_discard_size_kib;
700 	uint32_t block_size = spdk_bdev_desc_get_block_size(desc);
701 	struct spdk_iov_xfer ix;
702 	uint64_t lba;
703 	uint32_t lba_count;
704 	int rc;
705 
706 	nr = cmd->cdw10_bits.dsm.nr + 1;
707 	if (nr * sizeof(struct spdk_nvme_dsm_range) > req->length) {
708 		SPDK_ERRLOG("Dataset Management number of ranges > SGL length\n");
709 		response->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
710 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
711 	}
712 
713 	if (unmap_ctx == NULL) {
714 		unmap_ctx = calloc(1, sizeof(*unmap_ctx));
715 		if (!unmap_ctx) {
716 			response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
717 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
718 		}
719 
720 		unmap_ctx->req = req;
721 		unmap_ctx->desc = desc;
722 		unmap_ctx->ch = ch;
723 		unmap_ctx->bdev = bdev;
724 
725 		response->status.sct = SPDK_NVME_SCT_GENERIC;
726 		response->status.sc = SPDK_NVME_SC_SUCCESS;
727 	} else {
728 		unmap_ctx->count--;	/* dequeued */
729 	}
730 
731 	spdk_iov_xfer_init(&ix, req->iov, req->iovcnt);
732 
733 	for (i = unmap_ctx->range_index; i < nr; i++) {
734 		struct spdk_nvme_dsm_range dsm_range = { 0 };
735 
736 		spdk_iov_xfer_to_buf(&ix, &dsm_range, sizeof(dsm_range));
737 
738 		lba = dsm_range.starting_lba;
739 		lba_count = dsm_range.length;
740 		if (max_discard_size > 0 && lba_count > (max_discard_size << 10) / block_size) {
741 			SPDK_ERRLOG("invalid unmap size %" PRIu32 " blocks, should not exceed %" PRIu64 " blocks\n",
742 				    lba_count, max_discard_size << 1);
743 			response->status.sct = SPDK_NVME_SCT_GENERIC;
744 			response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
745 			break;
746 		}
747 
748 		unmap_ctx->count++;
749 
750 		rc = spdk_bdev_unmap_blocks(desc, ch, lba, lba_count,
751 					    nvmf_bdev_ctrlr_unmap_cpl, unmap_ctx);
752 		if (rc) {
753 			if (rc == -ENOMEM) {
754 				nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_bdev_ctrlr_unmap_resubmit, unmap_ctx);
755 				/* Unmap was not yet submitted to bdev */
756 				/* unmap_ctx->count will be decremented when the request is dequeued */
757 				return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
758 			}
759 			response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
760 			unmap_ctx->count--;
761 			/* We can't return here - we may have to wait for any other
762 				* unmaps already sent to complete */
763 			break;
764 		}
765 		unmap_ctx->range_index++;
766 	}
767 
768 	if (unmap_ctx->count == 0) {
769 		free(unmap_ctx);
770 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
771 	}
772 
773 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
774 }
775 
776 int
777 nvmf_bdev_ctrlr_dsm_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
778 			struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
779 {
780 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
781 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
782 
783 	if (cmd->cdw11_bits.dsm.ad) {
784 		return nvmf_bdev_ctrlr_unmap(bdev, desc, ch, req, NULL);
785 	}
786 
787 	response->status.sct = SPDK_NVME_SCT_GENERIC;
788 	response->status.sc = SPDK_NVME_SC_SUCCESS;
789 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
790 }
791 
792 int
793 nvmf_bdev_ctrlr_copy_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
794 			 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
795 {
796 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
797 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
798 	uint64_t sdlba = ((uint64_t)cmd->cdw11 << 32) + cmd->cdw10;
799 	struct spdk_nvme_scc_source_range range = { 0 };
800 	struct spdk_iov_xfer ix;
801 	int rc;
802 
803 	SPDK_DEBUGLOG(nvmf, "Copy command: SDLBA %lu, NR %u, desc format %u, PRINFOR %u, "
804 		      "DTYPE %u, STCW %u, PRINFOW %u, FUA %u, LR %u\n",
805 		      sdlba,
806 		      cmd->cdw12_bits.copy.nr,
807 		      cmd->cdw12_bits.copy.df,
808 		      cmd->cdw12_bits.copy.prinfor,
809 		      cmd->cdw12_bits.copy.dtype,
810 		      cmd->cdw12_bits.copy.stcw,
811 		      cmd->cdw12_bits.copy.prinfow,
812 		      cmd->cdw12_bits.copy.fua,
813 		      cmd->cdw12_bits.copy.lr);
814 
815 	if (spdk_unlikely(req->length != (cmd->cdw12_bits.copy.nr + 1) *
816 			  sizeof(struct spdk_nvme_scc_source_range))) {
817 		response->status.sct = SPDK_NVME_SCT_GENERIC;
818 		response->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
819 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
820 	}
821 
822 	/*
823 	 * We support only one source range, and rely on this with the xfer
824 	 * below.
825 	 */
826 	if (cmd->cdw12_bits.copy.nr > 0) {
827 		response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
828 		response->status.sc = SPDK_NVME_SC_CMD_SIZE_LIMIT_SIZE_EXCEEDED;
829 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
830 	}
831 
832 	if (cmd->cdw12_bits.copy.df != 0) {
833 		response->status.sct = SPDK_NVME_SCT_GENERIC;
834 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
835 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
836 	}
837 
838 	spdk_iov_xfer_init(&ix, req->iov, req->iovcnt);
839 	spdk_iov_xfer_to_buf(&ix, &range, sizeof(range));
840 
841 	rc = spdk_bdev_copy_blocks(desc, ch, sdlba, range.slba, range.nlb + 1,
842 				   nvmf_bdev_ctrlr_complete_cmd, req);
843 	if (spdk_unlikely(rc)) {
844 		if (rc == -ENOMEM) {
845 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
846 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
847 		}
848 
849 		response->status.sct = SPDK_NVME_SCT_GENERIC;
850 		response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
851 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
852 	}
853 
854 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
855 }
856 
857 int
858 nvmf_bdev_ctrlr_nvme_passthru_io(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
859 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
860 {
861 	int rc;
862 
863 	rc = spdk_bdev_nvme_iov_passthru_md(desc, ch, &req->cmd->nvme_cmd, req->iov, req->iovcnt,
864 					    req->length, NULL, 0, nvmf_bdev_ctrlr_complete_cmd, req);
865 
866 	if (spdk_unlikely(rc)) {
867 		if (rc == -ENOMEM) {
868 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
869 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
870 		}
871 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
872 		req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
873 		req->rsp->nvme_cpl.status.dnr = 1;
874 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
875 	}
876 
877 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
878 }
879 
880 int
881 spdk_nvmf_bdev_ctrlr_nvme_passthru_admin(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
882 		struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
883 		spdk_nvmf_nvme_passthru_cmd_cb cb_fn)
884 {
885 	int rc;
886 
887 	if (spdk_unlikely(req->iovcnt > 1)) {
888 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
889 		req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
890 		req->rsp->nvme_cpl.status.dnr = 1;
891 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
892 	}
893 
894 	req->cmd_cb_fn = cb_fn;
895 
896 	rc = spdk_bdev_nvme_admin_passthru(desc, ch, &req->cmd->nvme_cmd, req->iov[0].iov_base, req->length,
897 					   nvmf_bdev_ctrlr_complete_admin_cmd, req);
898 	if (spdk_unlikely(rc)) {
899 		if (rc == -ENOMEM) {
900 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_admin_cmd_resubmit, req);
901 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
902 		}
903 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
904 		if (rc == -ENOTSUP) {
905 			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
906 		} else {
907 			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
908 		}
909 
910 		req->rsp->nvme_cpl.status.dnr = 1;
911 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
912 	}
913 
914 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
915 }
916 
917 static void
918 nvmf_bdev_ctrlr_complete_abort_cmd(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
919 {
920 	struct spdk_nvmf_request *req = cb_arg;
921 
922 	if (success) {
923 		req->rsp->nvme_cpl.cdw0 &= ~1U;
924 	}
925 
926 	spdk_nvmf_request_complete(req);
927 	spdk_bdev_free_io(bdev_io);
928 }
929 
930 int
931 spdk_nvmf_bdev_ctrlr_abort_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
932 			       struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
933 			       struct spdk_nvmf_request *req_to_abort)
934 {
935 	int rc;
936 
937 	assert((req->rsp->nvme_cpl.cdw0 & 1U) != 0);
938 
939 	rc = spdk_bdev_abort(desc, ch, req_to_abort, nvmf_bdev_ctrlr_complete_abort_cmd, req);
940 	if (spdk_likely(rc == 0)) {
941 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
942 	} else if (rc == -ENOMEM) {
943 		nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_admin_cmd_resubmit, req);
944 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
945 	} else {
946 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
947 	}
948 }
949 
950 bool
951 nvmf_bdev_ctrlr_get_dif_ctx(struct spdk_bdev_desc *desc, struct spdk_nvme_cmd *cmd,
952 			    struct spdk_dif_ctx *dif_ctx)
953 {
954 	uint32_t init_ref_tag, dif_check_flags = 0;
955 	int rc;
956 	struct spdk_dif_ctx_init_ext_opts dif_opts;
957 
958 	if (spdk_bdev_desc_get_md_size(desc) == 0) {
959 		return false;
960 	}
961 
962 	/* Initial Reference Tag is the lower 32 bits of the start LBA. */
963 	init_ref_tag = (uint32_t)from_le64(&cmd->cdw10);
964 
965 	if (spdk_bdev_desc_is_dif_check_enabled(desc, SPDK_DIF_CHECK_TYPE_REFTAG)) {
966 		dif_check_flags |= SPDK_DIF_FLAGS_REFTAG_CHECK;
967 	}
968 
969 	if (spdk_bdev_desc_is_dif_check_enabled(desc, SPDK_DIF_CHECK_TYPE_GUARD)) {
970 		dif_check_flags |= SPDK_DIF_FLAGS_GUARD_CHECK;
971 	}
972 
973 	dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format);
974 	dif_opts.dif_pi_format = SPDK_DIF_PI_FORMAT_16;
975 	rc = spdk_dif_ctx_init(dif_ctx,
976 			       spdk_bdev_desc_get_block_size(desc),
977 			       spdk_bdev_desc_get_md_size(desc),
978 			       spdk_bdev_desc_is_md_interleaved(desc),
979 			       spdk_bdev_desc_is_dif_head_of_md(desc),
980 			       spdk_bdev_desc_get_dif_type(desc),
981 			       dif_check_flags,
982 			       init_ref_tag, 0, 0, 0, 0, &dif_opts);
983 
984 	return (rc == 0) ? true : false;
985 }
986 
987 static void
988 nvmf_bdev_ctrlr_zcopy_start_complete(struct spdk_bdev_io *bdev_io, bool success,
989 				     void *cb_arg)
990 {
991 	struct spdk_nvmf_request	*req = cb_arg;
992 	struct iovec *iov;
993 	int iovcnt = 0;
994 
995 	if (spdk_unlikely(!success)) {
996 		int                     sc = 0, sct = 0;
997 		uint32_t                cdw0 = 0;
998 		struct spdk_nvme_cpl    *response = &req->rsp->nvme_cpl;
999 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
1000 
1001 		response->cdw0 = cdw0;
1002 		response->status.sc = sc;
1003 		response->status.sct = sct;
1004 
1005 		spdk_bdev_free_io(bdev_io);
1006 		spdk_nvmf_request_complete(req);
1007 		return;
1008 	}
1009 
1010 	spdk_bdev_io_get_iovec(bdev_io, &iov, &iovcnt);
1011 
1012 	assert(iovcnt <= NVMF_REQ_MAX_BUFFERS);
1013 	assert(iovcnt > 0);
1014 
1015 	req->iovcnt = iovcnt;
1016 
1017 	assert(req->iov == iov);
1018 
1019 	req->zcopy_bdev_io = bdev_io; /* Preserve the bdev_io for the end zcopy */
1020 
1021 	spdk_nvmf_request_complete(req);
1022 	/* Don't free the bdev_io here as it is needed for the END ZCOPY */
1023 }
1024 
1025 int
1026 nvmf_bdev_ctrlr_zcopy_start(struct spdk_bdev *bdev,
1027 			    struct spdk_bdev_desc *desc,
1028 			    struct spdk_io_channel *ch,
1029 			    struct spdk_nvmf_request *req)
1030 {
1031 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
1032 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
1033 	uint32_t block_size = spdk_bdev_desc_get_block_size(desc);
1034 	uint64_t start_lba;
1035 	uint64_t num_blocks;
1036 	int rc;
1037 
1038 	nvmf_bdev_ctrlr_get_rw_params(&req->cmd->nvme_cmd, &start_lba, &num_blocks);
1039 
1040 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
1041 		SPDK_ERRLOG("end of media\n");
1042 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
1043 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
1044 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1045 	}
1046 
1047 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
1048 		SPDK_ERRLOG("Read NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
1049 			    num_blocks, block_size, req->length);
1050 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
1051 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
1052 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1053 	}
1054 
1055 	bool populate = (req->cmd->nvme_cmd.opc == SPDK_NVME_OPC_READ) ? true : false;
1056 
1057 	rc = spdk_bdev_zcopy_start(desc, ch, req->iov, req->iovcnt, start_lba,
1058 				   num_blocks, populate, nvmf_bdev_ctrlr_zcopy_start_complete, req);
1059 	if (spdk_unlikely(rc != 0)) {
1060 		if (rc == -ENOMEM) {
1061 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
1062 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
1063 		}
1064 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
1065 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
1066 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1067 	}
1068 
1069 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
1070 }
1071 
1072 static void
1073 nvmf_bdev_ctrlr_zcopy_end_complete(struct spdk_bdev_io *bdev_io, bool success,
1074 				   void *cb_arg)
1075 {
1076 	struct spdk_nvmf_request	*req = cb_arg;
1077 
1078 	if (spdk_unlikely(!success)) {
1079 		int                     sc = 0, sct = 0;
1080 		uint32_t                cdw0 = 0;
1081 		struct spdk_nvme_cpl    *response = &req->rsp->nvme_cpl;
1082 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
1083 
1084 		response->cdw0 = cdw0;
1085 		response->status.sc = sc;
1086 		response->status.sct = sct;
1087 	}
1088 
1089 	spdk_bdev_free_io(bdev_io);
1090 	req->zcopy_bdev_io = NULL;
1091 	spdk_nvmf_request_complete(req);
1092 }
1093 
1094 void
1095 nvmf_bdev_ctrlr_zcopy_end(struct spdk_nvmf_request *req, bool commit)
1096 {
1097 	int rc __attribute__((unused));
1098 
1099 	rc = spdk_bdev_zcopy_end(req->zcopy_bdev_io, commit, nvmf_bdev_ctrlr_zcopy_end_complete, req);
1100 
1101 	/* The only way spdk_bdev_zcopy_end() can fail is if we pass a bdev_io type that isn't ZCOPY */
1102 	assert(rc == 0);
1103 }
1104