xref: /spdk/lib/nvmf/ctrlr_bdev.c (revision 95d6c9fac17572b107042103439aafd696d60b0e)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2017 Intel Corporation. All rights reserved.
3  *   Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
4  *   Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5  */
6 
7 #include "spdk/stdinc.h"
8 
9 #include "nvmf_internal.h"
10 
11 #include "spdk/bdev.h"
12 #include "spdk/endian.h"
13 #include "spdk/thread.h"
14 #include "spdk/likely.h"
15 #include "spdk/nvme.h"
16 #include "spdk/nvmf_cmd.h"
17 #include "spdk/nvmf_spec.h"
18 #include "spdk/trace.h"
19 #include "spdk/scsi_spec.h"
20 #include "spdk/string.h"
21 #include "spdk/util.h"
22 
23 #include "spdk/log.h"
24 
25 static bool
26 nvmf_subsystem_bdev_io_type_supported(struct spdk_nvmf_subsystem *subsystem,
27 				      enum spdk_bdev_io_type io_type)
28 {
29 	struct spdk_nvmf_ns *ns;
30 
31 	for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL;
32 	     ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) {
33 		if (ns->bdev == NULL) {
34 			continue;
35 		}
36 
37 		if (!spdk_bdev_io_type_supported(ns->bdev, io_type)) {
38 			SPDK_DEBUGLOG(nvmf,
39 				      "Subsystem %s namespace %u (%s) does not support io_type %d\n",
40 				      spdk_nvmf_subsystem_get_nqn(subsystem),
41 				      ns->opts.nsid, spdk_bdev_get_name(ns->bdev), (int)io_type);
42 			return false;
43 		}
44 	}
45 
46 	SPDK_DEBUGLOG(nvmf, "All devices in Subsystem %s support io_type %d\n",
47 		      spdk_nvmf_subsystem_get_nqn(subsystem), (int)io_type);
48 	return true;
49 }
50 
51 bool
52 nvmf_ctrlr_dsm_supported(struct spdk_nvmf_ctrlr *ctrlr)
53 {
54 	return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_UNMAP);
55 }
56 
57 bool
58 nvmf_ctrlr_write_zeroes_supported(struct spdk_nvmf_ctrlr *ctrlr)
59 {
60 	return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_WRITE_ZEROES);
61 }
62 
63 bool
64 nvmf_ctrlr_copy_supported(struct spdk_nvmf_ctrlr *ctrlr)
65 {
66 	return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_COPY);
67 }
68 
69 static void
70 nvmf_bdev_ctrlr_complete_cmd(struct spdk_bdev_io *bdev_io, bool success,
71 			     void *cb_arg)
72 {
73 	struct spdk_nvmf_request	*req = cb_arg;
74 	struct spdk_nvme_cpl		*response = &req->rsp->nvme_cpl;
75 	int				sc = 0, sct = 0;
76 	uint32_t			cdw0 = 0;
77 
78 	if (spdk_unlikely(req->first_fused)) {
79 		struct spdk_nvmf_request	*first_req = req->first_fused_req;
80 		struct spdk_nvme_cpl		*first_response = &first_req->rsp->nvme_cpl;
81 		int				first_sc = 0, first_sct = 0;
82 
83 		/* get status for both operations */
84 		spdk_bdev_io_get_nvme_fused_status(bdev_io, &cdw0, &first_sct, &first_sc, &sct, &sc);
85 		first_response->cdw0 = cdw0;
86 		first_response->status.sc = first_sc;
87 		first_response->status.sct = first_sct;
88 
89 		/* first request should be completed */
90 		spdk_nvmf_request_complete(first_req);
91 		req->first_fused_req = NULL;
92 		req->first_fused = false;
93 	} else {
94 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
95 	}
96 
97 	response->cdw0 = cdw0;
98 	response->status.sc = sc;
99 	response->status.sct = sct;
100 
101 	spdk_nvmf_request_complete(req);
102 	spdk_bdev_free_io(bdev_io);
103 }
104 
105 static void
106 nvmf_bdev_ctrlr_complete_admin_cmd(struct spdk_bdev_io *bdev_io, bool success,
107 				   void *cb_arg)
108 {
109 	struct spdk_nvmf_request *req = cb_arg;
110 
111 	if (req->cmd_cb_fn) {
112 		req->cmd_cb_fn(req);
113 	}
114 
115 	nvmf_bdev_ctrlr_complete_cmd(bdev_io, success, req);
116 }
117 
118 void
119 nvmf_bdev_ctrlr_identify_ns(struct spdk_nvmf_ns *ns, struct spdk_nvme_ns_data *nsdata,
120 			    bool dif_insert_or_strip)
121 {
122 	struct spdk_bdev *bdev = ns->bdev;
123 	uint64_t num_blocks;
124 	uint32_t phys_blocklen;
125 	uint32_t max_copy;
126 
127 	num_blocks = spdk_bdev_get_num_blocks(bdev);
128 
129 	nsdata->nsze = num_blocks;
130 	nsdata->ncap = num_blocks;
131 	nsdata->nuse = num_blocks;
132 	nsdata->nlbaf = 0;
133 	nsdata->flbas.format = 0;
134 	nsdata->flbas.msb_format = 0;
135 	nsdata->nacwu = spdk_bdev_get_acwu(bdev) - 1; /* nacwu is 0-based */
136 	if (!dif_insert_or_strip) {
137 		nsdata->lbaf[0].ms = spdk_bdev_get_md_size(bdev);
138 		nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_block_size(bdev));
139 		if (nsdata->lbaf[0].ms != 0) {
140 			nsdata->flbas.extended = 1;
141 			nsdata->mc.extended = 1;
142 			nsdata->mc.pointer = 0;
143 			nsdata->dps.md_start = spdk_bdev_is_dif_head_of_md(bdev);
144 			/* NVMf library doesn't process PRACT and PRCHK flags, we
145 			 * leave the use of extended LBA buffer to users.
146 			 */
147 			nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_DISABLE;
148 		}
149 	} else {
150 		nsdata->lbaf[0].ms = 0;
151 		nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_data_block_size(bdev));
152 	}
153 
154 	phys_blocklen = spdk_bdev_get_physical_block_size(bdev);
155 	assert(phys_blocklen > 0);
156 	/* Linux driver uses min(nawupf, npwg) to set physical_block_size */
157 	nsdata->nsfeat.optperf = 1;
158 	nsdata->nsfeat.ns_atomic_write_unit = 1;
159 	nsdata->npwg = (phys_blocklen >> nsdata->lbaf[0].lbads) - 1;
160 	nsdata->nawupf = nsdata->npwg;
161 	nsdata->npwa = nsdata->npwg;
162 	nsdata->npdg = nsdata->npwg;
163 	nsdata->npda = nsdata->npwg;
164 
165 	if (spdk_bdev_get_write_unit_size(bdev) == 1) {
166 		nsdata->noiob = spdk_bdev_get_optimal_io_boundary(bdev);
167 	}
168 	nsdata->nmic.can_share = 1;
169 	if (nvmf_ns_is_ptpl_capable(ns)) {
170 		nsdata->nsrescap.rescap.persist = 1;
171 	}
172 	nsdata->nsrescap.rescap.write_exclusive = 1;
173 	nsdata->nsrescap.rescap.exclusive_access = 1;
174 	nsdata->nsrescap.rescap.write_exclusive_reg_only = 1;
175 	nsdata->nsrescap.rescap.exclusive_access_reg_only = 1;
176 	nsdata->nsrescap.rescap.write_exclusive_all_reg = 1;
177 	nsdata->nsrescap.rescap.exclusive_access_all_reg = 1;
178 	nsdata->nsrescap.rescap.ignore_existing_key = 1;
179 
180 	SPDK_STATIC_ASSERT(sizeof(nsdata->nguid) == sizeof(ns->opts.nguid), "size mismatch");
181 	memcpy(nsdata->nguid, ns->opts.nguid, sizeof(nsdata->nguid));
182 
183 	SPDK_STATIC_ASSERT(sizeof(nsdata->eui64) == sizeof(ns->opts.eui64), "size mismatch");
184 	memcpy(&nsdata->eui64, ns->opts.eui64, sizeof(nsdata->eui64));
185 
186 	/* For now we support just one source range for copy command */
187 	nsdata->msrc = 0;
188 
189 	max_copy = spdk_bdev_get_max_copy(bdev);
190 	if (max_copy == 0 || max_copy > UINT16_MAX) {
191 		/* Zero means copy size is unlimited */
192 		nsdata->mcl = UINT16_MAX;
193 		nsdata->mssrl = UINT16_MAX;
194 	} else {
195 		nsdata->mcl = max_copy;
196 		nsdata->mssrl = max_copy;
197 	}
198 }
199 
200 void
201 nvmf_bdev_ctrlr_identify_iocs_nvm(struct spdk_nvmf_ns *ns,
202 				  struct spdk_nvme_nvm_ns_data *nsdata_nvm)
203 {
204 	struct spdk_bdev *bdev = ns->bdev;
205 	uint8_t _16bpists;
206 	uint32_t sts, pif;
207 
208 	if (spdk_bdev_get_dif_type(bdev) == SPDK_DIF_DISABLE) {
209 		return;
210 	}
211 
212 	pif = spdk_bdev_get_dif_pi_format(bdev);
213 
214 	/*
215 	 * 16BPISTS shall be 1 for 32/64b Guard PI.
216 	 * STCRS shall be 1 if 16BPISTS is 1.
217 	 * 16 is the minimum value of STS for 32b Guard PI.
218 	 */
219 	switch (pif) {
220 	case SPDK_DIF_PI_FORMAT_16:
221 		_16bpists = 0;
222 		sts = 0;
223 		break;
224 	case SPDK_DIF_PI_FORMAT_32:
225 		_16bpists = 1;
226 		sts = 16;
227 		break;
228 	case SPDK_DIF_PI_FORMAT_64:
229 		_16bpists = 1;
230 		sts = 0;
231 		break;
232 	default:
233 		SPDK_WARNLOG("PI format %u is not supported\n", pif);
234 		return;
235 	}
236 
237 	/* For 16b Guard PI, Storage Tag is not available because we set STS to 0.
238 	 * In this case, we do not have to set 16BPISTM to 1. For simplicity,
239 	 * set 16BPISTM to 0 and set LBSTM to all zeroes.
240 	 *
241 	 * We will revisit here when we find any OS uses Storage Tag.
242 	 */
243 	nsdata_nvm->lbstm = 0;
244 	nsdata_nvm->pic._16bpistm = 0;
245 
246 	nsdata_nvm->pic._16bpists = _16bpists;
247 	nsdata_nvm->pic.stcrs = 0;
248 	nsdata_nvm->elbaf[0].sts = sts;
249 	nsdata_nvm->elbaf[0].pif = pif;
250 }
251 
252 static void
253 nvmf_bdev_ctrlr_get_rw_params(const struct spdk_nvme_cmd *cmd, uint64_t *start_lba,
254 			      uint64_t *num_blocks)
255 {
256 	/* SLBA: CDW10 and CDW11 */
257 	*start_lba = from_le64(&cmd->cdw10);
258 
259 	/* NLB: CDW12 bits 15:00, 0's based */
260 	*num_blocks = (from_le32(&cmd->cdw12) & 0xFFFFu) + 1;
261 }
262 
263 static void
264 nvmf_bdev_ctrlr_get_rw_ext_params(const struct spdk_nvme_cmd *cmd,
265 				  struct spdk_bdev_ext_io_opts *opts)
266 {
267 	/* Get CDW12 values */
268 	opts->nvme_cdw12.raw = from_le32(&cmd->cdw12);
269 
270 	/* Get CDW13 values */
271 	opts->nvme_cdw13.raw = from_le32(&cmd->cdw13);
272 }
273 
274 static bool
275 nvmf_bdev_ctrlr_lba_in_range(uint64_t bdev_num_blocks, uint64_t io_start_lba,
276 			     uint64_t io_num_blocks)
277 {
278 	if (io_start_lba + io_num_blocks > bdev_num_blocks ||
279 	    io_start_lba + io_num_blocks < io_start_lba) {
280 		return false;
281 	}
282 
283 	return true;
284 }
285 
286 static void
287 nvmf_ctrlr_process_io_cmd_resubmit(void *arg)
288 {
289 	struct spdk_nvmf_request *req = arg;
290 	int rc;
291 
292 	rc = nvmf_ctrlr_process_io_cmd(req);
293 	if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
294 		spdk_nvmf_request_complete(req);
295 	}
296 }
297 
298 static void
299 nvmf_ctrlr_process_admin_cmd_resubmit(void *arg)
300 {
301 	struct spdk_nvmf_request *req = arg;
302 	int rc;
303 
304 	rc = nvmf_ctrlr_process_admin_cmd(req);
305 	if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
306 		spdk_nvmf_request_complete(req);
307 	}
308 }
309 
310 static void
311 nvmf_bdev_ctrl_queue_io(struct spdk_nvmf_request *req, struct spdk_bdev *bdev,
312 			struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn, void *cb_arg)
313 {
314 	int rc;
315 
316 	req->bdev_io_wait.bdev = bdev;
317 	req->bdev_io_wait.cb_fn = cb_fn;
318 	req->bdev_io_wait.cb_arg = cb_arg;
319 
320 	rc = spdk_bdev_queue_io_wait(bdev, ch, &req->bdev_io_wait);
321 	if (rc != 0) {
322 		assert(false);
323 	}
324 	req->qpair->group->stat.pending_bdev_io++;
325 }
326 
327 bool
328 nvmf_bdev_zcopy_enabled(struct spdk_bdev *bdev)
329 {
330 	return spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_ZCOPY);
331 }
332 
333 int
334 nvmf_bdev_ctrlr_read_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
335 			 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
336 {
337 	struct spdk_bdev_ext_io_opts opts = {
338 		.size = SPDK_SIZEOF(&opts, accel_sequence),
339 		.memory_domain = req->memory_domain,
340 		.memory_domain_ctx = req->memory_domain_ctx,
341 		.accel_sequence = req->accel_sequence,
342 	};
343 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
344 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
345 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
346 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
347 	uint64_t start_lba;
348 	uint64_t num_blocks;
349 	int rc;
350 
351 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
352 
353 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
354 		SPDK_ERRLOG("end of media\n");
355 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
356 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
357 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
358 	}
359 
360 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
361 		SPDK_ERRLOG("Read NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
362 			    num_blocks, block_size, req->length);
363 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
364 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
365 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
366 	}
367 
368 	assert(!spdk_nvmf_request_using_zcopy(req));
369 
370 	rc = spdk_bdev_readv_blocks_ext(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
371 					nvmf_bdev_ctrlr_complete_cmd, req, &opts);
372 	if (spdk_unlikely(rc)) {
373 		if (rc == -ENOMEM) {
374 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
375 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
376 		}
377 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
378 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
379 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
380 	}
381 
382 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
383 }
384 
385 int
386 nvmf_bdev_ctrlr_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
387 			  struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
388 {
389 	struct spdk_bdev_ext_io_opts opts = {
390 		.size = SPDK_SIZEOF(&opts, nvme_cdw13),
391 		.memory_domain = req->memory_domain,
392 		.memory_domain_ctx = req->memory_domain_ctx,
393 		.accel_sequence = req->accel_sequence,
394 	};
395 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
396 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
397 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
398 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
399 	uint64_t start_lba;
400 	uint64_t num_blocks;
401 	int rc;
402 
403 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
404 	nvmf_bdev_ctrlr_get_rw_ext_params(cmd, &opts);
405 
406 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
407 		SPDK_ERRLOG("end of media\n");
408 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
409 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
410 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
411 	}
412 
413 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
414 		SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
415 			    num_blocks, block_size, req->length);
416 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
417 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
418 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
419 	}
420 
421 	assert(!spdk_nvmf_request_using_zcopy(req));
422 
423 	rc = spdk_bdev_writev_blocks_ext(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
424 					 nvmf_bdev_ctrlr_complete_cmd, req, &opts);
425 	if (spdk_unlikely(rc)) {
426 		if (rc == -ENOMEM) {
427 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
428 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
429 		}
430 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
431 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
432 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
433 	}
434 
435 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
436 }
437 
438 int
439 nvmf_bdev_ctrlr_compare_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
440 			    struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
441 {
442 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
443 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
444 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
445 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
446 	uint64_t start_lba;
447 	uint64_t num_blocks;
448 	int rc;
449 
450 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
451 
452 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
453 		SPDK_ERRLOG("end of media\n");
454 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
455 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
456 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
457 	}
458 
459 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
460 		SPDK_ERRLOG("Compare NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
461 			    num_blocks, block_size, req->length);
462 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
463 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
464 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
465 	}
466 
467 	rc = spdk_bdev_comparev_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
468 				       nvmf_bdev_ctrlr_complete_cmd, req);
469 	if (spdk_unlikely(rc)) {
470 		if (rc == -ENOMEM) {
471 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
472 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
473 		}
474 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
475 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
476 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
477 	}
478 
479 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
480 }
481 
482 int
483 nvmf_bdev_ctrlr_compare_and_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
484 				      struct spdk_io_channel *ch, struct spdk_nvmf_request *cmp_req, struct spdk_nvmf_request *write_req)
485 {
486 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
487 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
488 	struct spdk_nvme_cmd *cmp_cmd = &cmp_req->cmd->nvme_cmd;
489 	struct spdk_nvme_cmd *write_cmd = &write_req->cmd->nvme_cmd;
490 	struct spdk_nvme_cpl *rsp = &write_req->rsp->nvme_cpl;
491 	uint64_t write_start_lba, cmp_start_lba;
492 	uint64_t write_num_blocks, cmp_num_blocks;
493 	int rc;
494 
495 	nvmf_bdev_ctrlr_get_rw_params(cmp_cmd, &cmp_start_lba, &cmp_num_blocks);
496 	nvmf_bdev_ctrlr_get_rw_params(write_cmd, &write_start_lba, &write_num_blocks);
497 
498 	if (spdk_unlikely(write_start_lba != cmp_start_lba || write_num_blocks != cmp_num_blocks)) {
499 		SPDK_ERRLOG("Fused command start lba / num blocks mismatch\n");
500 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
501 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
502 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
503 	}
504 
505 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, write_start_lba,
506 			  write_num_blocks))) {
507 		SPDK_ERRLOG("end of media\n");
508 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
509 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
510 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
511 	}
512 
513 	if (spdk_unlikely(write_num_blocks * block_size > write_req->length)) {
514 		SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
515 			    write_num_blocks, block_size, write_req->length);
516 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
517 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
518 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
519 	}
520 
521 	rc = spdk_bdev_comparev_and_writev_blocks(desc, ch, cmp_req->iov, cmp_req->iovcnt, write_req->iov,
522 			write_req->iovcnt, write_start_lba, write_num_blocks, nvmf_bdev_ctrlr_complete_cmd, write_req);
523 	if (spdk_unlikely(rc)) {
524 		if (rc == -ENOMEM) {
525 			nvmf_bdev_ctrl_queue_io(cmp_req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, cmp_req);
526 			nvmf_bdev_ctrl_queue_io(write_req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, write_req);
527 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
528 		}
529 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
530 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
531 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
532 	}
533 
534 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
535 }
536 
537 int
538 nvmf_bdev_ctrlr_write_zeroes_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
539 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
540 {
541 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
542 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
543 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
544 	uint64_t max_write_zeroes_size = req->qpair->ctrlr->subsys->max_write_zeroes_size_kib;
545 	uint64_t start_lba;
546 	uint64_t num_blocks;
547 	int rc;
548 
549 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
550 	if (spdk_unlikely(max_write_zeroes_size > 0 &&
551 			  num_blocks > (max_write_zeroes_size << 10) / spdk_bdev_get_block_size(bdev))) {
552 		SPDK_ERRLOG("invalid write zeroes size, should not exceed %" PRIu64 "Kib\n", max_write_zeroes_size);
553 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
554 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
555 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
556 	}
557 
558 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
559 		SPDK_ERRLOG("end of media\n");
560 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
561 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
562 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
563 	}
564 
565 	if (spdk_unlikely(cmd->cdw12_bits.write_zeroes.deac)) {
566 		SPDK_ERRLOG("Write Zeroes Deallocate is not supported\n");
567 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
568 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
569 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
570 	}
571 
572 	rc = spdk_bdev_write_zeroes_blocks(desc, ch, start_lba, num_blocks,
573 					   nvmf_bdev_ctrlr_complete_cmd, req);
574 	if (spdk_unlikely(rc)) {
575 		if (rc == -ENOMEM) {
576 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
577 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
578 		}
579 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
580 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
581 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
582 	}
583 
584 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
585 }
586 
587 int
588 nvmf_bdev_ctrlr_flush_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
589 			  struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
590 {
591 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
592 	int rc;
593 
594 	/* As for NVMeoF controller, SPDK always set volatile write
595 	 * cache bit to 1, return success for those block devices
596 	 * which can't support FLUSH command.
597 	 */
598 	if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_FLUSH)) {
599 		response->status.sct = SPDK_NVME_SCT_GENERIC;
600 		response->status.sc = SPDK_NVME_SC_SUCCESS;
601 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
602 	}
603 
604 	rc = spdk_bdev_flush_blocks(desc, ch, 0, spdk_bdev_get_num_blocks(bdev),
605 				    nvmf_bdev_ctrlr_complete_cmd, req);
606 	if (spdk_unlikely(rc)) {
607 		if (rc == -ENOMEM) {
608 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
609 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
610 		}
611 		response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
612 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
613 	}
614 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
615 }
616 
617 struct nvmf_bdev_ctrlr_unmap {
618 	struct spdk_nvmf_request	*req;
619 	uint32_t			count;
620 	struct spdk_bdev_desc		*desc;
621 	struct spdk_bdev		*bdev;
622 	struct spdk_io_channel		*ch;
623 	uint32_t			range_index;
624 };
625 
626 static void
627 nvmf_bdev_ctrlr_unmap_cpl(struct spdk_bdev_io *bdev_io, bool success,
628 			  void *cb_arg)
629 {
630 	struct nvmf_bdev_ctrlr_unmap *unmap_ctx = cb_arg;
631 	struct spdk_nvmf_request	*req = unmap_ctx->req;
632 	struct spdk_nvme_cpl		*response = &req->rsp->nvme_cpl;
633 	int				sc, sct;
634 	uint32_t			cdw0;
635 
636 	unmap_ctx->count--;
637 
638 	if (response->status.sct == SPDK_NVME_SCT_GENERIC &&
639 	    response->status.sc == SPDK_NVME_SC_SUCCESS) {
640 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
641 		response->cdw0 = cdw0;
642 		response->status.sc = sc;
643 		response->status.sct = sct;
644 	}
645 
646 	if (unmap_ctx->count == 0) {
647 		spdk_nvmf_request_complete(req);
648 		free(unmap_ctx);
649 	}
650 	spdk_bdev_free_io(bdev_io);
651 }
652 
653 static int nvmf_bdev_ctrlr_unmap(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
654 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
655 				 struct nvmf_bdev_ctrlr_unmap *unmap_ctx);
656 static void
657 nvmf_bdev_ctrlr_unmap_resubmit(void *arg)
658 {
659 	struct nvmf_bdev_ctrlr_unmap *unmap_ctx = arg;
660 	struct spdk_nvmf_request *req = unmap_ctx->req;
661 	struct spdk_bdev_desc *desc = unmap_ctx->desc;
662 	struct spdk_bdev *bdev = unmap_ctx->bdev;
663 	struct spdk_io_channel *ch = unmap_ctx->ch;
664 
665 	nvmf_bdev_ctrlr_unmap(bdev, desc, ch, req, unmap_ctx);
666 }
667 
668 static int
669 nvmf_bdev_ctrlr_unmap(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
670 		      struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
671 		      struct nvmf_bdev_ctrlr_unmap *unmap_ctx)
672 {
673 	uint16_t nr, i;
674 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
675 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
676 	uint64_t max_discard_size = req->qpair->ctrlr->subsys->max_discard_size_kib;
677 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
678 	struct spdk_iov_xfer ix;
679 	uint64_t lba;
680 	uint32_t lba_count;
681 	int rc;
682 
683 	nr = cmd->cdw10_bits.dsm.nr + 1;
684 	if (nr * sizeof(struct spdk_nvme_dsm_range) > req->length) {
685 		SPDK_ERRLOG("Dataset Management number of ranges > SGL length\n");
686 		response->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
687 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
688 	}
689 
690 	if (unmap_ctx == NULL) {
691 		unmap_ctx = calloc(1, sizeof(*unmap_ctx));
692 		if (!unmap_ctx) {
693 			response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
694 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
695 		}
696 
697 		unmap_ctx->req = req;
698 		unmap_ctx->desc = desc;
699 		unmap_ctx->ch = ch;
700 		unmap_ctx->bdev = bdev;
701 
702 		response->status.sct = SPDK_NVME_SCT_GENERIC;
703 		response->status.sc = SPDK_NVME_SC_SUCCESS;
704 	} else {
705 		unmap_ctx->count--;	/* dequeued */
706 	}
707 
708 	spdk_iov_xfer_init(&ix, req->iov, req->iovcnt);
709 
710 	for (i = unmap_ctx->range_index; i < nr; i++) {
711 		struct spdk_nvme_dsm_range dsm_range = { 0 };
712 
713 		spdk_iov_xfer_to_buf(&ix, &dsm_range, sizeof(dsm_range));
714 
715 		lba = dsm_range.starting_lba;
716 		lba_count = dsm_range.length;
717 		if (max_discard_size > 0 && lba_count > (max_discard_size << 10) / block_size) {
718 			SPDK_ERRLOG("invalid unmap size %" PRIu32 " blocks, should not exceed %" PRIu64 " blocks\n",
719 				    lba_count, max_discard_size << 1);
720 			response->status.sct = SPDK_NVME_SCT_GENERIC;
721 			response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
722 			break;
723 		}
724 
725 		unmap_ctx->count++;
726 
727 		rc = spdk_bdev_unmap_blocks(desc, ch, lba, lba_count,
728 					    nvmf_bdev_ctrlr_unmap_cpl, unmap_ctx);
729 		if (rc) {
730 			if (rc == -ENOMEM) {
731 				nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_bdev_ctrlr_unmap_resubmit, unmap_ctx);
732 				/* Unmap was not yet submitted to bdev */
733 				/* unmap_ctx->count will be decremented when the request is dequeued */
734 				return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
735 			}
736 			response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
737 			unmap_ctx->count--;
738 			/* We can't return here - we may have to wait for any other
739 				* unmaps already sent to complete */
740 			break;
741 		}
742 		unmap_ctx->range_index++;
743 	}
744 
745 	if (unmap_ctx->count == 0) {
746 		free(unmap_ctx);
747 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
748 	}
749 
750 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
751 }
752 
753 int
754 nvmf_bdev_ctrlr_dsm_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
755 			struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
756 {
757 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
758 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
759 
760 	if (cmd->cdw11_bits.dsm.ad) {
761 		return nvmf_bdev_ctrlr_unmap(bdev, desc, ch, req, NULL);
762 	}
763 
764 	response->status.sct = SPDK_NVME_SCT_GENERIC;
765 	response->status.sc = SPDK_NVME_SC_SUCCESS;
766 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
767 }
768 
769 int
770 nvmf_bdev_ctrlr_copy_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
771 			 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
772 {
773 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
774 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
775 	uint64_t sdlba = ((uint64_t)cmd->cdw11 << 32) + cmd->cdw10;
776 	struct spdk_nvme_scc_source_range range = { 0 };
777 	struct spdk_iov_xfer ix;
778 	int rc;
779 
780 	SPDK_DEBUGLOG(nvmf, "Copy command: SDLBA %lu, NR %u, desc format %u, PRINFOR %u, "
781 		      "DTYPE %u, STCW %u, PRINFOW %u, FUA %u, LR %u\n",
782 		      sdlba,
783 		      cmd->cdw12_bits.copy.nr,
784 		      cmd->cdw12_bits.copy.df,
785 		      cmd->cdw12_bits.copy.prinfor,
786 		      cmd->cdw12_bits.copy.dtype,
787 		      cmd->cdw12_bits.copy.stcw,
788 		      cmd->cdw12_bits.copy.prinfow,
789 		      cmd->cdw12_bits.copy.fua,
790 		      cmd->cdw12_bits.copy.lr);
791 
792 	if (spdk_unlikely(req->length != (cmd->cdw12_bits.copy.nr + 1) *
793 			  sizeof(struct spdk_nvme_scc_source_range))) {
794 		response->status.sct = SPDK_NVME_SCT_GENERIC;
795 		response->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
796 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
797 	}
798 
799 	/*
800 	 * We support only one source range, and rely on this with the xfer
801 	 * below.
802 	 */
803 	if (cmd->cdw12_bits.copy.nr > 0) {
804 		response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
805 		response->status.sc = SPDK_NVME_SC_CMD_SIZE_LIMIT_SIZE_EXCEEDED;
806 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
807 	}
808 
809 	if (cmd->cdw12_bits.copy.df != 0) {
810 		response->status.sct = SPDK_NVME_SCT_GENERIC;
811 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
812 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
813 	}
814 
815 	spdk_iov_xfer_init(&ix, req->iov, req->iovcnt);
816 	spdk_iov_xfer_to_buf(&ix, &range, sizeof(range));
817 
818 	rc = spdk_bdev_copy_blocks(desc, ch, sdlba, range.slba, range.nlb + 1,
819 				   nvmf_bdev_ctrlr_complete_cmd, req);
820 	if (spdk_unlikely(rc)) {
821 		if (rc == -ENOMEM) {
822 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
823 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
824 		}
825 
826 		response->status.sct = SPDK_NVME_SCT_GENERIC;
827 		response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
828 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
829 	}
830 
831 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
832 }
833 
834 int
835 nvmf_bdev_ctrlr_nvme_passthru_io(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
836 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
837 {
838 	int rc;
839 
840 	rc = spdk_bdev_nvme_iov_passthru_md(desc, ch, &req->cmd->nvme_cmd, req->iov, req->iovcnt,
841 					    req->length, NULL, 0, nvmf_bdev_ctrlr_complete_cmd, req);
842 
843 	if (spdk_unlikely(rc)) {
844 		if (rc == -ENOMEM) {
845 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
846 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
847 		}
848 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
849 		req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
850 		req->rsp->nvme_cpl.status.dnr = 1;
851 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
852 	}
853 
854 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
855 }
856 
857 int
858 spdk_nvmf_bdev_ctrlr_nvme_passthru_admin(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
859 		struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
860 		spdk_nvmf_nvme_passthru_cmd_cb cb_fn)
861 {
862 	int rc;
863 
864 	if (spdk_unlikely(req->iovcnt > 1)) {
865 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
866 		req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
867 		req->rsp->nvme_cpl.status.dnr = 1;
868 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
869 	}
870 
871 	req->cmd_cb_fn = cb_fn;
872 
873 	rc = spdk_bdev_nvme_admin_passthru(desc, ch, &req->cmd->nvme_cmd, req->iov[0].iov_base, req->length,
874 					   nvmf_bdev_ctrlr_complete_admin_cmd, req);
875 	if (spdk_unlikely(rc)) {
876 		if (rc == -ENOMEM) {
877 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_admin_cmd_resubmit, req);
878 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
879 		}
880 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
881 		if (rc == -ENOTSUP) {
882 			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
883 		} else {
884 			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
885 		}
886 
887 		req->rsp->nvme_cpl.status.dnr = 1;
888 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
889 	}
890 
891 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
892 }
893 
894 static void
895 nvmf_bdev_ctrlr_complete_abort_cmd(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
896 {
897 	struct spdk_nvmf_request *req = cb_arg;
898 
899 	if (success) {
900 		req->rsp->nvme_cpl.cdw0 &= ~1U;
901 	}
902 
903 	spdk_nvmf_request_complete(req);
904 	spdk_bdev_free_io(bdev_io);
905 }
906 
907 int
908 spdk_nvmf_bdev_ctrlr_abort_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
909 			       struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
910 			       struct spdk_nvmf_request *req_to_abort)
911 {
912 	int rc;
913 
914 	assert((req->rsp->nvme_cpl.cdw0 & 1U) != 0);
915 
916 	rc = spdk_bdev_abort(desc, ch, req_to_abort, nvmf_bdev_ctrlr_complete_abort_cmd, req);
917 	if (spdk_likely(rc == 0)) {
918 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
919 	} else if (rc == -ENOMEM) {
920 		nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_admin_cmd_resubmit, req);
921 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
922 	} else {
923 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
924 	}
925 }
926 
927 bool
928 nvmf_bdev_ctrlr_get_dif_ctx(struct spdk_bdev *bdev, struct spdk_nvme_cmd *cmd,
929 			    struct spdk_dif_ctx *dif_ctx)
930 {
931 	uint32_t init_ref_tag, dif_check_flags = 0;
932 	int rc;
933 	struct spdk_dif_ctx_init_ext_opts dif_opts;
934 
935 	if (spdk_bdev_get_md_size(bdev) == 0) {
936 		return false;
937 	}
938 
939 	/* Initial Reference Tag is the lower 32 bits of the start LBA. */
940 	init_ref_tag = (uint32_t)from_le64(&cmd->cdw10);
941 
942 	if (spdk_bdev_is_dif_check_enabled(bdev, SPDK_DIF_CHECK_TYPE_REFTAG)) {
943 		dif_check_flags |= SPDK_DIF_FLAGS_REFTAG_CHECK;
944 	}
945 
946 	if (spdk_bdev_is_dif_check_enabled(bdev, SPDK_DIF_CHECK_TYPE_GUARD)) {
947 		dif_check_flags |= SPDK_DIF_FLAGS_GUARD_CHECK;
948 	}
949 
950 	dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format);
951 	dif_opts.dif_pi_format = SPDK_DIF_PI_FORMAT_16;
952 	rc = spdk_dif_ctx_init(dif_ctx,
953 			       spdk_bdev_get_block_size(bdev),
954 			       spdk_bdev_get_md_size(bdev),
955 			       spdk_bdev_is_md_interleaved(bdev),
956 			       spdk_bdev_is_dif_head_of_md(bdev),
957 			       spdk_bdev_get_dif_type(bdev),
958 			       dif_check_flags,
959 			       init_ref_tag, 0, 0, 0, 0, &dif_opts);
960 
961 	return (rc == 0) ? true : false;
962 }
963 
964 static void
965 nvmf_bdev_ctrlr_zcopy_start_complete(struct spdk_bdev_io *bdev_io, bool success,
966 				     void *cb_arg)
967 {
968 	struct spdk_nvmf_request	*req = cb_arg;
969 	struct iovec *iov;
970 	int iovcnt = 0;
971 
972 	if (spdk_unlikely(!success)) {
973 		int                     sc = 0, sct = 0;
974 		uint32_t                cdw0 = 0;
975 		struct spdk_nvme_cpl    *response = &req->rsp->nvme_cpl;
976 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
977 
978 		response->cdw0 = cdw0;
979 		response->status.sc = sc;
980 		response->status.sct = sct;
981 
982 		spdk_bdev_free_io(bdev_io);
983 		spdk_nvmf_request_complete(req);
984 		return;
985 	}
986 
987 	spdk_bdev_io_get_iovec(bdev_io, &iov, &iovcnt);
988 
989 	assert(iovcnt <= NVMF_REQ_MAX_BUFFERS);
990 	assert(iovcnt > 0);
991 
992 	req->iovcnt = iovcnt;
993 
994 	assert(req->iov == iov);
995 
996 	req->zcopy_bdev_io = bdev_io; /* Preserve the bdev_io for the end zcopy */
997 
998 	spdk_nvmf_request_complete(req);
999 	/* Don't free the bdev_io here as it is needed for the END ZCOPY */
1000 }
1001 
1002 int
1003 nvmf_bdev_ctrlr_zcopy_start(struct spdk_bdev *bdev,
1004 			    struct spdk_bdev_desc *desc,
1005 			    struct spdk_io_channel *ch,
1006 			    struct spdk_nvmf_request *req)
1007 {
1008 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
1009 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
1010 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
1011 	uint64_t start_lba;
1012 	uint64_t num_blocks;
1013 	int rc;
1014 
1015 	nvmf_bdev_ctrlr_get_rw_params(&req->cmd->nvme_cmd, &start_lba, &num_blocks);
1016 
1017 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
1018 		SPDK_ERRLOG("end of media\n");
1019 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
1020 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
1021 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1022 	}
1023 
1024 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
1025 		SPDK_ERRLOG("Read NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
1026 			    num_blocks, block_size, req->length);
1027 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
1028 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
1029 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1030 	}
1031 
1032 	bool populate = (req->cmd->nvme_cmd.opc == SPDK_NVME_OPC_READ) ? true : false;
1033 
1034 	rc = spdk_bdev_zcopy_start(desc, ch, req->iov, req->iovcnt, start_lba,
1035 				   num_blocks, populate, nvmf_bdev_ctrlr_zcopy_start_complete, req);
1036 	if (spdk_unlikely(rc != 0)) {
1037 		if (rc == -ENOMEM) {
1038 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
1039 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
1040 		}
1041 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
1042 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
1043 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1044 	}
1045 
1046 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
1047 }
1048 
1049 static void
1050 nvmf_bdev_ctrlr_zcopy_end_complete(struct spdk_bdev_io *bdev_io, bool success,
1051 				   void *cb_arg)
1052 {
1053 	struct spdk_nvmf_request	*req = cb_arg;
1054 
1055 	if (spdk_unlikely(!success)) {
1056 		int                     sc = 0, sct = 0;
1057 		uint32_t                cdw0 = 0;
1058 		struct spdk_nvme_cpl    *response = &req->rsp->nvme_cpl;
1059 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
1060 
1061 		response->cdw0 = cdw0;
1062 		response->status.sc = sc;
1063 		response->status.sct = sct;
1064 	}
1065 
1066 	spdk_bdev_free_io(bdev_io);
1067 	req->zcopy_bdev_io = NULL;
1068 	spdk_nvmf_request_complete(req);
1069 }
1070 
1071 void
1072 nvmf_bdev_ctrlr_zcopy_end(struct spdk_nvmf_request *req, bool commit)
1073 {
1074 	int rc __attribute__((unused));
1075 
1076 	rc = spdk_bdev_zcopy_end(req->zcopy_bdev_io, commit, nvmf_bdev_ctrlr_zcopy_end_complete, req);
1077 
1078 	/* The only way spdk_bdev_zcopy_end() can fail is if we pass a bdev_io type that isn't ZCOPY */
1079 	assert(rc == 0);
1080 }
1081