xref: /spdk/lib/nvmf/ctrlr_bdev.c (revision 780cb81f62366bd50be32af7aaaa51db1443acf6)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2017 Intel Corporation. All rights reserved.
3  *   Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
4  *   Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5  */
6 
7 #include "spdk/stdinc.h"
8 
9 #include "nvmf_internal.h"
10 
11 #include "spdk/bdev.h"
12 #include "spdk/endian.h"
13 #include "spdk/thread.h"
14 #include "spdk/likely.h"
15 #include "spdk/nvme.h"
16 #include "spdk/nvmf_cmd.h"
17 #include "spdk/nvmf_spec.h"
18 #include "spdk/trace.h"
19 #include "spdk/scsi_spec.h"
20 #include "spdk/string.h"
21 #include "spdk/util.h"
22 
23 #include "spdk/log.h"
24 
25 static bool
26 nvmf_subsystem_bdev_io_type_supported(struct spdk_nvmf_subsystem *subsystem,
27 				      enum spdk_bdev_io_type io_type)
28 {
29 	struct spdk_nvmf_ns *ns;
30 
31 	for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL;
32 	     ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) {
33 		if (ns->bdev == NULL) {
34 			continue;
35 		}
36 
37 		if (!spdk_bdev_io_type_supported(ns->bdev, io_type)) {
38 			SPDK_DEBUGLOG(nvmf,
39 				      "Subsystem %s namespace %u (%s) does not support io_type %d\n",
40 				      spdk_nvmf_subsystem_get_nqn(subsystem),
41 				      ns->opts.nsid, spdk_bdev_get_name(ns->bdev), (int)io_type);
42 			return false;
43 		}
44 	}
45 
46 	SPDK_DEBUGLOG(nvmf, "All devices in Subsystem %s support io_type %d\n",
47 		      spdk_nvmf_subsystem_get_nqn(subsystem), (int)io_type);
48 	return true;
49 }
50 
51 bool
52 nvmf_ctrlr_dsm_supported(struct spdk_nvmf_ctrlr *ctrlr)
53 {
54 	return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_UNMAP);
55 }
56 
57 bool
58 nvmf_ctrlr_write_zeroes_supported(struct spdk_nvmf_ctrlr *ctrlr)
59 {
60 	return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_WRITE_ZEROES);
61 }
62 
63 bool
64 nvmf_ctrlr_copy_supported(struct spdk_nvmf_ctrlr *ctrlr)
65 {
66 	return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_COPY);
67 }
68 
69 static void
70 nvmf_bdev_ctrlr_complete_cmd(struct spdk_bdev_io *bdev_io, bool success,
71 			     void *cb_arg)
72 {
73 	struct spdk_nvmf_request	*req = cb_arg;
74 	struct spdk_nvme_cpl		*response = &req->rsp->nvme_cpl;
75 	int				first_sc = 0, first_sct = 0, sc = 0, sct = 0;
76 	uint32_t			cdw0 = 0;
77 	struct spdk_nvmf_request	*first_req = req->first_fused_req;
78 
79 	if (spdk_unlikely(first_req != NULL)) {
80 		/* fused commands - get status for both operations */
81 		struct spdk_nvme_cpl *first_response = &first_req->rsp->nvme_cpl;
82 
83 		spdk_bdev_io_get_nvme_fused_status(bdev_io, &cdw0, &first_sct, &first_sc, &sct, &sc);
84 		first_response->cdw0 = cdw0;
85 		first_response->status.sc = first_sc;
86 		first_response->status.sct = first_sct;
87 
88 		/* first request should be completed */
89 		spdk_nvmf_request_complete(first_req);
90 		req->first_fused_req = NULL;
91 	} else {
92 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
93 	}
94 
95 	response->cdw0 = cdw0;
96 	response->status.sc = sc;
97 	response->status.sct = sct;
98 
99 	spdk_nvmf_request_complete(req);
100 	spdk_bdev_free_io(bdev_io);
101 }
102 
103 static void
104 nvmf_bdev_ctrlr_complete_admin_cmd(struct spdk_bdev_io *bdev_io, bool success,
105 				   void *cb_arg)
106 {
107 	struct spdk_nvmf_request *req = cb_arg;
108 
109 	if (req->cmd_cb_fn) {
110 		req->cmd_cb_fn(req);
111 	}
112 
113 	nvmf_bdev_ctrlr_complete_cmd(bdev_io, success, req);
114 }
115 
116 void
117 nvmf_bdev_ctrlr_identify_ns(struct spdk_nvmf_ns *ns, struct spdk_nvme_ns_data *nsdata,
118 			    bool dif_insert_or_strip)
119 {
120 	struct spdk_bdev *bdev = ns->bdev;
121 	uint64_t num_blocks;
122 	uint32_t phys_blocklen;
123 	uint32_t max_copy;
124 
125 	num_blocks = spdk_bdev_get_num_blocks(bdev);
126 
127 	nsdata->nsze = num_blocks;
128 	nsdata->ncap = num_blocks;
129 	nsdata->nuse = num_blocks;
130 	nsdata->nlbaf = 0;
131 	nsdata->flbas.format = 0;
132 	nsdata->flbas.msb_format = 0;
133 	nsdata->nacwu = spdk_bdev_get_acwu(bdev) - 1; /* nacwu is 0-based */
134 	if (!dif_insert_or_strip) {
135 		nsdata->lbaf[0].ms = spdk_bdev_get_md_size(bdev);
136 		nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_block_size(bdev));
137 		if (nsdata->lbaf[0].ms != 0) {
138 			nsdata->flbas.extended = 1;
139 			nsdata->mc.extended = 1;
140 			nsdata->mc.pointer = 0;
141 			nsdata->dps.md_start = spdk_bdev_is_dif_head_of_md(bdev);
142 			/* NVMf library doesn't process PRACT and PRCHK flags, we
143 			 * leave the use of extended LBA buffer to users.
144 			 */
145 			nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_DISABLE;
146 		}
147 	} else {
148 		nsdata->lbaf[0].ms = 0;
149 		nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_data_block_size(bdev));
150 	}
151 
152 	phys_blocklen = spdk_bdev_get_physical_block_size(bdev);
153 	assert(phys_blocklen > 0);
154 	/* Linux driver uses min(nawupf, npwg) to set physical_block_size */
155 	nsdata->nsfeat.optperf = 1;
156 	nsdata->nsfeat.ns_atomic_write_unit = 1;
157 	nsdata->npwg = (phys_blocklen >> nsdata->lbaf[0].lbads) - 1;
158 	nsdata->nawupf = nsdata->npwg;
159 	nsdata->npwa = nsdata->npwg;
160 	nsdata->npdg = nsdata->npwg;
161 	nsdata->npda = nsdata->npwg;
162 
163 	if (spdk_bdev_get_write_unit_size(bdev) == 1) {
164 		nsdata->noiob = spdk_bdev_get_optimal_io_boundary(bdev);
165 	}
166 	nsdata->nmic.can_share = 1;
167 	if (ns->ptpl_file != NULL) {
168 		nsdata->nsrescap.rescap.persist = 1;
169 	}
170 	nsdata->nsrescap.rescap.write_exclusive = 1;
171 	nsdata->nsrescap.rescap.exclusive_access = 1;
172 	nsdata->nsrescap.rescap.write_exclusive_reg_only = 1;
173 	nsdata->nsrescap.rescap.exclusive_access_reg_only = 1;
174 	nsdata->nsrescap.rescap.write_exclusive_all_reg = 1;
175 	nsdata->nsrescap.rescap.exclusive_access_all_reg = 1;
176 	nsdata->nsrescap.rescap.ignore_existing_key = 1;
177 
178 	SPDK_STATIC_ASSERT(sizeof(nsdata->nguid) == sizeof(ns->opts.nguid), "size mismatch");
179 	memcpy(nsdata->nguid, ns->opts.nguid, sizeof(nsdata->nguid));
180 
181 	SPDK_STATIC_ASSERT(sizeof(nsdata->eui64) == sizeof(ns->opts.eui64), "size mismatch");
182 	memcpy(&nsdata->eui64, ns->opts.eui64, sizeof(nsdata->eui64));
183 
184 	/* For now we support just one source range for copy command */
185 	nsdata->msrc = 0;
186 
187 	max_copy = spdk_bdev_get_max_copy(bdev);
188 	if (max_copy == 0 || max_copy > UINT16_MAX) {
189 		/* Zero means copy size is unlimited */
190 		nsdata->mcl = UINT16_MAX;
191 		nsdata->mssrl = UINT16_MAX;
192 	} else {
193 		nsdata->mcl = max_copy;
194 		nsdata->mssrl = max_copy;
195 	}
196 }
197 
198 static void
199 nvmf_bdev_ctrlr_get_rw_params(const struct spdk_nvme_cmd *cmd, uint64_t *start_lba,
200 			      uint64_t *num_blocks)
201 {
202 	/* SLBA: CDW10 and CDW11 */
203 	*start_lba = from_le64(&cmd->cdw10);
204 
205 	/* NLB: CDW12 bits 15:00, 0's based */
206 	*num_blocks = (from_le32(&cmd->cdw12) & 0xFFFFu) + 1;
207 }
208 
209 static bool
210 nvmf_bdev_ctrlr_lba_in_range(uint64_t bdev_num_blocks, uint64_t io_start_lba,
211 			     uint64_t io_num_blocks)
212 {
213 	if (io_start_lba + io_num_blocks > bdev_num_blocks ||
214 	    io_start_lba + io_num_blocks < io_start_lba) {
215 		return false;
216 	}
217 
218 	return true;
219 }
220 
221 static void
222 nvmf_ctrlr_process_io_cmd_resubmit(void *arg)
223 {
224 	struct spdk_nvmf_request *req = arg;
225 	int rc;
226 
227 	rc = nvmf_ctrlr_process_io_cmd(req);
228 	if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
229 		spdk_nvmf_request_complete(req);
230 	}
231 }
232 
233 static void
234 nvmf_ctrlr_process_admin_cmd_resubmit(void *arg)
235 {
236 	struct spdk_nvmf_request *req = arg;
237 	int rc;
238 
239 	rc = nvmf_ctrlr_process_admin_cmd(req);
240 	if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
241 		spdk_nvmf_request_complete(req);
242 	}
243 }
244 
245 static void
246 nvmf_bdev_ctrl_queue_io(struct spdk_nvmf_request *req, struct spdk_bdev *bdev,
247 			struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn, void *cb_arg)
248 {
249 	int rc;
250 
251 	req->bdev_io_wait.bdev = bdev;
252 	req->bdev_io_wait.cb_fn = cb_fn;
253 	req->bdev_io_wait.cb_arg = cb_arg;
254 
255 	rc = spdk_bdev_queue_io_wait(bdev, ch, &req->bdev_io_wait);
256 	if (rc != 0) {
257 		assert(false);
258 	}
259 	req->qpair->group->stat.pending_bdev_io++;
260 }
261 
262 bool
263 nvmf_bdev_zcopy_enabled(struct spdk_bdev *bdev)
264 {
265 	return spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_ZCOPY);
266 }
267 
268 int
269 nvmf_bdev_ctrlr_read_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
270 			 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
271 {
272 	struct spdk_bdev_ext_io_opts opts = {
273 		.size = SPDK_SIZEOF(&opts, accel_sequence),
274 		.memory_domain = req->memory_domain,
275 		.memory_domain_ctx = req->memory_domain_ctx,
276 		.accel_sequence = req->accel_sequence,
277 	};
278 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
279 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
280 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
281 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
282 	uint64_t start_lba;
283 	uint64_t num_blocks;
284 	int rc;
285 
286 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
287 
288 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
289 		SPDK_ERRLOG("end of media\n");
290 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
291 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
292 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
293 	}
294 
295 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
296 		SPDK_ERRLOG("Read NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
297 			    num_blocks, block_size, req->length);
298 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
299 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
300 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
301 	}
302 
303 	assert(!spdk_nvmf_request_using_zcopy(req));
304 
305 	rc = spdk_bdev_readv_blocks_ext(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
306 					nvmf_bdev_ctrlr_complete_cmd, req, &opts);
307 	if (spdk_unlikely(rc)) {
308 		if (rc == -ENOMEM) {
309 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
310 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
311 		}
312 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
313 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
314 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
315 	}
316 
317 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
318 }
319 
320 int
321 nvmf_bdev_ctrlr_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
322 			  struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
323 {
324 	struct spdk_bdev_ext_io_opts opts = {
325 		.size = SPDK_SIZEOF(&opts, accel_sequence),
326 		.memory_domain = req->memory_domain,
327 		.memory_domain_ctx = req->memory_domain_ctx,
328 		.accel_sequence = req->accel_sequence,
329 	};
330 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
331 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
332 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
333 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
334 	uint64_t start_lba;
335 	uint64_t num_blocks;
336 	int rc;
337 
338 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
339 
340 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
341 		SPDK_ERRLOG("end of media\n");
342 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
343 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
344 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
345 	}
346 
347 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
348 		SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
349 			    num_blocks, block_size, req->length);
350 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
351 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
352 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
353 	}
354 
355 	assert(!spdk_nvmf_request_using_zcopy(req));
356 
357 	rc = spdk_bdev_writev_blocks_ext(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
358 					 nvmf_bdev_ctrlr_complete_cmd, req, &opts);
359 	if (spdk_unlikely(rc)) {
360 		if (rc == -ENOMEM) {
361 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
362 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
363 		}
364 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
365 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
366 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
367 	}
368 
369 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
370 }
371 
372 int
373 nvmf_bdev_ctrlr_compare_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
374 			    struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
375 {
376 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
377 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
378 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
379 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
380 	uint64_t start_lba;
381 	uint64_t num_blocks;
382 	int rc;
383 
384 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
385 
386 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
387 		SPDK_ERRLOG("end of media\n");
388 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
389 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
390 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
391 	}
392 
393 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
394 		SPDK_ERRLOG("Compare NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
395 			    num_blocks, block_size, req->length);
396 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
397 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
398 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
399 	}
400 
401 	rc = spdk_bdev_comparev_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
402 				       nvmf_bdev_ctrlr_complete_cmd, req);
403 	if (spdk_unlikely(rc)) {
404 		if (rc == -ENOMEM) {
405 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
406 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
407 		}
408 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
409 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
410 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
411 	}
412 
413 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
414 }
415 
416 int
417 nvmf_bdev_ctrlr_compare_and_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
418 				      struct spdk_io_channel *ch, struct spdk_nvmf_request *cmp_req, struct spdk_nvmf_request *write_req)
419 {
420 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
421 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
422 	struct spdk_nvme_cmd *cmp_cmd = &cmp_req->cmd->nvme_cmd;
423 	struct spdk_nvme_cmd *write_cmd = &write_req->cmd->nvme_cmd;
424 	struct spdk_nvme_cpl *rsp = &write_req->rsp->nvme_cpl;
425 	uint64_t write_start_lba, cmp_start_lba;
426 	uint64_t write_num_blocks, cmp_num_blocks;
427 	int rc;
428 
429 	nvmf_bdev_ctrlr_get_rw_params(cmp_cmd, &cmp_start_lba, &cmp_num_blocks);
430 	nvmf_bdev_ctrlr_get_rw_params(write_cmd, &write_start_lba, &write_num_blocks);
431 
432 	if (spdk_unlikely(write_start_lba != cmp_start_lba || write_num_blocks != cmp_num_blocks)) {
433 		SPDK_ERRLOG("Fused command start lba / num blocks mismatch\n");
434 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
435 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
436 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
437 	}
438 
439 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, write_start_lba,
440 			  write_num_blocks))) {
441 		SPDK_ERRLOG("end of media\n");
442 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
443 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
444 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
445 	}
446 
447 	if (spdk_unlikely(write_num_blocks * block_size > write_req->length)) {
448 		SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
449 			    write_num_blocks, block_size, write_req->length);
450 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
451 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
452 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
453 	}
454 
455 	rc = spdk_bdev_comparev_and_writev_blocks(desc, ch, cmp_req->iov, cmp_req->iovcnt, write_req->iov,
456 			write_req->iovcnt, write_start_lba, write_num_blocks, nvmf_bdev_ctrlr_complete_cmd, write_req);
457 	if (spdk_unlikely(rc)) {
458 		if (rc == -ENOMEM) {
459 			nvmf_bdev_ctrl_queue_io(cmp_req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, cmp_req);
460 			nvmf_bdev_ctrl_queue_io(write_req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, write_req);
461 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
462 		}
463 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
464 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
465 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
466 	}
467 
468 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
469 }
470 
471 int
472 nvmf_bdev_ctrlr_write_zeroes_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
473 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
474 {
475 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
476 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
477 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
478 	uint64_t max_write_zeroes_size = req->qpair->ctrlr->subsys->max_write_zeroes_size_kib;
479 	uint64_t start_lba;
480 	uint64_t num_blocks;
481 	int rc;
482 
483 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
484 	if (spdk_unlikely(max_write_zeroes_size > 0 &&
485 			  num_blocks > (max_write_zeroes_size << 10) / spdk_bdev_get_block_size(bdev))) {
486 		SPDK_ERRLOG("invalid write zeroes size, should not exceed %" PRIu64 "Kib\n", max_write_zeroes_size);
487 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
488 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
489 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
490 	}
491 
492 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
493 		SPDK_ERRLOG("end of media\n");
494 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
495 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
496 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
497 	}
498 
499 	rc = spdk_bdev_write_zeroes_blocks(desc, ch, start_lba, num_blocks,
500 					   nvmf_bdev_ctrlr_complete_cmd, req);
501 	if (spdk_unlikely(rc)) {
502 		if (rc == -ENOMEM) {
503 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
504 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
505 		}
506 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
507 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
508 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
509 	}
510 
511 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
512 }
513 
514 int
515 nvmf_bdev_ctrlr_flush_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
516 			  struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
517 {
518 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
519 	int rc;
520 
521 	/* As for NVMeoF controller, SPDK always set volatile write
522 	 * cache bit to 1, return success for those block devices
523 	 * which can't support FLUSH command.
524 	 */
525 	if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_FLUSH)) {
526 		response->status.sct = SPDK_NVME_SCT_GENERIC;
527 		response->status.sc = SPDK_NVME_SC_SUCCESS;
528 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
529 	}
530 
531 	rc = spdk_bdev_flush_blocks(desc, ch, 0, spdk_bdev_get_num_blocks(bdev),
532 				    nvmf_bdev_ctrlr_complete_cmd, req);
533 	if (spdk_unlikely(rc)) {
534 		if (rc == -ENOMEM) {
535 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
536 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
537 		}
538 		response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
539 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
540 	}
541 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
542 }
543 
544 struct nvmf_bdev_ctrlr_unmap {
545 	struct spdk_nvmf_request	*req;
546 	uint32_t			count;
547 	struct spdk_bdev_desc		*desc;
548 	struct spdk_bdev		*bdev;
549 	struct spdk_io_channel		*ch;
550 	uint32_t			range_index;
551 };
552 
553 static void
554 nvmf_bdev_ctrlr_unmap_cpl(struct spdk_bdev_io *bdev_io, bool success,
555 			  void *cb_arg)
556 {
557 	struct nvmf_bdev_ctrlr_unmap *unmap_ctx = cb_arg;
558 	struct spdk_nvmf_request	*req = unmap_ctx->req;
559 	struct spdk_nvme_cpl		*response = &req->rsp->nvme_cpl;
560 	int				sc, sct;
561 	uint32_t			cdw0;
562 
563 	unmap_ctx->count--;
564 
565 	if (response->status.sct == SPDK_NVME_SCT_GENERIC &&
566 	    response->status.sc == SPDK_NVME_SC_SUCCESS) {
567 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
568 		response->cdw0 = cdw0;
569 		response->status.sc = sc;
570 		response->status.sct = sct;
571 	}
572 
573 	if (unmap_ctx->count == 0) {
574 		spdk_nvmf_request_complete(req);
575 		free(unmap_ctx);
576 	}
577 	spdk_bdev_free_io(bdev_io);
578 }
579 
580 static int nvmf_bdev_ctrlr_unmap(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
581 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
582 				 struct nvmf_bdev_ctrlr_unmap *unmap_ctx);
583 static void
584 nvmf_bdev_ctrlr_unmap_resubmit(void *arg)
585 {
586 	struct nvmf_bdev_ctrlr_unmap *unmap_ctx = arg;
587 	struct spdk_nvmf_request *req = unmap_ctx->req;
588 	struct spdk_bdev_desc *desc = unmap_ctx->desc;
589 	struct spdk_bdev *bdev = unmap_ctx->bdev;
590 	struct spdk_io_channel *ch = unmap_ctx->ch;
591 
592 	nvmf_bdev_ctrlr_unmap(bdev, desc, ch, req, unmap_ctx);
593 }
594 
595 static int
596 nvmf_bdev_ctrlr_unmap(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
597 		      struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
598 		      struct nvmf_bdev_ctrlr_unmap *unmap_ctx)
599 {
600 	uint16_t nr, i;
601 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
602 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
603 	uint64_t max_discard_size = req->qpair->ctrlr->subsys->max_discard_size_kib;
604 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
605 	struct spdk_iov_xfer ix;
606 	uint64_t lba;
607 	uint32_t lba_count;
608 	int rc;
609 
610 	nr = cmd->cdw10_bits.dsm.nr + 1;
611 	if (nr * sizeof(struct spdk_nvme_dsm_range) > req->length) {
612 		SPDK_ERRLOG("Dataset Management number of ranges > SGL length\n");
613 		response->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
614 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
615 	}
616 
617 	if (unmap_ctx == NULL) {
618 		unmap_ctx = calloc(1, sizeof(*unmap_ctx));
619 		if (!unmap_ctx) {
620 			response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
621 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
622 		}
623 
624 		unmap_ctx->req = req;
625 		unmap_ctx->desc = desc;
626 		unmap_ctx->ch = ch;
627 		unmap_ctx->bdev = bdev;
628 
629 		response->status.sct = SPDK_NVME_SCT_GENERIC;
630 		response->status.sc = SPDK_NVME_SC_SUCCESS;
631 	} else {
632 		unmap_ctx->count--;	/* dequeued */
633 	}
634 
635 	spdk_iov_xfer_init(&ix, req->iov, req->iovcnt);
636 
637 	for (i = unmap_ctx->range_index; i < nr; i++) {
638 		struct spdk_nvme_dsm_range dsm_range = { 0 };
639 
640 		spdk_iov_xfer_to_buf(&ix, &dsm_range, sizeof(dsm_range));
641 
642 		lba = dsm_range.starting_lba;
643 		lba_count = dsm_range.length;
644 		if (max_discard_size > 0 && lba_count > (max_discard_size << 10) / block_size) {
645 			SPDK_ERRLOG("invalid unmap size, should not exceed %" PRIu64 "Kib\n", max_discard_size);
646 			response->status.sct = SPDK_NVME_SCT_GENERIC;
647 			response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
648 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
649 		}
650 
651 		unmap_ctx->count++;
652 
653 		rc = spdk_bdev_unmap_blocks(desc, ch, lba, lba_count,
654 					    nvmf_bdev_ctrlr_unmap_cpl, unmap_ctx);
655 		if (rc) {
656 			if (rc == -ENOMEM) {
657 				nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_bdev_ctrlr_unmap_resubmit, unmap_ctx);
658 				/* Unmap was not yet submitted to bdev */
659 				/* unmap_ctx->count will be decremented when the request is dequeued */
660 				return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
661 			}
662 			response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
663 			unmap_ctx->count--;
664 			/* We can't return here - we may have to wait for any other
665 				* unmaps already sent to complete */
666 			break;
667 		}
668 		unmap_ctx->range_index++;
669 	}
670 
671 	if (unmap_ctx->count == 0) {
672 		free(unmap_ctx);
673 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
674 	}
675 
676 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
677 }
678 
679 int
680 nvmf_bdev_ctrlr_dsm_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
681 			struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
682 {
683 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
684 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
685 
686 	if (cmd->cdw11_bits.dsm.ad) {
687 		return nvmf_bdev_ctrlr_unmap(bdev, desc, ch, req, NULL);
688 	}
689 
690 	response->status.sct = SPDK_NVME_SCT_GENERIC;
691 	response->status.sc = SPDK_NVME_SC_SUCCESS;
692 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
693 }
694 
695 int
696 nvmf_bdev_ctrlr_copy_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
697 			 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
698 {
699 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
700 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
701 	uint64_t sdlba = ((uint64_t)cmd->cdw11 << 32) + cmd->cdw10;
702 	struct spdk_nvme_scc_source_range range = { 0 };
703 	struct spdk_iov_xfer ix;
704 	int rc;
705 
706 	SPDK_DEBUGLOG(nvmf, "Copy command: SDLBA %lu, NR %u, desc format %u, PRINFOR %u, "
707 		      "DTYPE %u, STCW %u, PRINFOW %u, FUA %u, LR %u\n",
708 		      sdlba,
709 		      cmd->cdw12_bits.copy.nr,
710 		      cmd->cdw12_bits.copy.df,
711 		      cmd->cdw12_bits.copy.prinfor,
712 		      cmd->cdw12_bits.copy.dtype,
713 		      cmd->cdw12_bits.copy.stcw,
714 		      cmd->cdw12_bits.copy.prinfow,
715 		      cmd->cdw12_bits.copy.fua,
716 		      cmd->cdw12_bits.copy.lr);
717 
718 	if (spdk_unlikely(req->length != (cmd->cdw12_bits.copy.nr + 1) *
719 			  sizeof(struct spdk_nvme_scc_source_range))) {
720 		response->status.sct = SPDK_NVME_SCT_GENERIC;
721 		response->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
722 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
723 	}
724 
725 	/*
726 	 * We support only one source range, and rely on this with the xfer
727 	 * below.
728 	 */
729 	if (cmd->cdw12_bits.copy.nr > 0) {
730 		response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
731 		response->status.sc = SPDK_NVME_SC_CMD_SIZE_LIMIT_SIZE_EXCEEDED;
732 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
733 	}
734 
735 	if (cmd->cdw12_bits.copy.df != 0) {
736 		response->status.sct = SPDK_NVME_SCT_GENERIC;
737 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
738 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
739 	}
740 
741 	spdk_iov_xfer_init(&ix, req->iov, req->iovcnt);
742 	spdk_iov_xfer_to_buf(&ix, &range, sizeof(range));
743 
744 	rc = spdk_bdev_copy_blocks(desc, ch, sdlba, range.slba, range.nlb + 1,
745 				   nvmf_bdev_ctrlr_complete_cmd, req);
746 	if (spdk_unlikely(rc)) {
747 		if (rc == -ENOMEM) {
748 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
749 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
750 		}
751 
752 		response->status.sct = SPDK_NVME_SCT_GENERIC;
753 		response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
754 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
755 	}
756 
757 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
758 }
759 
760 int
761 nvmf_bdev_ctrlr_nvme_passthru_io(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
762 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
763 {
764 	int rc;
765 
766 	rc = spdk_bdev_nvme_iov_passthru_md(desc, ch, &req->cmd->nvme_cmd, req->iov, req->iovcnt,
767 					    req->length, NULL, 0, nvmf_bdev_ctrlr_complete_cmd, req);
768 
769 	if (spdk_unlikely(rc)) {
770 		if (rc == -ENOMEM) {
771 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
772 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
773 		}
774 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
775 		req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
776 		req->rsp->nvme_cpl.status.dnr = 1;
777 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
778 	}
779 
780 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
781 }
782 
783 int
784 spdk_nvmf_bdev_ctrlr_nvme_passthru_admin(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
785 		struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
786 		spdk_nvmf_nvme_passthru_cmd_cb cb_fn)
787 {
788 	int rc;
789 
790 	if (spdk_unlikely(req->iovcnt > 1)) {
791 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
792 		req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
793 		req->rsp->nvme_cpl.status.dnr = 1;
794 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
795 	}
796 
797 	req->cmd_cb_fn = cb_fn;
798 
799 	rc = spdk_bdev_nvme_admin_passthru(desc, ch, &req->cmd->nvme_cmd, req->iov[0].iov_base, req->length,
800 					   nvmf_bdev_ctrlr_complete_admin_cmd, req);
801 	if (spdk_unlikely(rc)) {
802 		if (rc == -ENOMEM) {
803 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_admin_cmd_resubmit, req);
804 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
805 		}
806 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
807 		if (rc == -ENOTSUP) {
808 			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
809 		} else {
810 			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
811 		}
812 
813 		req->rsp->nvme_cpl.status.dnr = 1;
814 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
815 	}
816 
817 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
818 }
819 
820 static void
821 nvmf_bdev_ctrlr_complete_abort_cmd(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
822 {
823 	struct spdk_nvmf_request *req = cb_arg;
824 
825 	if (success) {
826 		req->rsp->nvme_cpl.cdw0 &= ~1U;
827 	}
828 
829 	spdk_nvmf_request_complete(req);
830 	spdk_bdev_free_io(bdev_io);
831 }
832 
833 int
834 spdk_nvmf_bdev_ctrlr_abort_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
835 			       struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
836 			       struct spdk_nvmf_request *req_to_abort)
837 {
838 	int rc;
839 
840 	assert((req->rsp->nvme_cpl.cdw0 & 1U) != 0);
841 
842 	rc = spdk_bdev_abort(desc, ch, req_to_abort, nvmf_bdev_ctrlr_complete_abort_cmd, req);
843 	if (spdk_likely(rc == 0)) {
844 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
845 	} else if (rc == -ENOMEM) {
846 		nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_admin_cmd_resubmit, req);
847 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
848 	} else {
849 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
850 	}
851 }
852 
853 bool
854 nvmf_bdev_ctrlr_get_dif_ctx(struct spdk_bdev *bdev, struct spdk_nvme_cmd *cmd,
855 			    struct spdk_dif_ctx *dif_ctx)
856 {
857 	uint32_t init_ref_tag, dif_check_flags = 0;
858 	int rc;
859 	struct spdk_dif_ctx_init_ext_opts dif_opts;
860 
861 	if (spdk_bdev_get_md_size(bdev) == 0) {
862 		return false;
863 	}
864 
865 	/* Initial Reference Tag is the lower 32 bits of the start LBA. */
866 	init_ref_tag = (uint32_t)from_le64(&cmd->cdw10);
867 
868 	if (spdk_bdev_is_dif_check_enabled(bdev, SPDK_DIF_CHECK_TYPE_REFTAG)) {
869 		dif_check_flags |= SPDK_DIF_FLAGS_REFTAG_CHECK;
870 	}
871 
872 	if (spdk_bdev_is_dif_check_enabled(bdev, SPDK_DIF_CHECK_TYPE_GUARD)) {
873 		dif_check_flags |= SPDK_DIF_FLAGS_GUARD_CHECK;
874 	}
875 
876 	dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format);
877 	dif_opts.dif_pi_format = SPDK_DIF_PI_FORMAT_16;
878 	rc = spdk_dif_ctx_init(dif_ctx,
879 			       spdk_bdev_get_block_size(bdev),
880 			       spdk_bdev_get_md_size(bdev),
881 			       spdk_bdev_is_md_interleaved(bdev),
882 			       spdk_bdev_is_dif_head_of_md(bdev),
883 			       spdk_bdev_get_dif_type(bdev),
884 			       dif_check_flags,
885 			       init_ref_tag, 0, 0, 0, 0, &dif_opts);
886 
887 	return (rc == 0) ? true : false;
888 }
889 
890 static void
891 nvmf_bdev_ctrlr_zcopy_start_complete(struct spdk_bdev_io *bdev_io, bool success,
892 				     void *cb_arg)
893 {
894 	struct spdk_nvmf_request	*req = cb_arg;
895 	struct iovec *iov;
896 	int iovcnt = 0;
897 
898 	if (spdk_unlikely(!success)) {
899 		int                     sc = 0, sct = 0;
900 		uint32_t                cdw0 = 0;
901 		struct spdk_nvme_cpl    *response = &req->rsp->nvme_cpl;
902 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
903 
904 		response->cdw0 = cdw0;
905 		response->status.sc = sc;
906 		response->status.sct = sct;
907 
908 		spdk_bdev_free_io(bdev_io);
909 		spdk_nvmf_request_complete(req);
910 		return;
911 	}
912 
913 	spdk_bdev_io_get_iovec(bdev_io, &iov, &iovcnt);
914 
915 	assert(iovcnt <= NVMF_REQ_MAX_BUFFERS);
916 	assert(iovcnt > 0);
917 
918 	req->iovcnt = iovcnt;
919 
920 	assert(req->iov == iov);
921 
922 	req->zcopy_bdev_io = bdev_io; /* Preserve the bdev_io for the end zcopy */
923 
924 	spdk_nvmf_request_complete(req);
925 	/* Don't free the bdev_io here as it is needed for the END ZCOPY */
926 }
927 
928 int
929 nvmf_bdev_ctrlr_zcopy_start(struct spdk_bdev *bdev,
930 			    struct spdk_bdev_desc *desc,
931 			    struct spdk_io_channel *ch,
932 			    struct spdk_nvmf_request *req)
933 {
934 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
935 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
936 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
937 	uint64_t start_lba;
938 	uint64_t num_blocks;
939 	int rc;
940 
941 	nvmf_bdev_ctrlr_get_rw_params(&req->cmd->nvme_cmd, &start_lba, &num_blocks);
942 
943 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
944 		SPDK_ERRLOG("end of media\n");
945 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
946 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
947 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
948 	}
949 
950 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
951 		SPDK_ERRLOG("Read NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
952 			    num_blocks, block_size, req->length);
953 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
954 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
955 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
956 	}
957 
958 	bool populate = (req->cmd->nvme_cmd.opc == SPDK_NVME_OPC_READ) ? true : false;
959 
960 	rc = spdk_bdev_zcopy_start(desc, ch, req->iov, req->iovcnt, start_lba,
961 				   num_blocks, populate, nvmf_bdev_ctrlr_zcopy_start_complete, req);
962 	if (spdk_unlikely(rc != 0)) {
963 		if (rc == -ENOMEM) {
964 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
965 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
966 		}
967 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
968 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
969 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
970 	}
971 
972 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
973 }
974 
975 static void
976 nvmf_bdev_ctrlr_zcopy_end_complete(struct spdk_bdev_io *bdev_io, bool success,
977 				   void *cb_arg)
978 {
979 	struct spdk_nvmf_request	*req = cb_arg;
980 
981 	if (spdk_unlikely(!success)) {
982 		int                     sc = 0, sct = 0;
983 		uint32_t                cdw0 = 0;
984 		struct spdk_nvme_cpl    *response = &req->rsp->nvme_cpl;
985 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
986 
987 		response->cdw0 = cdw0;
988 		response->status.sc = sc;
989 		response->status.sct = sct;
990 	}
991 
992 	spdk_bdev_free_io(bdev_io);
993 	req->zcopy_bdev_io = NULL;
994 	spdk_nvmf_request_complete(req);
995 }
996 
997 void
998 nvmf_bdev_ctrlr_zcopy_end(struct spdk_nvmf_request *req, bool commit)
999 {
1000 	int rc __attribute__((unused));
1001 
1002 	rc = spdk_bdev_zcopy_end(req->zcopy_bdev_io, commit, nvmf_bdev_ctrlr_zcopy_end_complete, req);
1003 
1004 	/* The only way spdk_bdev_zcopy_end() can fail is if we pass a bdev_io type that isn't ZCOPY */
1005 	assert(rc == 0);
1006 }
1007