xref: /spdk/lib/nvmf/ctrlr_bdev.c (revision b4ba6cdf0ad14da86fe0ca1b53f6a3f7b09560de)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2017 Intel Corporation. All rights reserved.
3  *   Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
4  *   Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5  */
6 
7 #include "spdk/stdinc.h"
8 
9 #include "nvmf_internal.h"
10 
11 #include "spdk/bdev.h"
12 #include "spdk/endian.h"
13 #include "spdk/thread.h"
14 #include "spdk/likely.h"
15 #include "spdk/nvme.h"
16 #include "spdk/nvmf_cmd.h"
17 #include "spdk/nvmf_spec.h"
18 #include "spdk/trace.h"
19 #include "spdk/scsi_spec.h"
20 #include "spdk/string.h"
21 #include "spdk/util.h"
22 
23 #include "spdk/log.h"
24 
25 static bool
26 nvmf_subsystem_bdev_io_type_supported(struct spdk_nvmf_subsystem *subsystem,
27 				      enum spdk_bdev_io_type io_type)
28 {
29 	struct spdk_nvmf_ns *ns;
30 
31 	for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL;
32 	     ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) {
33 		if (ns->bdev == NULL) {
34 			continue;
35 		}
36 
37 		if (!spdk_bdev_io_type_supported(ns->bdev, io_type)) {
38 			SPDK_DEBUGLOG(nvmf,
39 				      "Subsystem %s namespace %u (%s) does not support io_type %d\n",
40 				      spdk_nvmf_subsystem_get_nqn(subsystem),
41 				      ns->opts.nsid, spdk_bdev_get_name(ns->bdev), (int)io_type);
42 			return false;
43 		}
44 	}
45 
46 	SPDK_DEBUGLOG(nvmf, "All devices in Subsystem %s support io_type %d\n",
47 		      spdk_nvmf_subsystem_get_nqn(subsystem), (int)io_type);
48 	return true;
49 }
50 
51 bool
52 nvmf_ctrlr_dsm_supported(struct spdk_nvmf_ctrlr *ctrlr)
53 {
54 	return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_UNMAP);
55 }
56 
57 bool
58 nvmf_ctrlr_write_zeroes_supported(struct spdk_nvmf_ctrlr *ctrlr)
59 {
60 	return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_WRITE_ZEROES);
61 }
62 
63 bool
64 nvmf_ctrlr_copy_supported(struct spdk_nvmf_ctrlr *ctrlr)
65 {
66 	return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_COPY);
67 }
68 
69 static void
70 nvmf_bdev_ctrlr_complete_cmd(struct spdk_bdev_io *bdev_io, bool success,
71 			     void *cb_arg)
72 {
73 	struct spdk_nvmf_request	*req = cb_arg;
74 	struct spdk_nvme_cpl		*response = &req->rsp->nvme_cpl;
75 	int				sc = 0, sct = 0;
76 	uint32_t			cdw0 = 0;
77 
78 	if (spdk_unlikely(req->first_fused)) {
79 		struct spdk_nvmf_request	*first_req = req->first_fused_req;
80 		struct spdk_nvme_cpl		*first_response = &first_req->rsp->nvme_cpl;
81 		int				first_sc = 0, first_sct = 0;
82 
83 		/* get status for both operations */
84 		spdk_bdev_io_get_nvme_fused_status(bdev_io, &cdw0, &first_sct, &first_sc, &sct, &sc);
85 		first_response->cdw0 = cdw0;
86 		first_response->status.sc = first_sc;
87 		first_response->status.sct = first_sct;
88 
89 		/* first request should be completed */
90 		spdk_nvmf_request_complete(first_req);
91 		req->first_fused_req = NULL;
92 		req->first_fused = false;
93 	} else {
94 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
95 	}
96 
97 	response->cdw0 = cdw0;
98 	response->status.sc = sc;
99 	response->status.sct = sct;
100 
101 	spdk_nvmf_request_complete(req);
102 	spdk_bdev_free_io(bdev_io);
103 }
104 
105 static void
106 nvmf_bdev_ctrlr_complete_admin_cmd(struct spdk_bdev_io *bdev_io, bool success,
107 				   void *cb_arg)
108 {
109 	struct spdk_nvmf_request *req = cb_arg;
110 
111 	if (req->cmd_cb_fn) {
112 		req->cmd_cb_fn(req);
113 	}
114 
115 	nvmf_bdev_ctrlr_complete_cmd(bdev_io, success, req);
116 }
117 
118 void
119 nvmf_bdev_ctrlr_identify_ns(struct spdk_nvmf_ns *ns, struct spdk_nvme_ns_data *nsdata,
120 			    bool dif_insert_or_strip)
121 {
122 	struct spdk_bdev *bdev = ns->bdev;
123 	uint64_t num_blocks;
124 	uint32_t phys_blocklen;
125 	uint32_t max_copy;
126 
127 	num_blocks = spdk_bdev_get_num_blocks(bdev);
128 
129 	nsdata->nsze = num_blocks;
130 	nsdata->ncap = num_blocks;
131 	nsdata->nuse = num_blocks;
132 	nsdata->nlbaf = 0;
133 	nsdata->flbas.format = 0;
134 	nsdata->flbas.msb_format = 0;
135 	nsdata->nacwu = spdk_bdev_get_acwu(bdev) - 1; /* nacwu is 0-based */
136 	if (!dif_insert_or_strip) {
137 		nsdata->lbaf[0].ms = spdk_bdev_get_md_size(bdev);
138 		nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_block_size(bdev));
139 		if (nsdata->lbaf[0].ms != 0) {
140 			nsdata->flbas.extended = 1;
141 			nsdata->mc.extended = 1;
142 			nsdata->mc.pointer = 0;
143 			nsdata->dps.md_start = spdk_bdev_is_dif_head_of_md(bdev);
144 			/* NVMf library doesn't process PRACT and PRCHK flags, we
145 			 * leave the use of extended LBA buffer to users.
146 			 */
147 			nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_DISABLE;
148 		}
149 	} else {
150 		nsdata->lbaf[0].ms = 0;
151 		nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_data_block_size(bdev));
152 	}
153 
154 	phys_blocklen = spdk_bdev_get_physical_block_size(bdev);
155 	assert(phys_blocklen > 0);
156 	/* Linux driver uses min(nawupf, npwg) to set physical_block_size */
157 	nsdata->nsfeat.optperf = 1;
158 	nsdata->nsfeat.ns_atomic_write_unit = 1;
159 	nsdata->npwg = (phys_blocklen >> nsdata->lbaf[0].lbads) - 1;
160 	nsdata->nawupf = nsdata->npwg;
161 	nsdata->npwa = nsdata->npwg;
162 	nsdata->npdg = nsdata->npwg;
163 	nsdata->npda = nsdata->npwg;
164 
165 	if (spdk_bdev_get_write_unit_size(bdev) == 1) {
166 		nsdata->noiob = spdk_bdev_get_optimal_io_boundary(bdev);
167 	}
168 	nsdata->nmic.can_share = 1;
169 	if (nvmf_ns_is_ptpl_capable(ns)) {
170 		nsdata->nsrescap.rescap.persist = 1;
171 	}
172 	nsdata->nsrescap.rescap.write_exclusive = 1;
173 	nsdata->nsrescap.rescap.exclusive_access = 1;
174 	nsdata->nsrescap.rescap.write_exclusive_reg_only = 1;
175 	nsdata->nsrescap.rescap.exclusive_access_reg_only = 1;
176 	nsdata->nsrescap.rescap.write_exclusive_all_reg = 1;
177 	nsdata->nsrescap.rescap.exclusive_access_all_reg = 1;
178 	nsdata->nsrescap.rescap.ignore_existing_key = 1;
179 
180 	SPDK_STATIC_ASSERT(sizeof(nsdata->nguid) == sizeof(ns->opts.nguid), "size mismatch");
181 	memcpy(nsdata->nguid, ns->opts.nguid, sizeof(nsdata->nguid));
182 
183 	SPDK_STATIC_ASSERT(sizeof(nsdata->eui64) == sizeof(ns->opts.eui64), "size mismatch");
184 	memcpy(&nsdata->eui64, ns->opts.eui64, sizeof(nsdata->eui64));
185 
186 	/* For now we support just one source range for copy command */
187 	nsdata->msrc = 0;
188 
189 	max_copy = spdk_bdev_get_max_copy(bdev);
190 	if (max_copy == 0 || max_copy > UINT16_MAX) {
191 		/* Zero means copy size is unlimited */
192 		nsdata->mcl = UINT16_MAX;
193 		nsdata->mssrl = UINT16_MAX;
194 	} else {
195 		nsdata->mcl = max_copy;
196 		nsdata->mssrl = max_copy;
197 	}
198 }
199 
200 static void
201 nvmf_bdev_ctrlr_get_rw_params(const struct spdk_nvme_cmd *cmd, uint64_t *start_lba,
202 			      uint64_t *num_blocks)
203 {
204 	/* SLBA: CDW10 and CDW11 */
205 	*start_lba = from_le64(&cmd->cdw10);
206 
207 	/* NLB: CDW12 bits 15:00, 0's based */
208 	*num_blocks = (from_le32(&cmd->cdw12) & 0xFFFFu) + 1;
209 }
210 
211 static bool
212 nvmf_bdev_ctrlr_lba_in_range(uint64_t bdev_num_blocks, uint64_t io_start_lba,
213 			     uint64_t io_num_blocks)
214 {
215 	if (io_start_lba + io_num_blocks > bdev_num_blocks ||
216 	    io_start_lba + io_num_blocks < io_start_lba) {
217 		return false;
218 	}
219 
220 	return true;
221 }
222 
223 static void
224 nvmf_ctrlr_process_io_cmd_resubmit(void *arg)
225 {
226 	struct spdk_nvmf_request *req = arg;
227 	int rc;
228 
229 	rc = nvmf_ctrlr_process_io_cmd(req);
230 	if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
231 		spdk_nvmf_request_complete(req);
232 	}
233 }
234 
235 static void
236 nvmf_ctrlr_process_admin_cmd_resubmit(void *arg)
237 {
238 	struct spdk_nvmf_request *req = arg;
239 	int rc;
240 
241 	rc = nvmf_ctrlr_process_admin_cmd(req);
242 	if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
243 		spdk_nvmf_request_complete(req);
244 	}
245 }
246 
247 static void
248 nvmf_bdev_ctrl_queue_io(struct spdk_nvmf_request *req, struct spdk_bdev *bdev,
249 			struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn, void *cb_arg)
250 {
251 	int rc;
252 
253 	req->bdev_io_wait.bdev = bdev;
254 	req->bdev_io_wait.cb_fn = cb_fn;
255 	req->bdev_io_wait.cb_arg = cb_arg;
256 
257 	rc = spdk_bdev_queue_io_wait(bdev, ch, &req->bdev_io_wait);
258 	if (rc != 0) {
259 		assert(false);
260 	}
261 	req->qpair->group->stat.pending_bdev_io++;
262 }
263 
264 bool
265 nvmf_bdev_zcopy_enabled(struct spdk_bdev *bdev)
266 {
267 	return spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_ZCOPY);
268 }
269 
270 int
271 nvmf_bdev_ctrlr_read_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
272 			 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
273 {
274 	struct spdk_bdev_ext_io_opts opts = {
275 		.size = SPDK_SIZEOF(&opts, accel_sequence),
276 		.memory_domain = req->memory_domain,
277 		.memory_domain_ctx = req->memory_domain_ctx,
278 		.accel_sequence = req->accel_sequence,
279 	};
280 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
281 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
282 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
283 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
284 	uint64_t start_lba;
285 	uint64_t num_blocks;
286 	int rc;
287 
288 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
289 
290 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
291 		SPDK_ERRLOG("end of media\n");
292 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
293 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
294 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
295 	}
296 
297 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
298 		SPDK_ERRLOG("Read NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
299 			    num_blocks, block_size, req->length);
300 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
301 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
302 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
303 	}
304 
305 	assert(!spdk_nvmf_request_using_zcopy(req));
306 
307 	rc = spdk_bdev_readv_blocks_ext(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
308 					nvmf_bdev_ctrlr_complete_cmd, req, &opts);
309 	if (spdk_unlikely(rc)) {
310 		if (rc == -ENOMEM) {
311 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
312 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
313 		}
314 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
315 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
316 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
317 	}
318 
319 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
320 }
321 
322 int
323 nvmf_bdev_ctrlr_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
324 			  struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
325 {
326 	struct spdk_bdev_ext_io_opts opts = {
327 		.size = SPDK_SIZEOF(&opts, accel_sequence),
328 		.memory_domain = req->memory_domain,
329 		.memory_domain_ctx = req->memory_domain_ctx,
330 		.accel_sequence = req->accel_sequence,
331 	};
332 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
333 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
334 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
335 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
336 	uint64_t start_lba;
337 	uint64_t num_blocks;
338 	int rc;
339 
340 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
341 
342 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
343 		SPDK_ERRLOG("end of media\n");
344 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
345 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
346 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
347 	}
348 
349 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
350 		SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
351 			    num_blocks, block_size, req->length);
352 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
353 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
354 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
355 	}
356 
357 	assert(!spdk_nvmf_request_using_zcopy(req));
358 
359 	rc = spdk_bdev_writev_blocks_ext(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
360 					 nvmf_bdev_ctrlr_complete_cmd, req, &opts);
361 	if (spdk_unlikely(rc)) {
362 		if (rc == -ENOMEM) {
363 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
364 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
365 		}
366 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
367 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
368 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
369 	}
370 
371 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
372 }
373 
374 int
375 nvmf_bdev_ctrlr_compare_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
376 			    struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
377 {
378 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
379 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
380 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
381 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
382 	uint64_t start_lba;
383 	uint64_t num_blocks;
384 	int rc;
385 
386 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
387 
388 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
389 		SPDK_ERRLOG("end of media\n");
390 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
391 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
392 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
393 	}
394 
395 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
396 		SPDK_ERRLOG("Compare NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
397 			    num_blocks, block_size, req->length);
398 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
399 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
400 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
401 	}
402 
403 	rc = spdk_bdev_comparev_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
404 				       nvmf_bdev_ctrlr_complete_cmd, req);
405 	if (spdk_unlikely(rc)) {
406 		if (rc == -ENOMEM) {
407 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
408 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
409 		}
410 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
411 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
412 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
413 	}
414 
415 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
416 }
417 
418 int
419 nvmf_bdev_ctrlr_compare_and_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
420 				      struct spdk_io_channel *ch, struct spdk_nvmf_request *cmp_req, struct spdk_nvmf_request *write_req)
421 {
422 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
423 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
424 	struct spdk_nvme_cmd *cmp_cmd = &cmp_req->cmd->nvme_cmd;
425 	struct spdk_nvme_cmd *write_cmd = &write_req->cmd->nvme_cmd;
426 	struct spdk_nvme_cpl *rsp = &write_req->rsp->nvme_cpl;
427 	uint64_t write_start_lba, cmp_start_lba;
428 	uint64_t write_num_blocks, cmp_num_blocks;
429 	int rc;
430 
431 	nvmf_bdev_ctrlr_get_rw_params(cmp_cmd, &cmp_start_lba, &cmp_num_blocks);
432 	nvmf_bdev_ctrlr_get_rw_params(write_cmd, &write_start_lba, &write_num_blocks);
433 
434 	if (spdk_unlikely(write_start_lba != cmp_start_lba || write_num_blocks != cmp_num_blocks)) {
435 		SPDK_ERRLOG("Fused command start lba / num blocks mismatch\n");
436 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
437 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
438 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
439 	}
440 
441 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, write_start_lba,
442 			  write_num_blocks))) {
443 		SPDK_ERRLOG("end of media\n");
444 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
445 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
446 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
447 	}
448 
449 	if (spdk_unlikely(write_num_blocks * block_size > write_req->length)) {
450 		SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
451 			    write_num_blocks, block_size, write_req->length);
452 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
453 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
454 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
455 	}
456 
457 	rc = spdk_bdev_comparev_and_writev_blocks(desc, ch, cmp_req->iov, cmp_req->iovcnt, write_req->iov,
458 			write_req->iovcnt, write_start_lba, write_num_blocks, nvmf_bdev_ctrlr_complete_cmd, write_req);
459 	if (spdk_unlikely(rc)) {
460 		if (rc == -ENOMEM) {
461 			nvmf_bdev_ctrl_queue_io(cmp_req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, cmp_req);
462 			nvmf_bdev_ctrl_queue_io(write_req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, write_req);
463 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
464 		}
465 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
466 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
467 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
468 	}
469 
470 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
471 }
472 
473 int
474 nvmf_bdev_ctrlr_write_zeroes_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
475 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
476 {
477 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
478 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
479 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
480 	uint64_t max_write_zeroes_size = req->qpair->ctrlr->subsys->max_write_zeroes_size_kib;
481 	uint64_t start_lba;
482 	uint64_t num_blocks;
483 	int rc;
484 
485 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
486 	if (spdk_unlikely(max_write_zeroes_size > 0 &&
487 			  num_blocks > (max_write_zeroes_size << 10) / spdk_bdev_get_block_size(bdev))) {
488 		SPDK_ERRLOG("invalid write zeroes size, should not exceed %" PRIu64 "Kib\n", max_write_zeroes_size);
489 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
490 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
491 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
492 	}
493 
494 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
495 		SPDK_ERRLOG("end of media\n");
496 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
497 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
498 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
499 	}
500 
501 	if (spdk_unlikely(cmd->cdw12_bits.write_zeroes.deac)) {
502 		SPDK_ERRLOG("Write Zeroes Deallocate is not supported\n");
503 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
504 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
505 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
506 	}
507 
508 	rc = spdk_bdev_write_zeroes_blocks(desc, ch, start_lba, num_blocks,
509 					   nvmf_bdev_ctrlr_complete_cmd, req);
510 	if (spdk_unlikely(rc)) {
511 		if (rc == -ENOMEM) {
512 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
513 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
514 		}
515 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
516 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
517 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
518 	}
519 
520 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
521 }
522 
523 int
524 nvmf_bdev_ctrlr_flush_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
525 			  struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
526 {
527 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
528 	int rc;
529 
530 	/* As for NVMeoF controller, SPDK always set volatile write
531 	 * cache bit to 1, return success for those block devices
532 	 * which can't support FLUSH command.
533 	 */
534 	if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_FLUSH)) {
535 		response->status.sct = SPDK_NVME_SCT_GENERIC;
536 		response->status.sc = SPDK_NVME_SC_SUCCESS;
537 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
538 	}
539 
540 	rc = spdk_bdev_flush_blocks(desc, ch, 0, spdk_bdev_get_num_blocks(bdev),
541 				    nvmf_bdev_ctrlr_complete_cmd, req);
542 	if (spdk_unlikely(rc)) {
543 		if (rc == -ENOMEM) {
544 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
545 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
546 		}
547 		response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
548 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
549 	}
550 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
551 }
552 
553 struct nvmf_bdev_ctrlr_unmap {
554 	struct spdk_nvmf_request	*req;
555 	uint32_t			count;
556 	struct spdk_bdev_desc		*desc;
557 	struct spdk_bdev		*bdev;
558 	struct spdk_io_channel		*ch;
559 	uint32_t			range_index;
560 };
561 
562 static void
563 nvmf_bdev_ctrlr_unmap_cpl(struct spdk_bdev_io *bdev_io, bool success,
564 			  void *cb_arg)
565 {
566 	struct nvmf_bdev_ctrlr_unmap *unmap_ctx = cb_arg;
567 	struct spdk_nvmf_request	*req = unmap_ctx->req;
568 	struct spdk_nvme_cpl		*response = &req->rsp->nvme_cpl;
569 	int				sc, sct;
570 	uint32_t			cdw0;
571 
572 	unmap_ctx->count--;
573 
574 	if (response->status.sct == SPDK_NVME_SCT_GENERIC &&
575 	    response->status.sc == SPDK_NVME_SC_SUCCESS) {
576 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
577 		response->cdw0 = cdw0;
578 		response->status.sc = sc;
579 		response->status.sct = sct;
580 	}
581 
582 	if (unmap_ctx->count == 0) {
583 		spdk_nvmf_request_complete(req);
584 		free(unmap_ctx);
585 	}
586 	spdk_bdev_free_io(bdev_io);
587 }
588 
589 static int nvmf_bdev_ctrlr_unmap(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
590 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
591 				 struct nvmf_bdev_ctrlr_unmap *unmap_ctx);
592 static void
593 nvmf_bdev_ctrlr_unmap_resubmit(void *arg)
594 {
595 	struct nvmf_bdev_ctrlr_unmap *unmap_ctx = arg;
596 	struct spdk_nvmf_request *req = unmap_ctx->req;
597 	struct spdk_bdev_desc *desc = unmap_ctx->desc;
598 	struct spdk_bdev *bdev = unmap_ctx->bdev;
599 	struct spdk_io_channel *ch = unmap_ctx->ch;
600 
601 	nvmf_bdev_ctrlr_unmap(bdev, desc, ch, req, unmap_ctx);
602 }
603 
604 static int
605 nvmf_bdev_ctrlr_unmap(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
606 		      struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
607 		      struct nvmf_bdev_ctrlr_unmap *unmap_ctx)
608 {
609 	uint16_t nr, i;
610 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
611 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
612 	uint64_t max_discard_size = req->qpair->ctrlr->subsys->max_discard_size_kib;
613 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
614 	struct spdk_iov_xfer ix;
615 	uint64_t lba;
616 	uint32_t lba_count;
617 	int rc;
618 
619 	nr = cmd->cdw10_bits.dsm.nr + 1;
620 	if (nr * sizeof(struct spdk_nvme_dsm_range) > req->length) {
621 		SPDK_ERRLOG("Dataset Management number of ranges > SGL length\n");
622 		response->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
623 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
624 	}
625 
626 	if (unmap_ctx == NULL) {
627 		unmap_ctx = calloc(1, sizeof(*unmap_ctx));
628 		if (!unmap_ctx) {
629 			response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
630 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
631 		}
632 
633 		unmap_ctx->req = req;
634 		unmap_ctx->desc = desc;
635 		unmap_ctx->ch = ch;
636 		unmap_ctx->bdev = bdev;
637 
638 		response->status.sct = SPDK_NVME_SCT_GENERIC;
639 		response->status.sc = SPDK_NVME_SC_SUCCESS;
640 	} else {
641 		unmap_ctx->count--;	/* dequeued */
642 	}
643 
644 	spdk_iov_xfer_init(&ix, req->iov, req->iovcnt);
645 
646 	for (i = unmap_ctx->range_index; i < nr; i++) {
647 		struct spdk_nvme_dsm_range dsm_range = { 0 };
648 
649 		spdk_iov_xfer_to_buf(&ix, &dsm_range, sizeof(dsm_range));
650 
651 		lba = dsm_range.starting_lba;
652 		lba_count = dsm_range.length;
653 		if (max_discard_size > 0 && lba_count > (max_discard_size << 10) / block_size) {
654 			SPDK_ERRLOG("invalid unmap size, should not exceed %" PRIu64 "Kib\n", max_discard_size);
655 			response->status.sct = SPDK_NVME_SCT_GENERIC;
656 			response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
657 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
658 		}
659 
660 		unmap_ctx->count++;
661 
662 		rc = spdk_bdev_unmap_blocks(desc, ch, lba, lba_count,
663 					    nvmf_bdev_ctrlr_unmap_cpl, unmap_ctx);
664 		if (rc) {
665 			if (rc == -ENOMEM) {
666 				nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_bdev_ctrlr_unmap_resubmit, unmap_ctx);
667 				/* Unmap was not yet submitted to bdev */
668 				/* unmap_ctx->count will be decremented when the request is dequeued */
669 				return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
670 			}
671 			response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
672 			unmap_ctx->count--;
673 			/* We can't return here - we may have to wait for any other
674 				* unmaps already sent to complete */
675 			break;
676 		}
677 		unmap_ctx->range_index++;
678 	}
679 
680 	if (unmap_ctx->count == 0) {
681 		free(unmap_ctx);
682 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
683 	}
684 
685 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
686 }
687 
688 int
689 nvmf_bdev_ctrlr_dsm_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
690 			struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
691 {
692 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
693 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
694 
695 	if (cmd->cdw11_bits.dsm.ad) {
696 		return nvmf_bdev_ctrlr_unmap(bdev, desc, ch, req, NULL);
697 	}
698 
699 	response->status.sct = SPDK_NVME_SCT_GENERIC;
700 	response->status.sc = SPDK_NVME_SC_SUCCESS;
701 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
702 }
703 
704 int
705 nvmf_bdev_ctrlr_copy_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
706 			 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
707 {
708 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
709 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
710 	uint64_t sdlba = ((uint64_t)cmd->cdw11 << 32) + cmd->cdw10;
711 	struct spdk_nvme_scc_source_range range = { 0 };
712 	struct spdk_iov_xfer ix;
713 	int rc;
714 
715 	SPDK_DEBUGLOG(nvmf, "Copy command: SDLBA %lu, NR %u, desc format %u, PRINFOR %u, "
716 		      "DTYPE %u, STCW %u, PRINFOW %u, FUA %u, LR %u\n",
717 		      sdlba,
718 		      cmd->cdw12_bits.copy.nr,
719 		      cmd->cdw12_bits.copy.df,
720 		      cmd->cdw12_bits.copy.prinfor,
721 		      cmd->cdw12_bits.copy.dtype,
722 		      cmd->cdw12_bits.copy.stcw,
723 		      cmd->cdw12_bits.copy.prinfow,
724 		      cmd->cdw12_bits.copy.fua,
725 		      cmd->cdw12_bits.copy.lr);
726 
727 	if (spdk_unlikely(req->length != (cmd->cdw12_bits.copy.nr + 1) *
728 			  sizeof(struct spdk_nvme_scc_source_range))) {
729 		response->status.sct = SPDK_NVME_SCT_GENERIC;
730 		response->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
731 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
732 	}
733 
734 	/*
735 	 * We support only one source range, and rely on this with the xfer
736 	 * below.
737 	 */
738 	if (cmd->cdw12_bits.copy.nr > 0) {
739 		response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
740 		response->status.sc = SPDK_NVME_SC_CMD_SIZE_LIMIT_SIZE_EXCEEDED;
741 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
742 	}
743 
744 	if (cmd->cdw12_bits.copy.df != 0) {
745 		response->status.sct = SPDK_NVME_SCT_GENERIC;
746 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
747 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
748 	}
749 
750 	spdk_iov_xfer_init(&ix, req->iov, req->iovcnt);
751 	spdk_iov_xfer_to_buf(&ix, &range, sizeof(range));
752 
753 	rc = spdk_bdev_copy_blocks(desc, ch, sdlba, range.slba, range.nlb + 1,
754 				   nvmf_bdev_ctrlr_complete_cmd, req);
755 	if (spdk_unlikely(rc)) {
756 		if (rc == -ENOMEM) {
757 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
758 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
759 		}
760 
761 		response->status.sct = SPDK_NVME_SCT_GENERIC;
762 		response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
763 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
764 	}
765 
766 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
767 }
768 
769 int
770 nvmf_bdev_ctrlr_nvme_passthru_io(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
771 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
772 {
773 	int rc;
774 
775 	rc = spdk_bdev_nvme_iov_passthru_md(desc, ch, &req->cmd->nvme_cmd, req->iov, req->iovcnt,
776 					    req->length, NULL, 0, nvmf_bdev_ctrlr_complete_cmd, req);
777 
778 	if (spdk_unlikely(rc)) {
779 		if (rc == -ENOMEM) {
780 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
781 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
782 		}
783 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
784 		req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
785 		req->rsp->nvme_cpl.status.dnr = 1;
786 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
787 	}
788 
789 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
790 }
791 
792 int
793 spdk_nvmf_bdev_ctrlr_nvme_passthru_admin(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
794 		struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
795 		spdk_nvmf_nvme_passthru_cmd_cb cb_fn)
796 {
797 	int rc;
798 
799 	if (spdk_unlikely(req->iovcnt > 1)) {
800 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
801 		req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
802 		req->rsp->nvme_cpl.status.dnr = 1;
803 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
804 	}
805 
806 	req->cmd_cb_fn = cb_fn;
807 
808 	rc = spdk_bdev_nvme_admin_passthru(desc, ch, &req->cmd->nvme_cmd, req->iov[0].iov_base, req->length,
809 					   nvmf_bdev_ctrlr_complete_admin_cmd, req);
810 	if (spdk_unlikely(rc)) {
811 		if (rc == -ENOMEM) {
812 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_admin_cmd_resubmit, req);
813 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
814 		}
815 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
816 		if (rc == -ENOTSUP) {
817 			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
818 		} else {
819 			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
820 		}
821 
822 		req->rsp->nvme_cpl.status.dnr = 1;
823 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
824 	}
825 
826 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
827 }
828 
829 static void
830 nvmf_bdev_ctrlr_complete_abort_cmd(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
831 {
832 	struct spdk_nvmf_request *req = cb_arg;
833 
834 	if (success) {
835 		req->rsp->nvme_cpl.cdw0 &= ~1U;
836 	}
837 
838 	spdk_nvmf_request_complete(req);
839 	spdk_bdev_free_io(bdev_io);
840 }
841 
842 int
843 spdk_nvmf_bdev_ctrlr_abort_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
844 			       struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
845 			       struct spdk_nvmf_request *req_to_abort)
846 {
847 	int rc;
848 
849 	assert((req->rsp->nvme_cpl.cdw0 & 1U) != 0);
850 
851 	rc = spdk_bdev_abort(desc, ch, req_to_abort, nvmf_bdev_ctrlr_complete_abort_cmd, req);
852 	if (spdk_likely(rc == 0)) {
853 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
854 	} else if (rc == -ENOMEM) {
855 		nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_admin_cmd_resubmit, req);
856 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
857 	} else {
858 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
859 	}
860 }
861 
862 bool
863 nvmf_bdev_ctrlr_get_dif_ctx(struct spdk_bdev *bdev, struct spdk_nvme_cmd *cmd,
864 			    struct spdk_dif_ctx *dif_ctx)
865 {
866 	uint32_t init_ref_tag, dif_check_flags = 0;
867 	int rc;
868 	struct spdk_dif_ctx_init_ext_opts dif_opts;
869 
870 	if (spdk_bdev_get_md_size(bdev) == 0) {
871 		return false;
872 	}
873 
874 	/* Initial Reference Tag is the lower 32 bits of the start LBA. */
875 	init_ref_tag = (uint32_t)from_le64(&cmd->cdw10);
876 
877 	if (spdk_bdev_is_dif_check_enabled(bdev, SPDK_DIF_CHECK_TYPE_REFTAG)) {
878 		dif_check_flags |= SPDK_DIF_FLAGS_REFTAG_CHECK;
879 	}
880 
881 	if (spdk_bdev_is_dif_check_enabled(bdev, SPDK_DIF_CHECK_TYPE_GUARD)) {
882 		dif_check_flags |= SPDK_DIF_FLAGS_GUARD_CHECK;
883 	}
884 
885 	dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format);
886 	dif_opts.dif_pi_format = SPDK_DIF_PI_FORMAT_16;
887 	rc = spdk_dif_ctx_init(dif_ctx,
888 			       spdk_bdev_get_block_size(bdev),
889 			       spdk_bdev_get_md_size(bdev),
890 			       spdk_bdev_is_md_interleaved(bdev),
891 			       spdk_bdev_is_dif_head_of_md(bdev),
892 			       spdk_bdev_get_dif_type(bdev),
893 			       dif_check_flags,
894 			       init_ref_tag, 0, 0, 0, 0, &dif_opts);
895 
896 	return (rc == 0) ? true : false;
897 }
898 
899 static void
900 nvmf_bdev_ctrlr_zcopy_start_complete(struct spdk_bdev_io *bdev_io, bool success,
901 				     void *cb_arg)
902 {
903 	struct spdk_nvmf_request	*req = cb_arg;
904 	struct iovec *iov;
905 	int iovcnt = 0;
906 
907 	if (spdk_unlikely(!success)) {
908 		int                     sc = 0, sct = 0;
909 		uint32_t                cdw0 = 0;
910 		struct spdk_nvme_cpl    *response = &req->rsp->nvme_cpl;
911 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
912 
913 		response->cdw0 = cdw0;
914 		response->status.sc = sc;
915 		response->status.sct = sct;
916 
917 		spdk_bdev_free_io(bdev_io);
918 		spdk_nvmf_request_complete(req);
919 		return;
920 	}
921 
922 	spdk_bdev_io_get_iovec(bdev_io, &iov, &iovcnt);
923 
924 	assert(iovcnt <= NVMF_REQ_MAX_BUFFERS);
925 	assert(iovcnt > 0);
926 
927 	req->iovcnt = iovcnt;
928 
929 	assert(req->iov == iov);
930 
931 	req->zcopy_bdev_io = bdev_io; /* Preserve the bdev_io for the end zcopy */
932 
933 	spdk_nvmf_request_complete(req);
934 	/* Don't free the bdev_io here as it is needed for the END ZCOPY */
935 }
936 
937 int
938 nvmf_bdev_ctrlr_zcopy_start(struct spdk_bdev *bdev,
939 			    struct spdk_bdev_desc *desc,
940 			    struct spdk_io_channel *ch,
941 			    struct spdk_nvmf_request *req)
942 {
943 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
944 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
945 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
946 	uint64_t start_lba;
947 	uint64_t num_blocks;
948 	int rc;
949 
950 	nvmf_bdev_ctrlr_get_rw_params(&req->cmd->nvme_cmd, &start_lba, &num_blocks);
951 
952 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
953 		SPDK_ERRLOG("end of media\n");
954 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
955 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
956 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
957 	}
958 
959 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
960 		SPDK_ERRLOG("Read NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
961 			    num_blocks, block_size, req->length);
962 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
963 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
964 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
965 	}
966 
967 	bool populate = (req->cmd->nvme_cmd.opc == SPDK_NVME_OPC_READ) ? true : false;
968 
969 	rc = spdk_bdev_zcopy_start(desc, ch, req->iov, req->iovcnt, start_lba,
970 				   num_blocks, populate, nvmf_bdev_ctrlr_zcopy_start_complete, req);
971 	if (spdk_unlikely(rc != 0)) {
972 		if (rc == -ENOMEM) {
973 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
974 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
975 		}
976 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
977 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
978 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
979 	}
980 
981 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
982 }
983 
984 static void
985 nvmf_bdev_ctrlr_zcopy_end_complete(struct spdk_bdev_io *bdev_io, bool success,
986 				   void *cb_arg)
987 {
988 	struct spdk_nvmf_request	*req = cb_arg;
989 
990 	if (spdk_unlikely(!success)) {
991 		int                     sc = 0, sct = 0;
992 		uint32_t                cdw0 = 0;
993 		struct spdk_nvme_cpl    *response = &req->rsp->nvme_cpl;
994 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
995 
996 		response->cdw0 = cdw0;
997 		response->status.sc = sc;
998 		response->status.sct = sct;
999 	}
1000 
1001 	spdk_bdev_free_io(bdev_io);
1002 	req->zcopy_bdev_io = NULL;
1003 	spdk_nvmf_request_complete(req);
1004 }
1005 
1006 void
1007 nvmf_bdev_ctrlr_zcopy_end(struct spdk_nvmf_request *req, bool commit)
1008 {
1009 	int rc __attribute__((unused));
1010 
1011 	rc = spdk_bdev_zcopy_end(req->zcopy_bdev_io, commit, nvmf_bdev_ctrlr_zcopy_end_complete, req);
1012 
1013 	/* The only way spdk_bdev_zcopy_end() can fail is if we pass a bdev_io type that isn't ZCOPY */
1014 	assert(rc == 0);
1015 }
1016