xref: /spdk/lib/nvmf/ctrlr_bdev.c (revision 45a053c5777494f4e8ce4bc1191c9de3920377f7)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2017 Intel Corporation. All rights reserved.
3  *   Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
4  *   Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5  */
6 
7 #include "spdk/stdinc.h"
8 
9 #include "nvmf_internal.h"
10 
11 #include "spdk/bdev.h"
12 #include "spdk/endian.h"
13 #include "spdk/thread.h"
14 #include "spdk/likely.h"
15 #include "spdk/nvme.h"
16 #include "spdk/nvmf_cmd.h"
17 #include "spdk/nvmf_spec.h"
18 #include "spdk/trace.h"
19 #include "spdk/scsi_spec.h"
20 #include "spdk/string.h"
21 #include "spdk/util.h"
22 
23 #include "spdk/log.h"
24 
25 static bool
26 nvmf_subsystem_bdev_io_type_supported(struct spdk_nvmf_subsystem *subsystem,
27 				      enum spdk_bdev_io_type io_type)
28 {
29 	struct spdk_nvmf_ns *ns;
30 
31 	for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL;
32 	     ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) {
33 		if (ns->bdev == NULL) {
34 			continue;
35 		}
36 
37 		if (!spdk_bdev_io_type_supported(ns->bdev, io_type)) {
38 			SPDK_DEBUGLOG(nvmf,
39 				      "Subsystem %s namespace %u (%s) does not support io_type %d\n",
40 				      spdk_nvmf_subsystem_get_nqn(subsystem),
41 				      ns->opts.nsid, spdk_bdev_get_name(ns->bdev), (int)io_type);
42 			return false;
43 		}
44 	}
45 
46 	SPDK_DEBUGLOG(nvmf, "All devices in Subsystem %s support io_type %d\n",
47 		      spdk_nvmf_subsystem_get_nqn(subsystem), (int)io_type);
48 	return true;
49 }
50 
51 bool
52 nvmf_ctrlr_dsm_supported(struct spdk_nvmf_ctrlr *ctrlr)
53 {
54 	return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_UNMAP);
55 }
56 
57 bool
58 nvmf_ctrlr_write_zeroes_supported(struct spdk_nvmf_ctrlr *ctrlr)
59 {
60 	return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_WRITE_ZEROES);
61 }
62 
63 bool
64 nvmf_ctrlr_copy_supported(struct spdk_nvmf_ctrlr *ctrlr)
65 {
66 	return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_COPY);
67 }
68 
69 static void
70 nvmf_bdev_ctrlr_complete_cmd(struct spdk_bdev_io *bdev_io, bool success,
71 			     void *cb_arg)
72 {
73 	struct spdk_nvmf_request	*req = cb_arg;
74 	struct spdk_nvme_cpl		*response = &req->rsp->nvme_cpl;
75 	int				sc = 0, sct = 0;
76 	uint32_t			cdw0 = 0;
77 
78 	if (spdk_unlikely(req->first_fused)) {
79 		struct spdk_nvmf_request	*first_req = req->first_fused_req;
80 		struct spdk_nvme_cpl		*first_response = &first_req->rsp->nvme_cpl;
81 		int				first_sc = 0, first_sct = 0;
82 
83 		/* get status for both operations */
84 		spdk_bdev_io_get_nvme_fused_status(bdev_io, &cdw0, &first_sct, &first_sc, &sct, &sc);
85 		first_response->cdw0 = cdw0;
86 		first_response->status.sc = first_sc;
87 		first_response->status.sct = first_sct;
88 
89 		/* first request should be completed */
90 		spdk_nvmf_request_complete(first_req);
91 		req->first_fused_req = NULL;
92 		req->first_fused = false;
93 	} else {
94 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
95 	}
96 
97 	response->cdw0 = cdw0;
98 	response->status.sc = sc;
99 	response->status.sct = sct;
100 
101 	spdk_nvmf_request_complete(req);
102 	spdk_bdev_free_io(bdev_io);
103 }
104 
105 static void
106 nvmf_bdev_ctrlr_complete_admin_cmd(struct spdk_bdev_io *bdev_io, bool success,
107 				   void *cb_arg)
108 {
109 	struct spdk_nvmf_request *req = cb_arg;
110 
111 	if (req->cmd_cb_fn) {
112 		req->cmd_cb_fn(req);
113 	}
114 
115 	nvmf_bdev_ctrlr_complete_cmd(bdev_io, success, req);
116 }
117 
118 void
119 nvmf_bdev_ctrlr_identify_ns(struct spdk_nvmf_ns *ns, struct spdk_nvme_ns_data *nsdata,
120 			    bool dif_insert_or_strip)
121 {
122 	struct spdk_bdev *bdev = ns->bdev;
123 	uint64_t num_blocks;
124 	uint32_t phys_blocklen;
125 	uint32_t max_copy;
126 
127 	num_blocks = spdk_bdev_get_num_blocks(bdev);
128 
129 	nsdata->nsze = num_blocks;
130 	nsdata->ncap = num_blocks;
131 	nsdata->nuse = num_blocks;
132 	nsdata->nlbaf = 0;
133 	nsdata->flbas.format = 0;
134 	nsdata->flbas.msb_format = 0;
135 	nsdata->nacwu = spdk_bdev_get_acwu(bdev) - 1; /* nacwu is 0-based */
136 	if (!dif_insert_or_strip) {
137 		nsdata->lbaf[0].ms = spdk_bdev_get_md_size(bdev);
138 		nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_block_size(bdev));
139 		if (nsdata->lbaf[0].ms != 0) {
140 			nsdata->flbas.extended = 1;
141 			nsdata->mc.extended = 1;
142 			nsdata->mc.pointer = 0;
143 			nsdata->dps.md_start = spdk_bdev_is_dif_head_of_md(bdev);
144 			/* NVMf library doesn't process PRACT and PRCHK flags, we
145 			 * leave the use of extended LBA buffer to users.
146 			 */
147 			nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_DISABLE;
148 		}
149 	} else {
150 		nsdata->lbaf[0].ms = 0;
151 		nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_data_block_size(bdev));
152 	}
153 
154 	phys_blocklen = spdk_bdev_get_physical_block_size(bdev);
155 	assert(phys_blocklen > 0);
156 	/* Linux driver uses min(nawupf, npwg) to set physical_block_size */
157 	nsdata->nsfeat.optperf = 1;
158 	nsdata->nsfeat.ns_atomic_write_unit = 1;
159 	nsdata->npwg = (phys_blocklen >> nsdata->lbaf[0].lbads) - 1;
160 	nsdata->nawupf = nsdata->npwg;
161 	nsdata->npwa = nsdata->npwg;
162 	nsdata->npdg = nsdata->npwg;
163 	nsdata->npda = nsdata->npwg;
164 
165 	if (spdk_bdev_get_write_unit_size(bdev) == 1) {
166 		nsdata->noiob = spdk_bdev_get_optimal_io_boundary(bdev);
167 	}
168 	nsdata->nmic.can_share = 1;
169 	if (nvmf_ns_is_ptpl_capable(ns)) {
170 		nsdata->nsrescap.rescap.persist = 1;
171 	}
172 	nsdata->nsrescap.rescap.write_exclusive = 1;
173 	nsdata->nsrescap.rescap.exclusive_access = 1;
174 	nsdata->nsrescap.rescap.write_exclusive_reg_only = 1;
175 	nsdata->nsrescap.rescap.exclusive_access_reg_only = 1;
176 	nsdata->nsrescap.rescap.write_exclusive_all_reg = 1;
177 	nsdata->nsrescap.rescap.exclusive_access_all_reg = 1;
178 	nsdata->nsrescap.rescap.ignore_existing_key = 1;
179 
180 	SPDK_STATIC_ASSERT(sizeof(nsdata->nguid) == sizeof(ns->opts.nguid), "size mismatch");
181 	memcpy(nsdata->nguid, ns->opts.nguid, sizeof(nsdata->nguid));
182 
183 	SPDK_STATIC_ASSERT(sizeof(nsdata->eui64) == sizeof(ns->opts.eui64), "size mismatch");
184 	memcpy(&nsdata->eui64, ns->opts.eui64, sizeof(nsdata->eui64));
185 
186 	/* For now we support just one source range for copy command */
187 	nsdata->msrc = 0;
188 
189 	max_copy = spdk_bdev_get_max_copy(bdev);
190 	if (max_copy == 0 || max_copy > UINT16_MAX) {
191 		/* Zero means copy size is unlimited */
192 		nsdata->mcl = UINT16_MAX;
193 		nsdata->mssrl = UINT16_MAX;
194 	} else {
195 		nsdata->mcl = max_copy;
196 		nsdata->mssrl = max_copy;
197 	}
198 }
199 
200 static void
201 nvmf_bdev_ctrlr_get_rw_params(const struct spdk_nvme_cmd *cmd, uint64_t *start_lba,
202 			      uint64_t *num_blocks)
203 {
204 	/* SLBA: CDW10 and CDW11 */
205 	*start_lba = from_le64(&cmd->cdw10);
206 
207 	/* NLB: CDW12 bits 15:00, 0's based */
208 	*num_blocks = (from_le32(&cmd->cdw12) & 0xFFFFu) + 1;
209 }
210 
211 static bool
212 nvmf_bdev_ctrlr_lba_in_range(uint64_t bdev_num_blocks, uint64_t io_start_lba,
213 			     uint64_t io_num_blocks)
214 {
215 	if (io_start_lba + io_num_blocks > bdev_num_blocks ||
216 	    io_start_lba + io_num_blocks < io_start_lba) {
217 		return false;
218 	}
219 
220 	return true;
221 }
222 
223 static void
224 nvmf_ctrlr_process_io_cmd_resubmit(void *arg)
225 {
226 	struct spdk_nvmf_request *req = arg;
227 	int rc;
228 
229 	rc = nvmf_ctrlr_process_io_cmd(req);
230 	if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
231 		spdk_nvmf_request_complete(req);
232 	}
233 }
234 
235 static void
236 nvmf_ctrlr_process_admin_cmd_resubmit(void *arg)
237 {
238 	struct spdk_nvmf_request *req = arg;
239 	int rc;
240 
241 	rc = nvmf_ctrlr_process_admin_cmd(req);
242 	if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
243 		spdk_nvmf_request_complete(req);
244 	}
245 }
246 
247 static void
248 nvmf_bdev_ctrl_queue_io(struct spdk_nvmf_request *req, struct spdk_bdev *bdev,
249 			struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn, void *cb_arg)
250 {
251 	int rc;
252 
253 	req->bdev_io_wait.bdev = bdev;
254 	req->bdev_io_wait.cb_fn = cb_fn;
255 	req->bdev_io_wait.cb_arg = cb_arg;
256 
257 	rc = spdk_bdev_queue_io_wait(bdev, ch, &req->bdev_io_wait);
258 	if (rc != 0) {
259 		assert(false);
260 	}
261 	req->qpair->group->stat.pending_bdev_io++;
262 }
263 
264 bool
265 nvmf_bdev_zcopy_enabled(struct spdk_bdev *bdev)
266 {
267 	return spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_ZCOPY);
268 }
269 
270 int
271 nvmf_bdev_ctrlr_read_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
272 			 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
273 {
274 	struct spdk_bdev_ext_io_opts opts = {
275 		.size = SPDK_SIZEOF(&opts, accel_sequence),
276 		.memory_domain = req->memory_domain,
277 		.memory_domain_ctx = req->memory_domain_ctx,
278 		.accel_sequence = req->accel_sequence,
279 	};
280 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
281 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
282 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
283 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
284 	uint64_t start_lba;
285 	uint64_t num_blocks;
286 	int rc;
287 
288 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
289 
290 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
291 		SPDK_ERRLOG("end of media\n");
292 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
293 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
294 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
295 	}
296 
297 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
298 		SPDK_ERRLOG("Read NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
299 			    num_blocks, block_size, req->length);
300 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
301 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
302 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
303 	}
304 
305 	assert(!spdk_nvmf_request_using_zcopy(req));
306 
307 	rc = spdk_bdev_readv_blocks_ext(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
308 					nvmf_bdev_ctrlr_complete_cmd, req, &opts);
309 	if (spdk_unlikely(rc)) {
310 		if (rc == -ENOMEM) {
311 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
312 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
313 		}
314 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
315 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
316 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
317 	}
318 
319 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
320 }
321 
322 int
323 nvmf_bdev_ctrlr_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
324 			  struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
325 {
326 	struct spdk_bdev_ext_io_opts opts = {
327 		.size = SPDK_SIZEOF(&opts, accel_sequence),
328 		.memory_domain = req->memory_domain,
329 		.memory_domain_ctx = req->memory_domain_ctx,
330 		.accel_sequence = req->accel_sequence,
331 	};
332 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
333 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
334 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
335 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
336 	uint64_t start_lba;
337 	uint64_t num_blocks;
338 	int rc;
339 
340 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
341 
342 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
343 		SPDK_ERRLOG("end of media\n");
344 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
345 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
346 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
347 	}
348 
349 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
350 		SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
351 			    num_blocks, block_size, req->length);
352 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
353 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
354 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
355 	}
356 
357 	assert(!spdk_nvmf_request_using_zcopy(req));
358 
359 	rc = spdk_bdev_writev_blocks_ext(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
360 					 nvmf_bdev_ctrlr_complete_cmd, req, &opts);
361 	if (spdk_unlikely(rc)) {
362 		if (rc == -ENOMEM) {
363 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
364 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
365 		}
366 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
367 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
368 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
369 	}
370 
371 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
372 }
373 
374 int
375 nvmf_bdev_ctrlr_compare_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
376 			    struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
377 {
378 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
379 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
380 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
381 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
382 	uint64_t start_lba;
383 	uint64_t num_blocks;
384 	int rc;
385 
386 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
387 
388 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
389 		SPDK_ERRLOG("end of media\n");
390 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
391 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
392 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
393 	}
394 
395 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
396 		SPDK_ERRLOG("Compare NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
397 			    num_blocks, block_size, req->length);
398 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
399 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
400 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
401 	}
402 
403 	rc = spdk_bdev_comparev_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
404 				       nvmf_bdev_ctrlr_complete_cmd, req);
405 	if (spdk_unlikely(rc)) {
406 		if (rc == -ENOMEM) {
407 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
408 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
409 		}
410 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
411 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
412 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
413 	}
414 
415 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
416 }
417 
418 int
419 nvmf_bdev_ctrlr_compare_and_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
420 				      struct spdk_io_channel *ch, struct spdk_nvmf_request *cmp_req, struct spdk_nvmf_request *write_req)
421 {
422 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
423 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
424 	struct spdk_nvme_cmd *cmp_cmd = &cmp_req->cmd->nvme_cmd;
425 	struct spdk_nvme_cmd *write_cmd = &write_req->cmd->nvme_cmd;
426 	struct spdk_nvme_cpl *rsp = &write_req->rsp->nvme_cpl;
427 	uint64_t write_start_lba, cmp_start_lba;
428 	uint64_t write_num_blocks, cmp_num_blocks;
429 	int rc;
430 
431 	nvmf_bdev_ctrlr_get_rw_params(cmp_cmd, &cmp_start_lba, &cmp_num_blocks);
432 	nvmf_bdev_ctrlr_get_rw_params(write_cmd, &write_start_lba, &write_num_blocks);
433 
434 	if (spdk_unlikely(write_start_lba != cmp_start_lba || write_num_blocks != cmp_num_blocks)) {
435 		SPDK_ERRLOG("Fused command start lba / num blocks mismatch\n");
436 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
437 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
438 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
439 	}
440 
441 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, write_start_lba,
442 			  write_num_blocks))) {
443 		SPDK_ERRLOG("end of media\n");
444 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
445 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
446 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
447 	}
448 
449 	if (spdk_unlikely(write_num_blocks * block_size > write_req->length)) {
450 		SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
451 			    write_num_blocks, block_size, write_req->length);
452 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
453 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
454 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
455 	}
456 
457 	rc = spdk_bdev_comparev_and_writev_blocks(desc, ch, cmp_req->iov, cmp_req->iovcnt, write_req->iov,
458 			write_req->iovcnt, write_start_lba, write_num_blocks, nvmf_bdev_ctrlr_complete_cmd, write_req);
459 	if (spdk_unlikely(rc)) {
460 		if (rc == -ENOMEM) {
461 			nvmf_bdev_ctrl_queue_io(cmp_req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, cmp_req);
462 			nvmf_bdev_ctrl_queue_io(write_req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, write_req);
463 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
464 		}
465 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
466 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
467 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
468 	}
469 
470 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
471 }
472 
473 int
474 nvmf_bdev_ctrlr_write_zeroes_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
475 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
476 {
477 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
478 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
479 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
480 	uint64_t max_write_zeroes_size = req->qpair->ctrlr->subsys->max_write_zeroes_size_kib;
481 	uint64_t start_lba;
482 	uint64_t num_blocks;
483 	int rc;
484 
485 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
486 	if (spdk_unlikely(max_write_zeroes_size > 0 &&
487 			  num_blocks > (max_write_zeroes_size << 10) / spdk_bdev_get_block_size(bdev))) {
488 		SPDK_ERRLOG("invalid write zeroes size, should not exceed %" PRIu64 "Kib\n", max_write_zeroes_size);
489 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
490 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
491 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
492 	}
493 
494 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
495 		SPDK_ERRLOG("end of media\n");
496 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
497 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
498 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
499 	}
500 
501 	rc = spdk_bdev_write_zeroes_blocks(desc, ch, start_lba, num_blocks,
502 					   nvmf_bdev_ctrlr_complete_cmd, req);
503 	if (spdk_unlikely(rc)) {
504 		if (rc == -ENOMEM) {
505 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
506 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
507 		}
508 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
509 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
510 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
511 	}
512 
513 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
514 }
515 
516 int
517 nvmf_bdev_ctrlr_flush_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
518 			  struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
519 {
520 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
521 	int rc;
522 
523 	/* As for NVMeoF controller, SPDK always set volatile write
524 	 * cache bit to 1, return success for those block devices
525 	 * which can't support FLUSH command.
526 	 */
527 	if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_FLUSH)) {
528 		response->status.sct = SPDK_NVME_SCT_GENERIC;
529 		response->status.sc = SPDK_NVME_SC_SUCCESS;
530 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
531 	}
532 
533 	rc = spdk_bdev_flush_blocks(desc, ch, 0, spdk_bdev_get_num_blocks(bdev),
534 				    nvmf_bdev_ctrlr_complete_cmd, req);
535 	if (spdk_unlikely(rc)) {
536 		if (rc == -ENOMEM) {
537 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
538 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
539 		}
540 		response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
541 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
542 	}
543 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
544 }
545 
546 struct nvmf_bdev_ctrlr_unmap {
547 	struct spdk_nvmf_request	*req;
548 	uint32_t			count;
549 	struct spdk_bdev_desc		*desc;
550 	struct spdk_bdev		*bdev;
551 	struct spdk_io_channel		*ch;
552 	uint32_t			range_index;
553 };
554 
555 static void
556 nvmf_bdev_ctrlr_unmap_cpl(struct spdk_bdev_io *bdev_io, bool success,
557 			  void *cb_arg)
558 {
559 	struct nvmf_bdev_ctrlr_unmap *unmap_ctx = cb_arg;
560 	struct spdk_nvmf_request	*req = unmap_ctx->req;
561 	struct spdk_nvme_cpl		*response = &req->rsp->nvme_cpl;
562 	int				sc, sct;
563 	uint32_t			cdw0;
564 
565 	unmap_ctx->count--;
566 
567 	if (response->status.sct == SPDK_NVME_SCT_GENERIC &&
568 	    response->status.sc == SPDK_NVME_SC_SUCCESS) {
569 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
570 		response->cdw0 = cdw0;
571 		response->status.sc = sc;
572 		response->status.sct = sct;
573 	}
574 
575 	if (unmap_ctx->count == 0) {
576 		spdk_nvmf_request_complete(req);
577 		free(unmap_ctx);
578 	}
579 	spdk_bdev_free_io(bdev_io);
580 }
581 
582 static int nvmf_bdev_ctrlr_unmap(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
583 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
584 				 struct nvmf_bdev_ctrlr_unmap *unmap_ctx);
585 static void
586 nvmf_bdev_ctrlr_unmap_resubmit(void *arg)
587 {
588 	struct nvmf_bdev_ctrlr_unmap *unmap_ctx = arg;
589 	struct spdk_nvmf_request *req = unmap_ctx->req;
590 	struct spdk_bdev_desc *desc = unmap_ctx->desc;
591 	struct spdk_bdev *bdev = unmap_ctx->bdev;
592 	struct spdk_io_channel *ch = unmap_ctx->ch;
593 
594 	nvmf_bdev_ctrlr_unmap(bdev, desc, ch, req, unmap_ctx);
595 }
596 
597 static int
598 nvmf_bdev_ctrlr_unmap(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
599 		      struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
600 		      struct nvmf_bdev_ctrlr_unmap *unmap_ctx)
601 {
602 	uint16_t nr, i;
603 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
604 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
605 	uint64_t max_discard_size = req->qpair->ctrlr->subsys->max_discard_size_kib;
606 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
607 	struct spdk_iov_xfer ix;
608 	uint64_t lba;
609 	uint32_t lba_count;
610 	int rc;
611 
612 	nr = cmd->cdw10_bits.dsm.nr + 1;
613 	if (nr * sizeof(struct spdk_nvme_dsm_range) > req->length) {
614 		SPDK_ERRLOG("Dataset Management number of ranges > SGL length\n");
615 		response->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
616 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
617 	}
618 
619 	if (unmap_ctx == NULL) {
620 		unmap_ctx = calloc(1, sizeof(*unmap_ctx));
621 		if (!unmap_ctx) {
622 			response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
623 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
624 		}
625 
626 		unmap_ctx->req = req;
627 		unmap_ctx->desc = desc;
628 		unmap_ctx->ch = ch;
629 		unmap_ctx->bdev = bdev;
630 
631 		response->status.sct = SPDK_NVME_SCT_GENERIC;
632 		response->status.sc = SPDK_NVME_SC_SUCCESS;
633 	} else {
634 		unmap_ctx->count--;	/* dequeued */
635 	}
636 
637 	spdk_iov_xfer_init(&ix, req->iov, req->iovcnt);
638 
639 	for (i = unmap_ctx->range_index; i < nr; i++) {
640 		struct spdk_nvme_dsm_range dsm_range = { 0 };
641 
642 		spdk_iov_xfer_to_buf(&ix, &dsm_range, sizeof(dsm_range));
643 
644 		lba = dsm_range.starting_lba;
645 		lba_count = dsm_range.length;
646 		if (max_discard_size > 0 && lba_count > (max_discard_size << 10) / block_size) {
647 			SPDK_ERRLOG("invalid unmap size, should not exceed %" PRIu64 "Kib\n", max_discard_size);
648 			response->status.sct = SPDK_NVME_SCT_GENERIC;
649 			response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
650 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
651 		}
652 
653 		unmap_ctx->count++;
654 
655 		rc = spdk_bdev_unmap_blocks(desc, ch, lba, lba_count,
656 					    nvmf_bdev_ctrlr_unmap_cpl, unmap_ctx);
657 		if (rc) {
658 			if (rc == -ENOMEM) {
659 				nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_bdev_ctrlr_unmap_resubmit, unmap_ctx);
660 				/* Unmap was not yet submitted to bdev */
661 				/* unmap_ctx->count will be decremented when the request is dequeued */
662 				return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
663 			}
664 			response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
665 			unmap_ctx->count--;
666 			/* We can't return here - we may have to wait for any other
667 				* unmaps already sent to complete */
668 			break;
669 		}
670 		unmap_ctx->range_index++;
671 	}
672 
673 	if (unmap_ctx->count == 0) {
674 		free(unmap_ctx);
675 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
676 	}
677 
678 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
679 }
680 
681 int
682 nvmf_bdev_ctrlr_dsm_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
683 			struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
684 {
685 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
686 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
687 
688 	if (cmd->cdw11_bits.dsm.ad) {
689 		return nvmf_bdev_ctrlr_unmap(bdev, desc, ch, req, NULL);
690 	}
691 
692 	response->status.sct = SPDK_NVME_SCT_GENERIC;
693 	response->status.sc = SPDK_NVME_SC_SUCCESS;
694 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
695 }
696 
697 int
698 nvmf_bdev_ctrlr_copy_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
699 			 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
700 {
701 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
702 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
703 	uint64_t sdlba = ((uint64_t)cmd->cdw11 << 32) + cmd->cdw10;
704 	struct spdk_nvme_scc_source_range range = { 0 };
705 	struct spdk_iov_xfer ix;
706 	int rc;
707 
708 	SPDK_DEBUGLOG(nvmf, "Copy command: SDLBA %lu, NR %u, desc format %u, PRINFOR %u, "
709 		      "DTYPE %u, STCW %u, PRINFOW %u, FUA %u, LR %u\n",
710 		      sdlba,
711 		      cmd->cdw12_bits.copy.nr,
712 		      cmd->cdw12_bits.copy.df,
713 		      cmd->cdw12_bits.copy.prinfor,
714 		      cmd->cdw12_bits.copy.dtype,
715 		      cmd->cdw12_bits.copy.stcw,
716 		      cmd->cdw12_bits.copy.prinfow,
717 		      cmd->cdw12_bits.copy.fua,
718 		      cmd->cdw12_bits.copy.lr);
719 
720 	if (spdk_unlikely(req->length != (cmd->cdw12_bits.copy.nr + 1) *
721 			  sizeof(struct spdk_nvme_scc_source_range))) {
722 		response->status.sct = SPDK_NVME_SCT_GENERIC;
723 		response->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
724 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
725 	}
726 
727 	/*
728 	 * We support only one source range, and rely on this with the xfer
729 	 * below.
730 	 */
731 	if (cmd->cdw12_bits.copy.nr > 0) {
732 		response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
733 		response->status.sc = SPDK_NVME_SC_CMD_SIZE_LIMIT_SIZE_EXCEEDED;
734 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
735 	}
736 
737 	if (cmd->cdw12_bits.copy.df != 0) {
738 		response->status.sct = SPDK_NVME_SCT_GENERIC;
739 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
740 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
741 	}
742 
743 	spdk_iov_xfer_init(&ix, req->iov, req->iovcnt);
744 	spdk_iov_xfer_to_buf(&ix, &range, sizeof(range));
745 
746 	rc = spdk_bdev_copy_blocks(desc, ch, sdlba, range.slba, range.nlb + 1,
747 				   nvmf_bdev_ctrlr_complete_cmd, req);
748 	if (spdk_unlikely(rc)) {
749 		if (rc == -ENOMEM) {
750 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
751 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
752 		}
753 
754 		response->status.sct = SPDK_NVME_SCT_GENERIC;
755 		response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
756 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
757 	}
758 
759 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
760 }
761 
762 int
763 nvmf_bdev_ctrlr_nvme_passthru_io(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
764 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
765 {
766 	int rc;
767 
768 	rc = spdk_bdev_nvme_iov_passthru_md(desc, ch, &req->cmd->nvme_cmd, req->iov, req->iovcnt,
769 					    req->length, NULL, 0, nvmf_bdev_ctrlr_complete_cmd, req);
770 
771 	if (spdk_unlikely(rc)) {
772 		if (rc == -ENOMEM) {
773 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
774 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
775 		}
776 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
777 		req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
778 		req->rsp->nvme_cpl.status.dnr = 1;
779 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
780 	}
781 
782 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
783 }
784 
785 int
786 spdk_nvmf_bdev_ctrlr_nvme_passthru_admin(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
787 		struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
788 		spdk_nvmf_nvme_passthru_cmd_cb cb_fn)
789 {
790 	int rc;
791 
792 	if (spdk_unlikely(req->iovcnt > 1)) {
793 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
794 		req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
795 		req->rsp->nvme_cpl.status.dnr = 1;
796 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
797 	}
798 
799 	req->cmd_cb_fn = cb_fn;
800 
801 	rc = spdk_bdev_nvme_admin_passthru(desc, ch, &req->cmd->nvme_cmd, req->iov[0].iov_base, req->length,
802 					   nvmf_bdev_ctrlr_complete_admin_cmd, req);
803 	if (spdk_unlikely(rc)) {
804 		if (rc == -ENOMEM) {
805 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_admin_cmd_resubmit, req);
806 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
807 		}
808 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
809 		if (rc == -ENOTSUP) {
810 			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
811 		} else {
812 			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
813 		}
814 
815 		req->rsp->nvme_cpl.status.dnr = 1;
816 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
817 	}
818 
819 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
820 }
821 
822 static void
823 nvmf_bdev_ctrlr_complete_abort_cmd(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
824 {
825 	struct spdk_nvmf_request *req = cb_arg;
826 
827 	if (success) {
828 		req->rsp->nvme_cpl.cdw0 &= ~1U;
829 	}
830 
831 	spdk_nvmf_request_complete(req);
832 	spdk_bdev_free_io(bdev_io);
833 }
834 
835 int
836 spdk_nvmf_bdev_ctrlr_abort_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
837 			       struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
838 			       struct spdk_nvmf_request *req_to_abort)
839 {
840 	int rc;
841 
842 	assert((req->rsp->nvme_cpl.cdw0 & 1U) != 0);
843 
844 	rc = spdk_bdev_abort(desc, ch, req_to_abort, nvmf_bdev_ctrlr_complete_abort_cmd, req);
845 	if (spdk_likely(rc == 0)) {
846 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
847 	} else if (rc == -ENOMEM) {
848 		nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_admin_cmd_resubmit, req);
849 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
850 	} else {
851 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
852 	}
853 }
854 
855 bool
856 nvmf_bdev_ctrlr_get_dif_ctx(struct spdk_bdev *bdev, struct spdk_nvme_cmd *cmd,
857 			    struct spdk_dif_ctx *dif_ctx)
858 {
859 	uint32_t init_ref_tag, dif_check_flags = 0;
860 	int rc;
861 	struct spdk_dif_ctx_init_ext_opts dif_opts;
862 
863 	if (spdk_bdev_get_md_size(bdev) == 0) {
864 		return false;
865 	}
866 
867 	/* Initial Reference Tag is the lower 32 bits of the start LBA. */
868 	init_ref_tag = (uint32_t)from_le64(&cmd->cdw10);
869 
870 	if (spdk_bdev_is_dif_check_enabled(bdev, SPDK_DIF_CHECK_TYPE_REFTAG)) {
871 		dif_check_flags |= SPDK_DIF_FLAGS_REFTAG_CHECK;
872 	}
873 
874 	if (spdk_bdev_is_dif_check_enabled(bdev, SPDK_DIF_CHECK_TYPE_GUARD)) {
875 		dif_check_flags |= SPDK_DIF_FLAGS_GUARD_CHECK;
876 	}
877 
878 	dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format);
879 	dif_opts.dif_pi_format = SPDK_DIF_PI_FORMAT_16;
880 	rc = spdk_dif_ctx_init(dif_ctx,
881 			       spdk_bdev_get_block_size(bdev),
882 			       spdk_bdev_get_md_size(bdev),
883 			       spdk_bdev_is_md_interleaved(bdev),
884 			       spdk_bdev_is_dif_head_of_md(bdev),
885 			       spdk_bdev_get_dif_type(bdev),
886 			       dif_check_flags,
887 			       init_ref_tag, 0, 0, 0, 0, &dif_opts);
888 
889 	return (rc == 0) ? true : false;
890 }
891 
892 static void
893 nvmf_bdev_ctrlr_zcopy_start_complete(struct spdk_bdev_io *bdev_io, bool success,
894 				     void *cb_arg)
895 {
896 	struct spdk_nvmf_request	*req = cb_arg;
897 	struct iovec *iov;
898 	int iovcnt = 0;
899 
900 	if (spdk_unlikely(!success)) {
901 		int                     sc = 0, sct = 0;
902 		uint32_t                cdw0 = 0;
903 		struct spdk_nvme_cpl    *response = &req->rsp->nvme_cpl;
904 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
905 
906 		response->cdw0 = cdw0;
907 		response->status.sc = sc;
908 		response->status.sct = sct;
909 
910 		spdk_bdev_free_io(bdev_io);
911 		spdk_nvmf_request_complete(req);
912 		return;
913 	}
914 
915 	spdk_bdev_io_get_iovec(bdev_io, &iov, &iovcnt);
916 
917 	assert(iovcnt <= NVMF_REQ_MAX_BUFFERS);
918 	assert(iovcnt > 0);
919 
920 	req->iovcnt = iovcnt;
921 
922 	assert(req->iov == iov);
923 
924 	req->zcopy_bdev_io = bdev_io; /* Preserve the bdev_io for the end zcopy */
925 
926 	spdk_nvmf_request_complete(req);
927 	/* Don't free the bdev_io here as it is needed for the END ZCOPY */
928 }
929 
930 int
931 nvmf_bdev_ctrlr_zcopy_start(struct spdk_bdev *bdev,
932 			    struct spdk_bdev_desc *desc,
933 			    struct spdk_io_channel *ch,
934 			    struct spdk_nvmf_request *req)
935 {
936 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
937 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
938 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
939 	uint64_t start_lba;
940 	uint64_t num_blocks;
941 	int rc;
942 
943 	nvmf_bdev_ctrlr_get_rw_params(&req->cmd->nvme_cmd, &start_lba, &num_blocks);
944 
945 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
946 		SPDK_ERRLOG("end of media\n");
947 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
948 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
949 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
950 	}
951 
952 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
953 		SPDK_ERRLOG("Read NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
954 			    num_blocks, block_size, req->length);
955 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
956 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
957 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
958 	}
959 
960 	bool populate = (req->cmd->nvme_cmd.opc == SPDK_NVME_OPC_READ) ? true : false;
961 
962 	rc = spdk_bdev_zcopy_start(desc, ch, req->iov, req->iovcnt, start_lba,
963 				   num_blocks, populate, nvmf_bdev_ctrlr_zcopy_start_complete, req);
964 	if (spdk_unlikely(rc != 0)) {
965 		if (rc == -ENOMEM) {
966 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
967 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
968 		}
969 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
970 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
971 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
972 	}
973 
974 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
975 }
976 
977 static void
978 nvmf_bdev_ctrlr_zcopy_end_complete(struct spdk_bdev_io *bdev_io, bool success,
979 				   void *cb_arg)
980 {
981 	struct spdk_nvmf_request	*req = cb_arg;
982 
983 	if (spdk_unlikely(!success)) {
984 		int                     sc = 0, sct = 0;
985 		uint32_t                cdw0 = 0;
986 		struct spdk_nvme_cpl    *response = &req->rsp->nvme_cpl;
987 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
988 
989 		response->cdw0 = cdw0;
990 		response->status.sc = sc;
991 		response->status.sct = sct;
992 	}
993 
994 	spdk_bdev_free_io(bdev_io);
995 	req->zcopy_bdev_io = NULL;
996 	spdk_nvmf_request_complete(req);
997 }
998 
999 void
1000 nvmf_bdev_ctrlr_zcopy_end(struct spdk_nvmf_request *req, bool commit)
1001 {
1002 	int rc __attribute__((unused));
1003 
1004 	rc = spdk_bdev_zcopy_end(req->zcopy_bdev_io, commit, nvmf_bdev_ctrlr_zcopy_end_complete, req);
1005 
1006 	/* The only way spdk_bdev_zcopy_end() can fail is if we pass a bdev_io type that isn't ZCOPY */
1007 	assert(rc == 0);
1008 }
1009