xref: /spdk/lib/nvmf/ctrlr_bdev.c (revision 8afdeef3becfe9409cc9e7372bd0bc10e8b7d46d)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2017 Intel Corporation. All rights reserved.
3  *   Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
4  *   Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5  */
6 
7 #include "spdk/stdinc.h"
8 
9 #include "nvmf_internal.h"
10 
11 #include "spdk/bdev.h"
12 #include "spdk/endian.h"
13 #include "spdk/thread.h"
14 #include "spdk/likely.h"
15 #include "spdk/nvme.h"
16 #include "spdk/nvmf_cmd.h"
17 #include "spdk/nvmf_spec.h"
18 #include "spdk/trace.h"
19 #include "spdk/scsi_spec.h"
20 #include "spdk/string.h"
21 #include "spdk/util.h"
22 
23 #include "spdk/log.h"
24 
25 static bool
26 nvmf_subsystem_bdev_io_type_supported(struct spdk_nvmf_subsystem *subsystem,
27 				      enum spdk_bdev_io_type io_type)
28 {
29 	struct spdk_nvmf_ns *ns;
30 
31 	for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL;
32 	     ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) {
33 		if (ns->bdev == NULL) {
34 			continue;
35 		}
36 
37 		if (!spdk_bdev_io_type_supported(ns->bdev, io_type)) {
38 			SPDK_DEBUGLOG(nvmf,
39 				      "Subsystem %s namespace %u (%s) does not support io_type %d\n",
40 				      spdk_nvmf_subsystem_get_nqn(subsystem),
41 				      ns->opts.nsid, spdk_bdev_get_name(ns->bdev), (int)io_type);
42 			return false;
43 		}
44 	}
45 
46 	SPDK_DEBUGLOG(nvmf, "All devices in Subsystem %s support io_type %d\n",
47 		      spdk_nvmf_subsystem_get_nqn(subsystem), (int)io_type);
48 	return true;
49 }
50 
51 bool
52 nvmf_ctrlr_dsm_supported(struct spdk_nvmf_ctrlr *ctrlr)
53 {
54 	return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_UNMAP);
55 }
56 
57 bool
58 nvmf_ctrlr_write_zeroes_supported(struct spdk_nvmf_ctrlr *ctrlr)
59 {
60 	return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_WRITE_ZEROES);
61 }
62 
63 bool
64 nvmf_ctrlr_copy_supported(struct spdk_nvmf_ctrlr *ctrlr)
65 {
66 	return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_COPY);
67 }
68 
69 static void
70 nvmf_bdev_ctrlr_complete_cmd(struct spdk_bdev_io *bdev_io, bool success,
71 			     void *cb_arg)
72 {
73 	struct spdk_nvmf_request	*req = cb_arg;
74 	struct spdk_nvme_cpl		*response = &req->rsp->nvme_cpl;
75 	int				sc = 0, sct = 0;
76 	uint32_t			cdw0 = 0;
77 
78 	if (spdk_unlikely(req->first_fused)) {
79 		struct spdk_nvmf_request	*first_req = req->first_fused_req;
80 		struct spdk_nvme_cpl		*first_response = &first_req->rsp->nvme_cpl;
81 		int				first_sc = 0, first_sct = 0;
82 
83 		/* get status for both operations */
84 		spdk_bdev_io_get_nvme_fused_status(bdev_io, &cdw0, &first_sct, &first_sc, &sct, &sc);
85 		first_response->cdw0 = cdw0;
86 		first_response->status.sc = first_sc;
87 		first_response->status.sct = first_sct;
88 
89 		/* first request should be completed */
90 		spdk_nvmf_request_complete(first_req);
91 		req->first_fused_req = NULL;
92 		req->first_fused = false;
93 	} else {
94 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
95 	}
96 
97 	response->cdw0 = cdw0;
98 	response->status.sc = sc;
99 	response->status.sct = sct;
100 
101 	spdk_nvmf_request_complete(req);
102 	spdk_bdev_free_io(bdev_io);
103 }
104 
105 static void
106 nvmf_bdev_ctrlr_complete_admin_cmd(struct spdk_bdev_io *bdev_io, bool success,
107 				   void *cb_arg)
108 {
109 	struct spdk_nvmf_request *req = cb_arg;
110 
111 	if (req->cmd_cb_fn) {
112 		req->cmd_cb_fn(req);
113 	}
114 
115 	nvmf_bdev_ctrlr_complete_cmd(bdev_io, success, req);
116 }
117 
118 void
119 nvmf_bdev_ctrlr_identify_ns(struct spdk_nvmf_ns *ns, struct spdk_nvme_ns_data *nsdata,
120 			    bool dif_insert_or_strip)
121 {
122 	struct spdk_bdev *bdev = ns->bdev;
123 	uint64_t num_blocks;
124 	uint32_t phys_blocklen;
125 	uint32_t max_copy;
126 
127 	num_blocks = spdk_bdev_get_num_blocks(bdev);
128 
129 	nsdata->nsze = num_blocks;
130 	nsdata->ncap = num_blocks;
131 	nsdata->nuse = num_blocks;
132 	nsdata->nlbaf = 0;
133 	nsdata->flbas.format = 0;
134 	nsdata->flbas.msb_format = 0;
135 	nsdata->nacwu = spdk_bdev_get_acwu(bdev) - 1; /* nacwu is 0-based */
136 	if (!dif_insert_or_strip) {
137 		nsdata->lbaf[0].ms = spdk_bdev_get_md_size(bdev);
138 		nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_block_size(bdev));
139 		if (nsdata->lbaf[0].ms != 0) {
140 			nsdata->flbas.extended = 1;
141 			nsdata->mc.extended = 1;
142 			nsdata->mc.pointer = 0;
143 			nsdata->dps.md_start = spdk_bdev_is_dif_head_of_md(bdev);
144 			/* NVMf library doesn't process PRACT and PRCHK flags, we
145 			 * leave the use of extended LBA buffer to users.
146 			 */
147 			nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_DISABLE;
148 		}
149 	} else {
150 		nsdata->lbaf[0].ms = 0;
151 		nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_data_block_size(bdev));
152 	}
153 
154 	phys_blocklen = spdk_bdev_get_physical_block_size(bdev);
155 	assert(phys_blocklen > 0);
156 	/* Linux driver uses min(nawupf, npwg) to set physical_block_size */
157 	nsdata->nsfeat.optperf = 1;
158 	nsdata->nsfeat.ns_atomic_write_unit = 1;
159 	nsdata->npwg = (phys_blocklen >> nsdata->lbaf[0].lbads) - 1;
160 	nsdata->nawupf = nsdata->npwg;
161 	nsdata->npwa = nsdata->npwg;
162 	nsdata->npdg = nsdata->npwg;
163 	nsdata->npda = nsdata->npwg;
164 
165 	if (spdk_bdev_get_write_unit_size(bdev) == 1) {
166 		nsdata->noiob = spdk_bdev_get_optimal_io_boundary(bdev);
167 	}
168 	nsdata->nmic.can_share = 1;
169 	if (nvmf_ns_is_ptpl_capable(ns)) {
170 		nsdata->nsrescap.rescap.persist = 1;
171 	}
172 	nsdata->nsrescap.rescap.write_exclusive = 1;
173 	nsdata->nsrescap.rescap.exclusive_access = 1;
174 	nsdata->nsrescap.rescap.write_exclusive_reg_only = 1;
175 	nsdata->nsrescap.rescap.exclusive_access_reg_only = 1;
176 	nsdata->nsrescap.rescap.write_exclusive_all_reg = 1;
177 	nsdata->nsrescap.rescap.exclusive_access_all_reg = 1;
178 	nsdata->nsrescap.rescap.ignore_existing_key = 1;
179 
180 	SPDK_STATIC_ASSERT(sizeof(nsdata->nguid) == sizeof(ns->opts.nguid), "size mismatch");
181 	memcpy(nsdata->nguid, ns->opts.nguid, sizeof(nsdata->nguid));
182 
183 	SPDK_STATIC_ASSERT(sizeof(nsdata->eui64) == sizeof(ns->opts.eui64), "size mismatch");
184 	memcpy(&nsdata->eui64, ns->opts.eui64, sizeof(nsdata->eui64));
185 
186 	/* For now we support just one source range for copy command */
187 	nsdata->msrc = 0;
188 
189 	max_copy = spdk_bdev_get_max_copy(bdev);
190 	if (max_copy == 0 || max_copy > UINT16_MAX) {
191 		/* Zero means copy size is unlimited */
192 		nsdata->mcl = UINT16_MAX;
193 		nsdata->mssrl = UINT16_MAX;
194 	} else {
195 		nsdata->mcl = max_copy;
196 		nsdata->mssrl = max_copy;
197 	}
198 }
199 
200 static void
201 nvmf_bdev_ctrlr_get_rw_params(const struct spdk_nvme_cmd *cmd, uint64_t *start_lba,
202 			      uint64_t *num_blocks)
203 {
204 	/* SLBA: CDW10 and CDW11 */
205 	*start_lba = from_le64(&cmd->cdw10);
206 
207 	/* NLB: CDW12 bits 15:00, 0's based */
208 	*num_blocks = (from_le32(&cmd->cdw12) & 0xFFFFu) + 1;
209 }
210 
211 static void
212 nvmf_bdev_ctrlr_get_rw_ext_params(const struct spdk_nvme_cmd *cmd,
213 				  struct spdk_bdev_ext_io_opts *opts)
214 {
215 	/* Get CDW12 values */
216 	opts->nvme_cdw12.raw = from_le32(&cmd->cdw12);
217 
218 	/* Get CDW13 values */
219 	opts->nvme_cdw13.raw = from_le32(&cmd->cdw13);
220 }
221 
222 static bool
223 nvmf_bdev_ctrlr_lba_in_range(uint64_t bdev_num_blocks, uint64_t io_start_lba,
224 			     uint64_t io_num_blocks)
225 {
226 	if (io_start_lba + io_num_blocks > bdev_num_blocks ||
227 	    io_start_lba + io_num_blocks < io_start_lba) {
228 		return false;
229 	}
230 
231 	return true;
232 }
233 
234 static void
235 nvmf_ctrlr_process_io_cmd_resubmit(void *arg)
236 {
237 	struct spdk_nvmf_request *req = arg;
238 	int rc;
239 
240 	rc = nvmf_ctrlr_process_io_cmd(req);
241 	if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
242 		spdk_nvmf_request_complete(req);
243 	}
244 }
245 
246 static void
247 nvmf_ctrlr_process_admin_cmd_resubmit(void *arg)
248 {
249 	struct spdk_nvmf_request *req = arg;
250 	int rc;
251 
252 	rc = nvmf_ctrlr_process_admin_cmd(req);
253 	if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
254 		spdk_nvmf_request_complete(req);
255 	}
256 }
257 
258 static void
259 nvmf_bdev_ctrl_queue_io(struct spdk_nvmf_request *req, struct spdk_bdev *bdev,
260 			struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn, void *cb_arg)
261 {
262 	int rc;
263 
264 	req->bdev_io_wait.bdev = bdev;
265 	req->bdev_io_wait.cb_fn = cb_fn;
266 	req->bdev_io_wait.cb_arg = cb_arg;
267 
268 	rc = spdk_bdev_queue_io_wait(bdev, ch, &req->bdev_io_wait);
269 	if (rc != 0) {
270 		assert(false);
271 	}
272 	req->qpair->group->stat.pending_bdev_io++;
273 }
274 
275 bool
276 nvmf_bdev_zcopy_enabled(struct spdk_bdev *bdev)
277 {
278 	return spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_ZCOPY);
279 }
280 
281 int
282 nvmf_bdev_ctrlr_read_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
283 			 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
284 {
285 	struct spdk_bdev_ext_io_opts opts = {
286 		.size = SPDK_SIZEOF(&opts, accel_sequence),
287 		.memory_domain = req->memory_domain,
288 		.memory_domain_ctx = req->memory_domain_ctx,
289 		.accel_sequence = req->accel_sequence,
290 	};
291 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
292 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
293 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
294 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
295 	uint64_t start_lba;
296 	uint64_t num_blocks;
297 	int rc;
298 
299 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
300 
301 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
302 		SPDK_ERRLOG("end of media\n");
303 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
304 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
305 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
306 	}
307 
308 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
309 		SPDK_ERRLOG("Read NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
310 			    num_blocks, block_size, req->length);
311 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
312 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
313 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
314 	}
315 
316 	assert(!spdk_nvmf_request_using_zcopy(req));
317 
318 	rc = spdk_bdev_readv_blocks_ext(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
319 					nvmf_bdev_ctrlr_complete_cmd, req, &opts);
320 	if (spdk_unlikely(rc)) {
321 		if (rc == -ENOMEM) {
322 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
323 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
324 		}
325 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
326 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
327 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
328 	}
329 
330 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
331 }
332 
333 int
334 nvmf_bdev_ctrlr_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
335 			  struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
336 {
337 	struct spdk_bdev_ext_io_opts opts = {
338 		.size = SPDK_SIZEOF(&opts, nvme_cdw13),
339 		.memory_domain = req->memory_domain,
340 		.memory_domain_ctx = req->memory_domain_ctx,
341 		.accel_sequence = req->accel_sequence,
342 	};
343 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
344 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
345 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
346 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
347 	uint64_t start_lba;
348 	uint64_t num_blocks;
349 	int rc;
350 
351 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
352 	nvmf_bdev_ctrlr_get_rw_ext_params(cmd, &opts);
353 
354 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
355 		SPDK_ERRLOG("end of media\n");
356 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
357 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
358 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
359 	}
360 
361 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
362 		SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
363 			    num_blocks, block_size, req->length);
364 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
365 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
366 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
367 	}
368 
369 	assert(!spdk_nvmf_request_using_zcopy(req));
370 
371 	rc = spdk_bdev_writev_blocks_ext(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
372 					 nvmf_bdev_ctrlr_complete_cmd, req, &opts);
373 	if (spdk_unlikely(rc)) {
374 		if (rc == -ENOMEM) {
375 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
376 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
377 		}
378 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
379 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
380 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
381 	}
382 
383 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
384 }
385 
386 int
387 nvmf_bdev_ctrlr_compare_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
388 			    struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
389 {
390 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
391 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
392 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
393 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
394 	uint64_t start_lba;
395 	uint64_t num_blocks;
396 	int rc;
397 
398 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
399 
400 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
401 		SPDK_ERRLOG("end of media\n");
402 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
403 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
404 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
405 	}
406 
407 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
408 		SPDK_ERRLOG("Compare NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
409 			    num_blocks, block_size, req->length);
410 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
411 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
412 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
413 	}
414 
415 	rc = spdk_bdev_comparev_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
416 				       nvmf_bdev_ctrlr_complete_cmd, req);
417 	if (spdk_unlikely(rc)) {
418 		if (rc == -ENOMEM) {
419 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
420 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
421 		}
422 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
423 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
424 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
425 	}
426 
427 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
428 }
429 
430 int
431 nvmf_bdev_ctrlr_compare_and_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
432 				      struct spdk_io_channel *ch, struct spdk_nvmf_request *cmp_req, struct spdk_nvmf_request *write_req)
433 {
434 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
435 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
436 	struct spdk_nvme_cmd *cmp_cmd = &cmp_req->cmd->nvme_cmd;
437 	struct spdk_nvme_cmd *write_cmd = &write_req->cmd->nvme_cmd;
438 	struct spdk_nvme_cpl *rsp = &write_req->rsp->nvme_cpl;
439 	uint64_t write_start_lba, cmp_start_lba;
440 	uint64_t write_num_blocks, cmp_num_blocks;
441 	int rc;
442 
443 	nvmf_bdev_ctrlr_get_rw_params(cmp_cmd, &cmp_start_lba, &cmp_num_blocks);
444 	nvmf_bdev_ctrlr_get_rw_params(write_cmd, &write_start_lba, &write_num_blocks);
445 
446 	if (spdk_unlikely(write_start_lba != cmp_start_lba || write_num_blocks != cmp_num_blocks)) {
447 		SPDK_ERRLOG("Fused command start lba / num blocks mismatch\n");
448 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
449 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
450 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
451 	}
452 
453 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, write_start_lba,
454 			  write_num_blocks))) {
455 		SPDK_ERRLOG("end of media\n");
456 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
457 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
458 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
459 	}
460 
461 	if (spdk_unlikely(write_num_blocks * block_size > write_req->length)) {
462 		SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
463 			    write_num_blocks, block_size, write_req->length);
464 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
465 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
466 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
467 	}
468 
469 	rc = spdk_bdev_comparev_and_writev_blocks(desc, ch, cmp_req->iov, cmp_req->iovcnt, write_req->iov,
470 			write_req->iovcnt, write_start_lba, write_num_blocks, nvmf_bdev_ctrlr_complete_cmd, write_req);
471 	if (spdk_unlikely(rc)) {
472 		if (rc == -ENOMEM) {
473 			nvmf_bdev_ctrl_queue_io(cmp_req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, cmp_req);
474 			nvmf_bdev_ctrl_queue_io(write_req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, write_req);
475 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
476 		}
477 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
478 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
479 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
480 	}
481 
482 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
483 }
484 
485 int
486 nvmf_bdev_ctrlr_write_zeroes_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
487 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
488 {
489 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
490 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
491 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
492 	uint64_t max_write_zeroes_size = req->qpair->ctrlr->subsys->max_write_zeroes_size_kib;
493 	uint64_t start_lba;
494 	uint64_t num_blocks;
495 	int rc;
496 
497 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
498 	if (spdk_unlikely(max_write_zeroes_size > 0 &&
499 			  num_blocks > (max_write_zeroes_size << 10) / spdk_bdev_get_block_size(bdev))) {
500 		SPDK_ERRLOG("invalid write zeroes size, should not exceed %" PRIu64 "Kib\n", max_write_zeroes_size);
501 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
502 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
503 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
504 	}
505 
506 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
507 		SPDK_ERRLOG("end of media\n");
508 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
509 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
510 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
511 	}
512 
513 	if (spdk_unlikely(cmd->cdw12_bits.write_zeroes.deac)) {
514 		SPDK_ERRLOG("Write Zeroes Deallocate is not supported\n");
515 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
516 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
517 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
518 	}
519 
520 	rc = spdk_bdev_write_zeroes_blocks(desc, ch, start_lba, num_blocks,
521 					   nvmf_bdev_ctrlr_complete_cmd, req);
522 	if (spdk_unlikely(rc)) {
523 		if (rc == -ENOMEM) {
524 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
525 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
526 		}
527 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
528 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
529 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
530 	}
531 
532 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
533 }
534 
535 int
536 nvmf_bdev_ctrlr_flush_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
537 			  struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
538 {
539 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
540 	int rc;
541 
542 	/* As for NVMeoF controller, SPDK always set volatile write
543 	 * cache bit to 1, return success for those block devices
544 	 * which can't support FLUSH command.
545 	 */
546 	if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_FLUSH)) {
547 		response->status.sct = SPDK_NVME_SCT_GENERIC;
548 		response->status.sc = SPDK_NVME_SC_SUCCESS;
549 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
550 	}
551 
552 	rc = spdk_bdev_flush_blocks(desc, ch, 0, spdk_bdev_get_num_blocks(bdev),
553 				    nvmf_bdev_ctrlr_complete_cmd, req);
554 	if (spdk_unlikely(rc)) {
555 		if (rc == -ENOMEM) {
556 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
557 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
558 		}
559 		response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
560 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
561 	}
562 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
563 }
564 
565 struct nvmf_bdev_ctrlr_unmap {
566 	struct spdk_nvmf_request	*req;
567 	uint32_t			count;
568 	struct spdk_bdev_desc		*desc;
569 	struct spdk_bdev		*bdev;
570 	struct spdk_io_channel		*ch;
571 	uint32_t			range_index;
572 };
573 
574 static void
575 nvmf_bdev_ctrlr_unmap_cpl(struct spdk_bdev_io *bdev_io, bool success,
576 			  void *cb_arg)
577 {
578 	struct nvmf_bdev_ctrlr_unmap *unmap_ctx = cb_arg;
579 	struct spdk_nvmf_request	*req = unmap_ctx->req;
580 	struct spdk_nvme_cpl		*response = &req->rsp->nvme_cpl;
581 	int				sc, sct;
582 	uint32_t			cdw0;
583 
584 	unmap_ctx->count--;
585 
586 	if (response->status.sct == SPDK_NVME_SCT_GENERIC &&
587 	    response->status.sc == SPDK_NVME_SC_SUCCESS) {
588 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
589 		response->cdw0 = cdw0;
590 		response->status.sc = sc;
591 		response->status.sct = sct;
592 	}
593 
594 	if (unmap_ctx->count == 0) {
595 		spdk_nvmf_request_complete(req);
596 		free(unmap_ctx);
597 	}
598 	spdk_bdev_free_io(bdev_io);
599 }
600 
601 static int nvmf_bdev_ctrlr_unmap(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
602 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
603 				 struct nvmf_bdev_ctrlr_unmap *unmap_ctx);
604 static void
605 nvmf_bdev_ctrlr_unmap_resubmit(void *arg)
606 {
607 	struct nvmf_bdev_ctrlr_unmap *unmap_ctx = arg;
608 	struct spdk_nvmf_request *req = unmap_ctx->req;
609 	struct spdk_bdev_desc *desc = unmap_ctx->desc;
610 	struct spdk_bdev *bdev = unmap_ctx->bdev;
611 	struct spdk_io_channel *ch = unmap_ctx->ch;
612 
613 	nvmf_bdev_ctrlr_unmap(bdev, desc, ch, req, unmap_ctx);
614 }
615 
616 static int
617 nvmf_bdev_ctrlr_unmap(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
618 		      struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
619 		      struct nvmf_bdev_ctrlr_unmap *unmap_ctx)
620 {
621 	uint16_t nr, i;
622 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
623 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
624 	uint64_t max_discard_size = req->qpair->ctrlr->subsys->max_discard_size_kib;
625 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
626 	struct spdk_iov_xfer ix;
627 	uint64_t lba;
628 	uint32_t lba_count;
629 	int rc;
630 
631 	nr = cmd->cdw10_bits.dsm.nr + 1;
632 	if (nr * sizeof(struct spdk_nvme_dsm_range) > req->length) {
633 		SPDK_ERRLOG("Dataset Management number of ranges > SGL length\n");
634 		response->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
635 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
636 	}
637 
638 	if (unmap_ctx == NULL) {
639 		unmap_ctx = calloc(1, sizeof(*unmap_ctx));
640 		if (!unmap_ctx) {
641 			response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
642 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
643 		}
644 
645 		unmap_ctx->req = req;
646 		unmap_ctx->desc = desc;
647 		unmap_ctx->ch = ch;
648 		unmap_ctx->bdev = bdev;
649 
650 		response->status.sct = SPDK_NVME_SCT_GENERIC;
651 		response->status.sc = SPDK_NVME_SC_SUCCESS;
652 	} else {
653 		unmap_ctx->count--;	/* dequeued */
654 	}
655 
656 	spdk_iov_xfer_init(&ix, req->iov, req->iovcnt);
657 
658 	for (i = unmap_ctx->range_index; i < nr; i++) {
659 		struct spdk_nvme_dsm_range dsm_range = { 0 };
660 
661 		spdk_iov_xfer_to_buf(&ix, &dsm_range, sizeof(dsm_range));
662 
663 		lba = dsm_range.starting_lba;
664 		lba_count = dsm_range.length;
665 		if (max_discard_size > 0 && lba_count > (max_discard_size << 10) / block_size) {
666 			SPDK_ERRLOG("invalid unmap size, should not exceed %" PRIu64 "Kib\n", max_discard_size);
667 			response->status.sct = SPDK_NVME_SCT_GENERIC;
668 			response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
669 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
670 		}
671 
672 		unmap_ctx->count++;
673 
674 		rc = spdk_bdev_unmap_blocks(desc, ch, lba, lba_count,
675 					    nvmf_bdev_ctrlr_unmap_cpl, unmap_ctx);
676 		if (rc) {
677 			if (rc == -ENOMEM) {
678 				nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_bdev_ctrlr_unmap_resubmit, unmap_ctx);
679 				/* Unmap was not yet submitted to bdev */
680 				/* unmap_ctx->count will be decremented when the request is dequeued */
681 				return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
682 			}
683 			response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
684 			unmap_ctx->count--;
685 			/* We can't return here - we may have to wait for any other
686 				* unmaps already sent to complete */
687 			break;
688 		}
689 		unmap_ctx->range_index++;
690 	}
691 
692 	if (unmap_ctx->count == 0) {
693 		free(unmap_ctx);
694 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
695 	}
696 
697 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
698 }
699 
700 int
701 nvmf_bdev_ctrlr_dsm_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
702 			struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
703 {
704 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
705 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
706 
707 	if (cmd->cdw11_bits.dsm.ad) {
708 		return nvmf_bdev_ctrlr_unmap(bdev, desc, ch, req, NULL);
709 	}
710 
711 	response->status.sct = SPDK_NVME_SCT_GENERIC;
712 	response->status.sc = SPDK_NVME_SC_SUCCESS;
713 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
714 }
715 
716 int
717 nvmf_bdev_ctrlr_copy_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
718 			 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
719 {
720 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
721 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
722 	uint64_t sdlba = ((uint64_t)cmd->cdw11 << 32) + cmd->cdw10;
723 	struct spdk_nvme_scc_source_range range = { 0 };
724 	struct spdk_iov_xfer ix;
725 	int rc;
726 
727 	SPDK_DEBUGLOG(nvmf, "Copy command: SDLBA %lu, NR %u, desc format %u, PRINFOR %u, "
728 		      "DTYPE %u, STCW %u, PRINFOW %u, FUA %u, LR %u\n",
729 		      sdlba,
730 		      cmd->cdw12_bits.copy.nr,
731 		      cmd->cdw12_bits.copy.df,
732 		      cmd->cdw12_bits.copy.prinfor,
733 		      cmd->cdw12_bits.copy.dtype,
734 		      cmd->cdw12_bits.copy.stcw,
735 		      cmd->cdw12_bits.copy.prinfow,
736 		      cmd->cdw12_bits.copy.fua,
737 		      cmd->cdw12_bits.copy.lr);
738 
739 	if (spdk_unlikely(req->length != (cmd->cdw12_bits.copy.nr + 1) *
740 			  sizeof(struct spdk_nvme_scc_source_range))) {
741 		response->status.sct = SPDK_NVME_SCT_GENERIC;
742 		response->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
743 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
744 	}
745 
746 	/*
747 	 * We support only one source range, and rely on this with the xfer
748 	 * below.
749 	 */
750 	if (cmd->cdw12_bits.copy.nr > 0) {
751 		response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
752 		response->status.sc = SPDK_NVME_SC_CMD_SIZE_LIMIT_SIZE_EXCEEDED;
753 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
754 	}
755 
756 	if (cmd->cdw12_bits.copy.df != 0) {
757 		response->status.sct = SPDK_NVME_SCT_GENERIC;
758 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
759 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
760 	}
761 
762 	spdk_iov_xfer_init(&ix, req->iov, req->iovcnt);
763 	spdk_iov_xfer_to_buf(&ix, &range, sizeof(range));
764 
765 	rc = spdk_bdev_copy_blocks(desc, ch, sdlba, range.slba, range.nlb + 1,
766 				   nvmf_bdev_ctrlr_complete_cmd, req);
767 	if (spdk_unlikely(rc)) {
768 		if (rc == -ENOMEM) {
769 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
770 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
771 		}
772 
773 		response->status.sct = SPDK_NVME_SCT_GENERIC;
774 		response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
775 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
776 	}
777 
778 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
779 }
780 
781 int
782 nvmf_bdev_ctrlr_nvme_passthru_io(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
783 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
784 {
785 	int rc;
786 
787 	rc = spdk_bdev_nvme_iov_passthru_md(desc, ch, &req->cmd->nvme_cmd, req->iov, req->iovcnt,
788 					    req->length, NULL, 0, nvmf_bdev_ctrlr_complete_cmd, req);
789 
790 	if (spdk_unlikely(rc)) {
791 		if (rc == -ENOMEM) {
792 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
793 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
794 		}
795 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
796 		req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
797 		req->rsp->nvme_cpl.status.dnr = 1;
798 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
799 	}
800 
801 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
802 }
803 
804 int
805 spdk_nvmf_bdev_ctrlr_nvme_passthru_admin(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
806 		struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
807 		spdk_nvmf_nvme_passthru_cmd_cb cb_fn)
808 {
809 	int rc;
810 
811 	if (spdk_unlikely(req->iovcnt > 1)) {
812 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
813 		req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
814 		req->rsp->nvme_cpl.status.dnr = 1;
815 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
816 	}
817 
818 	req->cmd_cb_fn = cb_fn;
819 
820 	rc = spdk_bdev_nvme_admin_passthru(desc, ch, &req->cmd->nvme_cmd, req->iov[0].iov_base, req->length,
821 					   nvmf_bdev_ctrlr_complete_admin_cmd, req);
822 	if (spdk_unlikely(rc)) {
823 		if (rc == -ENOMEM) {
824 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_admin_cmd_resubmit, req);
825 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
826 		}
827 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
828 		if (rc == -ENOTSUP) {
829 			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
830 		} else {
831 			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
832 		}
833 
834 		req->rsp->nvme_cpl.status.dnr = 1;
835 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
836 	}
837 
838 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
839 }
840 
841 static void
842 nvmf_bdev_ctrlr_complete_abort_cmd(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
843 {
844 	struct spdk_nvmf_request *req = cb_arg;
845 
846 	if (success) {
847 		req->rsp->nvme_cpl.cdw0 &= ~1U;
848 	}
849 
850 	spdk_nvmf_request_complete(req);
851 	spdk_bdev_free_io(bdev_io);
852 }
853 
854 int
855 spdk_nvmf_bdev_ctrlr_abort_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
856 			       struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
857 			       struct spdk_nvmf_request *req_to_abort)
858 {
859 	int rc;
860 
861 	assert((req->rsp->nvme_cpl.cdw0 & 1U) != 0);
862 
863 	rc = spdk_bdev_abort(desc, ch, req_to_abort, nvmf_bdev_ctrlr_complete_abort_cmd, req);
864 	if (spdk_likely(rc == 0)) {
865 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
866 	} else if (rc == -ENOMEM) {
867 		nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_admin_cmd_resubmit, req);
868 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
869 	} else {
870 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
871 	}
872 }
873 
874 bool
875 nvmf_bdev_ctrlr_get_dif_ctx(struct spdk_bdev *bdev, struct spdk_nvme_cmd *cmd,
876 			    struct spdk_dif_ctx *dif_ctx)
877 {
878 	uint32_t init_ref_tag, dif_check_flags = 0;
879 	int rc;
880 	struct spdk_dif_ctx_init_ext_opts dif_opts;
881 
882 	if (spdk_bdev_get_md_size(bdev) == 0) {
883 		return false;
884 	}
885 
886 	/* Initial Reference Tag is the lower 32 bits of the start LBA. */
887 	init_ref_tag = (uint32_t)from_le64(&cmd->cdw10);
888 
889 	if (spdk_bdev_is_dif_check_enabled(bdev, SPDK_DIF_CHECK_TYPE_REFTAG)) {
890 		dif_check_flags |= SPDK_DIF_FLAGS_REFTAG_CHECK;
891 	}
892 
893 	if (spdk_bdev_is_dif_check_enabled(bdev, SPDK_DIF_CHECK_TYPE_GUARD)) {
894 		dif_check_flags |= SPDK_DIF_FLAGS_GUARD_CHECK;
895 	}
896 
897 	dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format);
898 	dif_opts.dif_pi_format = SPDK_DIF_PI_FORMAT_16;
899 	rc = spdk_dif_ctx_init(dif_ctx,
900 			       spdk_bdev_get_block_size(bdev),
901 			       spdk_bdev_get_md_size(bdev),
902 			       spdk_bdev_is_md_interleaved(bdev),
903 			       spdk_bdev_is_dif_head_of_md(bdev),
904 			       spdk_bdev_get_dif_type(bdev),
905 			       dif_check_flags,
906 			       init_ref_tag, 0, 0, 0, 0, &dif_opts);
907 
908 	return (rc == 0) ? true : false;
909 }
910 
911 static void
912 nvmf_bdev_ctrlr_zcopy_start_complete(struct spdk_bdev_io *bdev_io, bool success,
913 				     void *cb_arg)
914 {
915 	struct spdk_nvmf_request	*req = cb_arg;
916 	struct iovec *iov;
917 	int iovcnt = 0;
918 
919 	if (spdk_unlikely(!success)) {
920 		int                     sc = 0, sct = 0;
921 		uint32_t                cdw0 = 0;
922 		struct spdk_nvme_cpl    *response = &req->rsp->nvme_cpl;
923 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
924 
925 		response->cdw0 = cdw0;
926 		response->status.sc = sc;
927 		response->status.sct = sct;
928 
929 		spdk_bdev_free_io(bdev_io);
930 		spdk_nvmf_request_complete(req);
931 		return;
932 	}
933 
934 	spdk_bdev_io_get_iovec(bdev_io, &iov, &iovcnt);
935 
936 	assert(iovcnt <= NVMF_REQ_MAX_BUFFERS);
937 	assert(iovcnt > 0);
938 
939 	req->iovcnt = iovcnt;
940 
941 	assert(req->iov == iov);
942 
943 	req->zcopy_bdev_io = bdev_io; /* Preserve the bdev_io for the end zcopy */
944 
945 	spdk_nvmf_request_complete(req);
946 	/* Don't free the bdev_io here as it is needed for the END ZCOPY */
947 }
948 
949 int
950 nvmf_bdev_ctrlr_zcopy_start(struct spdk_bdev *bdev,
951 			    struct spdk_bdev_desc *desc,
952 			    struct spdk_io_channel *ch,
953 			    struct spdk_nvmf_request *req)
954 {
955 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
956 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
957 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
958 	uint64_t start_lba;
959 	uint64_t num_blocks;
960 	int rc;
961 
962 	nvmf_bdev_ctrlr_get_rw_params(&req->cmd->nvme_cmd, &start_lba, &num_blocks);
963 
964 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
965 		SPDK_ERRLOG("end of media\n");
966 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
967 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
968 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
969 	}
970 
971 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
972 		SPDK_ERRLOG("Read NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
973 			    num_blocks, block_size, req->length);
974 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
975 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
976 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
977 	}
978 
979 	bool populate = (req->cmd->nvme_cmd.opc == SPDK_NVME_OPC_READ) ? true : false;
980 
981 	rc = spdk_bdev_zcopy_start(desc, ch, req->iov, req->iovcnt, start_lba,
982 				   num_blocks, populate, nvmf_bdev_ctrlr_zcopy_start_complete, req);
983 	if (spdk_unlikely(rc != 0)) {
984 		if (rc == -ENOMEM) {
985 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
986 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
987 		}
988 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
989 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
990 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
991 	}
992 
993 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
994 }
995 
996 static void
997 nvmf_bdev_ctrlr_zcopy_end_complete(struct spdk_bdev_io *bdev_io, bool success,
998 				   void *cb_arg)
999 {
1000 	struct spdk_nvmf_request	*req = cb_arg;
1001 
1002 	if (spdk_unlikely(!success)) {
1003 		int                     sc = 0, sct = 0;
1004 		uint32_t                cdw0 = 0;
1005 		struct spdk_nvme_cpl    *response = &req->rsp->nvme_cpl;
1006 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
1007 
1008 		response->cdw0 = cdw0;
1009 		response->status.sc = sc;
1010 		response->status.sct = sct;
1011 	}
1012 
1013 	spdk_bdev_free_io(bdev_io);
1014 	req->zcopy_bdev_io = NULL;
1015 	spdk_nvmf_request_complete(req);
1016 }
1017 
1018 void
1019 nvmf_bdev_ctrlr_zcopy_end(struct spdk_nvmf_request *req, bool commit)
1020 {
1021 	int rc __attribute__((unused));
1022 
1023 	rc = spdk_bdev_zcopy_end(req->zcopy_bdev_io, commit, nvmf_bdev_ctrlr_zcopy_end_complete, req);
1024 
1025 	/* The only way spdk_bdev_zcopy_end() can fail is if we pass a bdev_io type that isn't ZCOPY */
1026 	assert(rc == 0);
1027 }
1028