xref: /spdk/lib/nvmf/ctrlr_bdev.c (revision 38b931b23c2d90c594fc9afa0bd539202c185f2a)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2017 Intel Corporation. All rights reserved.
3  *   Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
4  *   Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5  */
6 
7 #include "spdk/stdinc.h"
8 
9 #include "nvmf_internal.h"
10 
11 #include "spdk/bdev.h"
12 #include "spdk/endian.h"
13 #include "spdk/thread.h"
14 #include "spdk/likely.h"
15 #include "spdk/nvme.h"
16 #include "spdk/nvmf_cmd.h"
17 #include "spdk/nvmf_spec.h"
18 #include "spdk/trace.h"
19 #include "spdk/scsi_spec.h"
20 #include "spdk/string.h"
21 #include "spdk/util.h"
22 
23 #include "spdk/log.h"
24 
25 static bool
26 nvmf_subsystem_bdev_io_type_supported(struct spdk_nvmf_subsystem *subsystem,
27 				      enum spdk_bdev_io_type io_type)
28 {
29 	struct spdk_nvmf_ns *ns;
30 
31 	for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL;
32 	     ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) {
33 		if (ns->bdev == NULL) {
34 			continue;
35 		}
36 
37 		if (!spdk_bdev_io_type_supported(ns->bdev, io_type)) {
38 			SPDK_DEBUGLOG(nvmf,
39 				      "Subsystem %s namespace %u (%s) does not support io_type %d\n",
40 				      spdk_nvmf_subsystem_get_nqn(subsystem),
41 				      ns->opts.nsid, spdk_bdev_get_name(ns->bdev), (int)io_type);
42 			return false;
43 		}
44 	}
45 
46 	SPDK_DEBUGLOG(nvmf, "All devices in Subsystem %s support io_type %d\n",
47 		      spdk_nvmf_subsystem_get_nqn(subsystem), (int)io_type);
48 	return true;
49 }
50 
51 bool
52 nvmf_ctrlr_dsm_supported(struct spdk_nvmf_ctrlr *ctrlr)
53 {
54 	return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_UNMAP);
55 }
56 
57 bool
58 nvmf_ctrlr_write_zeroes_supported(struct spdk_nvmf_ctrlr *ctrlr)
59 {
60 	return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_WRITE_ZEROES);
61 }
62 
63 bool
64 nvmf_ctrlr_copy_supported(struct spdk_nvmf_ctrlr *ctrlr)
65 {
66 	return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_COPY);
67 }
68 
69 static void
70 nvmf_bdev_ctrlr_complete_cmd(struct spdk_bdev_io *bdev_io, bool success,
71 			     void *cb_arg)
72 {
73 	struct spdk_nvmf_request	*req = cb_arg;
74 	struct spdk_nvme_cpl		*response = &req->rsp->nvme_cpl;
75 	int				sc = 0, sct = 0;
76 	uint32_t			cdw0 = 0;
77 
78 	if (spdk_unlikely(req->first_fused)) {
79 		struct spdk_nvmf_request	*first_req = req->first_fused_req;
80 		struct spdk_nvme_cpl		*first_response = &first_req->rsp->nvme_cpl;
81 		int				first_sc = 0, first_sct = 0;
82 
83 		/* get status for both operations */
84 		spdk_bdev_io_get_nvme_fused_status(bdev_io, &cdw0, &first_sct, &first_sc, &sct, &sc);
85 		first_response->cdw0 = cdw0;
86 		first_response->status.sc = first_sc;
87 		first_response->status.sct = first_sct;
88 
89 		/* first request should be completed */
90 		spdk_nvmf_request_complete(first_req);
91 		req->first_fused_req = NULL;
92 		req->first_fused = false;
93 	} else {
94 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
95 	}
96 
97 	response->cdw0 = cdw0;
98 	response->status.sc = sc;
99 	response->status.sct = sct;
100 
101 	spdk_nvmf_request_complete(req);
102 	spdk_bdev_free_io(bdev_io);
103 }
104 
105 static void
106 nvmf_bdev_ctrlr_complete_admin_cmd(struct spdk_bdev_io *bdev_io, bool success,
107 				   void *cb_arg)
108 {
109 	struct spdk_nvmf_request *req = cb_arg;
110 
111 	if (req->cmd_cb_fn) {
112 		req->cmd_cb_fn(req);
113 	}
114 
115 	nvmf_bdev_ctrlr_complete_cmd(bdev_io, success, req);
116 }
117 
118 void
119 nvmf_bdev_ctrlr_identify_ns(struct spdk_nvmf_ns *ns, struct spdk_nvme_ns_data *nsdata,
120 			    bool dif_insert_or_strip)
121 {
122 	struct spdk_bdev *bdev = ns->bdev;
123 	struct spdk_bdev_desc *desc = ns->desc;
124 	uint64_t num_blocks;
125 	uint32_t phys_blocklen;
126 	uint32_t max_copy;
127 
128 	num_blocks = spdk_bdev_get_num_blocks(bdev);
129 
130 	nsdata->nsze = num_blocks;
131 	nsdata->ncap = num_blocks;
132 	nsdata->nuse = num_blocks;
133 	nsdata->nlbaf = 0;
134 	nsdata->flbas.format = 0;
135 	nsdata->flbas.msb_format = 0;
136 	nsdata->nacwu = spdk_bdev_get_acwu(bdev) - 1; /* nacwu is 0-based */
137 	if (!dif_insert_or_strip) {
138 		nsdata->lbaf[0].ms = spdk_bdev_desc_get_md_size(desc);
139 		nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_desc_get_block_size(desc));
140 		if (nsdata->lbaf[0].ms != 0) {
141 			nsdata->flbas.extended = 1;
142 			nsdata->mc.extended = 1;
143 			nsdata->mc.pointer = 0;
144 			nsdata->dps.md_start = spdk_bdev_desc_is_dif_head_of_md(desc);
145 			/* NVMf library doesn't process PRACT and PRCHK flags, we
146 			 * leave the use of extended LBA buffer to users.
147 			 */
148 			nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_DISABLE;
149 		}
150 	} else {
151 		nsdata->lbaf[0].ms = 0;
152 		nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_data_block_size(bdev));
153 	}
154 
155 	phys_blocklen = spdk_bdev_get_physical_block_size(bdev);
156 	assert(phys_blocklen > 0);
157 	/* Linux driver uses min(nawupf, npwg) to set physical_block_size */
158 	nsdata->nsfeat.optperf = 1;
159 	nsdata->nsfeat.ns_atomic_write_unit = 1;
160 	nsdata->npwg = (phys_blocklen >> nsdata->lbaf[0].lbads) - 1;
161 	nsdata->nawupf = nsdata->npwg;
162 	nsdata->npwa = nsdata->npwg;
163 	nsdata->npdg = nsdata->npwg;
164 	nsdata->npda = nsdata->npwg;
165 
166 	if (spdk_bdev_get_write_unit_size(bdev) == 1) {
167 		nsdata->noiob = spdk_bdev_get_optimal_io_boundary(bdev);
168 	}
169 	nsdata->nmic.can_share = 1;
170 	if (nvmf_ns_is_ptpl_capable(ns)) {
171 		nsdata->nsrescap.rescap.persist = 1;
172 	}
173 	nsdata->nsrescap.rescap.write_exclusive = 1;
174 	nsdata->nsrescap.rescap.exclusive_access = 1;
175 	nsdata->nsrescap.rescap.write_exclusive_reg_only = 1;
176 	nsdata->nsrescap.rescap.exclusive_access_reg_only = 1;
177 	nsdata->nsrescap.rescap.write_exclusive_all_reg = 1;
178 	nsdata->nsrescap.rescap.exclusive_access_all_reg = 1;
179 	nsdata->nsrescap.rescap.ignore_existing_key = 1;
180 
181 	SPDK_STATIC_ASSERT(sizeof(nsdata->nguid) == sizeof(ns->opts.nguid), "size mismatch");
182 	memcpy(nsdata->nguid, ns->opts.nguid, sizeof(nsdata->nguid));
183 
184 	SPDK_STATIC_ASSERT(sizeof(nsdata->eui64) == sizeof(ns->opts.eui64), "size mismatch");
185 	memcpy(&nsdata->eui64, ns->opts.eui64, sizeof(nsdata->eui64));
186 
187 	/* For now we support just one source range for copy command */
188 	nsdata->msrc = 0;
189 
190 	max_copy = spdk_bdev_get_max_copy(bdev);
191 	if (max_copy == 0 || max_copy > UINT16_MAX) {
192 		/* Zero means copy size is unlimited */
193 		nsdata->mcl = UINT16_MAX;
194 		nsdata->mssrl = UINT16_MAX;
195 	} else {
196 		nsdata->mcl = max_copy;
197 		nsdata->mssrl = max_copy;
198 	}
199 }
200 
201 void
202 nvmf_bdev_ctrlr_identify_iocs_nvm(struct spdk_nvmf_ns *ns,
203 				  struct spdk_nvme_nvm_ns_data *nsdata_nvm)
204 {
205 	struct spdk_bdev_desc *desc = ns->desc;
206 	uint8_t _16bpists;
207 	uint32_t sts, pif;
208 
209 	if (spdk_bdev_desc_get_dif_type(desc) == SPDK_DIF_DISABLE) {
210 		return;
211 	}
212 
213 	pif = spdk_bdev_desc_get_dif_pi_format(desc);
214 
215 	/*
216 	 * 16BPISTS shall be 1 for 32/64b Guard PI.
217 	 * STCRS shall be 1 if 16BPISTS is 1.
218 	 * 16 is the minimum value of STS for 32b Guard PI.
219 	 */
220 	switch (pif) {
221 	case SPDK_DIF_PI_FORMAT_16:
222 		_16bpists = 0;
223 		sts = 0;
224 		break;
225 	case SPDK_DIF_PI_FORMAT_32:
226 		_16bpists = 1;
227 		sts = 16;
228 		break;
229 	case SPDK_DIF_PI_FORMAT_64:
230 		_16bpists = 1;
231 		sts = 0;
232 		break;
233 	default:
234 		SPDK_WARNLOG("PI format %u is not supported\n", pif);
235 		return;
236 	}
237 
238 	/* For 16b Guard PI, Storage Tag is not available because we set STS to 0.
239 	 * In this case, we do not have to set 16BPISTM to 1. For simplicity,
240 	 * set 16BPISTM to 0 and set LBSTM to all zeroes.
241 	 *
242 	 * We will revisit here when we find any OS uses Storage Tag.
243 	 */
244 	nsdata_nvm->lbstm = 0;
245 	nsdata_nvm->pic._16bpistm = 0;
246 
247 	nsdata_nvm->pic._16bpists = _16bpists;
248 	nsdata_nvm->pic.stcrs = 0;
249 	nsdata_nvm->elbaf[0].sts = sts;
250 	nsdata_nvm->elbaf[0].pif = pif;
251 }
252 
253 static void
254 nvmf_bdev_ctrlr_get_rw_params(const struct spdk_nvme_cmd *cmd, uint64_t *start_lba,
255 			      uint64_t *num_blocks)
256 {
257 	/* SLBA: CDW10 and CDW11 */
258 	*start_lba = from_le64(&cmd->cdw10);
259 
260 	/* NLB: CDW12 bits 15:00, 0's based */
261 	*num_blocks = (from_le32(&cmd->cdw12) & 0xFFFFu) + 1;
262 }
263 
264 static void
265 nvmf_bdev_ctrlr_get_rw_ext_params(const struct spdk_nvme_cmd *cmd,
266 				  struct spdk_bdev_ext_io_opts *opts)
267 {
268 	/* Get CDW12 values */
269 	opts->nvme_cdw12.raw = from_le32(&cmd->cdw12);
270 
271 	/* Get CDW13 values */
272 	opts->nvme_cdw13.raw = from_le32(&cmd->cdw13);
273 
274 	/* Bdev layer checks PRACT in CDW12 because it is NVMe specific, but
275 	 * it does not check DIF check flags in CDW because DIF is not NVMe
276 	 * specific. Hence, copy DIF check flags from CDW12 to dif_check_flags_exclude_mask.
277 	 */
278 	opts->dif_check_flags_exclude_mask = (~opts->nvme_cdw12.raw) & SPDK_NVME_IO_FLAGS_PRCHK_MASK;
279 }
280 
281 static bool
282 nvmf_bdev_ctrlr_lba_in_range(uint64_t bdev_num_blocks, uint64_t io_start_lba,
283 			     uint64_t io_num_blocks)
284 {
285 	if (io_start_lba + io_num_blocks > bdev_num_blocks ||
286 	    io_start_lba + io_num_blocks < io_start_lba) {
287 		return false;
288 	}
289 
290 	return true;
291 }
292 
293 static void
294 nvmf_ctrlr_process_io_cmd_resubmit(void *arg)
295 {
296 	struct spdk_nvmf_request *req = arg;
297 	int rc;
298 
299 	rc = nvmf_ctrlr_process_io_cmd(req);
300 	if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
301 		spdk_nvmf_request_complete(req);
302 	}
303 }
304 
305 static void
306 nvmf_ctrlr_process_admin_cmd_resubmit(void *arg)
307 {
308 	struct spdk_nvmf_request *req = arg;
309 	int rc;
310 
311 	rc = nvmf_ctrlr_process_admin_cmd(req);
312 	if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
313 		spdk_nvmf_request_complete(req);
314 	}
315 }
316 
317 static void
318 nvmf_bdev_ctrl_queue_io(struct spdk_nvmf_request *req, struct spdk_bdev *bdev,
319 			struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn, void *cb_arg)
320 {
321 	int rc;
322 
323 	req->bdev_io_wait.bdev = bdev;
324 	req->bdev_io_wait.cb_fn = cb_fn;
325 	req->bdev_io_wait.cb_arg = cb_arg;
326 
327 	rc = spdk_bdev_queue_io_wait(bdev, ch, &req->bdev_io_wait);
328 	if (rc != 0) {
329 		assert(false);
330 	}
331 	req->qpair->group->stat.pending_bdev_io++;
332 }
333 
334 bool
335 nvmf_bdev_zcopy_enabled(struct spdk_bdev *bdev)
336 {
337 	return spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_ZCOPY);
338 }
339 
340 int
341 nvmf_bdev_ctrlr_read_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
342 			 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
343 {
344 	struct spdk_bdev_ext_io_opts opts = {
345 		.size = SPDK_SIZEOF(&opts, accel_sequence),
346 		.memory_domain = req->memory_domain,
347 		.memory_domain_ctx = req->memory_domain_ctx,
348 		.accel_sequence = req->accel_sequence,
349 	};
350 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
351 	uint32_t block_size = spdk_bdev_desc_get_block_size(desc);
352 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
353 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
354 	uint64_t start_lba;
355 	uint64_t num_blocks;
356 	int rc;
357 
358 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
359 	nvmf_bdev_ctrlr_get_rw_ext_params(cmd, &opts);
360 
361 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
362 		SPDK_ERRLOG("end of media\n");
363 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
364 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
365 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
366 	}
367 
368 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
369 		SPDK_ERRLOG("Read NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
370 			    num_blocks, block_size, req->length);
371 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
372 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
373 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
374 	}
375 
376 	assert(!spdk_nvmf_request_using_zcopy(req));
377 
378 	rc = spdk_bdev_readv_blocks_ext(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
379 					nvmf_bdev_ctrlr_complete_cmd, req, &opts);
380 	if (spdk_unlikely(rc)) {
381 		if (rc == -ENOMEM) {
382 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
383 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
384 		}
385 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
386 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
387 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
388 	}
389 
390 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
391 }
392 
393 int
394 nvmf_bdev_ctrlr_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
395 			  struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
396 {
397 	struct spdk_bdev_ext_io_opts opts = {
398 		.size = SPDK_SIZEOF(&opts, nvme_cdw13),
399 		.memory_domain = req->memory_domain,
400 		.memory_domain_ctx = req->memory_domain_ctx,
401 		.accel_sequence = req->accel_sequence,
402 	};
403 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
404 	uint32_t block_size = spdk_bdev_desc_get_block_size(desc);
405 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
406 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
407 	uint64_t start_lba;
408 	uint64_t num_blocks;
409 	int rc;
410 
411 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
412 	nvmf_bdev_ctrlr_get_rw_ext_params(cmd, &opts);
413 
414 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
415 		SPDK_ERRLOG("end of media\n");
416 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
417 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
418 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
419 	}
420 
421 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
422 		SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
423 			    num_blocks, block_size, req->length);
424 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
425 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
426 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
427 	}
428 
429 	assert(!spdk_nvmf_request_using_zcopy(req));
430 
431 	rc = spdk_bdev_writev_blocks_ext(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
432 					 nvmf_bdev_ctrlr_complete_cmd, req, &opts);
433 	if (spdk_unlikely(rc)) {
434 		if (rc == -ENOMEM) {
435 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
436 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
437 		}
438 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
439 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
440 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
441 	}
442 
443 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
444 }
445 
446 int
447 nvmf_bdev_ctrlr_compare_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
448 			    struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
449 {
450 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
451 	uint32_t block_size = spdk_bdev_desc_get_block_size(desc);
452 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
453 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
454 	uint64_t start_lba;
455 	uint64_t num_blocks;
456 	int rc;
457 
458 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
459 
460 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
461 		SPDK_ERRLOG("end of media\n");
462 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
463 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
464 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
465 	}
466 
467 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
468 		SPDK_ERRLOG("Compare NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
469 			    num_blocks, block_size, req->length);
470 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
471 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
472 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
473 	}
474 
475 	rc = spdk_bdev_comparev_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
476 				       nvmf_bdev_ctrlr_complete_cmd, req);
477 	if (spdk_unlikely(rc)) {
478 		if (rc == -ENOMEM) {
479 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
480 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
481 		}
482 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
483 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
484 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
485 	}
486 
487 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
488 }
489 
490 int
491 nvmf_bdev_ctrlr_compare_and_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
492 				      struct spdk_io_channel *ch, struct spdk_nvmf_request *cmp_req, struct spdk_nvmf_request *write_req)
493 {
494 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
495 	uint32_t block_size = spdk_bdev_desc_get_block_size(desc);
496 	struct spdk_nvme_cmd *cmp_cmd = &cmp_req->cmd->nvme_cmd;
497 	struct spdk_nvme_cmd *write_cmd = &write_req->cmd->nvme_cmd;
498 	struct spdk_nvme_cpl *rsp = &write_req->rsp->nvme_cpl;
499 	uint64_t write_start_lba, cmp_start_lba;
500 	uint64_t write_num_blocks, cmp_num_blocks;
501 	int rc;
502 
503 	nvmf_bdev_ctrlr_get_rw_params(cmp_cmd, &cmp_start_lba, &cmp_num_blocks);
504 	nvmf_bdev_ctrlr_get_rw_params(write_cmd, &write_start_lba, &write_num_blocks);
505 
506 	if (spdk_unlikely(write_start_lba != cmp_start_lba || write_num_blocks != cmp_num_blocks)) {
507 		SPDK_ERRLOG("Fused command start lba / num blocks mismatch\n");
508 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
509 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
510 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
511 	}
512 
513 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, write_start_lba,
514 			  write_num_blocks))) {
515 		SPDK_ERRLOG("end of media\n");
516 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
517 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
518 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
519 	}
520 
521 	if (spdk_unlikely(write_num_blocks * block_size > write_req->length)) {
522 		SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
523 			    write_num_blocks, block_size, write_req->length);
524 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
525 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
526 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
527 	}
528 
529 	rc = spdk_bdev_comparev_and_writev_blocks(desc, ch, cmp_req->iov, cmp_req->iovcnt, write_req->iov,
530 			write_req->iovcnt, write_start_lba, write_num_blocks, nvmf_bdev_ctrlr_complete_cmd, write_req);
531 	if (spdk_unlikely(rc)) {
532 		if (rc == -ENOMEM) {
533 			nvmf_bdev_ctrl_queue_io(cmp_req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, cmp_req);
534 			nvmf_bdev_ctrl_queue_io(write_req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, write_req);
535 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
536 		}
537 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
538 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
539 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
540 	}
541 
542 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
543 }
544 
545 int
546 nvmf_bdev_ctrlr_write_zeroes_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
547 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
548 {
549 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
550 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
551 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
552 	uint64_t max_write_zeroes_size = req->qpair->ctrlr->subsys->max_write_zeroes_size_kib;
553 	uint64_t start_lba;
554 	uint64_t num_blocks;
555 	int rc;
556 
557 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
558 	if (spdk_unlikely(max_write_zeroes_size > 0 &&
559 			  num_blocks > (max_write_zeroes_size << 10) / spdk_bdev_desc_get_block_size(desc))) {
560 		SPDK_ERRLOG("invalid write zeroes size, should not exceed %" PRIu64 "Kib\n", max_write_zeroes_size);
561 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
562 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
563 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
564 	}
565 
566 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
567 		SPDK_ERRLOG("end of media\n");
568 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
569 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
570 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
571 	}
572 
573 	if (spdk_unlikely(cmd->cdw12_bits.write_zeroes.deac)) {
574 		SPDK_ERRLOG("Write Zeroes Deallocate is not supported\n");
575 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
576 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
577 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
578 	}
579 
580 	rc = spdk_bdev_write_zeroes_blocks(desc, ch, start_lba, num_blocks,
581 					   nvmf_bdev_ctrlr_complete_cmd, req);
582 	if (spdk_unlikely(rc)) {
583 		if (rc == -ENOMEM) {
584 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
585 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
586 		}
587 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
588 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
589 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
590 	}
591 
592 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
593 }
594 
595 int
596 nvmf_bdev_ctrlr_flush_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
597 			  struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
598 {
599 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
600 	int rc;
601 
602 	/* As for NVMeoF controller, SPDK always set volatile write
603 	 * cache bit to 1, return success for those block devices
604 	 * which can't support FLUSH command.
605 	 */
606 	if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_FLUSH)) {
607 		response->status.sct = SPDK_NVME_SCT_GENERIC;
608 		response->status.sc = SPDK_NVME_SC_SUCCESS;
609 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
610 	}
611 
612 	rc = spdk_bdev_flush_blocks(desc, ch, 0, spdk_bdev_get_num_blocks(bdev),
613 				    nvmf_bdev_ctrlr_complete_cmd, req);
614 	if (spdk_unlikely(rc)) {
615 		if (rc == -ENOMEM) {
616 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
617 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
618 		}
619 		response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
620 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
621 	}
622 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
623 }
624 
625 struct nvmf_bdev_ctrlr_unmap {
626 	struct spdk_nvmf_request	*req;
627 	uint32_t			count;
628 	struct spdk_bdev_desc		*desc;
629 	struct spdk_bdev		*bdev;
630 	struct spdk_io_channel		*ch;
631 	uint32_t			range_index;
632 };
633 
634 static void
635 nvmf_bdev_ctrlr_unmap_cpl(struct spdk_bdev_io *bdev_io, bool success,
636 			  void *cb_arg)
637 {
638 	struct nvmf_bdev_ctrlr_unmap *unmap_ctx = cb_arg;
639 	struct spdk_nvmf_request	*req = unmap_ctx->req;
640 	struct spdk_nvme_cpl		*response = &req->rsp->nvme_cpl;
641 	int				sc, sct;
642 	uint32_t			cdw0;
643 
644 	unmap_ctx->count--;
645 
646 	if (response->status.sct == SPDK_NVME_SCT_GENERIC &&
647 	    response->status.sc == SPDK_NVME_SC_SUCCESS) {
648 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
649 		response->cdw0 = cdw0;
650 		response->status.sc = sc;
651 		response->status.sct = sct;
652 	}
653 
654 	if (unmap_ctx->count == 0) {
655 		spdk_nvmf_request_complete(req);
656 		free(unmap_ctx);
657 	}
658 	spdk_bdev_free_io(bdev_io);
659 }
660 
661 static int nvmf_bdev_ctrlr_unmap(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
662 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
663 				 struct nvmf_bdev_ctrlr_unmap *unmap_ctx);
664 static void
665 nvmf_bdev_ctrlr_unmap_resubmit(void *arg)
666 {
667 	struct nvmf_bdev_ctrlr_unmap *unmap_ctx = arg;
668 	struct spdk_nvmf_request *req = unmap_ctx->req;
669 	struct spdk_bdev_desc *desc = unmap_ctx->desc;
670 	struct spdk_bdev *bdev = unmap_ctx->bdev;
671 	struct spdk_io_channel *ch = unmap_ctx->ch;
672 
673 	nvmf_bdev_ctrlr_unmap(bdev, desc, ch, req, unmap_ctx);
674 }
675 
676 static int
677 nvmf_bdev_ctrlr_unmap(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
678 		      struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
679 		      struct nvmf_bdev_ctrlr_unmap *unmap_ctx)
680 {
681 	uint16_t nr, i;
682 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
683 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
684 	uint64_t max_discard_size = req->qpair->ctrlr->subsys->max_discard_size_kib;
685 	uint32_t block_size = spdk_bdev_desc_get_block_size(desc);
686 	struct spdk_iov_xfer ix;
687 	uint64_t lba;
688 	uint32_t lba_count;
689 	int rc;
690 
691 	nr = cmd->cdw10_bits.dsm.nr + 1;
692 	if (nr * sizeof(struct spdk_nvme_dsm_range) > req->length) {
693 		SPDK_ERRLOG("Dataset Management number of ranges > SGL length\n");
694 		response->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
695 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
696 	}
697 
698 	if (unmap_ctx == NULL) {
699 		unmap_ctx = calloc(1, sizeof(*unmap_ctx));
700 		if (!unmap_ctx) {
701 			response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
702 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
703 		}
704 
705 		unmap_ctx->req = req;
706 		unmap_ctx->desc = desc;
707 		unmap_ctx->ch = ch;
708 		unmap_ctx->bdev = bdev;
709 
710 		response->status.sct = SPDK_NVME_SCT_GENERIC;
711 		response->status.sc = SPDK_NVME_SC_SUCCESS;
712 	} else {
713 		unmap_ctx->count--;	/* dequeued */
714 	}
715 
716 	spdk_iov_xfer_init(&ix, req->iov, req->iovcnt);
717 
718 	for (i = unmap_ctx->range_index; i < nr; i++) {
719 		struct spdk_nvme_dsm_range dsm_range = { 0 };
720 
721 		spdk_iov_xfer_to_buf(&ix, &dsm_range, sizeof(dsm_range));
722 
723 		lba = dsm_range.starting_lba;
724 		lba_count = dsm_range.length;
725 		if (max_discard_size > 0 && lba_count > (max_discard_size << 10) / block_size) {
726 			SPDK_ERRLOG("invalid unmap size %" PRIu32 " blocks, should not exceed %" PRIu64 " blocks\n",
727 				    lba_count, max_discard_size << 1);
728 			response->status.sct = SPDK_NVME_SCT_GENERIC;
729 			response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
730 			break;
731 		}
732 
733 		unmap_ctx->count++;
734 
735 		rc = spdk_bdev_unmap_blocks(desc, ch, lba, lba_count,
736 					    nvmf_bdev_ctrlr_unmap_cpl, unmap_ctx);
737 		if (rc) {
738 			if (rc == -ENOMEM) {
739 				nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_bdev_ctrlr_unmap_resubmit, unmap_ctx);
740 				/* Unmap was not yet submitted to bdev */
741 				/* unmap_ctx->count will be decremented when the request is dequeued */
742 				return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
743 			}
744 			response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
745 			unmap_ctx->count--;
746 			/* We can't return here - we may have to wait for any other
747 				* unmaps already sent to complete */
748 			break;
749 		}
750 		unmap_ctx->range_index++;
751 	}
752 
753 	if (unmap_ctx->count == 0) {
754 		free(unmap_ctx);
755 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
756 	}
757 
758 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
759 }
760 
761 int
762 nvmf_bdev_ctrlr_dsm_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
763 			struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
764 {
765 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
766 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
767 
768 	if (cmd->cdw11_bits.dsm.ad) {
769 		return nvmf_bdev_ctrlr_unmap(bdev, desc, ch, req, NULL);
770 	}
771 
772 	response->status.sct = SPDK_NVME_SCT_GENERIC;
773 	response->status.sc = SPDK_NVME_SC_SUCCESS;
774 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
775 }
776 
777 int
778 nvmf_bdev_ctrlr_copy_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
779 			 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
780 {
781 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
782 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
783 	uint64_t sdlba = ((uint64_t)cmd->cdw11 << 32) + cmd->cdw10;
784 	struct spdk_nvme_scc_source_range range = { 0 };
785 	struct spdk_iov_xfer ix;
786 	int rc;
787 
788 	SPDK_DEBUGLOG(nvmf, "Copy command: SDLBA %lu, NR %u, desc format %u, PRINFOR %u, "
789 		      "DTYPE %u, STCW %u, PRINFOW %u, FUA %u, LR %u\n",
790 		      sdlba,
791 		      cmd->cdw12_bits.copy.nr,
792 		      cmd->cdw12_bits.copy.df,
793 		      cmd->cdw12_bits.copy.prinfor,
794 		      cmd->cdw12_bits.copy.dtype,
795 		      cmd->cdw12_bits.copy.stcw,
796 		      cmd->cdw12_bits.copy.prinfow,
797 		      cmd->cdw12_bits.copy.fua,
798 		      cmd->cdw12_bits.copy.lr);
799 
800 	if (spdk_unlikely(req->length != (cmd->cdw12_bits.copy.nr + 1) *
801 			  sizeof(struct spdk_nvme_scc_source_range))) {
802 		response->status.sct = SPDK_NVME_SCT_GENERIC;
803 		response->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
804 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
805 	}
806 
807 	/*
808 	 * We support only one source range, and rely on this with the xfer
809 	 * below.
810 	 */
811 	if (cmd->cdw12_bits.copy.nr > 0) {
812 		response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
813 		response->status.sc = SPDK_NVME_SC_CMD_SIZE_LIMIT_SIZE_EXCEEDED;
814 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
815 	}
816 
817 	if (cmd->cdw12_bits.copy.df != 0) {
818 		response->status.sct = SPDK_NVME_SCT_GENERIC;
819 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
820 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
821 	}
822 
823 	spdk_iov_xfer_init(&ix, req->iov, req->iovcnt);
824 	spdk_iov_xfer_to_buf(&ix, &range, sizeof(range));
825 
826 	rc = spdk_bdev_copy_blocks(desc, ch, sdlba, range.slba, range.nlb + 1,
827 				   nvmf_bdev_ctrlr_complete_cmd, req);
828 	if (spdk_unlikely(rc)) {
829 		if (rc == -ENOMEM) {
830 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
831 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
832 		}
833 
834 		response->status.sct = SPDK_NVME_SCT_GENERIC;
835 		response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
836 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
837 	}
838 
839 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
840 }
841 
842 int
843 nvmf_bdev_ctrlr_nvme_passthru_io(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
844 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
845 {
846 	int rc;
847 
848 	rc = spdk_bdev_nvme_iov_passthru_md(desc, ch, &req->cmd->nvme_cmd, req->iov, req->iovcnt,
849 					    req->length, NULL, 0, nvmf_bdev_ctrlr_complete_cmd, req);
850 
851 	if (spdk_unlikely(rc)) {
852 		if (rc == -ENOMEM) {
853 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
854 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
855 		}
856 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
857 		req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
858 		req->rsp->nvme_cpl.status.dnr = 1;
859 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
860 	}
861 
862 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
863 }
864 
865 int
866 spdk_nvmf_bdev_ctrlr_nvme_passthru_admin(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
867 		struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
868 		spdk_nvmf_nvme_passthru_cmd_cb cb_fn)
869 {
870 	int rc;
871 
872 	if (spdk_unlikely(req->iovcnt > 1)) {
873 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
874 		req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
875 		req->rsp->nvme_cpl.status.dnr = 1;
876 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
877 	}
878 
879 	req->cmd_cb_fn = cb_fn;
880 
881 	rc = spdk_bdev_nvme_admin_passthru(desc, ch, &req->cmd->nvme_cmd, req->iov[0].iov_base, req->length,
882 					   nvmf_bdev_ctrlr_complete_admin_cmd, req);
883 	if (spdk_unlikely(rc)) {
884 		if (rc == -ENOMEM) {
885 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_admin_cmd_resubmit, req);
886 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
887 		}
888 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
889 		if (rc == -ENOTSUP) {
890 			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
891 		} else {
892 			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
893 		}
894 
895 		req->rsp->nvme_cpl.status.dnr = 1;
896 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
897 	}
898 
899 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
900 }
901 
902 static void
903 nvmf_bdev_ctrlr_complete_abort_cmd(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
904 {
905 	struct spdk_nvmf_request *req = cb_arg;
906 
907 	if (success) {
908 		req->rsp->nvme_cpl.cdw0 &= ~1U;
909 	}
910 
911 	spdk_nvmf_request_complete(req);
912 	spdk_bdev_free_io(bdev_io);
913 }
914 
915 int
916 spdk_nvmf_bdev_ctrlr_abort_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
917 			       struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
918 			       struct spdk_nvmf_request *req_to_abort)
919 {
920 	int rc;
921 
922 	assert((req->rsp->nvme_cpl.cdw0 & 1U) != 0);
923 
924 	rc = spdk_bdev_abort(desc, ch, req_to_abort, nvmf_bdev_ctrlr_complete_abort_cmd, req);
925 	if (spdk_likely(rc == 0)) {
926 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
927 	} else if (rc == -ENOMEM) {
928 		nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_admin_cmd_resubmit, req);
929 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
930 	} else {
931 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
932 	}
933 }
934 
935 bool
936 nvmf_bdev_ctrlr_get_dif_ctx(struct spdk_bdev_desc *desc, struct spdk_nvme_cmd *cmd,
937 			    struct spdk_dif_ctx *dif_ctx)
938 {
939 	uint32_t init_ref_tag, dif_check_flags = 0;
940 	int rc;
941 	struct spdk_dif_ctx_init_ext_opts dif_opts;
942 
943 	if (spdk_bdev_desc_get_md_size(desc) == 0) {
944 		return false;
945 	}
946 
947 	/* Initial Reference Tag is the lower 32 bits of the start LBA. */
948 	init_ref_tag = (uint32_t)from_le64(&cmd->cdw10);
949 
950 	if (spdk_bdev_desc_is_dif_check_enabled(desc, SPDK_DIF_CHECK_TYPE_REFTAG)) {
951 		dif_check_flags |= SPDK_DIF_FLAGS_REFTAG_CHECK;
952 	}
953 
954 	if (spdk_bdev_desc_is_dif_check_enabled(desc, SPDK_DIF_CHECK_TYPE_GUARD)) {
955 		dif_check_flags |= SPDK_DIF_FLAGS_GUARD_CHECK;
956 	}
957 
958 	dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format);
959 	dif_opts.dif_pi_format = SPDK_DIF_PI_FORMAT_16;
960 	rc = spdk_dif_ctx_init(dif_ctx,
961 			       spdk_bdev_desc_get_block_size(desc),
962 			       spdk_bdev_desc_get_md_size(desc),
963 			       spdk_bdev_desc_is_md_interleaved(desc),
964 			       spdk_bdev_desc_is_dif_head_of_md(desc),
965 			       spdk_bdev_desc_get_dif_type(desc),
966 			       dif_check_flags,
967 			       init_ref_tag, 0, 0, 0, 0, &dif_opts);
968 
969 	return (rc == 0) ? true : false;
970 }
971 
972 static void
973 nvmf_bdev_ctrlr_zcopy_start_complete(struct spdk_bdev_io *bdev_io, bool success,
974 				     void *cb_arg)
975 {
976 	struct spdk_nvmf_request	*req = cb_arg;
977 	struct iovec *iov;
978 	int iovcnt = 0;
979 
980 	if (spdk_unlikely(!success)) {
981 		int                     sc = 0, sct = 0;
982 		uint32_t                cdw0 = 0;
983 		struct spdk_nvme_cpl    *response = &req->rsp->nvme_cpl;
984 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
985 
986 		response->cdw0 = cdw0;
987 		response->status.sc = sc;
988 		response->status.sct = sct;
989 
990 		spdk_bdev_free_io(bdev_io);
991 		spdk_nvmf_request_complete(req);
992 		return;
993 	}
994 
995 	spdk_bdev_io_get_iovec(bdev_io, &iov, &iovcnt);
996 
997 	assert(iovcnt <= NVMF_REQ_MAX_BUFFERS);
998 	assert(iovcnt > 0);
999 
1000 	req->iovcnt = iovcnt;
1001 
1002 	assert(req->iov == iov);
1003 
1004 	req->zcopy_bdev_io = bdev_io; /* Preserve the bdev_io for the end zcopy */
1005 
1006 	spdk_nvmf_request_complete(req);
1007 	/* Don't free the bdev_io here as it is needed for the END ZCOPY */
1008 }
1009 
1010 int
1011 nvmf_bdev_ctrlr_zcopy_start(struct spdk_bdev *bdev,
1012 			    struct spdk_bdev_desc *desc,
1013 			    struct spdk_io_channel *ch,
1014 			    struct spdk_nvmf_request *req)
1015 {
1016 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
1017 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
1018 	uint32_t block_size = spdk_bdev_desc_get_block_size(desc);
1019 	uint64_t start_lba;
1020 	uint64_t num_blocks;
1021 	int rc;
1022 
1023 	nvmf_bdev_ctrlr_get_rw_params(&req->cmd->nvme_cmd, &start_lba, &num_blocks);
1024 
1025 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
1026 		SPDK_ERRLOG("end of media\n");
1027 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
1028 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
1029 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1030 	}
1031 
1032 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
1033 		SPDK_ERRLOG("Read NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
1034 			    num_blocks, block_size, req->length);
1035 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
1036 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
1037 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1038 	}
1039 
1040 	bool populate = (req->cmd->nvme_cmd.opc == SPDK_NVME_OPC_READ) ? true : false;
1041 
1042 	rc = spdk_bdev_zcopy_start(desc, ch, req->iov, req->iovcnt, start_lba,
1043 				   num_blocks, populate, nvmf_bdev_ctrlr_zcopy_start_complete, req);
1044 	if (spdk_unlikely(rc != 0)) {
1045 		if (rc == -ENOMEM) {
1046 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
1047 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
1048 		}
1049 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
1050 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
1051 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1052 	}
1053 
1054 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
1055 }
1056 
1057 static void
1058 nvmf_bdev_ctrlr_zcopy_end_complete(struct spdk_bdev_io *bdev_io, bool success,
1059 				   void *cb_arg)
1060 {
1061 	struct spdk_nvmf_request	*req = cb_arg;
1062 
1063 	if (spdk_unlikely(!success)) {
1064 		int                     sc = 0, sct = 0;
1065 		uint32_t                cdw0 = 0;
1066 		struct spdk_nvme_cpl    *response = &req->rsp->nvme_cpl;
1067 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
1068 
1069 		response->cdw0 = cdw0;
1070 		response->status.sc = sc;
1071 		response->status.sct = sct;
1072 	}
1073 
1074 	spdk_bdev_free_io(bdev_io);
1075 	req->zcopy_bdev_io = NULL;
1076 	spdk_nvmf_request_complete(req);
1077 }
1078 
1079 void
1080 nvmf_bdev_ctrlr_zcopy_end(struct spdk_nvmf_request *req, bool commit)
1081 {
1082 	int rc __attribute__((unused));
1083 
1084 	rc = spdk_bdev_zcopy_end(req->zcopy_bdev_io, commit, nvmf_bdev_ctrlr_zcopy_end_complete, req);
1085 
1086 	/* The only way spdk_bdev_zcopy_end() can fail is if we pass a bdev_io type that isn't ZCOPY */
1087 	assert(rc == 0);
1088 }
1089