xref: /spdk/lib/nvmf/ctrlr_bdev.c (revision cc6920a4763d4b9a43aa40583c8397d8f14fa100)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation. All rights reserved.
5  *   Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/stdinc.h"
35 
36 #include "nvmf_internal.h"
37 
38 #include "spdk/bdev.h"
39 #include "spdk/endian.h"
40 #include "spdk/thread.h"
41 #include "spdk/likely.h"
42 #include "spdk/nvme.h"
43 #include "spdk/nvmf_cmd.h"
44 #include "spdk/nvmf_spec.h"
45 #include "spdk/trace.h"
46 #include "spdk/scsi_spec.h"
47 #include "spdk/string.h"
48 #include "spdk/util.h"
49 
50 #include "spdk/log.h"
51 
52 static bool
53 nvmf_subsystem_bdev_io_type_supported(struct spdk_nvmf_subsystem *subsystem,
54 				      enum spdk_bdev_io_type io_type)
55 {
56 	struct spdk_nvmf_ns *ns;
57 
58 	for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL;
59 	     ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) {
60 		if (ns->bdev == NULL) {
61 			continue;
62 		}
63 
64 		if (!spdk_bdev_io_type_supported(ns->bdev, io_type)) {
65 			SPDK_DEBUGLOG(nvmf,
66 				      "Subsystem %s namespace %u (%s) does not support io_type %d\n",
67 				      spdk_nvmf_subsystem_get_nqn(subsystem),
68 				      ns->opts.nsid, spdk_bdev_get_name(ns->bdev), (int)io_type);
69 			return false;
70 		}
71 	}
72 
73 	SPDK_DEBUGLOG(nvmf, "All devices in Subsystem %s support io_type %d\n",
74 		      spdk_nvmf_subsystem_get_nqn(subsystem), (int)io_type);
75 	return true;
76 }
77 
78 bool
79 nvmf_ctrlr_dsm_supported(struct spdk_nvmf_ctrlr *ctrlr)
80 {
81 	return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_UNMAP);
82 }
83 
84 bool
85 nvmf_ctrlr_write_zeroes_supported(struct spdk_nvmf_ctrlr *ctrlr)
86 {
87 	return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_WRITE_ZEROES);
88 }
89 
90 static void
91 nvmf_bdev_ctrlr_complete_cmd(struct spdk_bdev_io *bdev_io, bool success,
92 			     void *cb_arg)
93 {
94 	struct spdk_nvmf_request	*req = cb_arg;
95 	struct spdk_nvme_cpl		*response = &req->rsp->nvme_cpl;
96 	int				first_sc = 0, first_sct = 0, sc = 0, sct = 0;
97 	uint32_t			cdw0 = 0;
98 	struct spdk_nvmf_request	*first_req = req->first_fused_req;
99 
100 	if (spdk_unlikely(first_req != NULL)) {
101 		/* fused commands - get status for both operations */
102 		struct spdk_nvme_cpl *first_response = &first_req->rsp->nvme_cpl;
103 
104 		spdk_bdev_io_get_nvme_fused_status(bdev_io, &cdw0, &first_sct, &first_sc, &sct, &sc);
105 		first_response->cdw0 = cdw0;
106 		first_response->status.sc = first_sc;
107 		first_response->status.sct = first_sct;
108 
109 		/* first request should be completed */
110 		spdk_nvmf_request_complete(first_req);
111 		req->first_fused_req = NULL;
112 	} else {
113 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
114 	}
115 
116 	response->cdw0 = cdw0;
117 	response->status.sc = sc;
118 	response->status.sct = sct;
119 
120 	spdk_nvmf_request_complete(req);
121 	spdk_bdev_free_io(bdev_io);
122 }
123 
124 static void
125 nvmf_bdev_ctrlr_complete_admin_cmd(struct spdk_bdev_io *bdev_io, bool success,
126 				   void *cb_arg)
127 {
128 	struct spdk_nvmf_request *req = cb_arg;
129 
130 	if (req->cmd_cb_fn) {
131 		req->cmd_cb_fn(req);
132 	}
133 
134 	nvmf_bdev_ctrlr_complete_cmd(bdev_io, success, req);
135 }
136 
137 void
138 nvmf_bdev_ctrlr_identify_ns(struct spdk_nvmf_ns *ns, struct spdk_nvme_ns_data *nsdata,
139 			    bool dif_insert_or_strip)
140 {
141 	struct spdk_bdev *bdev = ns->bdev;
142 	uint64_t num_blocks;
143 	uint32_t phys_blocklen;
144 
145 	num_blocks = spdk_bdev_get_num_blocks(bdev);
146 
147 	nsdata->nsze = num_blocks;
148 	nsdata->ncap = num_blocks;
149 	nsdata->nuse = num_blocks;
150 	nsdata->nlbaf = 0;
151 	nsdata->flbas.format = 0;
152 	nsdata->nacwu = spdk_bdev_get_acwu(bdev);
153 	if (!dif_insert_or_strip) {
154 		nsdata->lbaf[0].ms = spdk_bdev_get_md_size(bdev);
155 		nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_block_size(bdev));
156 		if (nsdata->lbaf[0].ms != 0) {
157 			nsdata->flbas.extended = 1;
158 			nsdata->mc.extended = 1;
159 			nsdata->mc.pointer = 0;
160 			nsdata->dps.md_start = spdk_bdev_is_dif_head_of_md(bdev);
161 
162 			switch (spdk_bdev_get_dif_type(bdev)) {
163 			case SPDK_DIF_TYPE1:
164 				nsdata->dpc.pit1 = 1;
165 				nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_TYPE1;
166 				break;
167 			case SPDK_DIF_TYPE2:
168 				nsdata->dpc.pit2 = 1;
169 				nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_TYPE2;
170 				break;
171 			case SPDK_DIF_TYPE3:
172 				nsdata->dpc.pit3 = 1;
173 				nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_TYPE3;
174 				break;
175 			default:
176 				SPDK_DEBUGLOG(nvmf, "Protection Disabled\n");
177 				nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_DISABLE;
178 				break;
179 			}
180 		}
181 	} else {
182 		nsdata->lbaf[0].ms = 0;
183 		nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_data_block_size(bdev));
184 	}
185 
186 	phys_blocklen = spdk_bdev_get_physical_block_size(bdev);
187 	assert(phys_blocklen > 0);
188 	/* Linux driver uses min(nawupf, npwg) to set physical_block_size */
189 	nsdata->nsfeat.optperf = 1;
190 	nsdata->nsfeat.ns_atomic_write_unit = 1;
191 	nsdata->npwg = (phys_blocklen >> nsdata->lbaf[0].lbads) - 1;
192 	nsdata->nawupf = nsdata->npwg;
193 	nsdata->npwa = nsdata->npwg;
194 	nsdata->npdg = nsdata->npwg;
195 	nsdata->npda = nsdata->npwg;
196 
197 	nsdata->noiob = spdk_bdev_get_optimal_io_boundary(bdev);
198 	nsdata->nmic.can_share = 1;
199 	if (ns->ptpl_file != NULL) {
200 		nsdata->nsrescap.rescap.persist = 1;
201 	}
202 	nsdata->nsrescap.rescap.write_exclusive = 1;
203 	nsdata->nsrescap.rescap.exclusive_access = 1;
204 	nsdata->nsrescap.rescap.write_exclusive_reg_only = 1;
205 	nsdata->nsrescap.rescap.exclusive_access_reg_only = 1;
206 	nsdata->nsrescap.rescap.write_exclusive_all_reg = 1;
207 	nsdata->nsrescap.rescap.exclusive_access_all_reg = 1;
208 	nsdata->nsrescap.rescap.ignore_existing_key = 1;
209 
210 	SPDK_STATIC_ASSERT(sizeof(nsdata->nguid) == sizeof(ns->opts.nguid), "size mismatch");
211 	memcpy(nsdata->nguid, ns->opts.nguid, sizeof(nsdata->nguid));
212 
213 	SPDK_STATIC_ASSERT(sizeof(nsdata->eui64) == sizeof(ns->opts.eui64), "size mismatch");
214 	memcpy(&nsdata->eui64, ns->opts.eui64, sizeof(nsdata->eui64));
215 }
216 
217 static void
218 nvmf_bdev_ctrlr_get_rw_params(const struct spdk_nvme_cmd *cmd, uint64_t *start_lba,
219 			      uint64_t *num_blocks)
220 {
221 	/* SLBA: CDW10 and CDW11 */
222 	*start_lba = from_le64(&cmd->cdw10);
223 
224 	/* NLB: CDW12 bits 15:00, 0's based */
225 	*num_blocks = (from_le32(&cmd->cdw12) & 0xFFFFu) + 1;
226 }
227 
228 static bool
229 nvmf_bdev_ctrlr_lba_in_range(uint64_t bdev_num_blocks, uint64_t io_start_lba,
230 			     uint64_t io_num_blocks)
231 {
232 	if (io_start_lba + io_num_blocks > bdev_num_blocks ||
233 	    io_start_lba + io_num_blocks < io_start_lba) {
234 		return false;
235 	}
236 
237 	return true;
238 }
239 
240 static void
241 nvmf_ctrlr_process_io_cmd_resubmit(void *arg)
242 {
243 	struct spdk_nvmf_request *req = arg;
244 
245 	nvmf_ctrlr_process_io_cmd(req);
246 }
247 
248 static void
249 nvmf_ctrlr_process_admin_cmd_resubmit(void *arg)
250 {
251 	struct spdk_nvmf_request *req = arg;
252 
253 	nvmf_ctrlr_process_admin_cmd(req);
254 }
255 
256 static void
257 nvmf_bdev_ctrl_queue_io(struct spdk_nvmf_request *req, struct spdk_bdev *bdev,
258 			struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn, void *cb_arg)
259 {
260 	int rc;
261 
262 	req->bdev_io_wait.bdev = bdev;
263 	req->bdev_io_wait.cb_fn = cb_fn;
264 	req->bdev_io_wait.cb_arg = cb_arg;
265 
266 	rc = spdk_bdev_queue_io_wait(bdev, ch, &req->bdev_io_wait);
267 	if (rc != 0) {
268 		assert(false);
269 	}
270 	req->qpair->group->stat.pending_bdev_io++;
271 }
272 
273 bool
274 nvmf_bdev_zcopy_enabled(struct spdk_bdev *bdev)
275 {
276 	return spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_ZCOPY);
277 }
278 
279 int
280 nvmf_bdev_ctrlr_read_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
281 			 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
282 {
283 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
284 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
285 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
286 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
287 	uint64_t start_lba;
288 	uint64_t num_blocks;
289 	int rc;
290 
291 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
292 
293 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
294 		SPDK_ERRLOG("end of media\n");
295 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
296 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
297 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
298 	}
299 
300 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
301 		SPDK_ERRLOG("Read NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
302 			    num_blocks, block_size, req->length);
303 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
304 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
305 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
306 	}
307 
308 	if (req->zcopy_phase == NVMF_ZCOPY_PHASE_EXECUTE) {
309 		/* Return here after checking the lba etc */
310 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
311 	}
312 
313 	assert(!spdk_nvmf_using_zcopy(req->zcopy_phase));
314 
315 	rc = spdk_bdev_readv_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
316 				    nvmf_bdev_ctrlr_complete_cmd, req);
317 	if (spdk_unlikely(rc)) {
318 		if (rc == -ENOMEM) {
319 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
320 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
321 		}
322 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
323 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
324 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
325 	}
326 
327 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
328 }
329 
330 int
331 nvmf_bdev_ctrlr_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
332 			  struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
333 {
334 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
335 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
336 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
337 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
338 	uint64_t start_lba;
339 	uint64_t num_blocks;
340 	int rc;
341 
342 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
343 
344 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
345 		SPDK_ERRLOG("end of media\n");
346 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
347 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
348 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
349 	}
350 
351 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
352 		SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
353 			    num_blocks, block_size, req->length);
354 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
355 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
356 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
357 	}
358 
359 	if (req->zcopy_phase == NVMF_ZCOPY_PHASE_EXECUTE) {
360 		/* Return here after checking the lba etc */
361 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
362 	}
363 
364 	assert(!spdk_nvmf_using_zcopy(req->zcopy_phase));
365 
366 	rc = spdk_bdev_writev_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
367 				     nvmf_bdev_ctrlr_complete_cmd, req);
368 	if (spdk_unlikely(rc)) {
369 		if (rc == -ENOMEM) {
370 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
371 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
372 		}
373 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
374 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
375 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
376 	}
377 
378 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
379 }
380 
381 int
382 nvmf_bdev_ctrlr_compare_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
383 			    struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
384 {
385 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
386 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
387 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
388 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
389 	uint64_t start_lba;
390 	uint64_t num_blocks;
391 	int rc;
392 
393 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
394 
395 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
396 		SPDK_ERRLOG("end of media\n");
397 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
398 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
399 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
400 	}
401 
402 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
403 		SPDK_ERRLOG("Compare NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
404 			    num_blocks, block_size, req->length);
405 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
406 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
407 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
408 	}
409 
410 	rc = spdk_bdev_comparev_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
411 				       nvmf_bdev_ctrlr_complete_cmd, req);
412 	if (spdk_unlikely(rc)) {
413 		if (rc == -ENOMEM) {
414 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
415 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
416 		}
417 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
418 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
419 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
420 	}
421 
422 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
423 }
424 
425 int
426 nvmf_bdev_ctrlr_compare_and_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
427 				      struct spdk_io_channel *ch, struct spdk_nvmf_request *cmp_req, struct spdk_nvmf_request *write_req)
428 {
429 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
430 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
431 	struct spdk_nvme_cmd *cmp_cmd = &cmp_req->cmd->nvme_cmd;
432 	struct spdk_nvme_cmd *write_cmd = &write_req->cmd->nvme_cmd;
433 	struct spdk_nvme_cpl *rsp = &write_req->rsp->nvme_cpl;
434 	uint64_t write_start_lba, cmp_start_lba;
435 	uint64_t write_num_blocks, cmp_num_blocks;
436 	int rc;
437 
438 	nvmf_bdev_ctrlr_get_rw_params(cmp_cmd, &cmp_start_lba, &cmp_num_blocks);
439 	nvmf_bdev_ctrlr_get_rw_params(write_cmd, &write_start_lba, &write_num_blocks);
440 
441 	if (spdk_unlikely(write_start_lba != cmp_start_lba || write_num_blocks != cmp_num_blocks)) {
442 		SPDK_ERRLOG("Fused command start lba / num blocks mismatch\n");
443 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
444 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
445 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
446 	}
447 
448 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, write_start_lba,
449 			  write_num_blocks))) {
450 		SPDK_ERRLOG("end of media\n");
451 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
452 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
453 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
454 	}
455 
456 	if (spdk_unlikely(write_num_blocks * block_size > write_req->length)) {
457 		SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
458 			    write_num_blocks, block_size, write_req->length);
459 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
460 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
461 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
462 	}
463 
464 	rc = spdk_bdev_comparev_and_writev_blocks(desc, ch, cmp_req->iov, cmp_req->iovcnt, write_req->iov,
465 			write_req->iovcnt, write_start_lba, write_num_blocks, nvmf_bdev_ctrlr_complete_cmd, write_req);
466 	if (spdk_unlikely(rc)) {
467 		if (rc == -ENOMEM) {
468 			nvmf_bdev_ctrl_queue_io(cmp_req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, cmp_req);
469 			nvmf_bdev_ctrl_queue_io(write_req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, write_req);
470 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
471 		}
472 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
473 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
474 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
475 	}
476 
477 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
478 }
479 
480 int
481 nvmf_bdev_ctrlr_write_zeroes_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
482 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
483 {
484 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
485 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
486 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
487 	uint64_t start_lba;
488 	uint64_t num_blocks;
489 	int rc;
490 
491 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
492 
493 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
494 		SPDK_ERRLOG("end of media\n");
495 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
496 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
497 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
498 	}
499 
500 	rc = spdk_bdev_write_zeroes_blocks(desc, ch, start_lba, num_blocks,
501 					   nvmf_bdev_ctrlr_complete_cmd, req);
502 	if (spdk_unlikely(rc)) {
503 		if (rc == -ENOMEM) {
504 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
505 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
506 		}
507 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
508 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
509 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
510 	}
511 
512 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
513 }
514 
515 int
516 nvmf_bdev_ctrlr_flush_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
517 			  struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
518 {
519 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
520 	int rc;
521 
522 	/* As for NVMeoF controller, SPDK always set volatile write
523 	 * cache bit to 1, return success for those block devices
524 	 * which can't support FLUSH command.
525 	 */
526 	if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_FLUSH)) {
527 		response->status.sct = SPDK_NVME_SCT_GENERIC;
528 		response->status.sc = SPDK_NVME_SC_SUCCESS;
529 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
530 	}
531 
532 	rc = spdk_bdev_flush_blocks(desc, ch, 0, spdk_bdev_get_num_blocks(bdev),
533 				    nvmf_bdev_ctrlr_complete_cmd, req);
534 	if (spdk_unlikely(rc)) {
535 		if (rc == -ENOMEM) {
536 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
537 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
538 		}
539 		response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
540 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
541 	}
542 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
543 }
544 
545 struct nvmf_bdev_ctrlr_unmap {
546 	struct spdk_nvmf_request	*req;
547 	uint32_t			count;
548 	struct spdk_bdev_desc		*desc;
549 	struct spdk_bdev		*bdev;
550 	struct spdk_io_channel		*ch;
551 	uint32_t			range_index;
552 };
553 
554 static void
555 nvmf_bdev_ctrlr_unmap_cpl(struct spdk_bdev_io *bdev_io, bool success,
556 			  void *cb_arg)
557 {
558 	struct nvmf_bdev_ctrlr_unmap *unmap_ctx = cb_arg;
559 	struct spdk_nvmf_request	*req = unmap_ctx->req;
560 	struct spdk_nvme_cpl		*response = &req->rsp->nvme_cpl;
561 	int				sc, sct;
562 	uint32_t			cdw0;
563 
564 	unmap_ctx->count--;
565 
566 	if (response->status.sct == SPDK_NVME_SCT_GENERIC &&
567 	    response->status.sc == SPDK_NVME_SC_SUCCESS) {
568 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
569 		response->cdw0 = cdw0;
570 		response->status.sc = sc;
571 		response->status.sct = sct;
572 	}
573 
574 	if (unmap_ctx->count == 0) {
575 		spdk_nvmf_request_complete(req);
576 		free(unmap_ctx);
577 	}
578 	spdk_bdev_free_io(bdev_io);
579 }
580 
581 static int
582 nvmf_bdev_ctrlr_unmap(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
583 		      struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
584 		      struct nvmf_bdev_ctrlr_unmap *unmap_ctx);
585 static void
586 nvmf_bdev_ctrlr_unmap_resubmit(void *arg)
587 {
588 	struct nvmf_bdev_ctrlr_unmap *unmap_ctx = arg;
589 	struct spdk_nvmf_request *req = unmap_ctx->req;
590 	struct spdk_bdev_desc *desc = unmap_ctx->desc;
591 	struct spdk_bdev *bdev = unmap_ctx->bdev;
592 	struct spdk_io_channel *ch = unmap_ctx->ch;
593 
594 	nvmf_bdev_ctrlr_unmap(bdev, desc, ch, req, unmap_ctx);
595 }
596 
597 static int
598 nvmf_bdev_ctrlr_unmap(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
599 		      struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
600 		      struct nvmf_bdev_ctrlr_unmap *unmap_ctx)
601 {
602 	uint16_t nr, i;
603 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
604 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
605 	struct spdk_nvme_dsm_range *dsm_range;
606 	uint64_t lba;
607 	uint32_t lba_count;
608 	int rc;
609 
610 	nr = cmd->cdw10_bits.dsm.nr + 1;
611 	if (nr * sizeof(struct spdk_nvme_dsm_range) > req->length) {
612 		SPDK_ERRLOG("Dataset Management number of ranges > SGL length\n");
613 		response->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
614 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
615 	}
616 
617 	if (unmap_ctx == NULL) {
618 		unmap_ctx = calloc(1, sizeof(*unmap_ctx));
619 		if (!unmap_ctx) {
620 			response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
621 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
622 		}
623 
624 		unmap_ctx->req = req;
625 		unmap_ctx->desc = desc;
626 		unmap_ctx->ch = ch;
627 		unmap_ctx->bdev = bdev;
628 
629 		response->status.sct = SPDK_NVME_SCT_GENERIC;
630 		response->status.sc = SPDK_NVME_SC_SUCCESS;
631 	} else {
632 		unmap_ctx->count--;	/* dequeued */
633 	}
634 
635 	dsm_range = (struct spdk_nvme_dsm_range *)req->data;
636 	for (i = unmap_ctx->range_index; i < nr; i++) {
637 		lba = dsm_range[i].starting_lba;
638 		lba_count = dsm_range[i].length;
639 
640 		unmap_ctx->count++;
641 
642 		rc = spdk_bdev_unmap_blocks(desc, ch, lba, lba_count,
643 					    nvmf_bdev_ctrlr_unmap_cpl, unmap_ctx);
644 		if (rc) {
645 			if (rc == -ENOMEM) {
646 				nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_bdev_ctrlr_unmap_resubmit, unmap_ctx);
647 				/* Unmap was not yet submitted to bdev */
648 				/* unmap_ctx->count will be decremented when the request is dequeued */
649 				return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
650 			}
651 			response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
652 			unmap_ctx->count--;
653 			/* We can't return here - we may have to wait for any other
654 				* unmaps already sent to complete */
655 			break;
656 		}
657 		unmap_ctx->range_index++;
658 	}
659 
660 	if (unmap_ctx->count == 0) {
661 		free(unmap_ctx);
662 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
663 	}
664 
665 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
666 }
667 
668 int
669 nvmf_bdev_ctrlr_dsm_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
670 			struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
671 {
672 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
673 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
674 
675 	if (cmd->cdw11_bits.dsm.ad) {
676 		return nvmf_bdev_ctrlr_unmap(bdev, desc, ch, req, NULL);
677 	}
678 
679 	response->status.sct = SPDK_NVME_SCT_GENERIC;
680 	response->status.sc = SPDK_NVME_SC_SUCCESS;
681 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
682 }
683 
684 int
685 nvmf_bdev_ctrlr_nvme_passthru_io(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
686 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
687 {
688 	int rc;
689 
690 	rc = spdk_bdev_nvme_io_passthru(desc, ch, &req->cmd->nvme_cmd, req->data, req->length,
691 					nvmf_bdev_ctrlr_complete_cmd, req);
692 	if (spdk_unlikely(rc)) {
693 		if (rc == -ENOMEM) {
694 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
695 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
696 		}
697 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
698 		req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
699 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
700 	}
701 
702 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
703 }
704 
705 int
706 spdk_nvmf_bdev_ctrlr_nvme_passthru_admin(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
707 		struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
708 		spdk_nvmf_nvme_passthru_cmd_cb cb_fn)
709 {
710 	int rc;
711 
712 	req->cmd_cb_fn = cb_fn;
713 
714 	rc = spdk_bdev_nvme_admin_passthru(desc, ch, &req->cmd->nvme_cmd, req->data, req->length,
715 					   nvmf_bdev_ctrlr_complete_admin_cmd, req);
716 	if (spdk_unlikely(rc)) {
717 		if (rc == -ENOMEM) {
718 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_admin_cmd_resubmit, req);
719 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
720 		}
721 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
722 		req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
723 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
724 	}
725 
726 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
727 }
728 
729 static void
730 nvmf_bdev_ctrlr_complete_abort_cmd(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
731 {
732 	struct spdk_nvmf_request *req = cb_arg;
733 
734 	if (success) {
735 		req->rsp->nvme_cpl.cdw0 &= ~1U;
736 	}
737 
738 	spdk_nvmf_request_complete(req);
739 	spdk_bdev_free_io(bdev_io);
740 }
741 
742 int
743 spdk_nvmf_bdev_ctrlr_abort_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
744 			       struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
745 			       struct spdk_nvmf_request *req_to_abort)
746 {
747 	int rc;
748 
749 	assert((req->rsp->nvme_cpl.cdw0 & 1U) != 0);
750 
751 	rc = spdk_bdev_abort(desc, ch, req_to_abort, nvmf_bdev_ctrlr_complete_abort_cmd, req);
752 	if (spdk_likely(rc == 0)) {
753 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
754 	} else if (rc == -ENOMEM) {
755 		nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_admin_cmd_resubmit, req);
756 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
757 	} else {
758 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
759 	}
760 }
761 
762 bool
763 nvmf_bdev_ctrlr_get_dif_ctx(struct spdk_bdev *bdev, struct spdk_nvme_cmd *cmd,
764 			    struct spdk_dif_ctx *dif_ctx)
765 {
766 	uint32_t init_ref_tag, dif_check_flags = 0;
767 	int rc;
768 
769 	if (spdk_bdev_get_md_size(bdev) == 0) {
770 		return false;
771 	}
772 
773 	/* Initial Reference Tag is the lower 32 bits of the start LBA. */
774 	init_ref_tag = (uint32_t)from_le64(&cmd->cdw10);
775 
776 	if (spdk_bdev_is_dif_check_enabled(bdev, SPDK_DIF_CHECK_TYPE_REFTAG)) {
777 		dif_check_flags |= SPDK_DIF_FLAGS_REFTAG_CHECK;
778 	}
779 
780 	if (spdk_bdev_is_dif_check_enabled(bdev, SPDK_DIF_CHECK_TYPE_GUARD)) {
781 		dif_check_flags |= SPDK_DIF_FLAGS_GUARD_CHECK;
782 	}
783 
784 	rc = spdk_dif_ctx_init(dif_ctx,
785 			       spdk_bdev_get_block_size(bdev),
786 			       spdk_bdev_get_md_size(bdev),
787 			       spdk_bdev_is_md_interleaved(bdev),
788 			       spdk_bdev_is_dif_head_of_md(bdev),
789 			       spdk_bdev_get_dif_type(bdev),
790 			       dif_check_flags,
791 			       init_ref_tag, 0, 0, 0, 0);
792 
793 	return (rc == 0) ? true : false;
794 }
795 
796 static void
797 nvmf_bdev_ctrlr_start_zcopy_complete(struct spdk_bdev_io *bdev_io, bool success,
798 				     void *cb_arg)
799 {
800 	struct spdk_nvmf_request	*req = cb_arg;
801 	struct iovec *iov;
802 	int iovcnt;
803 
804 	if (spdk_unlikely(!success)) {
805 		int                     sc = 0, sct = 0;
806 		uint32_t                cdw0 = 0;
807 		struct spdk_nvme_cpl    *response = &req->rsp->nvme_cpl;
808 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
809 
810 		response->cdw0 = cdw0;
811 		response->status.sc = sc;
812 		response->status.sct = sct;
813 
814 		spdk_bdev_free_io(bdev_io);
815 		spdk_nvmf_request_complete(req);
816 		return;
817 	}
818 
819 	spdk_bdev_io_get_iovec(bdev_io, &iov, &iovcnt);
820 
821 	assert(iovcnt <= NVMF_REQ_MAX_BUFFERS);
822 	assert(iovcnt > 0);
823 
824 	req->iovcnt = iovcnt;
825 
826 	assert(req->iov == iov);
827 
828 	/* backward compatible */
829 	req->data = req->iov[0].iov_base;
830 
831 	req->zcopy_bdev_io = bdev_io; /* Preserve the bdev_io for the end zcopy */
832 
833 	spdk_nvmf_request_complete(req);
834 	/* Don't free the bdev_io here as it is needed for the END ZCOPY */
835 }
836 
837 int
838 nvmf_bdev_ctrlr_start_zcopy(struct spdk_bdev *bdev,
839 			    struct spdk_bdev_desc *desc,
840 			    struct spdk_io_channel *ch,
841 			    struct spdk_nvmf_request *req)
842 {
843 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
844 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
845 	uint64_t start_lba;
846 	uint64_t num_blocks;
847 
848 	nvmf_bdev_ctrlr_get_rw_params(&req->cmd->nvme_cmd, &start_lba, &num_blocks);
849 
850 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
851 		SPDK_ERRLOG("end of media\n");
852 		return -ENXIO;
853 	}
854 
855 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
856 		SPDK_ERRLOG("Read NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
857 			    num_blocks, block_size, req->length);
858 		return -ENXIO;
859 	}
860 
861 	bool populate = (req->cmd->nvme_cmd.opc == SPDK_NVME_OPC_READ) ? true : false;
862 
863 	return spdk_bdev_zcopy_start(desc, ch, req->iov, req->iovcnt, start_lba,
864 				     num_blocks, populate, nvmf_bdev_ctrlr_start_zcopy_complete, req);
865 }
866 
867 static void
868 nvmf_bdev_ctrlr_end_zcopy_complete(struct spdk_bdev_io *bdev_io, bool success,
869 				   void *cb_arg)
870 {
871 	struct spdk_nvmf_request	*req = cb_arg;
872 
873 	if (spdk_unlikely(!success)) {
874 		int                     sc = 0, sct = 0;
875 		uint32_t                cdw0 = 0;
876 		struct spdk_nvme_cpl    *response = &req->rsp->nvme_cpl;
877 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
878 
879 		response->cdw0 = cdw0;
880 		response->status.sc = sc;
881 		response->status.sct = sct;
882 	}
883 
884 	spdk_bdev_free_io(bdev_io);
885 	req->zcopy_bdev_io = NULL;
886 	spdk_nvmf_request_complete(req);
887 }
888 
889 int
890 nvmf_bdev_ctrlr_end_zcopy(struct spdk_nvmf_request *req, bool commit)
891 {
892 	return spdk_bdev_zcopy_end(req->zcopy_bdev_io, commit, nvmf_bdev_ctrlr_end_zcopy_complete, req);
893 }
894