xref: /spdk/lib/nvmf/ctrlr_bdev.c (revision 4e527910925f8d001001e96f1719d015a7a51a94)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation. All rights reserved.
5  *   Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/stdinc.h"
35 
36 #include "nvmf_internal.h"
37 
38 #include "spdk/bdev.h"
39 #include "spdk/endian.h"
40 #include "spdk/thread.h"
41 #include "spdk/likely.h"
42 #include "spdk/nvme.h"
43 #include "spdk/nvmf_cmd.h"
44 #include "spdk/nvmf_spec.h"
45 #include "spdk/trace.h"
46 #include "spdk/scsi_spec.h"
47 #include "spdk/string.h"
48 #include "spdk/util.h"
49 
50 #include "spdk/log.h"
51 
52 static bool
53 nvmf_subsystem_bdev_io_type_supported(struct spdk_nvmf_subsystem *subsystem,
54 				      enum spdk_bdev_io_type io_type)
55 {
56 	struct spdk_nvmf_ns *ns;
57 
58 	for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL;
59 	     ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) {
60 		if (ns->bdev == NULL) {
61 			continue;
62 		}
63 
64 		if (!spdk_bdev_io_type_supported(ns->bdev, io_type)) {
65 			SPDK_DEBUGLOG(nvmf,
66 				      "Subsystem %s namespace %u (%s) does not support io_type %d\n",
67 				      spdk_nvmf_subsystem_get_nqn(subsystem),
68 				      ns->opts.nsid, spdk_bdev_get_name(ns->bdev), (int)io_type);
69 			return false;
70 		}
71 	}
72 
73 	SPDK_DEBUGLOG(nvmf, "All devices in Subsystem %s support io_type %d\n",
74 		      spdk_nvmf_subsystem_get_nqn(subsystem), (int)io_type);
75 	return true;
76 }
77 
78 bool
79 nvmf_ctrlr_dsm_supported(struct spdk_nvmf_ctrlr *ctrlr)
80 {
81 	return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_UNMAP);
82 }
83 
84 bool
85 nvmf_ctrlr_write_zeroes_supported(struct spdk_nvmf_ctrlr *ctrlr)
86 {
87 	return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_WRITE_ZEROES);
88 }
89 
90 static void
91 nvmf_bdev_ctrlr_complete_cmd(struct spdk_bdev_io *bdev_io, bool success,
92 			     void *cb_arg)
93 {
94 	struct spdk_nvmf_request	*req = cb_arg;
95 	struct spdk_nvme_cpl		*response = &req->rsp->nvme_cpl;
96 	int				first_sc = 0, first_sct = 0, second_sc = 0, second_sct = 0;
97 	uint32_t			cdw0 = 0;
98 	struct spdk_nvmf_request	*first_req = req->first_fused_req;
99 
100 	if (spdk_unlikely(first_req != NULL)) {
101 		/* fused commands - get status for both operations */
102 		struct spdk_nvme_cpl *fused_response = &first_req->rsp->nvme_cpl;
103 
104 		spdk_bdev_io_get_nvme_fused_status(bdev_io, &cdw0, &second_sct, &second_sc, &first_sct, &first_sc);
105 		fused_response->cdw0 = cdw0;
106 		fused_response->status.sc = second_sc;
107 		fused_response->status.sct = second_sct;
108 
109 		/* first request should be completed */
110 		spdk_nvmf_request_complete(first_req);
111 		req->first_fused_req = NULL;
112 	} else {
113 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &first_sct, &first_sc);
114 	}
115 
116 	response->cdw0 = cdw0;
117 	response->status.sc = first_sc;
118 	response->status.sct = first_sct;
119 
120 	spdk_nvmf_request_complete(req);
121 	spdk_bdev_free_io(bdev_io);
122 }
123 
124 static void
125 nvmf_bdev_ctrlr_complete_admin_cmd(struct spdk_bdev_io *bdev_io, bool success,
126 				   void *cb_arg)
127 {
128 	struct spdk_nvmf_request *req = cb_arg;
129 
130 	if (req->cmd_cb_fn) {
131 		req->cmd_cb_fn(req);
132 	}
133 
134 	nvmf_bdev_ctrlr_complete_cmd(bdev_io, success, req);
135 }
136 
137 void
138 nvmf_bdev_ctrlr_identify_ns(struct spdk_nvmf_ns *ns, struct spdk_nvme_ns_data *nsdata,
139 			    bool dif_insert_or_strip)
140 {
141 	struct spdk_bdev *bdev = ns->bdev;
142 	uint64_t num_blocks;
143 	uint32_t phys_blocklen;
144 
145 	num_blocks = spdk_bdev_get_num_blocks(bdev);
146 
147 	nsdata->nsze = num_blocks;
148 	nsdata->ncap = num_blocks;
149 	nsdata->nuse = num_blocks;
150 	nsdata->nlbaf = 0;
151 	nsdata->flbas.format = 0;
152 	nsdata->nacwu = spdk_bdev_get_acwu(bdev);
153 	if (!dif_insert_or_strip) {
154 		nsdata->lbaf[0].ms = spdk_bdev_get_md_size(bdev);
155 		nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_block_size(bdev));
156 		if (nsdata->lbaf[0].ms != 0) {
157 			nsdata->flbas.extended = 1;
158 			nsdata->mc.extended = 1;
159 			nsdata->mc.pointer = 0;
160 			nsdata->dps.md_start = spdk_bdev_is_dif_head_of_md(bdev);
161 
162 			switch (spdk_bdev_get_dif_type(bdev)) {
163 			case SPDK_DIF_TYPE1:
164 				nsdata->dpc.pit1 = 1;
165 				nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_TYPE1;
166 				break;
167 			case SPDK_DIF_TYPE2:
168 				nsdata->dpc.pit2 = 1;
169 				nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_TYPE2;
170 				break;
171 			case SPDK_DIF_TYPE3:
172 				nsdata->dpc.pit3 = 1;
173 				nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_TYPE3;
174 				break;
175 			default:
176 				SPDK_DEBUGLOG(nvmf, "Protection Disabled\n");
177 				nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_DISABLE;
178 				break;
179 			}
180 		}
181 	} else {
182 		nsdata->lbaf[0].ms = 0;
183 		nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_data_block_size(bdev));
184 	}
185 
186 	phys_blocklen = spdk_bdev_get_physical_block_size(bdev);
187 	assert(phys_blocklen > 0);
188 	/* Linux driver uses min(nawupf, npwg) to set physical_block_size */
189 	nsdata->nsfeat.optperf = 1;
190 	nsdata->nsfeat.ns_atomic_write_unit = 1;
191 	nsdata->npwg = (phys_blocklen >> nsdata->lbaf[0].lbads) - 1;
192 	nsdata->nawupf = nsdata->npwg;
193 
194 	nsdata->noiob = spdk_bdev_get_optimal_io_boundary(bdev);
195 	nsdata->nmic.can_share = 1;
196 	if (ns->ptpl_file != NULL) {
197 		nsdata->nsrescap.rescap.persist = 1;
198 	}
199 	nsdata->nsrescap.rescap.write_exclusive = 1;
200 	nsdata->nsrescap.rescap.exclusive_access = 1;
201 	nsdata->nsrescap.rescap.write_exclusive_reg_only = 1;
202 	nsdata->nsrescap.rescap.exclusive_access_reg_only = 1;
203 	nsdata->nsrescap.rescap.write_exclusive_all_reg = 1;
204 	nsdata->nsrescap.rescap.exclusive_access_all_reg = 1;
205 	nsdata->nsrescap.rescap.ignore_existing_key = 1;
206 
207 	SPDK_STATIC_ASSERT(sizeof(nsdata->nguid) == sizeof(ns->opts.nguid), "size mismatch");
208 	memcpy(nsdata->nguid, ns->opts.nguid, sizeof(nsdata->nguid));
209 
210 	SPDK_STATIC_ASSERT(sizeof(nsdata->eui64) == sizeof(ns->opts.eui64), "size mismatch");
211 	memcpy(&nsdata->eui64, ns->opts.eui64, sizeof(nsdata->eui64));
212 }
213 
214 static void
215 nvmf_bdev_ctrlr_get_rw_params(const struct spdk_nvme_cmd *cmd, uint64_t *start_lba,
216 			      uint64_t *num_blocks)
217 {
218 	/* SLBA: CDW10 and CDW11 */
219 	*start_lba = from_le64(&cmd->cdw10);
220 
221 	/* NLB: CDW12 bits 15:00, 0's based */
222 	*num_blocks = (from_le32(&cmd->cdw12) & 0xFFFFu) + 1;
223 }
224 
225 static bool
226 nvmf_bdev_ctrlr_lba_in_range(uint64_t bdev_num_blocks, uint64_t io_start_lba,
227 			     uint64_t io_num_blocks)
228 {
229 	if (io_start_lba + io_num_blocks > bdev_num_blocks ||
230 	    io_start_lba + io_num_blocks < io_start_lba) {
231 		return false;
232 	}
233 
234 	return true;
235 }
236 
237 static void
238 nvmf_ctrlr_process_io_cmd_resubmit(void *arg)
239 {
240 	struct spdk_nvmf_request *req = arg;
241 
242 	nvmf_ctrlr_process_io_cmd(req);
243 }
244 
245 static void
246 nvmf_ctrlr_process_admin_cmd_resubmit(void *arg)
247 {
248 	struct spdk_nvmf_request *req = arg;
249 
250 	nvmf_ctrlr_process_admin_cmd(req);
251 }
252 
253 static void
254 nvmf_bdev_ctrl_queue_io(struct spdk_nvmf_request *req, struct spdk_bdev *bdev,
255 			struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn, void *cb_arg)
256 {
257 	int rc;
258 
259 	req->bdev_io_wait.bdev = bdev;
260 	req->bdev_io_wait.cb_fn = cb_fn;
261 	req->bdev_io_wait.cb_arg = cb_arg;
262 
263 	rc = spdk_bdev_queue_io_wait(bdev, ch, &req->bdev_io_wait);
264 	if (rc != 0) {
265 		assert(false);
266 	}
267 	req->qpair->group->stat.pending_bdev_io++;
268 }
269 
270 bool
271 nvmf_bdev_zcopy_enabled(struct spdk_bdev *bdev)
272 {
273 	return spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_ZCOPY);
274 }
275 
276 int
277 nvmf_bdev_ctrlr_read_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
278 			 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
279 {
280 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
281 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
282 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
283 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
284 	uint64_t start_lba;
285 	uint64_t num_blocks;
286 	int rc;
287 
288 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
289 
290 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
291 		SPDK_ERRLOG("end of media\n");
292 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
293 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
294 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
295 	}
296 
297 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
298 		SPDK_ERRLOG("Read NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
299 			    num_blocks, block_size, req->length);
300 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
301 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
302 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
303 	}
304 
305 	if (req->zcopy_phase == NVMF_ZCOPY_PHASE_EXECUTE) {
306 		/* Return here after checking the lba etc */
307 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
308 	}
309 
310 	assert(!spdk_nvmf_using_zcopy(req->zcopy_phase));
311 
312 	rc = spdk_bdev_readv_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
313 				    nvmf_bdev_ctrlr_complete_cmd, req);
314 	if (spdk_unlikely(rc)) {
315 		if (rc == -ENOMEM) {
316 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
317 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
318 		}
319 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
320 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
321 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
322 	}
323 
324 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
325 }
326 
327 int
328 nvmf_bdev_ctrlr_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
329 			  struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
330 {
331 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
332 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
333 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
334 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
335 	uint64_t start_lba;
336 	uint64_t num_blocks;
337 	int rc;
338 
339 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
340 
341 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
342 		SPDK_ERRLOG("end of media\n");
343 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
344 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
345 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
346 	}
347 
348 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
349 		SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
350 			    num_blocks, block_size, req->length);
351 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
352 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
353 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
354 	}
355 
356 	if (req->zcopy_phase == NVMF_ZCOPY_PHASE_EXECUTE) {
357 		/* Return here after checking the lba etc */
358 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
359 	}
360 
361 	assert(!spdk_nvmf_using_zcopy(req->zcopy_phase));
362 
363 	rc = spdk_bdev_writev_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
364 				     nvmf_bdev_ctrlr_complete_cmd, req);
365 	if (spdk_unlikely(rc)) {
366 		if (rc == -ENOMEM) {
367 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
368 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
369 		}
370 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
371 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
372 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
373 	}
374 
375 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
376 }
377 
378 int
379 nvmf_bdev_ctrlr_compare_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
380 			    struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
381 {
382 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
383 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
384 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
385 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
386 	uint64_t start_lba;
387 	uint64_t num_blocks;
388 	int rc;
389 
390 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
391 
392 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
393 		SPDK_ERRLOG("end of media\n");
394 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
395 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
396 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
397 	}
398 
399 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
400 		SPDK_ERRLOG("Compare NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
401 			    num_blocks, block_size, req->length);
402 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
403 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
404 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
405 	}
406 
407 	rc = spdk_bdev_comparev_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
408 				       nvmf_bdev_ctrlr_complete_cmd, req);
409 	if (spdk_unlikely(rc)) {
410 		if (rc == -ENOMEM) {
411 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
412 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
413 		}
414 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
415 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
416 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
417 	}
418 
419 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
420 }
421 
422 int
423 nvmf_bdev_ctrlr_compare_and_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
424 				      struct spdk_io_channel *ch, struct spdk_nvmf_request *cmp_req, struct spdk_nvmf_request *write_req)
425 {
426 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
427 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
428 	struct spdk_nvme_cmd *cmp_cmd = &cmp_req->cmd->nvme_cmd;
429 	struct spdk_nvme_cmd *write_cmd = &write_req->cmd->nvme_cmd;
430 	struct spdk_nvme_cpl *rsp = &write_req->rsp->nvme_cpl;
431 	uint64_t write_start_lba, cmp_start_lba;
432 	uint64_t write_num_blocks, cmp_num_blocks;
433 	int rc;
434 
435 	nvmf_bdev_ctrlr_get_rw_params(cmp_cmd, &cmp_start_lba, &cmp_num_blocks);
436 	nvmf_bdev_ctrlr_get_rw_params(write_cmd, &write_start_lba, &write_num_blocks);
437 
438 	if (spdk_unlikely(write_start_lba != cmp_start_lba || write_num_blocks != cmp_num_blocks)) {
439 		SPDK_ERRLOG("Fused command start lba / num blocks mismatch\n");
440 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
441 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
442 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
443 	}
444 
445 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, write_start_lba,
446 			  write_num_blocks))) {
447 		SPDK_ERRLOG("end of media\n");
448 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
449 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
450 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
451 	}
452 
453 	if (spdk_unlikely(write_num_blocks * block_size > write_req->length)) {
454 		SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
455 			    write_num_blocks, block_size, write_req->length);
456 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
457 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
458 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
459 	}
460 
461 	rc = spdk_bdev_comparev_and_writev_blocks(desc, ch, cmp_req->iov, cmp_req->iovcnt, write_req->iov,
462 			write_req->iovcnt, write_start_lba, write_num_blocks, nvmf_bdev_ctrlr_complete_cmd, write_req);
463 	if (spdk_unlikely(rc)) {
464 		if (rc == -ENOMEM) {
465 			nvmf_bdev_ctrl_queue_io(cmp_req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, cmp_req);
466 			nvmf_bdev_ctrl_queue_io(write_req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, write_req);
467 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
468 		}
469 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
470 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
471 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
472 	}
473 
474 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
475 }
476 
477 int
478 nvmf_bdev_ctrlr_write_zeroes_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
479 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
480 {
481 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
482 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
483 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
484 	uint64_t start_lba;
485 	uint64_t num_blocks;
486 	int rc;
487 
488 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
489 
490 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
491 		SPDK_ERRLOG("end of media\n");
492 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
493 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
494 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
495 	}
496 
497 	rc = spdk_bdev_write_zeroes_blocks(desc, ch, start_lba, num_blocks,
498 					   nvmf_bdev_ctrlr_complete_cmd, req);
499 	if (spdk_unlikely(rc)) {
500 		if (rc == -ENOMEM) {
501 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
502 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
503 		}
504 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
505 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
506 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
507 	}
508 
509 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
510 }
511 
512 int
513 nvmf_bdev_ctrlr_flush_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
514 			  struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
515 {
516 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
517 	int rc;
518 
519 	/* As for NVMeoF controller, SPDK always set volatile write
520 	 * cache bit to 1, return success for those block devices
521 	 * which can't support FLUSH command.
522 	 */
523 	if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_FLUSH)) {
524 		response->status.sct = SPDK_NVME_SCT_GENERIC;
525 		response->status.sc = SPDK_NVME_SC_SUCCESS;
526 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
527 	}
528 
529 	rc = spdk_bdev_flush_blocks(desc, ch, 0, spdk_bdev_get_num_blocks(bdev),
530 				    nvmf_bdev_ctrlr_complete_cmd, req);
531 	if (spdk_unlikely(rc)) {
532 		if (rc == -ENOMEM) {
533 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
534 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
535 		}
536 		response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
537 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
538 	}
539 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
540 }
541 
542 struct nvmf_bdev_ctrlr_unmap {
543 	struct spdk_nvmf_request	*req;
544 	uint32_t			count;
545 	struct spdk_bdev_desc		*desc;
546 	struct spdk_bdev		*bdev;
547 	struct spdk_io_channel		*ch;
548 	uint32_t			range_index;
549 };
550 
551 static void
552 nvmf_bdev_ctrlr_unmap_cpl(struct spdk_bdev_io *bdev_io, bool success,
553 			  void *cb_arg)
554 {
555 	struct nvmf_bdev_ctrlr_unmap *unmap_ctx = cb_arg;
556 	struct spdk_nvmf_request	*req = unmap_ctx->req;
557 	struct spdk_nvme_cpl		*response = &req->rsp->nvme_cpl;
558 	int				sc, sct;
559 	uint32_t			cdw0;
560 
561 	unmap_ctx->count--;
562 
563 	if (response->status.sct == SPDK_NVME_SCT_GENERIC &&
564 	    response->status.sc == SPDK_NVME_SC_SUCCESS) {
565 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
566 		response->cdw0 = cdw0;
567 		response->status.sc = sc;
568 		response->status.sct = sct;
569 	}
570 
571 	if (unmap_ctx->count == 0) {
572 		spdk_nvmf_request_complete(req);
573 		free(unmap_ctx);
574 	}
575 	spdk_bdev_free_io(bdev_io);
576 }
577 
578 static int
579 nvmf_bdev_ctrlr_unmap(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
580 		      struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
581 		      struct nvmf_bdev_ctrlr_unmap *unmap_ctx);
582 static void
583 nvmf_bdev_ctrlr_unmap_resubmit(void *arg)
584 {
585 	struct nvmf_bdev_ctrlr_unmap *unmap_ctx = arg;
586 	struct spdk_nvmf_request *req = unmap_ctx->req;
587 	struct spdk_bdev_desc *desc = unmap_ctx->desc;
588 	struct spdk_bdev *bdev = unmap_ctx->bdev;
589 	struct spdk_io_channel *ch = unmap_ctx->ch;
590 
591 	nvmf_bdev_ctrlr_unmap(bdev, desc, ch, req, unmap_ctx);
592 }
593 
594 static int
595 nvmf_bdev_ctrlr_unmap(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
596 		      struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
597 		      struct nvmf_bdev_ctrlr_unmap *unmap_ctx)
598 {
599 	uint16_t nr, i;
600 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
601 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
602 	struct spdk_nvme_dsm_range *dsm_range;
603 	uint64_t lba;
604 	uint32_t lba_count;
605 	int rc;
606 
607 	nr = cmd->cdw10_bits.dsm.nr + 1;
608 	if (nr * sizeof(struct spdk_nvme_dsm_range) > req->length) {
609 		SPDK_ERRLOG("Dataset Management number of ranges > SGL length\n");
610 		response->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
611 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
612 	}
613 
614 	if (unmap_ctx == NULL) {
615 		unmap_ctx = calloc(1, sizeof(*unmap_ctx));
616 		if (!unmap_ctx) {
617 			response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
618 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
619 		}
620 
621 		unmap_ctx->req = req;
622 		unmap_ctx->desc = desc;
623 		unmap_ctx->ch = ch;
624 		unmap_ctx->bdev = bdev;
625 
626 		response->status.sct = SPDK_NVME_SCT_GENERIC;
627 		response->status.sc = SPDK_NVME_SC_SUCCESS;
628 	} else {
629 		unmap_ctx->count--;	/* dequeued */
630 	}
631 
632 	dsm_range = (struct spdk_nvme_dsm_range *)req->data;
633 	for (i = unmap_ctx->range_index; i < nr; i++) {
634 		lba = dsm_range[i].starting_lba;
635 		lba_count = dsm_range[i].length;
636 
637 		unmap_ctx->count++;
638 
639 		rc = spdk_bdev_unmap_blocks(desc, ch, lba, lba_count,
640 					    nvmf_bdev_ctrlr_unmap_cpl, unmap_ctx);
641 		if (rc) {
642 			if (rc == -ENOMEM) {
643 				nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_bdev_ctrlr_unmap_resubmit, unmap_ctx);
644 				/* Unmap was not yet submitted to bdev */
645 				/* unmap_ctx->count will be decremented when the request is dequeued */
646 				return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
647 			}
648 			response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
649 			unmap_ctx->count--;
650 			/* We can't return here - we may have to wait for any other
651 				* unmaps already sent to complete */
652 			break;
653 		}
654 		unmap_ctx->range_index++;
655 	}
656 
657 	if (unmap_ctx->count == 0) {
658 		free(unmap_ctx);
659 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
660 	}
661 
662 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
663 }
664 
665 int
666 nvmf_bdev_ctrlr_dsm_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
667 			struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
668 {
669 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
670 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
671 
672 	if (cmd->cdw11_bits.dsm.ad) {
673 		return nvmf_bdev_ctrlr_unmap(bdev, desc, ch, req, NULL);
674 	}
675 
676 	response->status.sct = SPDK_NVME_SCT_GENERIC;
677 	response->status.sc = SPDK_NVME_SC_SUCCESS;
678 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
679 }
680 
681 int
682 nvmf_bdev_ctrlr_nvme_passthru_io(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
683 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
684 {
685 	int rc;
686 
687 	rc = spdk_bdev_nvme_io_passthru(desc, ch, &req->cmd->nvme_cmd, req->data, req->length,
688 					nvmf_bdev_ctrlr_complete_cmd, req);
689 	if (spdk_unlikely(rc)) {
690 		if (rc == -ENOMEM) {
691 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
692 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
693 		}
694 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
695 		req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
696 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
697 	}
698 
699 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
700 }
701 
702 int
703 spdk_nvmf_bdev_ctrlr_nvme_passthru_admin(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
704 		struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
705 		spdk_nvmf_nvme_passthru_cmd_cb cb_fn)
706 {
707 	int rc;
708 
709 	req->cmd_cb_fn = cb_fn;
710 
711 	rc = spdk_bdev_nvme_admin_passthru(desc, ch, &req->cmd->nvme_cmd, req->data, req->length,
712 					   nvmf_bdev_ctrlr_complete_admin_cmd, req);
713 	if (spdk_unlikely(rc)) {
714 		if (rc == -ENOMEM) {
715 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_admin_cmd_resubmit, req);
716 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
717 		}
718 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
719 		req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
720 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
721 	}
722 
723 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
724 }
725 
726 static void
727 nvmf_bdev_ctrlr_complete_abort_cmd(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
728 {
729 	struct spdk_nvmf_request *req = cb_arg;
730 
731 	if (success) {
732 		req->rsp->nvme_cpl.cdw0 &= ~1U;
733 	}
734 
735 	spdk_nvmf_request_complete(req);
736 	spdk_bdev_free_io(bdev_io);
737 }
738 
739 int
740 spdk_nvmf_bdev_ctrlr_abort_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
741 			       struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
742 			       struct spdk_nvmf_request *req_to_abort)
743 {
744 	int rc;
745 
746 	assert((req->rsp->nvme_cpl.cdw0 & 1U) != 0);
747 
748 	rc = spdk_bdev_abort(desc, ch, req_to_abort, nvmf_bdev_ctrlr_complete_abort_cmd, req);
749 	if (spdk_likely(rc == 0)) {
750 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
751 	} else if (rc == -ENOMEM) {
752 		nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_admin_cmd_resubmit, req);
753 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
754 	} else {
755 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
756 	}
757 }
758 
759 bool
760 nvmf_bdev_ctrlr_get_dif_ctx(struct spdk_bdev *bdev, struct spdk_nvme_cmd *cmd,
761 			    struct spdk_dif_ctx *dif_ctx)
762 {
763 	uint32_t init_ref_tag, dif_check_flags = 0;
764 	int rc;
765 
766 	if (spdk_bdev_get_md_size(bdev) == 0) {
767 		return false;
768 	}
769 
770 	/* Initial Reference Tag is the lower 32 bits of the start LBA. */
771 	init_ref_tag = (uint32_t)from_le64(&cmd->cdw10);
772 
773 	if (spdk_bdev_is_dif_check_enabled(bdev, SPDK_DIF_CHECK_TYPE_REFTAG)) {
774 		dif_check_flags |= SPDK_DIF_FLAGS_REFTAG_CHECK;
775 	}
776 
777 	if (spdk_bdev_is_dif_check_enabled(bdev, SPDK_DIF_CHECK_TYPE_GUARD)) {
778 		dif_check_flags |= SPDK_DIF_FLAGS_GUARD_CHECK;
779 	}
780 
781 	rc = spdk_dif_ctx_init(dif_ctx,
782 			       spdk_bdev_get_block_size(bdev),
783 			       spdk_bdev_get_md_size(bdev),
784 			       spdk_bdev_is_md_interleaved(bdev),
785 			       spdk_bdev_is_dif_head_of_md(bdev),
786 			       spdk_bdev_get_dif_type(bdev),
787 			       dif_check_flags,
788 			       init_ref_tag, 0, 0, 0, 0);
789 
790 	return (rc == 0) ? true : false;
791 }
792 
793 static void
794 nvmf_bdev_ctrlr_start_zcopy_complete(struct spdk_bdev_io *bdev_io, bool success,
795 				     void *cb_arg)
796 {
797 	struct spdk_nvmf_request	*req = cb_arg;
798 	struct iovec *iov;
799 	int iovcnt;
800 
801 	if (spdk_unlikely(!success)) {
802 		int                     sc = 0, sct = 0;
803 		uint32_t                cdw0 = 0;
804 		struct spdk_nvme_cpl    *response = &req->rsp->nvme_cpl;
805 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
806 
807 		response->cdw0 = cdw0;
808 		response->status.sc = sc;
809 		response->status.sct = sct;
810 
811 		spdk_bdev_free_io(bdev_io);
812 		spdk_nvmf_request_complete(req);
813 		return;
814 	}
815 
816 	spdk_bdev_io_get_iovec(bdev_io, &iov, &iovcnt);
817 
818 	assert(iovcnt <= NVMF_REQ_MAX_BUFFERS);
819 	assert(iovcnt > 0);
820 
821 	req->iovcnt = iovcnt;
822 
823 	assert(req->iov == iov);
824 
825 	/* backward compatible */
826 	req->data = req->iov[0].iov_base;
827 
828 	req->zcopy_bdev_io = bdev_io; /* Preserve the bdev_io for the end zcopy */
829 
830 	spdk_nvmf_request_complete(req);
831 	/* Don't free the bdev_io here as it is needed for the END ZCOPY */
832 }
833 
834 int
835 nvmf_bdev_ctrlr_start_zcopy(struct spdk_bdev *bdev,
836 			    struct spdk_bdev_desc *desc,
837 			    struct spdk_io_channel *ch,
838 			    struct spdk_nvmf_request *req)
839 {
840 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
841 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
842 	uint64_t start_lba;
843 	uint64_t num_blocks;
844 
845 	nvmf_bdev_ctrlr_get_rw_params(&req->cmd->nvme_cmd, &start_lba, &num_blocks);
846 
847 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
848 		SPDK_ERRLOG("end of media\n");
849 		return -ENXIO;
850 	}
851 
852 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
853 		SPDK_ERRLOG("Read NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
854 			    num_blocks, block_size, req->length);
855 		return -ENXIO;
856 	}
857 
858 	bool populate = (req->cmd->nvme_cmd.opc == SPDK_NVME_OPC_READ) ? true : false;
859 
860 	return spdk_bdev_zcopy_start(desc, ch, req->iov, req->iovcnt, start_lba,
861 				     num_blocks, populate, nvmf_bdev_ctrlr_start_zcopy_complete, req);
862 }
863 
864 static void
865 nvmf_bdev_ctrlr_end_zcopy_complete(struct spdk_bdev_io *bdev_io, bool success,
866 				   void *cb_arg)
867 {
868 	struct spdk_nvmf_request	*req = cb_arg;
869 
870 	if (spdk_unlikely(!success)) {
871 		int                     sc = 0, sct = 0;
872 		uint32_t                cdw0 = 0;
873 		struct spdk_nvme_cpl    *response = &req->rsp->nvme_cpl;
874 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
875 
876 		response->cdw0 = cdw0;
877 		response->status.sc = sc;
878 		response->status.sct = sct;
879 	}
880 
881 	spdk_bdev_free_io(bdev_io);
882 	req->zcopy_bdev_io = NULL;
883 	spdk_nvmf_request_complete(req);
884 }
885 
886 int
887 nvmf_bdev_ctrlr_end_zcopy(struct spdk_nvmf_request *req, bool commit)
888 {
889 	return spdk_bdev_zcopy_end(req->zcopy_bdev_io, commit, nvmf_bdev_ctrlr_end_zcopy_complete, req);
890 }
891