xref: /spdk/lib/nvmf/ctrlr_bdev.c (revision 3630473789c359155f05075bea018c32d24032b3)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation. All rights reserved.
5  *   Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/stdinc.h"
35 
36 #include "nvmf_internal.h"
37 
38 #include "spdk/bdev.h"
39 #include "spdk/endian.h"
40 #include "spdk/thread.h"
41 #include "spdk/likely.h"
42 #include "spdk/nvme.h"
43 #include "spdk/nvmf_cmd.h"
44 #include "spdk/nvmf_spec.h"
45 #include "spdk/trace.h"
46 #include "spdk/scsi_spec.h"
47 #include "spdk/string.h"
48 #include "spdk/util.h"
49 
50 #include "spdk/log.h"
51 
52 static bool
53 nvmf_subsystem_bdev_io_type_supported(struct spdk_nvmf_subsystem *subsystem,
54 				      enum spdk_bdev_io_type io_type)
55 {
56 	struct spdk_nvmf_ns *ns;
57 
58 	for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL;
59 	     ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) {
60 		if (ns->bdev == NULL) {
61 			continue;
62 		}
63 
64 		if (!spdk_bdev_io_type_supported(ns->bdev, io_type)) {
65 			SPDK_DEBUGLOG(nvmf,
66 				      "Subsystem %s namespace %u (%s) does not support io_type %d\n",
67 				      spdk_nvmf_subsystem_get_nqn(subsystem),
68 				      ns->opts.nsid, spdk_bdev_get_name(ns->bdev), (int)io_type);
69 			return false;
70 		}
71 	}
72 
73 	SPDK_DEBUGLOG(nvmf, "All devices in Subsystem %s support io_type %d\n",
74 		      spdk_nvmf_subsystem_get_nqn(subsystem), (int)io_type);
75 	return true;
76 }
77 
78 bool
79 nvmf_ctrlr_dsm_supported(struct spdk_nvmf_ctrlr *ctrlr)
80 {
81 	return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_UNMAP);
82 }
83 
84 bool
85 nvmf_ctrlr_write_zeroes_supported(struct spdk_nvmf_ctrlr *ctrlr)
86 {
87 	return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_WRITE_ZEROES);
88 }
89 
90 static void
91 nvmf_bdev_ctrlr_complete_cmd(struct spdk_bdev_io *bdev_io, bool success,
92 			     void *cb_arg)
93 {
94 	struct spdk_nvmf_request	*req = cb_arg;
95 	struct spdk_nvme_cpl		*response = &req->rsp->nvme_cpl;
96 	int				first_sc = 0, first_sct = 0, second_sc = 0, second_sct = 0;
97 	uint32_t			cdw0 = 0;
98 	struct spdk_nvmf_request	*first_req = req->first_fused_req;
99 
100 	if (spdk_unlikely(first_req != NULL)) {
101 		/* fused commands - get status for both operations */
102 		struct spdk_nvme_cpl *fused_response = &first_req->rsp->nvme_cpl;
103 
104 		spdk_bdev_io_get_nvme_fused_status(bdev_io, &cdw0, &second_sct, &second_sc, &first_sct, &first_sc);
105 		fused_response->cdw0 = cdw0;
106 		fused_response->status.sc = second_sc;
107 		fused_response->status.sct = second_sct;
108 
109 		/* first request should be completed */
110 		spdk_nvmf_request_complete(first_req);
111 		req->first_fused_req = NULL;
112 	} else {
113 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &first_sct, &first_sc);
114 	}
115 
116 	response->cdw0 = cdw0;
117 	response->status.sc = first_sc;
118 	response->status.sct = first_sct;
119 
120 	spdk_nvmf_request_complete(req);
121 	spdk_bdev_free_io(bdev_io);
122 }
123 
124 static void
125 nvmf_bdev_ctrlr_complete_admin_cmd(struct spdk_bdev_io *bdev_io, bool success,
126 				   void *cb_arg)
127 {
128 	struct spdk_nvmf_request *req = cb_arg;
129 
130 	if (req->cmd_cb_fn) {
131 		req->cmd_cb_fn(req);
132 	}
133 
134 	nvmf_bdev_ctrlr_complete_cmd(bdev_io, success, req);
135 }
136 
137 void
138 nvmf_bdev_ctrlr_identify_ns(struct spdk_nvmf_ns *ns, struct spdk_nvme_ns_data *nsdata,
139 			    bool dif_insert_or_strip)
140 {
141 	struct spdk_bdev *bdev = ns->bdev;
142 	uint64_t num_blocks;
143 	uint32_t phys_blocklen;
144 
145 	num_blocks = spdk_bdev_get_num_blocks(bdev);
146 
147 	nsdata->nsze = num_blocks;
148 	nsdata->ncap = num_blocks;
149 	nsdata->nuse = num_blocks;
150 	nsdata->nlbaf = 0;
151 	nsdata->flbas.format = 0;
152 	nsdata->nacwu = spdk_bdev_get_acwu(bdev);
153 	if (!dif_insert_or_strip) {
154 		nsdata->lbaf[0].ms = spdk_bdev_get_md_size(bdev);
155 		nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_block_size(bdev));
156 		if (nsdata->lbaf[0].ms != 0) {
157 			nsdata->flbas.extended = 1;
158 			nsdata->mc.extended = 1;
159 			nsdata->mc.pointer = 0;
160 			nsdata->dps.md_start = spdk_bdev_is_dif_head_of_md(bdev);
161 
162 			switch (spdk_bdev_get_dif_type(bdev)) {
163 			case SPDK_DIF_TYPE1:
164 				nsdata->dpc.pit1 = 1;
165 				nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_TYPE1;
166 				break;
167 			case SPDK_DIF_TYPE2:
168 				nsdata->dpc.pit2 = 1;
169 				nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_TYPE2;
170 				break;
171 			case SPDK_DIF_TYPE3:
172 				nsdata->dpc.pit3 = 1;
173 				nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_TYPE3;
174 				break;
175 			default:
176 				SPDK_DEBUGLOG(nvmf, "Protection Disabled\n");
177 				nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_DISABLE;
178 				break;
179 			}
180 		}
181 	} else {
182 		nsdata->lbaf[0].ms = 0;
183 		nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_data_block_size(bdev));
184 	}
185 
186 	phys_blocklen = spdk_bdev_get_physical_block_size(bdev);
187 	assert(phys_blocklen > 0);
188 	/* Linux driver uses min(nawupf, npwg) to set physical_block_size */
189 	nsdata->nsfeat.optperf = 1;
190 	nsdata->nsfeat.ns_atomic_write_unit = 1;
191 	nsdata->npwg = (phys_blocklen >> nsdata->lbaf[0].lbads) - 1;
192 	nsdata->nawupf = nsdata->npwg;
193 
194 	nsdata->noiob = spdk_bdev_get_optimal_io_boundary(bdev);
195 	nsdata->nmic.can_share = 1;
196 	if (ns->ptpl_file != NULL) {
197 		nsdata->nsrescap.rescap.persist = 1;
198 	}
199 	nsdata->nsrescap.rescap.write_exclusive = 1;
200 	nsdata->nsrescap.rescap.exclusive_access = 1;
201 	nsdata->nsrescap.rescap.write_exclusive_reg_only = 1;
202 	nsdata->nsrescap.rescap.exclusive_access_reg_only = 1;
203 	nsdata->nsrescap.rescap.write_exclusive_all_reg = 1;
204 	nsdata->nsrescap.rescap.exclusive_access_all_reg = 1;
205 	nsdata->nsrescap.rescap.ignore_existing_key = 1;
206 
207 	SPDK_STATIC_ASSERT(sizeof(nsdata->nguid) == sizeof(ns->opts.nguid), "size mismatch");
208 	memcpy(nsdata->nguid, ns->opts.nguid, sizeof(nsdata->nguid));
209 
210 	SPDK_STATIC_ASSERT(sizeof(nsdata->eui64) == sizeof(ns->opts.eui64), "size mismatch");
211 	memcpy(&nsdata->eui64, ns->opts.eui64, sizeof(nsdata->eui64));
212 }
213 
214 static void
215 nvmf_bdev_ctrlr_get_rw_params(const struct spdk_nvme_cmd *cmd, uint64_t *start_lba,
216 			      uint64_t *num_blocks)
217 {
218 	/* SLBA: CDW10 and CDW11 */
219 	*start_lba = from_le64(&cmd->cdw10);
220 
221 	/* NLB: CDW12 bits 15:00, 0's based */
222 	*num_blocks = (from_le32(&cmd->cdw12) & 0xFFFFu) + 1;
223 }
224 
225 static bool
226 nvmf_bdev_ctrlr_lba_in_range(uint64_t bdev_num_blocks, uint64_t io_start_lba,
227 			     uint64_t io_num_blocks)
228 {
229 	if (io_start_lba + io_num_blocks > bdev_num_blocks ||
230 	    io_start_lba + io_num_blocks < io_start_lba) {
231 		return false;
232 	}
233 
234 	return true;
235 }
236 
237 static void
238 nvmf_ctrlr_process_io_cmd_resubmit(void *arg)
239 {
240 	struct spdk_nvmf_request *req = arg;
241 
242 	nvmf_ctrlr_process_io_cmd(req);
243 }
244 
245 static void
246 nvmf_ctrlr_process_admin_cmd_resubmit(void *arg)
247 {
248 	struct spdk_nvmf_request *req = arg;
249 
250 	nvmf_ctrlr_process_admin_cmd(req);
251 }
252 
253 static void
254 nvmf_bdev_ctrl_queue_io(struct spdk_nvmf_request *req, struct spdk_bdev *bdev,
255 			struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn, void *cb_arg)
256 {
257 	int rc;
258 
259 	req->bdev_io_wait.bdev = bdev;
260 	req->bdev_io_wait.cb_fn = cb_fn;
261 	req->bdev_io_wait.cb_arg = cb_arg;
262 
263 	rc = spdk_bdev_queue_io_wait(bdev, ch, &req->bdev_io_wait);
264 	if (rc != 0) {
265 		assert(false);
266 	}
267 	req->qpair->group->stat.pending_bdev_io++;
268 }
269 
270 int
271 nvmf_bdev_ctrlr_read_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
272 			 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
273 {
274 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
275 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
276 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
277 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
278 	uint64_t start_lba;
279 	uint64_t num_blocks;
280 	int rc;
281 
282 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
283 
284 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
285 		SPDK_ERRLOG("end of media\n");
286 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
287 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
288 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
289 	}
290 
291 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
292 		SPDK_ERRLOG("Read NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
293 			    num_blocks, block_size, req->length);
294 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
295 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
296 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
297 	}
298 
299 	rc = spdk_bdev_readv_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
300 				    nvmf_bdev_ctrlr_complete_cmd, req);
301 	if (spdk_unlikely(rc)) {
302 		if (rc == -ENOMEM) {
303 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
304 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
305 		}
306 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
307 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
308 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
309 	}
310 
311 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
312 }
313 
314 int
315 nvmf_bdev_ctrlr_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
316 			  struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
317 {
318 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
319 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
320 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
321 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
322 	uint64_t start_lba;
323 	uint64_t num_blocks;
324 	int rc;
325 
326 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
327 
328 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
329 		SPDK_ERRLOG("end of media\n");
330 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
331 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
332 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
333 	}
334 
335 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
336 		SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
337 			    num_blocks, block_size, req->length);
338 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
339 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
340 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
341 	}
342 
343 	rc = spdk_bdev_writev_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
344 				     nvmf_bdev_ctrlr_complete_cmd, req);
345 	if (spdk_unlikely(rc)) {
346 		if (rc == -ENOMEM) {
347 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
348 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
349 		}
350 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
351 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
352 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
353 	}
354 
355 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
356 }
357 
358 int
359 nvmf_bdev_ctrlr_compare_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
360 			    struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
361 {
362 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
363 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
364 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
365 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
366 	uint64_t start_lba;
367 	uint64_t num_blocks;
368 	int rc;
369 
370 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
371 
372 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
373 		SPDK_ERRLOG("end of media\n");
374 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
375 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
376 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
377 	}
378 
379 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
380 		SPDK_ERRLOG("Compare NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
381 			    num_blocks, block_size, req->length);
382 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
383 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
384 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
385 	}
386 
387 	rc = spdk_bdev_comparev_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
388 				       nvmf_bdev_ctrlr_complete_cmd, req);
389 	if (spdk_unlikely(rc)) {
390 		if (rc == -ENOMEM) {
391 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
392 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
393 		}
394 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
395 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
396 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
397 	}
398 
399 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
400 }
401 
402 int
403 nvmf_bdev_ctrlr_compare_and_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
404 				      struct spdk_io_channel *ch, struct spdk_nvmf_request *cmp_req, struct spdk_nvmf_request *write_req)
405 {
406 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
407 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
408 	struct spdk_nvme_cmd *cmp_cmd = &cmp_req->cmd->nvme_cmd;
409 	struct spdk_nvme_cmd *write_cmd = &write_req->cmd->nvme_cmd;
410 	struct spdk_nvme_cpl *rsp = &write_req->rsp->nvme_cpl;
411 	uint64_t write_start_lba, cmp_start_lba;
412 	uint64_t write_num_blocks, cmp_num_blocks;
413 	int rc;
414 
415 	nvmf_bdev_ctrlr_get_rw_params(cmp_cmd, &cmp_start_lba, &cmp_num_blocks);
416 	nvmf_bdev_ctrlr_get_rw_params(write_cmd, &write_start_lba, &write_num_blocks);
417 
418 	if (spdk_unlikely(write_start_lba != cmp_start_lba || write_num_blocks != cmp_num_blocks)) {
419 		SPDK_ERRLOG("Fused command start lba / num blocks mismatch\n");
420 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
421 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
422 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
423 	}
424 
425 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, write_start_lba,
426 			  write_num_blocks))) {
427 		SPDK_ERRLOG("end of media\n");
428 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
429 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
430 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
431 	}
432 
433 	if (spdk_unlikely(write_num_blocks * block_size > write_req->length)) {
434 		SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
435 			    write_num_blocks, block_size, write_req->length);
436 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
437 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
438 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
439 	}
440 
441 	rc = spdk_bdev_comparev_and_writev_blocks(desc, ch, cmp_req->iov, cmp_req->iovcnt, write_req->iov,
442 			write_req->iovcnt, write_start_lba, write_num_blocks, nvmf_bdev_ctrlr_complete_cmd, write_req);
443 	if (spdk_unlikely(rc)) {
444 		if (rc == -ENOMEM) {
445 			nvmf_bdev_ctrl_queue_io(cmp_req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, cmp_req);
446 			nvmf_bdev_ctrl_queue_io(write_req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, write_req);
447 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
448 		}
449 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
450 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
451 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
452 	}
453 
454 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
455 }
456 
457 int
458 nvmf_bdev_ctrlr_write_zeroes_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
459 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
460 {
461 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
462 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
463 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
464 	uint64_t start_lba;
465 	uint64_t num_blocks;
466 	int rc;
467 
468 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
469 
470 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
471 		SPDK_ERRLOG("end of media\n");
472 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
473 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
474 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
475 	}
476 
477 	rc = spdk_bdev_write_zeroes_blocks(desc, ch, start_lba, num_blocks,
478 					   nvmf_bdev_ctrlr_complete_cmd, req);
479 	if (spdk_unlikely(rc)) {
480 		if (rc == -ENOMEM) {
481 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
482 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
483 		}
484 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
485 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
486 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
487 	}
488 
489 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
490 }
491 
492 int
493 nvmf_bdev_ctrlr_flush_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
494 			  struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
495 {
496 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
497 	int rc;
498 
499 	/* As for NVMeoF controller, SPDK always set volatile write
500 	 * cache bit to 1, return success for those block devices
501 	 * which can't support FLUSH command.
502 	 */
503 	if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_FLUSH)) {
504 		response->status.sct = SPDK_NVME_SCT_GENERIC;
505 		response->status.sc = SPDK_NVME_SC_SUCCESS;
506 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
507 	}
508 
509 	rc = spdk_bdev_flush_blocks(desc, ch, 0, spdk_bdev_get_num_blocks(bdev),
510 				    nvmf_bdev_ctrlr_complete_cmd, req);
511 	if (spdk_unlikely(rc)) {
512 		if (rc == -ENOMEM) {
513 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
514 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
515 		}
516 		response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
517 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
518 	}
519 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
520 }
521 
522 struct nvmf_bdev_ctrlr_unmap {
523 	struct spdk_nvmf_request	*req;
524 	uint32_t			count;
525 	struct spdk_bdev_desc		*desc;
526 	struct spdk_bdev		*bdev;
527 	struct spdk_io_channel		*ch;
528 	uint32_t			range_index;
529 };
530 
531 static void
532 nvmf_bdev_ctrlr_unmap_cpl(struct spdk_bdev_io *bdev_io, bool success,
533 			  void *cb_arg)
534 {
535 	struct nvmf_bdev_ctrlr_unmap *unmap_ctx = cb_arg;
536 	struct spdk_nvmf_request	*req = unmap_ctx->req;
537 	struct spdk_nvme_cpl		*response = &req->rsp->nvme_cpl;
538 	int				sc, sct;
539 	uint32_t			cdw0;
540 
541 	unmap_ctx->count--;
542 
543 	if (response->status.sct == SPDK_NVME_SCT_GENERIC &&
544 	    response->status.sc == SPDK_NVME_SC_SUCCESS) {
545 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
546 		response->cdw0 = cdw0;
547 		response->status.sc = sc;
548 		response->status.sct = sct;
549 	}
550 
551 	if (unmap_ctx->count == 0) {
552 		spdk_nvmf_request_complete(req);
553 		free(unmap_ctx);
554 	}
555 	spdk_bdev_free_io(bdev_io);
556 }
557 
558 static int
559 nvmf_bdev_ctrlr_unmap(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
560 		      struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
561 		      struct nvmf_bdev_ctrlr_unmap *unmap_ctx);
562 static void
563 nvmf_bdev_ctrlr_unmap_resubmit(void *arg)
564 {
565 	struct nvmf_bdev_ctrlr_unmap *unmap_ctx = arg;
566 	struct spdk_nvmf_request *req = unmap_ctx->req;
567 	struct spdk_bdev_desc *desc = unmap_ctx->desc;
568 	struct spdk_bdev *bdev = unmap_ctx->bdev;
569 	struct spdk_io_channel *ch = unmap_ctx->ch;
570 
571 	nvmf_bdev_ctrlr_unmap(bdev, desc, ch, req, unmap_ctx);
572 }
573 
574 static int
575 nvmf_bdev_ctrlr_unmap(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
576 		      struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
577 		      struct nvmf_bdev_ctrlr_unmap *unmap_ctx)
578 {
579 	uint16_t nr, i;
580 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
581 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
582 	struct spdk_nvme_dsm_range *dsm_range;
583 	uint64_t lba;
584 	uint32_t lba_count;
585 	int rc;
586 
587 	nr = cmd->cdw10_bits.dsm.nr + 1;
588 	if (nr * sizeof(struct spdk_nvme_dsm_range) > req->length) {
589 		SPDK_ERRLOG("Dataset Management number of ranges > SGL length\n");
590 		response->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
591 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
592 	}
593 
594 	if (unmap_ctx == NULL) {
595 		unmap_ctx = calloc(1, sizeof(*unmap_ctx));
596 		if (!unmap_ctx) {
597 			response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
598 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
599 		}
600 
601 		unmap_ctx->req = req;
602 		unmap_ctx->desc = desc;
603 		unmap_ctx->ch = ch;
604 		unmap_ctx->bdev = bdev;
605 
606 		response->status.sct = SPDK_NVME_SCT_GENERIC;
607 		response->status.sc = SPDK_NVME_SC_SUCCESS;
608 	} else {
609 		unmap_ctx->count--;	/* dequeued */
610 	}
611 
612 	dsm_range = (struct spdk_nvme_dsm_range *)req->data;
613 	for (i = unmap_ctx->range_index; i < nr; i++) {
614 		lba = dsm_range[i].starting_lba;
615 		lba_count = dsm_range[i].length;
616 
617 		unmap_ctx->count++;
618 
619 		rc = spdk_bdev_unmap_blocks(desc, ch, lba, lba_count,
620 					    nvmf_bdev_ctrlr_unmap_cpl, unmap_ctx);
621 		if (rc) {
622 			if (rc == -ENOMEM) {
623 				nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_bdev_ctrlr_unmap_resubmit, unmap_ctx);
624 				/* Unmap was not yet submitted to bdev */
625 				/* unmap_ctx->count will be decremented when the request is dequeued */
626 				return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
627 			}
628 			response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
629 			unmap_ctx->count--;
630 			/* We can't return here - we may have to wait for any other
631 				* unmaps already sent to complete */
632 			break;
633 		}
634 		unmap_ctx->range_index++;
635 	}
636 
637 	if (unmap_ctx->count == 0) {
638 		free(unmap_ctx);
639 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
640 	}
641 
642 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
643 }
644 
645 int
646 nvmf_bdev_ctrlr_dsm_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
647 			struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
648 {
649 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
650 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
651 
652 	if (cmd->cdw11_bits.dsm.ad) {
653 		return nvmf_bdev_ctrlr_unmap(bdev, desc, ch, req, NULL);
654 	}
655 
656 	response->status.sct = SPDK_NVME_SCT_GENERIC;
657 	response->status.sc = SPDK_NVME_SC_SUCCESS;
658 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
659 }
660 
661 int
662 nvmf_bdev_ctrlr_nvme_passthru_io(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
663 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
664 {
665 	int rc;
666 
667 	rc = spdk_bdev_nvme_io_passthru(desc, ch, &req->cmd->nvme_cmd, req->data, req->length,
668 					nvmf_bdev_ctrlr_complete_cmd, req);
669 	if (spdk_unlikely(rc)) {
670 		if (rc == -ENOMEM) {
671 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
672 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
673 		}
674 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
675 		req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
676 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
677 	}
678 
679 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
680 }
681 
682 int
683 spdk_nvmf_bdev_ctrlr_nvme_passthru_admin(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
684 		struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
685 		spdk_nvmf_nvme_passthru_cmd_cb cb_fn)
686 {
687 	int rc;
688 
689 	req->cmd_cb_fn = cb_fn;
690 
691 	rc = spdk_bdev_nvme_admin_passthru(desc, ch, &req->cmd->nvme_cmd, req->data, req->length,
692 					   nvmf_bdev_ctrlr_complete_admin_cmd, req);
693 	if (spdk_unlikely(rc)) {
694 		if (rc == -ENOMEM) {
695 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_admin_cmd_resubmit, req);
696 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
697 		}
698 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
699 		req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
700 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
701 	}
702 
703 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
704 }
705 
706 static void
707 nvmf_bdev_ctrlr_complete_abort_cmd(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
708 {
709 	struct spdk_nvmf_request *req = cb_arg;
710 
711 	if (success) {
712 		req->rsp->nvme_cpl.cdw0 &= ~1U;
713 	}
714 
715 	spdk_nvmf_request_complete(req);
716 	spdk_bdev_free_io(bdev_io);
717 }
718 
719 int
720 spdk_nvmf_bdev_ctrlr_abort_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
721 			       struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
722 			       struct spdk_nvmf_request *req_to_abort)
723 {
724 	int rc;
725 
726 	assert((req->rsp->nvme_cpl.cdw0 & 1U) != 0);
727 
728 	rc = spdk_bdev_abort(desc, ch, req_to_abort, nvmf_bdev_ctrlr_complete_abort_cmd, req);
729 	if (spdk_likely(rc == 0)) {
730 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
731 	} else if (rc == -ENOMEM) {
732 		nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_admin_cmd_resubmit, req);
733 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
734 	} else {
735 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
736 	}
737 }
738 
739 bool
740 nvmf_bdev_ctrlr_get_dif_ctx(struct spdk_bdev *bdev, struct spdk_nvme_cmd *cmd,
741 			    struct spdk_dif_ctx *dif_ctx)
742 {
743 	uint32_t init_ref_tag, dif_check_flags = 0;
744 	int rc;
745 
746 	if (spdk_bdev_get_md_size(bdev) == 0) {
747 		return false;
748 	}
749 
750 	/* Initial Reference Tag is the lower 32 bits of the start LBA. */
751 	init_ref_tag = (uint32_t)from_le64(&cmd->cdw10);
752 
753 	if (spdk_bdev_is_dif_check_enabled(bdev, SPDK_DIF_CHECK_TYPE_REFTAG)) {
754 		dif_check_flags |= SPDK_DIF_FLAGS_REFTAG_CHECK;
755 	}
756 
757 	if (spdk_bdev_is_dif_check_enabled(bdev, SPDK_DIF_CHECK_TYPE_GUARD)) {
758 		dif_check_flags |= SPDK_DIF_FLAGS_GUARD_CHECK;
759 	}
760 
761 	rc = spdk_dif_ctx_init(dif_ctx,
762 			       spdk_bdev_get_block_size(bdev),
763 			       spdk_bdev_get_md_size(bdev),
764 			       spdk_bdev_is_md_interleaved(bdev),
765 			       spdk_bdev_is_dif_head_of_md(bdev),
766 			       spdk_bdev_get_dif_type(bdev),
767 			       dif_check_flags,
768 			       init_ref_tag, 0, 0, 0, 0);
769 
770 	return (rc == 0) ? true : false;
771 }
772