xref: /spdk/lib/nvmf/ctrlr_bdev.c (revision a2815831cc4c36d82d7e4f9938308d38d0f304bd)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation. All rights reserved.
5  *   Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
6  *   Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 #include "spdk/stdinc.h"
36 
37 #include "nvmf_internal.h"
38 
39 #include "spdk/bdev.h"
40 #include "spdk/endian.h"
41 #include "spdk/thread.h"
42 #include "spdk/likely.h"
43 #include "spdk/nvme.h"
44 #include "spdk/nvmf_cmd.h"
45 #include "spdk/nvmf_spec.h"
46 #include "spdk/trace.h"
47 #include "spdk/scsi_spec.h"
48 #include "spdk/string.h"
49 #include "spdk/util.h"
50 
51 #include "spdk/log.h"
52 
53 static bool
54 nvmf_subsystem_bdev_io_type_supported(struct spdk_nvmf_subsystem *subsystem,
55 				      enum spdk_bdev_io_type io_type)
56 {
57 	struct spdk_nvmf_ns *ns;
58 
59 	for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL;
60 	     ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) {
61 		if (ns->bdev == NULL) {
62 			continue;
63 		}
64 
65 		if (!spdk_bdev_io_type_supported(ns->bdev, io_type)) {
66 			SPDK_DEBUGLOG(nvmf,
67 				      "Subsystem %s namespace %u (%s) does not support io_type %d\n",
68 				      spdk_nvmf_subsystem_get_nqn(subsystem),
69 				      ns->opts.nsid, spdk_bdev_get_name(ns->bdev), (int)io_type);
70 			return false;
71 		}
72 	}
73 
74 	SPDK_DEBUGLOG(nvmf, "All devices in Subsystem %s support io_type %d\n",
75 		      spdk_nvmf_subsystem_get_nqn(subsystem), (int)io_type);
76 	return true;
77 }
78 
79 bool
80 nvmf_ctrlr_dsm_supported(struct spdk_nvmf_ctrlr *ctrlr)
81 {
82 	return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_UNMAP);
83 }
84 
85 bool
86 nvmf_ctrlr_write_zeroes_supported(struct spdk_nvmf_ctrlr *ctrlr)
87 {
88 	return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_WRITE_ZEROES);
89 }
90 
91 static void
92 nvmf_bdev_ctrlr_complete_cmd(struct spdk_bdev_io *bdev_io, bool success,
93 			     void *cb_arg)
94 {
95 	struct spdk_nvmf_request	*req = cb_arg;
96 	struct spdk_nvme_cpl		*response = &req->rsp->nvme_cpl;
97 	int				first_sc = 0, first_sct = 0, sc = 0, sct = 0;
98 	uint32_t			cdw0 = 0;
99 	struct spdk_nvmf_request	*first_req = req->first_fused_req;
100 
101 	if (spdk_unlikely(first_req != NULL)) {
102 		/* fused commands - get status for both operations */
103 		struct spdk_nvme_cpl *first_response = &first_req->rsp->nvme_cpl;
104 
105 		spdk_bdev_io_get_nvme_fused_status(bdev_io, &cdw0, &first_sct, &first_sc, &sct, &sc);
106 		first_response->cdw0 = cdw0;
107 		first_response->status.sc = first_sc;
108 		first_response->status.sct = first_sct;
109 
110 		/* first request should be completed */
111 		spdk_nvmf_request_complete(first_req);
112 		req->first_fused_req = NULL;
113 	} else {
114 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
115 	}
116 
117 	response->cdw0 = cdw0;
118 	response->status.sc = sc;
119 	response->status.sct = sct;
120 
121 	spdk_nvmf_request_complete(req);
122 	spdk_bdev_free_io(bdev_io);
123 }
124 
125 static void
126 nvmf_bdev_ctrlr_complete_admin_cmd(struct spdk_bdev_io *bdev_io, bool success,
127 				   void *cb_arg)
128 {
129 	struct spdk_nvmf_request *req = cb_arg;
130 
131 	if (req->cmd_cb_fn) {
132 		req->cmd_cb_fn(req);
133 	}
134 
135 	nvmf_bdev_ctrlr_complete_cmd(bdev_io, success, req);
136 }
137 
138 void
139 nvmf_bdev_ctrlr_identify_ns(struct spdk_nvmf_ns *ns, struct spdk_nvme_ns_data *nsdata,
140 			    bool dif_insert_or_strip)
141 {
142 	struct spdk_bdev *bdev = ns->bdev;
143 	uint64_t num_blocks;
144 	uint32_t phys_blocklen;
145 
146 	num_blocks = spdk_bdev_get_num_blocks(bdev);
147 
148 	nsdata->nsze = num_blocks;
149 	nsdata->ncap = num_blocks;
150 	nsdata->nuse = num_blocks;
151 	nsdata->nlbaf = 0;
152 	nsdata->flbas.format = 0;
153 	nsdata->nacwu = spdk_bdev_get_acwu(bdev);
154 	if (!dif_insert_or_strip) {
155 		nsdata->lbaf[0].ms = spdk_bdev_get_md_size(bdev);
156 		nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_block_size(bdev));
157 		if (nsdata->lbaf[0].ms != 0) {
158 			nsdata->flbas.extended = 1;
159 			nsdata->mc.extended = 1;
160 			nsdata->mc.pointer = 0;
161 			nsdata->dps.md_start = spdk_bdev_is_dif_head_of_md(bdev);
162 
163 			switch (spdk_bdev_get_dif_type(bdev)) {
164 			case SPDK_DIF_TYPE1:
165 				nsdata->dpc.pit1 = 1;
166 				nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_TYPE1;
167 				break;
168 			case SPDK_DIF_TYPE2:
169 				nsdata->dpc.pit2 = 1;
170 				nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_TYPE2;
171 				break;
172 			case SPDK_DIF_TYPE3:
173 				nsdata->dpc.pit3 = 1;
174 				nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_TYPE3;
175 				break;
176 			default:
177 				SPDK_DEBUGLOG(nvmf, "Protection Disabled\n");
178 				nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_DISABLE;
179 				break;
180 			}
181 		}
182 	} else {
183 		nsdata->lbaf[0].ms = 0;
184 		nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_data_block_size(bdev));
185 	}
186 
187 	phys_blocklen = spdk_bdev_get_physical_block_size(bdev);
188 	assert(phys_blocklen > 0);
189 	/* Linux driver uses min(nawupf, npwg) to set physical_block_size */
190 	nsdata->nsfeat.optperf = 1;
191 	nsdata->nsfeat.ns_atomic_write_unit = 1;
192 	nsdata->npwg = (phys_blocklen >> nsdata->lbaf[0].lbads) - 1;
193 	nsdata->nawupf = nsdata->npwg;
194 	nsdata->npwa = nsdata->npwg;
195 	nsdata->npdg = nsdata->npwg;
196 	nsdata->npda = nsdata->npwg;
197 
198 	nsdata->noiob = spdk_bdev_get_optimal_io_boundary(bdev);
199 	nsdata->nmic.can_share = 1;
200 	if (ns->ptpl_file != NULL) {
201 		nsdata->nsrescap.rescap.persist = 1;
202 	}
203 	nsdata->nsrescap.rescap.write_exclusive = 1;
204 	nsdata->nsrescap.rescap.exclusive_access = 1;
205 	nsdata->nsrescap.rescap.write_exclusive_reg_only = 1;
206 	nsdata->nsrescap.rescap.exclusive_access_reg_only = 1;
207 	nsdata->nsrescap.rescap.write_exclusive_all_reg = 1;
208 	nsdata->nsrescap.rescap.exclusive_access_all_reg = 1;
209 	nsdata->nsrescap.rescap.ignore_existing_key = 1;
210 
211 	SPDK_STATIC_ASSERT(sizeof(nsdata->nguid) == sizeof(ns->opts.nguid), "size mismatch");
212 	memcpy(nsdata->nguid, ns->opts.nguid, sizeof(nsdata->nguid));
213 
214 	SPDK_STATIC_ASSERT(sizeof(nsdata->eui64) == sizeof(ns->opts.eui64), "size mismatch");
215 	memcpy(&nsdata->eui64, ns->opts.eui64, sizeof(nsdata->eui64));
216 }
217 
218 static void
219 nvmf_bdev_ctrlr_get_rw_params(const struct spdk_nvme_cmd *cmd, uint64_t *start_lba,
220 			      uint64_t *num_blocks)
221 {
222 	/* SLBA: CDW10 and CDW11 */
223 	*start_lba = from_le64(&cmd->cdw10);
224 
225 	/* NLB: CDW12 bits 15:00, 0's based */
226 	*num_blocks = (from_le32(&cmd->cdw12) & 0xFFFFu) + 1;
227 }
228 
229 static bool
230 nvmf_bdev_ctrlr_lba_in_range(uint64_t bdev_num_blocks, uint64_t io_start_lba,
231 			     uint64_t io_num_blocks)
232 {
233 	if (io_start_lba + io_num_blocks > bdev_num_blocks ||
234 	    io_start_lba + io_num_blocks < io_start_lba) {
235 		return false;
236 	}
237 
238 	return true;
239 }
240 
241 static void
242 nvmf_ctrlr_process_io_cmd_resubmit(void *arg)
243 {
244 	struct spdk_nvmf_request *req = arg;
245 
246 	nvmf_ctrlr_process_io_cmd(req);
247 }
248 
249 static void
250 nvmf_ctrlr_process_admin_cmd_resubmit(void *arg)
251 {
252 	struct spdk_nvmf_request *req = arg;
253 
254 	nvmf_ctrlr_process_admin_cmd(req);
255 }
256 
257 static void
258 nvmf_bdev_ctrl_queue_io(struct spdk_nvmf_request *req, struct spdk_bdev *bdev,
259 			struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn, void *cb_arg)
260 {
261 	int rc;
262 
263 	req->bdev_io_wait.bdev = bdev;
264 	req->bdev_io_wait.cb_fn = cb_fn;
265 	req->bdev_io_wait.cb_arg = cb_arg;
266 
267 	rc = spdk_bdev_queue_io_wait(bdev, ch, &req->bdev_io_wait);
268 	if (rc != 0) {
269 		assert(false);
270 	}
271 	req->qpair->group->stat.pending_bdev_io++;
272 }
273 
274 bool
275 nvmf_bdev_zcopy_enabled(struct spdk_bdev *bdev)
276 {
277 	return spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_ZCOPY);
278 }
279 
280 int
281 nvmf_bdev_ctrlr_read_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
282 			 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
283 {
284 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
285 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
286 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
287 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
288 	uint64_t start_lba;
289 	uint64_t num_blocks;
290 	int rc;
291 
292 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
293 
294 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
295 		SPDK_ERRLOG("end of media\n");
296 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
297 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
298 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
299 	}
300 
301 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
302 		SPDK_ERRLOG("Read NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
303 			    num_blocks, block_size, req->length);
304 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
305 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
306 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
307 	}
308 
309 	if (req->zcopy_phase == NVMF_ZCOPY_PHASE_EXECUTE) {
310 		/* Return here after checking the lba etc */
311 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
312 	}
313 
314 	assert(!spdk_nvmf_using_zcopy(req->zcopy_phase));
315 
316 	rc = spdk_bdev_readv_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
317 				    nvmf_bdev_ctrlr_complete_cmd, req);
318 	if (spdk_unlikely(rc)) {
319 		if (rc == -ENOMEM) {
320 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
321 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
322 		}
323 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
324 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
325 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
326 	}
327 
328 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
329 }
330 
331 int
332 nvmf_bdev_ctrlr_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
333 			  struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
334 {
335 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
336 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
337 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
338 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
339 	uint64_t start_lba;
340 	uint64_t num_blocks;
341 	int rc;
342 
343 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
344 
345 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
346 		SPDK_ERRLOG("end of media\n");
347 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
348 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
349 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
350 	}
351 
352 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
353 		SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
354 			    num_blocks, block_size, req->length);
355 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
356 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
357 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
358 	}
359 
360 	if (req->zcopy_phase == NVMF_ZCOPY_PHASE_EXECUTE) {
361 		/* Return here after checking the lba etc */
362 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
363 	}
364 
365 	assert(!spdk_nvmf_using_zcopy(req->zcopy_phase));
366 
367 	rc = spdk_bdev_writev_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
368 				     nvmf_bdev_ctrlr_complete_cmd, req);
369 	if (spdk_unlikely(rc)) {
370 		if (rc == -ENOMEM) {
371 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
372 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
373 		}
374 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
375 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
376 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
377 	}
378 
379 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
380 }
381 
382 int
383 nvmf_bdev_ctrlr_compare_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
384 			    struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
385 {
386 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
387 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
388 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
389 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
390 	uint64_t start_lba;
391 	uint64_t num_blocks;
392 	int rc;
393 
394 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
395 
396 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
397 		SPDK_ERRLOG("end of media\n");
398 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
399 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
400 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
401 	}
402 
403 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
404 		SPDK_ERRLOG("Compare NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
405 			    num_blocks, block_size, req->length);
406 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
407 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
408 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
409 	}
410 
411 	rc = spdk_bdev_comparev_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
412 				       nvmf_bdev_ctrlr_complete_cmd, req);
413 	if (spdk_unlikely(rc)) {
414 		if (rc == -ENOMEM) {
415 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
416 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
417 		}
418 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
419 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
420 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
421 	}
422 
423 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
424 }
425 
426 int
427 nvmf_bdev_ctrlr_compare_and_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
428 				      struct spdk_io_channel *ch, struct spdk_nvmf_request *cmp_req, struct spdk_nvmf_request *write_req)
429 {
430 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
431 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
432 	struct spdk_nvme_cmd *cmp_cmd = &cmp_req->cmd->nvme_cmd;
433 	struct spdk_nvme_cmd *write_cmd = &write_req->cmd->nvme_cmd;
434 	struct spdk_nvme_cpl *rsp = &write_req->rsp->nvme_cpl;
435 	uint64_t write_start_lba, cmp_start_lba;
436 	uint64_t write_num_blocks, cmp_num_blocks;
437 	int rc;
438 
439 	nvmf_bdev_ctrlr_get_rw_params(cmp_cmd, &cmp_start_lba, &cmp_num_blocks);
440 	nvmf_bdev_ctrlr_get_rw_params(write_cmd, &write_start_lba, &write_num_blocks);
441 
442 	if (spdk_unlikely(write_start_lba != cmp_start_lba || write_num_blocks != cmp_num_blocks)) {
443 		SPDK_ERRLOG("Fused command start lba / num blocks mismatch\n");
444 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
445 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
446 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
447 	}
448 
449 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, write_start_lba,
450 			  write_num_blocks))) {
451 		SPDK_ERRLOG("end of media\n");
452 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
453 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
454 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
455 	}
456 
457 	if (spdk_unlikely(write_num_blocks * block_size > write_req->length)) {
458 		SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
459 			    write_num_blocks, block_size, write_req->length);
460 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
461 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
462 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
463 	}
464 
465 	rc = spdk_bdev_comparev_and_writev_blocks(desc, ch, cmp_req->iov, cmp_req->iovcnt, write_req->iov,
466 			write_req->iovcnt, write_start_lba, write_num_blocks, nvmf_bdev_ctrlr_complete_cmd, write_req);
467 	if (spdk_unlikely(rc)) {
468 		if (rc == -ENOMEM) {
469 			nvmf_bdev_ctrl_queue_io(cmp_req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, cmp_req);
470 			nvmf_bdev_ctrl_queue_io(write_req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, write_req);
471 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
472 		}
473 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
474 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
475 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
476 	}
477 
478 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
479 }
480 
481 int
482 nvmf_bdev_ctrlr_write_zeroes_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
483 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
484 {
485 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
486 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
487 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
488 	uint64_t start_lba;
489 	uint64_t num_blocks;
490 	int rc;
491 
492 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
493 
494 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
495 		SPDK_ERRLOG("end of media\n");
496 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
497 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
498 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
499 	}
500 
501 	rc = spdk_bdev_write_zeroes_blocks(desc, ch, start_lba, num_blocks,
502 					   nvmf_bdev_ctrlr_complete_cmd, req);
503 	if (spdk_unlikely(rc)) {
504 		if (rc == -ENOMEM) {
505 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
506 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
507 		}
508 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
509 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
510 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
511 	}
512 
513 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
514 }
515 
516 int
517 nvmf_bdev_ctrlr_flush_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
518 			  struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
519 {
520 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
521 	int rc;
522 
523 	/* As for NVMeoF controller, SPDK always set volatile write
524 	 * cache bit to 1, return success for those block devices
525 	 * which can't support FLUSH command.
526 	 */
527 	if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_FLUSH)) {
528 		response->status.sct = SPDK_NVME_SCT_GENERIC;
529 		response->status.sc = SPDK_NVME_SC_SUCCESS;
530 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
531 	}
532 
533 	rc = spdk_bdev_flush_blocks(desc, ch, 0, spdk_bdev_get_num_blocks(bdev),
534 				    nvmf_bdev_ctrlr_complete_cmd, req);
535 	if (spdk_unlikely(rc)) {
536 		if (rc == -ENOMEM) {
537 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
538 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
539 		}
540 		response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
541 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
542 	}
543 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
544 }
545 
546 struct nvmf_bdev_ctrlr_unmap {
547 	struct spdk_nvmf_request	*req;
548 	uint32_t			count;
549 	struct spdk_bdev_desc		*desc;
550 	struct spdk_bdev		*bdev;
551 	struct spdk_io_channel		*ch;
552 	uint32_t			range_index;
553 };
554 
555 static void
556 nvmf_bdev_ctrlr_unmap_cpl(struct spdk_bdev_io *bdev_io, bool success,
557 			  void *cb_arg)
558 {
559 	struct nvmf_bdev_ctrlr_unmap *unmap_ctx = cb_arg;
560 	struct spdk_nvmf_request	*req = unmap_ctx->req;
561 	struct spdk_nvme_cpl		*response = &req->rsp->nvme_cpl;
562 	int				sc, sct;
563 	uint32_t			cdw0;
564 
565 	unmap_ctx->count--;
566 
567 	if (response->status.sct == SPDK_NVME_SCT_GENERIC &&
568 	    response->status.sc == SPDK_NVME_SC_SUCCESS) {
569 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
570 		response->cdw0 = cdw0;
571 		response->status.sc = sc;
572 		response->status.sct = sct;
573 	}
574 
575 	if (unmap_ctx->count == 0) {
576 		spdk_nvmf_request_complete(req);
577 		free(unmap_ctx);
578 	}
579 	spdk_bdev_free_io(bdev_io);
580 }
581 
582 static int
583 nvmf_bdev_ctrlr_unmap(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
584 		      struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
585 		      struct nvmf_bdev_ctrlr_unmap *unmap_ctx);
586 static void
587 nvmf_bdev_ctrlr_unmap_resubmit(void *arg)
588 {
589 	struct nvmf_bdev_ctrlr_unmap *unmap_ctx = arg;
590 	struct spdk_nvmf_request *req = unmap_ctx->req;
591 	struct spdk_bdev_desc *desc = unmap_ctx->desc;
592 	struct spdk_bdev *bdev = unmap_ctx->bdev;
593 	struct spdk_io_channel *ch = unmap_ctx->ch;
594 
595 	nvmf_bdev_ctrlr_unmap(bdev, desc, ch, req, unmap_ctx);
596 }
597 
598 static int
599 nvmf_bdev_ctrlr_unmap(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
600 		      struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
601 		      struct nvmf_bdev_ctrlr_unmap *unmap_ctx)
602 {
603 	uint16_t nr, i;
604 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
605 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
606 	struct spdk_nvme_dsm_range *dsm_range;
607 	uint64_t lba;
608 	uint32_t lba_count;
609 	int rc;
610 
611 	nr = cmd->cdw10_bits.dsm.nr + 1;
612 	if (nr * sizeof(struct spdk_nvme_dsm_range) > req->length) {
613 		SPDK_ERRLOG("Dataset Management number of ranges > SGL length\n");
614 		response->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
615 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
616 	}
617 
618 	if (unmap_ctx == NULL) {
619 		unmap_ctx = calloc(1, sizeof(*unmap_ctx));
620 		if (!unmap_ctx) {
621 			response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
622 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
623 		}
624 
625 		unmap_ctx->req = req;
626 		unmap_ctx->desc = desc;
627 		unmap_ctx->ch = ch;
628 		unmap_ctx->bdev = bdev;
629 
630 		response->status.sct = SPDK_NVME_SCT_GENERIC;
631 		response->status.sc = SPDK_NVME_SC_SUCCESS;
632 	} else {
633 		unmap_ctx->count--;	/* dequeued */
634 	}
635 
636 	dsm_range = (struct spdk_nvme_dsm_range *)req->data;
637 	for (i = unmap_ctx->range_index; i < nr; i++) {
638 		lba = dsm_range[i].starting_lba;
639 		lba_count = dsm_range[i].length;
640 
641 		unmap_ctx->count++;
642 
643 		rc = spdk_bdev_unmap_blocks(desc, ch, lba, lba_count,
644 					    nvmf_bdev_ctrlr_unmap_cpl, unmap_ctx);
645 		if (rc) {
646 			if (rc == -ENOMEM) {
647 				nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_bdev_ctrlr_unmap_resubmit, unmap_ctx);
648 				/* Unmap was not yet submitted to bdev */
649 				/* unmap_ctx->count will be decremented when the request is dequeued */
650 				return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
651 			}
652 			response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
653 			unmap_ctx->count--;
654 			/* We can't return here - we may have to wait for any other
655 				* unmaps already sent to complete */
656 			break;
657 		}
658 		unmap_ctx->range_index++;
659 	}
660 
661 	if (unmap_ctx->count == 0) {
662 		free(unmap_ctx);
663 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
664 	}
665 
666 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
667 }
668 
669 int
670 nvmf_bdev_ctrlr_dsm_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
671 			struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
672 {
673 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
674 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
675 
676 	if (cmd->cdw11_bits.dsm.ad) {
677 		return nvmf_bdev_ctrlr_unmap(bdev, desc, ch, req, NULL);
678 	}
679 
680 	response->status.sct = SPDK_NVME_SCT_GENERIC;
681 	response->status.sc = SPDK_NVME_SC_SUCCESS;
682 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
683 }
684 
685 int
686 nvmf_bdev_ctrlr_nvme_passthru_io(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
687 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
688 {
689 	int rc;
690 
691 	rc = spdk_bdev_nvme_io_passthru(desc, ch, &req->cmd->nvme_cmd, req->data, req->length,
692 					nvmf_bdev_ctrlr_complete_cmd, req);
693 	if (spdk_unlikely(rc)) {
694 		if (rc == -ENOMEM) {
695 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
696 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
697 		}
698 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
699 		req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
700 		req->rsp->nvme_cpl.status.dnr = 1;
701 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
702 	}
703 
704 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
705 }
706 
707 int
708 spdk_nvmf_bdev_ctrlr_nvme_passthru_admin(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
709 		struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
710 		spdk_nvmf_nvme_passthru_cmd_cb cb_fn)
711 {
712 	int rc;
713 
714 	req->cmd_cb_fn = cb_fn;
715 
716 	rc = spdk_bdev_nvme_admin_passthru(desc, ch, &req->cmd->nvme_cmd, req->data, req->length,
717 					   nvmf_bdev_ctrlr_complete_admin_cmd, req);
718 	if (spdk_unlikely(rc)) {
719 		if (rc == -ENOMEM) {
720 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_admin_cmd_resubmit, req);
721 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
722 		}
723 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
724 		if (rc == -ENOTSUP) {
725 			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
726 		} else {
727 			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
728 		}
729 
730 		req->rsp->nvme_cpl.status.dnr = 1;
731 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
732 	}
733 
734 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
735 }
736 
737 static void
738 nvmf_bdev_ctrlr_complete_abort_cmd(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
739 {
740 	struct spdk_nvmf_request *req = cb_arg;
741 
742 	if (success) {
743 		req->rsp->nvme_cpl.cdw0 &= ~1U;
744 	}
745 
746 	spdk_nvmf_request_complete(req);
747 	spdk_bdev_free_io(bdev_io);
748 }
749 
750 int
751 spdk_nvmf_bdev_ctrlr_abort_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
752 			       struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
753 			       struct spdk_nvmf_request *req_to_abort)
754 {
755 	int rc;
756 
757 	assert((req->rsp->nvme_cpl.cdw0 & 1U) != 0);
758 
759 	rc = spdk_bdev_abort(desc, ch, req_to_abort, nvmf_bdev_ctrlr_complete_abort_cmd, req);
760 	if (spdk_likely(rc == 0)) {
761 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
762 	} else if (rc == -ENOMEM) {
763 		nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_admin_cmd_resubmit, req);
764 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
765 	} else {
766 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
767 	}
768 }
769 
770 bool
771 nvmf_bdev_ctrlr_get_dif_ctx(struct spdk_bdev *bdev, struct spdk_nvme_cmd *cmd,
772 			    struct spdk_dif_ctx *dif_ctx)
773 {
774 	uint32_t init_ref_tag, dif_check_flags = 0;
775 	int rc;
776 
777 	if (spdk_bdev_get_md_size(bdev) == 0) {
778 		return false;
779 	}
780 
781 	/* Initial Reference Tag is the lower 32 bits of the start LBA. */
782 	init_ref_tag = (uint32_t)from_le64(&cmd->cdw10);
783 
784 	if (spdk_bdev_is_dif_check_enabled(bdev, SPDK_DIF_CHECK_TYPE_REFTAG)) {
785 		dif_check_flags |= SPDK_DIF_FLAGS_REFTAG_CHECK;
786 	}
787 
788 	if (spdk_bdev_is_dif_check_enabled(bdev, SPDK_DIF_CHECK_TYPE_GUARD)) {
789 		dif_check_flags |= SPDK_DIF_FLAGS_GUARD_CHECK;
790 	}
791 
792 	rc = spdk_dif_ctx_init(dif_ctx,
793 			       spdk_bdev_get_block_size(bdev),
794 			       spdk_bdev_get_md_size(bdev),
795 			       spdk_bdev_is_md_interleaved(bdev),
796 			       spdk_bdev_is_dif_head_of_md(bdev),
797 			       spdk_bdev_get_dif_type(bdev),
798 			       dif_check_flags,
799 			       init_ref_tag, 0, 0, 0, 0);
800 
801 	return (rc == 0) ? true : false;
802 }
803 
804 static void
805 nvmf_bdev_ctrlr_start_zcopy_complete(struct spdk_bdev_io *bdev_io, bool success,
806 				     void *cb_arg)
807 {
808 	struct spdk_nvmf_request	*req = cb_arg;
809 	struct iovec *iov;
810 	int iovcnt;
811 
812 	if (spdk_unlikely(!success)) {
813 		int                     sc = 0, sct = 0;
814 		uint32_t                cdw0 = 0;
815 		struct spdk_nvme_cpl    *response = &req->rsp->nvme_cpl;
816 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
817 
818 		response->cdw0 = cdw0;
819 		response->status.sc = sc;
820 		response->status.sct = sct;
821 
822 		spdk_bdev_free_io(bdev_io);
823 		spdk_nvmf_request_complete(req);
824 		return;
825 	}
826 
827 	spdk_bdev_io_get_iovec(bdev_io, &iov, &iovcnt);
828 
829 	assert(iovcnt <= NVMF_REQ_MAX_BUFFERS);
830 	assert(iovcnt > 0);
831 
832 	req->iovcnt = iovcnt;
833 
834 	assert(req->iov == iov);
835 
836 	/* backward compatible */
837 	req->data = req->iov[0].iov_base;
838 
839 	req->zcopy_bdev_io = bdev_io; /* Preserve the bdev_io for the end zcopy */
840 
841 	spdk_nvmf_request_complete(req);
842 	/* Don't free the bdev_io here as it is needed for the END ZCOPY */
843 }
844 
845 int
846 nvmf_bdev_ctrlr_start_zcopy(struct spdk_bdev *bdev,
847 			    struct spdk_bdev_desc *desc,
848 			    struct spdk_io_channel *ch,
849 			    struct spdk_nvmf_request *req)
850 {
851 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
852 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
853 	uint64_t start_lba;
854 	uint64_t num_blocks;
855 
856 	nvmf_bdev_ctrlr_get_rw_params(&req->cmd->nvme_cmd, &start_lba, &num_blocks);
857 
858 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
859 		SPDK_ERRLOG("end of media\n");
860 		return -ENXIO;
861 	}
862 
863 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
864 		SPDK_ERRLOG("Read NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
865 			    num_blocks, block_size, req->length);
866 		return -ENXIO;
867 	}
868 
869 	bool populate = (req->cmd->nvme_cmd.opc == SPDK_NVME_OPC_READ) ? true : false;
870 
871 	return spdk_bdev_zcopy_start(desc, ch, req->iov, req->iovcnt, start_lba,
872 				     num_blocks, populate, nvmf_bdev_ctrlr_start_zcopy_complete, req);
873 }
874 
875 static void
876 nvmf_bdev_ctrlr_end_zcopy_complete(struct spdk_bdev_io *bdev_io, bool success,
877 				   void *cb_arg)
878 {
879 	struct spdk_nvmf_request	*req = cb_arg;
880 
881 	if (spdk_unlikely(!success)) {
882 		int                     sc = 0, sct = 0;
883 		uint32_t                cdw0 = 0;
884 		struct spdk_nvme_cpl    *response = &req->rsp->nvme_cpl;
885 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
886 
887 		response->cdw0 = cdw0;
888 		response->status.sc = sc;
889 		response->status.sct = sct;
890 	}
891 
892 	spdk_bdev_free_io(bdev_io);
893 	req->zcopy_bdev_io = NULL;
894 	spdk_nvmf_request_complete(req);
895 }
896 
897 int
898 nvmf_bdev_ctrlr_end_zcopy(struct spdk_nvmf_request *req, bool commit)
899 {
900 	return spdk_bdev_zcopy_end(req->zcopy_bdev_io, commit, nvmf_bdev_ctrlr_end_zcopy_complete, req);
901 }
902