xref: /spdk/lib/nvmf/ctrlr_bdev.c (revision 307b8c112ffd90a26d53dd15fad67bd9038ef526)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (c) Intel Corporation. All rights reserved.
3  *   Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
4  *   Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5  */
6 
7 #include "spdk/stdinc.h"
8 
9 #include "nvmf_internal.h"
10 
11 #include "spdk/bdev.h"
12 #include "spdk/endian.h"
13 #include "spdk/thread.h"
14 #include "spdk/likely.h"
15 #include "spdk/nvme.h"
16 #include "spdk/nvmf_cmd.h"
17 #include "spdk/nvmf_spec.h"
18 #include "spdk/trace.h"
19 #include "spdk/scsi_spec.h"
20 #include "spdk/string.h"
21 #include "spdk/util.h"
22 
23 #include "spdk/log.h"
24 
25 static bool
26 nvmf_subsystem_bdev_io_type_supported(struct spdk_nvmf_subsystem *subsystem,
27 				      enum spdk_bdev_io_type io_type)
28 {
29 	struct spdk_nvmf_ns *ns;
30 
31 	for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL;
32 	     ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) {
33 		if (ns->bdev == NULL) {
34 			continue;
35 		}
36 
37 		if (!spdk_bdev_io_type_supported(ns->bdev, io_type)) {
38 			SPDK_DEBUGLOG(nvmf,
39 				      "Subsystem %s namespace %u (%s) does not support io_type %d\n",
40 				      spdk_nvmf_subsystem_get_nqn(subsystem),
41 				      ns->opts.nsid, spdk_bdev_get_name(ns->bdev), (int)io_type);
42 			return false;
43 		}
44 	}
45 
46 	SPDK_DEBUGLOG(nvmf, "All devices in Subsystem %s support io_type %d\n",
47 		      spdk_nvmf_subsystem_get_nqn(subsystem), (int)io_type);
48 	return true;
49 }
50 
51 bool
52 nvmf_ctrlr_dsm_supported(struct spdk_nvmf_ctrlr *ctrlr)
53 {
54 	return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_UNMAP);
55 }
56 
57 bool
58 nvmf_ctrlr_write_zeroes_supported(struct spdk_nvmf_ctrlr *ctrlr)
59 {
60 	return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_WRITE_ZEROES);
61 }
62 
63 static void
64 nvmf_bdev_ctrlr_complete_cmd(struct spdk_bdev_io *bdev_io, bool success,
65 			     void *cb_arg)
66 {
67 	struct spdk_nvmf_request	*req = cb_arg;
68 	struct spdk_nvme_cpl		*response = &req->rsp->nvme_cpl;
69 	int				first_sc = 0, first_sct = 0, sc = 0, sct = 0;
70 	uint32_t			cdw0 = 0;
71 	struct spdk_nvmf_request	*first_req = req->first_fused_req;
72 
73 	if (spdk_unlikely(first_req != NULL)) {
74 		/* fused commands - get status for both operations */
75 		struct spdk_nvme_cpl *first_response = &first_req->rsp->nvme_cpl;
76 
77 		spdk_bdev_io_get_nvme_fused_status(bdev_io, &cdw0, &first_sct, &first_sc, &sct, &sc);
78 		first_response->cdw0 = cdw0;
79 		first_response->status.sc = first_sc;
80 		first_response->status.sct = first_sct;
81 
82 		/* first request should be completed */
83 		spdk_nvmf_request_complete(first_req);
84 		req->first_fused_req = NULL;
85 	} else {
86 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
87 	}
88 
89 	response->cdw0 = cdw0;
90 	response->status.sc = sc;
91 	response->status.sct = sct;
92 
93 	spdk_nvmf_request_complete(req);
94 	spdk_bdev_free_io(bdev_io);
95 }
96 
97 static void
98 nvmf_bdev_ctrlr_complete_admin_cmd(struct spdk_bdev_io *bdev_io, bool success,
99 				   void *cb_arg)
100 {
101 	struct spdk_nvmf_request *req = cb_arg;
102 
103 	if (req->cmd_cb_fn) {
104 		req->cmd_cb_fn(req);
105 	}
106 
107 	nvmf_bdev_ctrlr_complete_cmd(bdev_io, success, req);
108 }
109 
110 void
111 nvmf_bdev_ctrlr_identify_ns(struct spdk_nvmf_ns *ns, struct spdk_nvme_ns_data *nsdata,
112 			    bool dif_insert_or_strip)
113 {
114 	struct spdk_bdev *bdev = ns->bdev;
115 	uint64_t num_blocks;
116 	uint32_t phys_blocklen;
117 
118 	num_blocks = spdk_bdev_get_num_blocks(bdev);
119 
120 	nsdata->nsze = num_blocks;
121 	nsdata->ncap = num_blocks;
122 	nsdata->nuse = num_blocks;
123 	nsdata->nlbaf = 0;
124 	nsdata->flbas.format = 0;
125 	nsdata->nacwu = spdk_bdev_get_acwu(bdev) - 1; /* nacwu is 0-based */
126 	if (!dif_insert_or_strip) {
127 		nsdata->lbaf[0].ms = spdk_bdev_get_md_size(bdev);
128 		nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_block_size(bdev));
129 		if (nsdata->lbaf[0].ms != 0) {
130 			nsdata->flbas.extended = 1;
131 			nsdata->mc.extended = 1;
132 			nsdata->mc.pointer = 0;
133 			nsdata->dps.md_start = spdk_bdev_is_dif_head_of_md(bdev);
134 			/* NVMf library doesn't process PRACT and PRCHK flags, we
135 			 * leave the use of extended LBA buffer to users.
136 			 */
137 			nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_DISABLE;
138 		}
139 	} else {
140 		nsdata->lbaf[0].ms = 0;
141 		nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_data_block_size(bdev));
142 	}
143 
144 	phys_blocklen = spdk_bdev_get_physical_block_size(bdev);
145 	assert(phys_blocklen > 0);
146 	/* Linux driver uses min(nawupf, npwg) to set physical_block_size */
147 	nsdata->nsfeat.optperf = 1;
148 	nsdata->nsfeat.ns_atomic_write_unit = 1;
149 	nsdata->npwg = (phys_blocklen >> nsdata->lbaf[0].lbads) - 1;
150 	nsdata->nawupf = nsdata->npwg;
151 	nsdata->npwa = nsdata->npwg;
152 	nsdata->npdg = nsdata->npwg;
153 	nsdata->npda = nsdata->npwg;
154 
155 	nsdata->noiob = spdk_bdev_get_optimal_io_boundary(bdev);
156 	nsdata->nmic.can_share = 1;
157 	if (ns->ptpl_file != NULL) {
158 		nsdata->nsrescap.rescap.persist = 1;
159 	}
160 	nsdata->nsrescap.rescap.write_exclusive = 1;
161 	nsdata->nsrescap.rescap.exclusive_access = 1;
162 	nsdata->nsrescap.rescap.write_exclusive_reg_only = 1;
163 	nsdata->nsrescap.rescap.exclusive_access_reg_only = 1;
164 	nsdata->nsrescap.rescap.write_exclusive_all_reg = 1;
165 	nsdata->nsrescap.rescap.exclusive_access_all_reg = 1;
166 	nsdata->nsrescap.rescap.ignore_existing_key = 1;
167 
168 	SPDK_STATIC_ASSERT(sizeof(nsdata->nguid) == sizeof(ns->opts.nguid), "size mismatch");
169 	memcpy(nsdata->nguid, ns->opts.nguid, sizeof(nsdata->nguid));
170 
171 	SPDK_STATIC_ASSERT(sizeof(nsdata->eui64) == sizeof(ns->opts.eui64), "size mismatch");
172 	memcpy(&nsdata->eui64, ns->opts.eui64, sizeof(nsdata->eui64));
173 }
174 
175 static void
176 nvmf_bdev_ctrlr_get_rw_params(const struct spdk_nvme_cmd *cmd, uint64_t *start_lba,
177 			      uint64_t *num_blocks)
178 {
179 	/* SLBA: CDW10 and CDW11 */
180 	*start_lba = from_le64(&cmd->cdw10);
181 
182 	/* NLB: CDW12 bits 15:00, 0's based */
183 	*num_blocks = (from_le32(&cmd->cdw12) & 0xFFFFu) + 1;
184 }
185 
186 static bool
187 nvmf_bdev_ctrlr_lba_in_range(uint64_t bdev_num_blocks, uint64_t io_start_lba,
188 			     uint64_t io_num_blocks)
189 {
190 	if (io_start_lba + io_num_blocks > bdev_num_blocks ||
191 	    io_start_lba + io_num_blocks < io_start_lba) {
192 		return false;
193 	}
194 
195 	return true;
196 }
197 
198 static void
199 nvmf_ctrlr_process_io_cmd_resubmit(void *arg)
200 {
201 	struct spdk_nvmf_request *req = arg;
202 	int rc;
203 
204 	rc = nvmf_ctrlr_process_io_cmd(req);
205 	if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
206 		spdk_nvmf_request_complete(req);
207 	}
208 }
209 
210 static void
211 nvmf_ctrlr_process_admin_cmd_resubmit(void *arg)
212 {
213 	struct spdk_nvmf_request *req = arg;
214 	int rc;
215 
216 	rc = nvmf_ctrlr_process_admin_cmd(req);
217 	if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
218 		spdk_nvmf_request_complete(req);
219 	}
220 }
221 
222 static void
223 nvmf_bdev_ctrl_queue_io(struct spdk_nvmf_request *req, struct spdk_bdev *bdev,
224 			struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn, void *cb_arg)
225 {
226 	int rc;
227 
228 	req->bdev_io_wait.bdev = bdev;
229 	req->bdev_io_wait.cb_fn = cb_fn;
230 	req->bdev_io_wait.cb_arg = cb_arg;
231 
232 	rc = spdk_bdev_queue_io_wait(bdev, ch, &req->bdev_io_wait);
233 	if (rc != 0) {
234 		assert(false);
235 	}
236 	req->qpair->group->stat.pending_bdev_io++;
237 }
238 
239 bool
240 nvmf_bdev_zcopy_enabled(struct spdk_bdev *bdev)
241 {
242 	return spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_ZCOPY);
243 }
244 
245 int
246 nvmf_bdev_ctrlr_read_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
247 			 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
248 {
249 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
250 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
251 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
252 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
253 	uint64_t start_lba;
254 	uint64_t num_blocks;
255 	int rc;
256 
257 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
258 
259 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
260 		SPDK_ERRLOG("end of media\n");
261 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
262 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
263 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
264 	}
265 
266 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
267 		SPDK_ERRLOG("Read NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
268 			    num_blocks, block_size, req->length);
269 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
270 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
271 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
272 	}
273 
274 	assert(!spdk_nvmf_request_using_zcopy(req));
275 
276 	rc = spdk_bdev_readv_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
277 				    nvmf_bdev_ctrlr_complete_cmd, req);
278 	if (spdk_unlikely(rc)) {
279 		if (rc == -ENOMEM) {
280 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
281 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
282 		}
283 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
284 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
285 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
286 	}
287 
288 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
289 }
290 
291 int
292 nvmf_bdev_ctrlr_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
293 			  struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
294 {
295 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
296 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
297 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
298 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
299 	uint64_t start_lba;
300 	uint64_t num_blocks;
301 	int rc;
302 
303 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
304 
305 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
306 		SPDK_ERRLOG("end of media\n");
307 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
308 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
309 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
310 	}
311 
312 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
313 		SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
314 			    num_blocks, block_size, req->length);
315 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
316 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
317 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
318 	}
319 
320 	assert(!spdk_nvmf_request_using_zcopy(req));
321 
322 	rc = spdk_bdev_writev_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
323 				     nvmf_bdev_ctrlr_complete_cmd, req);
324 	if (spdk_unlikely(rc)) {
325 		if (rc == -ENOMEM) {
326 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
327 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
328 		}
329 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
330 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
331 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
332 	}
333 
334 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
335 }
336 
337 int
338 nvmf_bdev_ctrlr_compare_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
339 			    struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
340 {
341 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
342 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
343 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
344 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
345 	uint64_t start_lba;
346 	uint64_t num_blocks;
347 	int rc;
348 
349 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
350 
351 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
352 		SPDK_ERRLOG("end of media\n");
353 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
354 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
355 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
356 	}
357 
358 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
359 		SPDK_ERRLOG("Compare NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
360 			    num_blocks, block_size, req->length);
361 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
362 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
363 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
364 	}
365 
366 	rc = spdk_bdev_comparev_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
367 				       nvmf_bdev_ctrlr_complete_cmd, req);
368 	if (spdk_unlikely(rc)) {
369 		if (rc == -ENOMEM) {
370 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
371 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
372 		}
373 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
374 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
375 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
376 	}
377 
378 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
379 }
380 
381 int
382 nvmf_bdev_ctrlr_compare_and_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
383 				      struct spdk_io_channel *ch, struct spdk_nvmf_request *cmp_req, struct spdk_nvmf_request *write_req)
384 {
385 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
386 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
387 	struct spdk_nvme_cmd *cmp_cmd = &cmp_req->cmd->nvme_cmd;
388 	struct spdk_nvme_cmd *write_cmd = &write_req->cmd->nvme_cmd;
389 	struct spdk_nvme_cpl *rsp = &write_req->rsp->nvme_cpl;
390 	uint64_t write_start_lba, cmp_start_lba;
391 	uint64_t write_num_blocks, cmp_num_blocks;
392 	int rc;
393 
394 	nvmf_bdev_ctrlr_get_rw_params(cmp_cmd, &cmp_start_lba, &cmp_num_blocks);
395 	nvmf_bdev_ctrlr_get_rw_params(write_cmd, &write_start_lba, &write_num_blocks);
396 
397 	if (spdk_unlikely(write_start_lba != cmp_start_lba || write_num_blocks != cmp_num_blocks)) {
398 		SPDK_ERRLOG("Fused command start lba / num blocks mismatch\n");
399 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
400 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
401 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
402 	}
403 
404 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, write_start_lba,
405 			  write_num_blocks))) {
406 		SPDK_ERRLOG("end of media\n");
407 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
408 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
409 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
410 	}
411 
412 	if (spdk_unlikely(write_num_blocks * block_size > write_req->length)) {
413 		SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
414 			    write_num_blocks, block_size, write_req->length);
415 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
416 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
417 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
418 	}
419 
420 	rc = spdk_bdev_comparev_and_writev_blocks(desc, ch, cmp_req->iov, cmp_req->iovcnt, write_req->iov,
421 			write_req->iovcnt, write_start_lba, write_num_blocks, nvmf_bdev_ctrlr_complete_cmd, write_req);
422 	if (spdk_unlikely(rc)) {
423 		if (rc == -ENOMEM) {
424 			nvmf_bdev_ctrl_queue_io(cmp_req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, cmp_req);
425 			nvmf_bdev_ctrl_queue_io(write_req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, write_req);
426 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
427 		}
428 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
429 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
430 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
431 	}
432 
433 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
434 }
435 
436 int
437 nvmf_bdev_ctrlr_write_zeroes_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
438 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
439 {
440 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
441 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
442 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
443 	uint64_t start_lba;
444 	uint64_t num_blocks;
445 	int rc;
446 
447 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
448 
449 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
450 		SPDK_ERRLOG("end of media\n");
451 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
452 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
453 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
454 	}
455 
456 	rc = spdk_bdev_write_zeroes_blocks(desc, ch, start_lba, num_blocks,
457 					   nvmf_bdev_ctrlr_complete_cmd, req);
458 	if (spdk_unlikely(rc)) {
459 		if (rc == -ENOMEM) {
460 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
461 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
462 		}
463 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
464 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
465 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
466 	}
467 
468 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
469 }
470 
471 int
472 nvmf_bdev_ctrlr_flush_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
473 			  struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
474 {
475 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
476 	int rc;
477 
478 	/* As for NVMeoF controller, SPDK always set volatile write
479 	 * cache bit to 1, return success for those block devices
480 	 * which can't support FLUSH command.
481 	 */
482 	if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_FLUSH)) {
483 		response->status.sct = SPDK_NVME_SCT_GENERIC;
484 		response->status.sc = SPDK_NVME_SC_SUCCESS;
485 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
486 	}
487 
488 	rc = spdk_bdev_flush_blocks(desc, ch, 0, spdk_bdev_get_num_blocks(bdev),
489 				    nvmf_bdev_ctrlr_complete_cmd, req);
490 	if (spdk_unlikely(rc)) {
491 		if (rc == -ENOMEM) {
492 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
493 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
494 		}
495 		response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
496 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
497 	}
498 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
499 }
500 
501 struct nvmf_bdev_ctrlr_unmap {
502 	struct spdk_nvmf_request	*req;
503 	uint32_t			count;
504 	struct spdk_bdev_desc		*desc;
505 	struct spdk_bdev		*bdev;
506 	struct spdk_io_channel		*ch;
507 	uint32_t			range_index;
508 };
509 
510 static void
511 nvmf_bdev_ctrlr_unmap_cpl(struct spdk_bdev_io *bdev_io, bool success,
512 			  void *cb_arg)
513 {
514 	struct nvmf_bdev_ctrlr_unmap *unmap_ctx = cb_arg;
515 	struct spdk_nvmf_request	*req = unmap_ctx->req;
516 	struct spdk_nvme_cpl		*response = &req->rsp->nvme_cpl;
517 	int				sc, sct;
518 	uint32_t			cdw0;
519 
520 	unmap_ctx->count--;
521 
522 	if (response->status.sct == SPDK_NVME_SCT_GENERIC &&
523 	    response->status.sc == SPDK_NVME_SC_SUCCESS) {
524 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
525 		response->cdw0 = cdw0;
526 		response->status.sc = sc;
527 		response->status.sct = sct;
528 	}
529 
530 	if (unmap_ctx->count == 0) {
531 		spdk_nvmf_request_complete(req);
532 		free(unmap_ctx);
533 	}
534 	spdk_bdev_free_io(bdev_io);
535 }
536 
537 static int nvmf_bdev_ctrlr_unmap(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
538 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
539 				 struct nvmf_bdev_ctrlr_unmap *unmap_ctx);
540 static void
541 nvmf_bdev_ctrlr_unmap_resubmit(void *arg)
542 {
543 	struct nvmf_bdev_ctrlr_unmap *unmap_ctx = arg;
544 	struct spdk_nvmf_request *req = unmap_ctx->req;
545 	struct spdk_bdev_desc *desc = unmap_ctx->desc;
546 	struct spdk_bdev *bdev = unmap_ctx->bdev;
547 	struct spdk_io_channel *ch = unmap_ctx->ch;
548 
549 	nvmf_bdev_ctrlr_unmap(bdev, desc, ch, req, unmap_ctx);
550 }
551 
552 static int
553 nvmf_bdev_ctrlr_unmap(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
554 		      struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
555 		      struct nvmf_bdev_ctrlr_unmap *unmap_ctx)
556 {
557 	uint16_t nr, i;
558 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
559 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
560 	struct spdk_nvme_dsm_range *dsm_range;
561 	uint64_t lba;
562 	uint32_t lba_count;
563 	int rc;
564 
565 	nr = cmd->cdw10_bits.dsm.nr + 1;
566 	if (nr * sizeof(struct spdk_nvme_dsm_range) > req->length) {
567 		SPDK_ERRLOG("Dataset Management number of ranges > SGL length\n");
568 		response->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
569 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
570 	}
571 
572 	if (unmap_ctx == NULL) {
573 		unmap_ctx = calloc(1, sizeof(*unmap_ctx));
574 		if (!unmap_ctx) {
575 			response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
576 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
577 		}
578 
579 		unmap_ctx->req = req;
580 		unmap_ctx->desc = desc;
581 		unmap_ctx->ch = ch;
582 		unmap_ctx->bdev = bdev;
583 
584 		response->status.sct = SPDK_NVME_SCT_GENERIC;
585 		response->status.sc = SPDK_NVME_SC_SUCCESS;
586 	} else {
587 		unmap_ctx->count--;	/* dequeued */
588 	}
589 
590 	dsm_range = (struct spdk_nvme_dsm_range *)req->data;
591 	for (i = unmap_ctx->range_index; i < nr; i++) {
592 		lba = dsm_range[i].starting_lba;
593 		lba_count = dsm_range[i].length;
594 
595 		unmap_ctx->count++;
596 
597 		rc = spdk_bdev_unmap_blocks(desc, ch, lba, lba_count,
598 					    nvmf_bdev_ctrlr_unmap_cpl, unmap_ctx);
599 		if (rc) {
600 			if (rc == -ENOMEM) {
601 				nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_bdev_ctrlr_unmap_resubmit, unmap_ctx);
602 				/* Unmap was not yet submitted to bdev */
603 				/* unmap_ctx->count will be decremented when the request is dequeued */
604 				return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
605 			}
606 			response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
607 			unmap_ctx->count--;
608 			/* We can't return here - we may have to wait for any other
609 				* unmaps already sent to complete */
610 			break;
611 		}
612 		unmap_ctx->range_index++;
613 	}
614 
615 	if (unmap_ctx->count == 0) {
616 		free(unmap_ctx);
617 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
618 	}
619 
620 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
621 }
622 
623 int
624 nvmf_bdev_ctrlr_dsm_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
625 			struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
626 {
627 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
628 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
629 
630 	if (cmd->cdw11_bits.dsm.ad) {
631 		return nvmf_bdev_ctrlr_unmap(bdev, desc, ch, req, NULL);
632 	}
633 
634 	response->status.sct = SPDK_NVME_SCT_GENERIC;
635 	response->status.sc = SPDK_NVME_SC_SUCCESS;
636 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
637 }
638 
639 int
640 nvmf_bdev_ctrlr_nvme_passthru_io(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
641 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
642 {
643 	int rc;
644 
645 	rc = spdk_bdev_nvme_io_passthru(desc, ch, &req->cmd->nvme_cmd, req->data, req->length,
646 					nvmf_bdev_ctrlr_complete_cmd, req);
647 	if (spdk_unlikely(rc)) {
648 		if (rc == -ENOMEM) {
649 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
650 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
651 		}
652 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
653 		req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
654 		req->rsp->nvme_cpl.status.dnr = 1;
655 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
656 	}
657 
658 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
659 }
660 
661 int
662 spdk_nvmf_bdev_ctrlr_nvme_passthru_admin(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
663 		struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
664 		spdk_nvmf_nvme_passthru_cmd_cb cb_fn)
665 {
666 	int rc;
667 
668 	req->cmd_cb_fn = cb_fn;
669 
670 	rc = spdk_bdev_nvme_admin_passthru(desc, ch, &req->cmd->nvme_cmd, req->data, req->length,
671 					   nvmf_bdev_ctrlr_complete_admin_cmd, req);
672 	if (spdk_unlikely(rc)) {
673 		if (rc == -ENOMEM) {
674 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_admin_cmd_resubmit, req);
675 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
676 		}
677 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
678 		if (rc == -ENOTSUP) {
679 			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
680 		} else {
681 			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
682 		}
683 
684 		req->rsp->nvme_cpl.status.dnr = 1;
685 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
686 	}
687 
688 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
689 }
690 
691 static void
692 nvmf_bdev_ctrlr_complete_abort_cmd(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
693 {
694 	struct spdk_nvmf_request *req = cb_arg;
695 
696 	if (success) {
697 		req->rsp->nvme_cpl.cdw0 &= ~1U;
698 	}
699 
700 	spdk_nvmf_request_complete(req);
701 	spdk_bdev_free_io(bdev_io);
702 }
703 
704 int
705 spdk_nvmf_bdev_ctrlr_abort_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
706 			       struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
707 			       struct spdk_nvmf_request *req_to_abort)
708 {
709 	int rc;
710 
711 	assert((req->rsp->nvme_cpl.cdw0 & 1U) != 0);
712 
713 	rc = spdk_bdev_abort(desc, ch, req_to_abort, nvmf_bdev_ctrlr_complete_abort_cmd, req);
714 	if (spdk_likely(rc == 0)) {
715 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
716 	} else if (rc == -ENOMEM) {
717 		nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_admin_cmd_resubmit, req);
718 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
719 	} else {
720 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
721 	}
722 }
723 
724 bool
725 nvmf_bdev_ctrlr_get_dif_ctx(struct spdk_bdev *bdev, struct spdk_nvme_cmd *cmd,
726 			    struct spdk_dif_ctx *dif_ctx)
727 {
728 	uint32_t init_ref_tag, dif_check_flags = 0;
729 	int rc;
730 
731 	if (spdk_bdev_get_md_size(bdev) == 0) {
732 		return false;
733 	}
734 
735 	/* Initial Reference Tag is the lower 32 bits of the start LBA. */
736 	init_ref_tag = (uint32_t)from_le64(&cmd->cdw10);
737 
738 	if (spdk_bdev_is_dif_check_enabled(bdev, SPDK_DIF_CHECK_TYPE_REFTAG)) {
739 		dif_check_flags |= SPDK_DIF_FLAGS_REFTAG_CHECK;
740 	}
741 
742 	if (spdk_bdev_is_dif_check_enabled(bdev, SPDK_DIF_CHECK_TYPE_GUARD)) {
743 		dif_check_flags |= SPDK_DIF_FLAGS_GUARD_CHECK;
744 	}
745 
746 	rc = spdk_dif_ctx_init(dif_ctx,
747 			       spdk_bdev_get_block_size(bdev),
748 			       spdk_bdev_get_md_size(bdev),
749 			       spdk_bdev_is_md_interleaved(bdev),
750 			       spdk_bdev_is_dif_head_of_md(bdev),
751 			       spdk_bdev_get_dif_type(bdev),
752 			       dif_check_flags,
753 			       init_ref_tag, 0, 0, 0, 0);
754 
755 	return (rc == 0) ? true : false;
756 }
757 
758 static void
759 nvmf_bdev_ctrlr_zcopy_start_complete(struct spdk_bdev_io *bdev_io, bool success,
760 				     void *cb_arg)
761 {
762 	struct spdk_nvmf_request	*req = cb_arg;
763 	struct iovec *iov;
764 	int iovcnt = 0;
765 
766 	if (spdk_unlikely(!success)) {
767 		int                     sc = 0, sct = 0;
768 		uint32_t                cdw0 = 0;
769 		struct spdk_nvme_cpl    *response = &req->rsp->nvme_cpl;
770 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
771 
772 		response->cdw0 = cdw0;
773 		response->status.sc = sc;
774 		response->status.sct = sct;
775 
776 		spdk_bdev_free_io(bdev_io);
777 		spdk_nvmf_request_complete(req);
778 		return;
779 	}
780 
781 	spdk_bdev_io_get_iovec(bdev_io, &iov, &iovcnt);
782 
783 	assert(iovcnt <= NVMF_REQ_MAX_BUFFERS);
784 	assert(iovcnt > 0);
785 
786 	req->iovcnt = iovcnt;
787 
788 	assert(req->iov == iov);
789 
790 	/* backward compatible */
791 	req->data = req->iov[0].iov_base;
792 
793 	req->zcopy_bdev_io = bdev_io; /* Preserve the bdev_io for the end zcopy */
794 
795 	spdk_nvmf_request_complete(req);
796 	/* Don't free the bdev_io here as it is needed for the END ZCOPY */
797 }
798 
799 int
800 nvmf_bdev_ctrlr_zcopy_start(struct spdk_bdev *bdev,
801 			    struct spdk_bdev_desc *desc,
802 			    struct spdk_io_channel *ch,
803 			    struct spdk_nvmf_request *req)
804 {
805 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
806 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
807 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
808 	uint64_t start_lba;
809 	uint64_t num_blocks;
810 	int rc;
811 
812 	nvmf_bdev_ctrlr_get_rw_params(&req->cmd->nvme_cmd, &start_lba, &num_blocks);
813 
814 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
815 		SPDK_ERRLOG("end of media\n");
816 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
817 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
818 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
819 	}
820 
821 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
822 		SPDK_ERRLOG("Read NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
823 			    num_blocks, block_size, req->length);
824 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
825 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
826 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
827 	}
828 
829 	bool populate = (req->cmd->nvme_cmd.opc == SPDK_NVME_OPC_READ) ? true : false;
830 
831 	rc = spdk_bdev_zcopy_start(desc, ch, req->iov, req->iovcnt, start_lba,
832 				   num_blocks, populate, nvmf_bdev_ctrlr_zcopy_start_complete, req);
833 	if (spdk_unlikely(rc != 0)) {
834 		if (rc == -ENOMEM) {
835 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
836 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
837 		}
838 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
839 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
840 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
841 	}
842 
843 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
844 }
845 
846 static void
847 nvmf_bdev_ctrlr_zcopy_end_complete(struct spdk_bdev_io *bdev_io, bool success,
848 				   void *cb_arg)
849 {
850 	struct spdk_nvmf_request	*req = cb_arg;
851 
852 	if (spdk_unlikely(!success)) {
853 		int                     sc = 0, sct = 0;
854 		uint32_t                cdw0 = 0;
855 		struct spdk_nvme_cpl    *response = &req->rsp->nvme_cpl;
856 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
857 
858 		response->cdw0 = cdw0;
859 		response->status.sc = sc;
860 		response->status.sct = sct;
861 	}
862 
863 	spdk_bdev_free_io(bdev_io);
864 	req->zcopy_bdev_io = NULL;
865 	spdk_nvmf_request_complete(req);
866 }
867 
868 void
869 nvmf_bdev_ctrlr_zcopy_end(struct spdk_nvmf_request *req, bool commit)
870 {
871 	int rc __attribute__((unused));
872 
873 	rc = spdk_bdev_zcopy_end(req->zcopy_bdev_io, commit, nvmf_bdev_ctrlr_zcopy_end_complete, req);
874 
875 	/* The only way spdk_bdev_zcopy_end() can fail is if we pass a bdev_io type that isn't ZCOPY */
876 	assert(rc == 0);
877 }
878