xref: /spdk/lib/nvmf/ctrlr_bdev.c (revision 6f338d4bf3a8a91b7abe377a605a321ea2b05bf7)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (c) Intel Corporation. All rights reserved.
3  *   Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
4  *   Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5  */
6 
7 #include "spdk/stdinc.h"
8 
9 #include "nvmf_internal.h"
10 
11 #include "spdk/bdev.h"
12 #include "spdk/endian.h"
13 #include "spdk/thread.h"
14 #include "spdk/likely.h"
15 #include "spdk/nvme.h"
16 #include "spdk/nvmf_cmd.h"
17 #include "spdk/nvmf_spec.h"
18 #include "spdk/trace.h"
19 #include "spdk/scsi_spec.h"
20 #include "spdk/string.h"
21 #include "spdk/util.h"
22 
23 #include "spdk/log.h"
24 
25 static bool
26 nvmf_subsystem_bdev_io_type_supported(struct spdk_nvmf_subsystem *subsystem,
27 				      enum spdk_bdev_io_type io_type)
28 {
29 	struct spdk_nvmf_ns *ns;
30 
31 	for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL;
32 	     ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) {
33 		if (ns->bdev == NULL) {
34 			continue;
35 		}
36 
37 		if (!spdk_bdev_io_type_supported(ns->bdev, io_type)) {
38 			SPDK_DEBUGLOG(nvmf,
39 				      "Subsystem %s namespace %u (%s) does not support io_type %d\n",
40 				      spdk_nvmf_subsystem_get_nqn(subsystem),
41 				      ns->opts.nsid, spdk_bdev_get_name(ns->bdev), (int)io_type);
42 			return false;
43 		}
44 	}
45 
46 	SPDK_DEBUGLOG(nvmf, "All devices in Subsystem %s support io_type %d\n",
47 		      spdk_nvmf_subsystem_get_nqn(subsystem), (int)io_type);
48 	return true;
49 }
50 
51 bool
52 nvmf_ctrlr_dsm_supported(struct spdk_nvmf_ctrlr *ctrlr)
53 {
54 	return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_UNMAP);
55 }
56 
57 bool
58 nvmf_ctrlr_write_zeroes_supported(struct spdk_nvmf_ctrlr *ctrlr)
59 {
60 	return nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_WRITE_ZEROES);
61 }
62 
63 static void
64 nvmf_bdev_ctrlr_complete_cmd(struct spdk_bdev_io *bdev_io, bool success,
65 			     void *cb_arg)
66 {
67 	struct spdk_nvmf_request	*req = cb_arg;
68 	struct spdk_nvme_cpl		*response = &req->rsp->nvme_cpl;
69 	int				first_sc = 0, first_sct = 0, sc = 0, sct = 0;
70 	uint32_t			cdw0 = 0;
71 	struct spdk_nvmf_request	*first_req = req->first_fused_req;
72 
73 	if (spdk_unlikely(first_req != NULL)) {
74 		/* fused commands - get status for both operations */
75 		struct spdk_nvme_cpl *first_response = &first_req->rsp->nvme_cpl;
76 
77 		spdk_bdev_io_get_nvme_fused_status(bdev_io, &cdw0, &first_sct, &first_sc, &sct, &sc);
78 		first_response->cdw0 = cdw0;
79 		first_response->status.sc = first_sc;
80 		first_response->status.sct = first_sct;
81 
82 		/* first request should be completed */
83 		spdk_nvmf_request_complete(first_req);
84 		req->first_fused_req = NULL;
85 	} else {
86 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
87 	}
88 
89 	response->cdw0 = cdw0;
90 	response->status.sc = sc;
91 	response->status.sct = sct;
92 
93 	spdk_nvmf_request_complete(req);
94 	spdk_bdev_free_io(bdev_io);
95 }
96 
97 static void
98 nvmf_bdev_ctrlr_complete_admin_cmd(struct spdk_bdev_io *bdev_io, bool success,
99 				   void *cb_arg)
100 {
101 	struct spdk_nvmf_request *req = cb_arg;
102 
103 	if (req->cmd_cb_fn) {
104 		req->cmd_cb_fn(req);
105 	}
106 
107 	nvmf_bdev_ctrlr_complete_cmd(bdev_io, success, req);
108 }
109 
110 void
111 nvmf_bdev_ctrlr_identify_ns(struct spdk_nvmf_ns *ns, struct spdk_nvme_ns_data *nsdata,
112 			    bool dif_insert_or_strip)
113 {
114 	struct spdk_bdev *bdev = ns->bdev;
115 	uint64_t num_blocks;
116 	uint32_t phys_blocklen;
117 
118 	num_blocks = spdk_bdev_get_num_blocks(bdev);
119 
120 	nsdata->nsze = num_blocks;
121 	nsdata->ncap = num_blocks;
122 	nsdata->nuse = num_blocks;
123 	nsdata->nlbaf = 0;
124 	nsdata->flbas.format = 0;
125 	nsdata->nacwu = spdk_bdev_get_acwu(bdev) - 1; /* nacwu is 0-based */
126 	if (!dif_insert_or_strip) {
127 		nsdata->lbaf[0].ms = spdk_bdev_get_md_size(bdev);
128 		nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_block_size(bdev));
129 		if (nsdata->lbaf[0].ms != 0) {
130 			nsdata->flbas.extended = 1;
131 			nsdata->mc.extended = 1;
132 			nsdata->mc.pointer = 0;
133 			nsdata->dps.md_start = spdk_bdev_is_dif_head_of_md(bdev);
134 
135 			switch (spdk_bdev_get_dif_type(bdev)) {
136 			case SPDK_DIF_TYPE1:
137 				nsdata->dpc.pit1 = 1;
138 				nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_TYPE1;
139 				break;
140 			case SPDK_DIF_TYPE2:
141 				nsdata->dpc.pit2 = 1;
142 				nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_TYPE2;
143 				break;
144 			case SPDK_DIF_TYPE3:
145 				nsdata->dpc.pit3 = 1;
146 				nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_TYPE3;
147 				break;
148 			default:
149 				SPDK_DEBUGLOG(nvmf, "Protection Disabled\n");
150 				nsdata->dps.pit = SPDK_NVME_FMT_NVM_PROTECTION_DISABLE;
151 				break;
152 			}
153 		}
154 	} else {
155 		nsdata->lbaf[0].ms = 0;
156 		nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_data_block_size(bdev));
157 	}
158 
159 	phys_blocklen = spdk_bdev_get_physical_block_size(bdev);
160 	assert(phys_blocklen > 0);
161 	/* Linux driver uses min(nawupf, npwg) to set physical_block_size */
162 	nsdata->nsfeat.optperf = 1;
163 	nsdata->nsfeat.ns_atomic_write_unit = 1;
164 	nsdata->npwg = (phys_blocklen >> nsdata->lbaf[0].lbads) - 1;
165 	nsdata->nawupf = nsdata->npwg;
166 	nsdata->npwa = nsdata->npwg;
167 	nsdata->npdg = nsdata->npwg;
168 	nsdata->npda = nsdata->npwg;
169 
170 	nsdata->noiob = spdk_bdev_get_optimal_io_boundary(bdev);
171 	nsdata->nmic.can_share = 1;
172 	if (ns->ptpl_file != NULL) {
173 		nsdata->nsrescap.rescap.persist = 1;
174 	}
175 	nsdata->nsrescap.rescap.write_exclusive = 1;
176 	nsdata->nsrescap.rescap.exclusive_access = 1;
177 	nsdata->nsrescap.rescap.write_exclusive_reg_only = 1;
178 	nsdata->nsrescap.rescap.exclusive_access_reg_only = 1;
179 	nsdata->nsrescap.rescap.write_exclusive_all_reg = 1;
180 	nsdata->nsrescap.rescap.exclusive_access_all_reg = 1;
181 	nsdata->nsrescap.rescap.ignore_existing_key = 1;
182 
183 	SPDK_STATIC_ASSERT(sizeof(nsdata->nguid) == sizeof(ns->opts.nguid), "size mismatch");
184 	memcpy(nsdata->nguid, ns->opts.nguid, sizeof(nsdata->nguid));
185 
186 	SPDK_STATIC_ASSERT(sizeof(nsdata->eui64) == sizeof(ns->opts.eui64), "size mismatch");
187 	memcpy(&nsdata->eui64, ns->opts.eui64, sizeof(nsdata->eui64));
188 }
189 
190 static void
191 nvmf_bdev_ctrlr_get_rw_params(const struct spdk_nvme_cmd *cmd, uint64_t *start_lba,
192 			      uint64_t *num_blocks)
193 {
194 	/* SLBA: CDW10 and CDW11 */
195 	*start_lba = from_le64(&cmd->cdw10);
196 
197 	/* NLB: CDW12 bits 15:00, 0's based */
198 	*num_blocks = (from_le32(&cmd->cdw12) & 0xFFFFu) + 1;
199 }
200 
201 static bool
202 nvmf_bdev_ctrlr_lba_in_range(uint64_t bdev_num_blocks, uint64_t io_start_lba,
203 			     uint64_t io_num_blocks)
204 {
205 	if (io_start_lba + io_num_blocks > bdev_num_blocks ||
206 	    io_start_lba + io_num_blocks < io_start_lba) {
207 		return false;
208 	}
209 
210 	return true;
211 }
212 
213 static void
214 nvmf_ctrlr_process_io_cmd_resubmit(void *arg)
215 {
216 	struct spdk_nvmf_request *req = arg;
217 	int rc;
218 
219 	rc = nvmf_ctrlr_process_io_cmd(req);
220 	if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
221 		spdk_nvmf_request_complete(req);
222 	}
223 }
224 
225 static void
226 nvmf_ctrlr_process_admin_cmd_resubmit(void *arg)
227 {
228 	struct spdk_nvmf_request *req = arg;
229 	int rc;
230 
231 	rc = nvmf_ctrlr_process_admin_cmd(req);
232 	if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
233 		spdk_nvmf_request_complete(req);
234 	}
235 }
236 
237 static void
238 nvmf_bdev_ctrl_queue_io(struct spdk_nvmf_request *req, struct spdk_bdev *bdev,
239 			struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn, void *cb_arg)
240 {
241 	int rc;
242 
243 	req->bdev_io_wait.bdev = bdev;
244 	req->bdev_io_wait.cb_fn = cb_fn;
245 	req->bdev_io_wait.cb_arg = cb_arg;
246 
247 	rc = spdk_bdev_queue_io_wait(bdev, ch, &req->bdev_io_wait);
248 	if (rc != 0) {
249 		assert(false);
250 	}
251 	req->qpair->group->stat.pending_bdev_io++;
252 }
253 
254 bool
255 nvmf_bdev_zcopy_enabled(struct spdk_bdev *bdev)
256 {
257 	return spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_ZCOPY);
258 }
259 
260 int
261 nvmf_bdev_ctrlr_read_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
262 			 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
263 {
264 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
265 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
266 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
267 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
268 	uint64_t start_lba;
269 	uint64_t num_blocks;
270 	int rc;
271 
272 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
273 
274 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
275 		SPDK_ERRLOG("end of media\n");
276 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
277 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
278 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
279 	}
280 
281 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
282 		SPDK_ERRLOG("Read NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
283 			    num_blocks, block_size, req->length);
284 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
285 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
286 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
287 	}
288 
289 	assert(!spdk_nvmf_request_using_zcopy(req));
290 
291 	rc = spdk_bdev_readv_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
292 				    nvmf_bdev_ctrlr_complete_cmd, req);
293 	if (spdk_unlikely(rc)) {
294 		if (rc == -ENOMEM) {
295 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
296 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
297 		}
298 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
299 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
300 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
301 	}
302 
303 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
304 }
305 
306 int
307 nvmf_bdev_ctrlr_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
308 			  struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
309 {
310 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
311 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
312 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
313 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
314 	uint64_t start_lba;
315 	uint64_t num_blocks;
316 	int rc;
317 
318 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
319 
320 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
321 		SPDK_ERRLOG("end of media\n");
322 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
323 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
324 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
325 	}
326 
327 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
328 		SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
329 			    num_blocks, block_size, req->length);
330 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
331 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
332 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
333 	}
334 
335 	assert(!spdk_nvmf_request_using_zcopy(req));
336 
337 	rc = spdk_bdev_writev_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
338 				     nvmf_bdev_ctrlr_complete_cmd, req);
339 	if (spdk_unlikely(rc)) {
340 		if (rc == -ENOMEM) {
341 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
342 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
343 		}
344 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
345 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
346 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
347 	}
348 
349 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
350 }
351 
352 int
353 nvmf_bdev_ctrlr_compare_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
354 			    struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
355 {
356 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
357 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
358 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
359 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
360 	uint64_t start_lba;
361 	uint64_t num_blocks;
362 	int rc;
363 
364 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
365 
366 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
367 		SPDK_ERRLOG("end of media\n");
368 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
369 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
370 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
371 	}
372 
373 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
374 		SPDK_ERRLOG("Compare NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
375 			    num_blocks, block_size, req->length);
376 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
377 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
378 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
379 	}
380 
381 	rc = spdk_bdev_comparev_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
382 				       nvmf_bdev_ctrlr_complete_cmd, req);
383 	if (spdk_unlikely(rc)) {
384 		if (rc == -ENOMEM) {
385 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
386 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
387 		}
388 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
389 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
390 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
391 	}
392 
393 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
394 }
395 
396 int
397 nvmf_bdev_ctrlr_compare_and_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
398 				      struct spdk_io_channel *ch, struct spdk_nvmf_request *cmp_req, struct spdk_nvmf_request *write_req)
399 {
400 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
401 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
402 	struct spdk_nvme_cmd *cmp_cmd = &cmp_req->cmd->nvme_cmd;
403 	struct spdk_nvme_cmd *write_cmd = &write_req->cmd->nvme_cmd;
404 	struct spdk_nvme_cpl *rsp = &write_req->rsp->nvme_cpl;
405 	uint64_t write_start_lba, cmp_start_lba;
406 	uint64_t write_num_blocks, cmp_num_blocks;
407 	int rc;
408 
409 	nvmf_bdev_ctrlr_get_rw_params(cmp_cmd, &cmp_start_lba, &cmp_num_blocks);
410 	nvmf_bdev_ctrlr_get_rw_params(write_cmd, &write_start_lba, &write_num_blocks);
411 
412 	if (spdk_unlikely(write_start_lba != cmp_start_lba || write_num_blocks != cmp_num_blocks)) {
413 		SPDK_ERRLOG("Fused command start lba / num blocks mismatch\n");
414 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
415 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
416 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
417 	}
418 
419 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, write_start_lba,
420 			  write_num_blocks))) {
421 		SPDK_ERRLOG("end of media\n");
422 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
423 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
424 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
425 	}
426 
427 	if (spdk_unlikely(write_num_blocks * block_size > write_req->length)) {
428 		SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
429 			    write_num_blocks, block_size, write_req->length);
430 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
431 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
432 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
433 	}
434 
435 	rc = spdk_bdev_comparev_and_writev_blocks(desc, ch, cmp_req->iov, cmp_req->iovcnt, write_req->iov,
436 			write_req->iovcnt, write_start_lba, write_num_blocks, nvmf_bdev_ctrlr_complete_cmd, write_req);
437 	if (spdk_unlikely(rc)) {
438 		if (rc == -ENOMEM) {
439 			nvmf_bdev_ctrl_queue_io(cmp_req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, cmp_req);
440 			nvmf_bdev_ctrl_queue_io(write_req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, write_req);
441 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
442 		}
443 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
444 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
445 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
446 	}
447 
448 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
449 }
450 
451 int
452 nvmf_bdev_ctrlr_write_zeroes_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
453 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
454 {
455 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
456 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
457 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
458 	uint64_t start_lba;
459 	uint64_t num_blocks;
460 	int rc;
461 
462 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
463 
464 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
465 		SPDK_ERRLOG("end of media\n");
466 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
467 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
468 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
469 	}
470 
471 	rc = spdk_bdev_write_zeroes_blocks(desc, ch, start_lba, num_blocks,
472 					   nvmf_bdev_ctrlr_complete_cmd, req);
473 	if (spdk_unlikely(rc)) {
474 		if (rc == -ENOMEM) {
475 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
476 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
477 		}
478 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
479 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
480 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
481 	}
482 
483 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
484 }
485 
486 int
487 nvmf_bdev_ctrlr_flush_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
488 			  struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
489 {
490 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
491 	int rc;
492 
493 	/* As for NVMeoF controller, SPDK always set volatile write
494 	 * cache bit to 1, return success for those block devices
495 	 * which can't support FLUSH command.
496 	 */
497 	if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_FLUSH)) {
498 		response->status.sct = SPDK_NVME_SCT_GENERIC;
499 		response->status.sc = SPDK_NVME_SC_SUCCESS;
500 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
501 	}
502 
503 	rc = spdk_bdev_flush_blocks(desc, ch, 0, spdk_bdev_get_num_blocks(bdev),
504 				    nvmf_bdev_ctrlr_complete_cmd, req);
505 	if (spdk_unlikely(rc)) {
506 		if (rc == -ENOMEM) {
507 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
508 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
509 		}
510 		response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
511 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
512 	}
513 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
514 }
515 
516 struct nvmf_bdev_ctrlr_unmap {
517 	struct spdk_nvmf_request	*req;
518 	uint32_t			count;
519 	struct spdk_bdev_desc		*desc;
520 	struct spdk_bdev		*bdev;
521 	struct spdk_io_channel		*ch;
522 	uint32_t			range_index;
523 };
524 
525 static void
526 nvmf_bdev_ctrlr_unmap_cpl(struct spdk_bdev_io *bdev_io, bool success,
527 			  void *cb_arg)
528 {
529 	struct nvmf_bdev_ctrlr_unmap *unmap_ctx = cb_arg;
530 	struct spdk_nvmf_request	*req = unmap_ctx->req;
531 	struct spdk_nvme_cpl		*response = &req->rsp->nvme_cpl;
532 	int				sc, sct;
533 	uint32_t			cdw0;
534 
535 	unmap_ctx->count--;
536 
537 	if (response->status.sct == SPDK_NVME_SCT_GENERIC &&
538 	    response->status.sc == SPDK_NVME_SC_SUCCESS) {
539 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
540 		response->cdw0 = cdw0;
541 		response->status.sc = sc;
542 		response->status.sct = sct;
543 	}
544 
545 	if (unmap_ctx->count == 0) {
546 		spdk_nvmf_request_complete(req);
547 		free(unmap_ctx);
548 	}
549 	spdk_bdev_free_io(bdev_io);
550 }
551 
552 static int nvmf_bdev_ctrlr_unmap(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
553 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
554 				 struct nvmf_bdev_ctrlr_unmap *unmap_ctx);
555 static void
556 nvmf_bdev_ctrlr_unmap_resubmit(void *arg)
557 {
558 	struct nvmf_bdev_ctrlr_unmap *unmap_ctx = arg;
559 	struct spdk_nvmf_request *req = unmap_ctx->req;
560 	struct spdk_bdev_desc *desc = unmap_ctx->desc;
561 	struct spdk_bdev *bdev = unmap_ctx->bdev;
562 	struct spdk_io_channel *ch = unmap_ctx->ch;
563 
564 	nvmf_bdev_ctrlr_unmap(bdev, desc, ch, req, unmap_ctx);
565 }
566 
567 static int
568 nvmf_bdev_ctrlr_unmap(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
569 		      struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
570 		      struct nvmf_bdev_ctrlr_unmap *unmap_ctx)
571 {
572 	uint16_t nr, i;
573 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
574 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
575 	struct spdk_nvme_dsm_range *dsm_range;
576 	uint64_t lba;
577 	uint32_t lba_count;
578 	int rc;
579 
580 	nr = cmd->cdw10_bits.dsm.nr + 1;
581 	if (nr * sizeof(struct spdk_nvme_dsm_range) > req->length) {
582 		SPDK_ERRLOG("Dataset Management number of ranges > SGL length\n");
583 		response->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
584 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
585 	}
586 
587 	if (unmap_ctx == NULL) {
588 		unmap_ctx = calloc(1, sizeof(*unmap_ctx));
589 		if (!unmap_ctx) {
590 			response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
591 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
592 		}
593 
594 		unmap_ctx->req = req;
595 		unmap_ctx->desc = desc;
596 		unmap_ctx->ch = ch;
597 		unmap_ctx->bdev = bdev;
598 
599 		response->status.sct = SPDK_NVME_SCT_GENERIC;
600 		response->status.sc = SPDK_NVME_SC_SUCCESS;
601 	} else {
602 		unmap_ctx->count--;	/* dequeued */
603 	}
604 
605 	dsm_range = (struct spdk_nvme_dsm_range *)req->data;
606 	for (i = unmap_ctx->range_index; i < nr; i++) {
607 		lba = dsm_range[i].starting_lba;
608 		lba_count = dsm_range[i].length;
609 
610 		unmap_ctx->count++;
611 
612 		rc = spdk_bdev_unmap_blocks(desc, ch, lba, lba_count,
613 					    nvmf_bdev_ctrlr_unmap_cpl, unmap_ctx);
614 		if (rc) {
615 			if (rc == -ENOMEM) {
616 				nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_bdev_ctrlr_unmap_resubmit, unmap_ctx);
617 				/* Unmap was not yet submitted to bdev */
618 				/* unmap_ctx->count will be decremented when the request is dequeued */
619 				return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
620 			}
621 			response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
622 			unmap_ctx->count--;
623 			/* We can't return here - we may have to wait for any other
624 				* unmaps already sent to complete */
625 			break;
626 		}
627 		unmap_ctx->range_index++;
628 	}
629 
630 	if (unmap_ctx->count == 0) {
631 		free(unmap_ctx);
632 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
633 	}
634 
635 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
636 }
637 
638 int
639 nvmf_bdev_ctrlr_dsm_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
640 			struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
641 {
642 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
643 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
644 
645 	if (cmd->cdw11_bits.dsm.ad) {
646 		return nvmf_bdev_ctrlr_unmap(bdev, desc, ch, req, NULL);
647 	}
648 
649 	response->status.sct = SPDK_NVME_SCT_GENERIC;
650 	response->status.sc = SPDK_NVME_SC_SUCCESS;
651 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
652 }
653 
654 int
655 nvmf_bdev_ctrlr_nvme_passthru_io(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
656 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
657 {
658 	int rc;
659 
660 	rc = spdk_bdev_nvme_io_passthru(desc, ch, &req->cmd->nvme_cmd, req->data, req->length,
661 					nvmf_bdev_ctrlr_complete_cmd, req);
662 	if (spdk_unlikely(rc)) {
663 		if (rc == -ENOMEM) {
664 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
665 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
666 		}
667 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
668 		req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
669 		req->rsp->nvme_cpl.status.dnr = 1;
670 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
671 	}
672 
673 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
674 }
675 
676 int
677 spdk_nvmf_bdev_ctrlr_nvme_passthru_admin(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
678 		struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
679 		spdk_nvmf_nvme_passthru_cmd_cb cb_fn)
680 {
681 	int rc;
682 
683 	req->cmd_cb_fn = cb_fn;
684 
685 	rc = spdk_bdev_nvme_admin_passthru(desc, ch, &req->cmd->nvme_cmd, req->data, req->length,
686 					   nvmf_bdev_ctrlr_complete_admin_cmd, req);
687 	if (spdk_unlikely(rc)) {
688 		if (rc == -ENOMEM) {
689 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_admin_cmd_resubmit, req);
690 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
691 		}
692 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
693 		if (rc == -ENOTSUP) {
694 			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
695 		} else {
696 			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
697 		}
698 
699 		req->rsp->nvme_cpl.status.dnr = 1;
700 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
701 	}
702 
703 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
704 }
705 
706 static void
707 nvmf_bdev_ctrlr_complete_abort_cmd(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
708 {
709 	struct spdk_nvmf_request *req = cb_arg;
710 
711 	if (success) {
712 		req->rsp->nvme_cpl.cdw0 &= ~1U;
713 	}
714 
715 	spdk_nvmf_request_complete(req);
716 	spdk_bdev_free_io(bdev_io);
717 }
718 
719 int
720 spdk_nvmf_bdev_ctrlr_abort_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
721 			       struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
722 			       struct spdk_nvmf_request *req_to_abort)
723 {
724 	int rc;
725 
726 	assert((req->rsp->nvme_cpl.cdw0 & 1U) != 0);
727 
728 	rc = spdk_bdev_abort(desc, ch, req_to_abort, nvmf_bdev_ctrlr_complete_abort_cmd, req);
729 	if (spdk_likely(rc == 0)) {
730 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
731 	} else if (rc == -ENOMEM) {
732 		nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_admin_cmd_resubmit, req);
733 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
734 	} else {
735 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
736 	}
737 }
738 
739 bool
740 nvmf_bdev_ctrlr_get_dif_ctx(struct spdk_bdev *bdev, struct spdk_nvme_cmd *cmd,
741 			    struct spdk_dif_ctx *dif_ctx)
742 {
743 	uint32_t init_ref_tag, dif_check_flags = 0;
744 	int rc;
745 
746 	if (spdk_bdev_get_md_size(bdev) == 0) {
747 		return false;
748 	}
749 
750 	/* Initial Reference Tag is the lower 32 bits of the start LBA. */
751 	init_ref_tag = (uint32_t)from_le64(&cmd->cdw10);
752 
753 	if (spdk_bdev_is_dif_check_enabled(bdev, SPDK_DIF_CHECK_TYPE_REFTAG)) {
754 		dif_check_flags |= SPDK_DIF_FLAGS_REFTAG_CHECK;
755 	}
756 
757 	if (spdk_bdev_is_dif_check_enabled(bdev, SPDK_DIF_CHECK_TYPE_GUARD)) {
758 		dif_check_flags |= SPDK_DIF_FLAGS_GUARD_CHECK;
759 	}
760 
761 	rc = spdk_dif_ctx_init(dif_ctx,
762 			       spdk_bdev_get_block_size(bdev),
763 			       spdk_bdev_get_md_size(bdev),
764 			       spdk_bdev_is_md_interleaved(bdev),
765 			       spdk_bdev_is_dif_head_of_md(bdev),
766 			       spdk_bdev_get_dif_type(bdev),
767 			       dif_check_flags,
768 			       init_ref_tag, 0, 0, 0, 0);
769 
770 	return (rc == 0) ? true : false;
771 }
772 
773 static void
774 nvmf_bdev_ctrlr_zcopy_start_complete(struct spdk_bdev_io *bdev_io, bool success,
775 				     void *cb_arg)
776 {
777 	struct spdk_nvmf_request	*req = cb_arg;
778 	struct iovec *iov;
779 	int iovcnt;
780 
781 	if (spdk_unlikely(!success)) {
782 		int                     sc = 0, sct = 0;
783 		uint32_t                cdw0 = 0;
784 		struct spdk_nvme_cpl    *response = &req->rsp->nvme_cpl;
785 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
786 
787 		response->cdw0 = cdw0;
788 		response->status.sc = sc;
789 		response->status.sct = sct;
790 
791 		spdk_bdev_free_io(bdev_io);
792 		spdk_nvmf_request_complete(req);
793 		return;
794 	}
795 
796 	spdk_bdev_io_get_iovec(bdev_io, &iov, &iovcnt);
797 
798 	assert(iovcnt <= NVMF_REQ_MAX_BUFFERS);
799 	assert(iovcnt > 0);
800 
801 	req->iovcnt = iovcnt;
802 
803 	assert(req->iov == iov);
804 
805 	/* backward compatible */
806 	req->data = req->iov[0].iov_base;
807 
808 	req->zcopy_bdev_io = bdev_io; /* Preserve the bdev_io for the end zcopy */
809 
810 	spdk_nvmf_request_complete(req);
811 	/* Don't free the bdev_io here as it is needed for the END ZCOPY */
812 }
813 
814 int
815 nvmf_bdev_ctrlr_zcopy_start(struct spdk_bdev *bdev,
816 			    struct spdk_bdev_desc *desc,
817 			    struct spdk_io_channel *ch,
818 			    struct spdk_nvmf_request *req)
819 {
820 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
821 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
822 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
823 	uint64_t start_lba;
824 	uint64_t num_blocks;
825 	int rc;
826 
827 	nvmf_bdev_ctrlr_get_rw_params(&req->cmd->nvme_cmd, &start_lba, &num_blocks);
828 
829 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
830 		SPDK_ERRLOG("end of media\n");
831 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
832 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
833 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
834 	}
835 
836 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
837 		SPDK_ERRLOG("Read NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
838 			    num_blocks, block_size, req->length);
839 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
840 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
841 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
842 	}
843 
844 	bool populate = (req->cmd->nvme_cmd.opc == SPDK_NVME_OPC_READ) ? true : false;
845 
846 	rc = spdk_bdev_zcopy_start(desc, ch, req->iov, req->iovcnt, start_lba,
847 				   num_blocks, populate, nvmf_bdev_ctrlr_zcopy_start_complete, req);
848 	if (spdk_unlikely(rc != 0)) {
849 		if (rc == -ENOMEM) {
850 			nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
851 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
852 		}
853 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
854 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
855 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
856 	}
857 
858 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
859 }
860 
861 static void
862 nvmf_bdev_ctrlr_zcopy_end_complete(struct spdk_bdev_io *bdev_io, bool success,
863 				   void *cb_arg)
864 {
865 	struct spdk_nvmf_request	*req = cb_arg;
866 
867 	if (spdk_unlikely(!success)) {
868 		int                     sc = 0, sct = 0;
869 		uint32_t                cdw0 = 0;
870 		struct spdk_nvme_cpl    *response = &req->rsp->nvme_cpl;
871 		spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
872 
873 		response->cdw0 = cdw0;
874 		response->status.sc = sc;
875 		response->status.sct = sct;
876 	}
877 
878 	spdk_bdev_free_io(bdev_io);
879 	req->zcopy_bdev_io = NULL;
880 	spdk_nvmf_request_complete(req);
881 }
882 
883 void
884 nvmf_bdev_ctrlr_zcopy_end(struct spdk_nvmf_request *req, bool commit)
885 {
886 	int rc __attribute__((unused));
887 
888 	rc = spdk_bdev_zcopy_end(req->zcopy_bdev_io, commit, nvmf_bdev_ctrlr_zcopy_end_complete, req);
889 
890 	/* The only way spdk_bdev_zcopy_end() can fail is if we pass a bdev_io type that isn't ZCOPY */
891 	assert(rc == 0);
892 }
893