xref: /spdk/lib/nvmf/ctrlr_bdev.c (revision 1831b086d000520ee926f096ea8dbd629c80b437)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/stdinc.h"
35 
36 #include "nvmf_internal.h"
37 
38 #include "spdk/bdev.h"
39 #include "spdk/endian.h"
40 #include "spdk/io_channel.h"
41 #include "spdk/likely.h"
42 #include "spdk/nvme.h"
43 #include "spdk/nvmf_spec.h"
44 #include "spdk/trace.h"
45 #include "spdk/scsi_spec.h"
46 #include "spdk/string.h"
47 #include "spdk/util.h"
48 
49 #include "spdk_internal/log.h"
50 
51 static bool
52 spdk_nvmf_subsystem_bdev_io_type_supported(struct spdk_nvmf_subsystem *subsystem,
53 		enum spdk_bdev_io_type io_type)
54 {
55 	struct spdk_nvmf_ns *ns;
56 
57 	for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL;
58 	     ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) {
59 		if (ns->bdev == NULL) {
60 			continue;
61 		}
62 
63 		if (!spdk_bdev_io_type_supported(ns->bdev, io_type)) {
64 			SPDK_DEBUGLOG(SPDK_LOG_NVMF,
65 				      "Subsystem %s namespace %u (%s) does not support io_type %d\n",
66 				      spdk_nvmf_subsystem_get_nqn(subsystem),
67 				      ns->opts.nsid, spdk_bdev_get_name(ns->bdev), (int)io_type);
68 			return false;
69 		}
70 	}
71 
72 	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "All devices in Subsystem %s support io_type %d\n",
73 		      spdk_nvmf_subsystem_get_nqn(subsystem), (int)io_type);
74 	return true;
75 }
76 
77 bool
78 spdk_nvmf_ctrlr_dsm_supported(struct spdk_nvmf_ctrlr *ctrlr)
79 {
80 	return spdk_nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_UNMAP);
81 }
82 
83 bool
84 spdk_nvmf_ctrlr_write_zeroes_supported(struct spdk_nvmf_ctrlr *ctrlr)
85 {
86 	return spdk_nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_WRITE_ZEROES);
87 }
88 
89 static void
90 nvmf_bdev_ctrlr_complete_cmd(struct spdk_bdev_io *bdev_io, bool success,
91 			     void *cb_arg)
92 {
93 	struct spdk_nvmf_request	*req = cb_arg;
94 	struct spdk_nvme_cpl		*response = &req->rsp->nvme_cpl;
95 	int				sc, sct;
96 
97 	spdk_bdev_io_get_nvme_status(bdev_io, &sct, &sc);
98 	response->status.sc = sc;
99 	response->status.sct = sct;
100 
101 	spdk_nvmf_request_complete(req);
102 	spdk_bdev_free_io(bdev_io);
103 }
104 
105 int
106 spdk_nvmf_bdev_ctrlr_identify_ns(struct spdk_nvmf_ns *ns, struct spdk_nvme_ns_data *nsdata)
107 {
108 	struct spdk_bdev *bdev = ns->bdev;
109 	uint64_t num_blocks;
110 
111 	num_blocks = spdk_bdev_get_num_blocks(bdev);
112 
113 	nsdata->nsze = num_blocks;
114 	nsdata->ncap = num_blocks;
115 	nsdata->nuse = num_blocks;
116 	nsdata->nlbaf = 0;
117 	nsdata->flbas.format = 0;
118 	nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_block_size(bdev));
119 	nsdata->noiob = spdk_bdev_get_optimal_io_boundary(bdev);
120 
121 	SPDK_STATIC_ASSERT(sizeof(nsdata->nguid) == sizeof(ns->opts.nguid), "size mismatch");
122 	memcpy(nsdata->nguid, ns->opts.nguid, sizeof(nsdata->nguid));
123 
124 	SPDK_STATIC_ASSERT(sizeof(nsdata->eui64) == sizeof(ns->opts.eui64), "size mismatch");
125 	memcpy(&nsdata->eui64, ns->opts.eui64, sizeof(nsdata->eui64));
126 
127 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
128 }
129 
130 static void
131 nvmf_bdev_ctrlr_get_rw_params(const struct spdk_nvme_cmd *cmd, uint64_t *start_lba,
132 			      uint64_t *num_blocks)
133 {
134 	/* SLBA: CDW10 and CDW11 */
135 	*start_lba = from_le64(&cmd->cdw10);
136 
137 	/* NLB: CDW12 bits 15:00, 0's based */
138 	*num_blocks = (from_le32(&cmd->cdw12) & 0xFFFFu) + 1;
139 }
140 
141 static bool
142 nvmf_bdev_ctrlr_lba_in_range(uint64_t bdev_num_blocks, uint64_t io_start_lba,
143 			     uint64_t io_num_blocks)
144 {
145 	if (io_start_lba + io_num_blocks > bdev_num_blocks ||
146 	    io_start_lba + io_num_blocks < io_start_lba) {
147 		return false;
148 	}
149 
150 	return true;
151 }
152 
153 static int
154 nvmf_bdev_ctrlr_read_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
155 			 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
156 {
157 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
158 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
159 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
160 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
161 	uint64_t start_lba;
162 	uint64_t num_blocks;
163 
164 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
165 
166 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
167 		SPDK_ERRLOG("end of media\n");
168 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
169 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
170 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
171 	}
172 
173 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
174 		SPDK_ERRLOG("Read NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
175 			    num_blocks, block_size, req->length);
176 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
177 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
178 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
179 	}
180 
181 	spdk_trace_record(TRACE_NVMF_LIB_READ_START, 0, 0, (uint64_t)req, 0);
182 	if (spdk_unlikely(spdk_bdev_read_blocks(desc, ch, req->data, start_lba, num_blocks,
183 						nvmf_bdev_ctrlr_complete_cmd, req))) {
184 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
185 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
186 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
187 	}
188 
189 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
190 }
191 
192 static int
193 nvmf_bdev_ctrlr_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
194 			  struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
195 {
196 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
197 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
198 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
199 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
200 	uint64_t start_lba;
201 	uint64_t num_blocks;
202 
203 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
204 
205 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
206 		SPDK_ERRLOG("end of media\n");
207 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
208 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
209 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
210 	}
211 
212 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
213 		SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
214 			    num_blocks, block_size, req->length);
215 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
216 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
217 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
218 	}
219 
220 	spdk_trace_record(TRACE_NVMF_LIB_WRITE_START, 0, 0, (uint64_t)req, 0);
221 	if (spdk_unlikely(spdk_bdev_write_blocks(desc, ch, req->data, start_lba, num_blocks,
222 			  nvmf_bdev_ctrlr_complete_cmd, req))) {
223 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
224 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
225 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
226 	}
227 
228 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
229 }
230 
231 static int
232 nvmf_bdev_ctrlr_write_zeroes_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
233 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
234 {
235 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
236 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
237 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
238 	uint64_t start_lba;
239 	uint64_t num_blocks;
240 
241 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
242 
243 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
244 		SPDK_ERRLOG("end of media\n");
245 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
246 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
247 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
248 	}
249 
250 	spdk_trace_record(TRACE_NVMF_LIB_WRITE_START, 0, 0, (uint64_t)req, 0);
251 	if (spdk_unlikely(spdk_bdev_write_zeroes_blocks(desc, ch, start_lba, num_blocks,
252 			  nvmf_bdev_ctrlr_complete_cmd, req))) {
253 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
254 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
255 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
256 	}
257 
258 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
259 }
260 
261 static int
262 nvmf_bdev_ctrlr_flush_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
263 			  struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
264 {
265 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
266 
267 	/* As for NVMeoF controller, SPDK always set volatile write
268 	 * cache bit to 1, return success for those block devices
269 	 * which can't support FLUSH command.
270 	 */
271 	if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_FLUSH)) {
272 		response->status.sct = SPDK_NVME_SCT_GENERIC;
273 		response->status.sc = SPDK_NVME_SC_SUCCESS;
274 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
275 	}
276 
277 	if (spdk_bdev_flush_blocks(desc, ch, 0, spdk_bdev_get_num_blocks(bdev),
278 				   nvmf_bdev_ctrlr_complete_cmd, req)) {
279 		response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
280 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
281 	}
282 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
283 }
284 
285 struct nvmf_virtual_ctrlr_unmap {
286 	struct spdk_nvmf_request	*req;
287 	uint32_t			count;
288 };
289 
290 static void
291 nvmf_virtual_ctrlr_dsm_cpl(struct spdk_bdev_io *bdev_io, bool success,
292 			   void *cb_arg)
293 {
294 	struct nvmf_virtual_ctrlr_unmap *unmap_ctx = cb_arg;
295 	struct spdk_nvmf_request	*req = unmap_ctx->req;
296 	struct spdk_nvme_cpl		*response = &req->rsp->nvme_cpl;
297 	int				sc, sct;
298 
299 	unmap_ctx->count--;
300 
301 	if (response->status.sct == SPDK_NVME_SCT_GENERIC &&
302 	    response->status.sc == SPDK_NVME_SC_SUCCESS) {
303 		spdk_bdev_io_get_nvme_status(bdev_io, &sct, &sc);
304 		response->status.sc = sc;
305 		response->status.sct = sct;
306 	}
307 
308 	if (unmap_ctx->count == 0) {
309 		spdk_nvmf_request_complete(req);
310 		free(unmap_ctx);
311 	}
312 	spdk_bdev_free_io(bdev_io);
313 }
314 
315 static int
316 nvmf_bdev_ctrlr_dsm_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
317 			struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
318 {
319 	uint32_t attribute;
320 	uint16_t nr, i;
321 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
322 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
323 
324 	nr = ((cmd->cdw10 & 0x000000ff) + 1);
325 	if (nr * sizeof(struct spdk_nvme_dsm_range) > req->length) {
326 		SPDK_ERRLOG("Dataset Management number of ranges > SGL length\n");
327 		response->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
328 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
329 	}
330 
331 	attribute = cmd->cdw11 & 0x00000007;
332 	if (attribute & SPDK_NVME_DSM_ATTR_DEALLOCATE) {
333 		struct nvmf_virtual_ctrlr_unmap *unmap_ctx;
334 		struct spdk_nvme_dsm_range *dsm_range;
335 		uint64_t lba;
336 		uint32_t lba_count;
337 
338 		unmap_ctx = calloc(1, sizeof(*unmap_ctx));
339 		if (!unmap_ctx) {
340 			response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
341 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
342 		}
343 
344 		unmap_ctx->req = req;
345 
346 		response->status.sct = SPDK_NVME_SCT_GENERIC;
347 		response->status.sc = SPDK_NVME_SC_SUCCESS;
348 
349 		dsm_range = (struct spdk_nvme_dsm_range *)req->data;
350 		for (i = 0; i < nr; i++) {
351 			lba = dsm_range[i].starting_lba;
352 			lba_count = dsm_range[i].length;
353 
354 			unmap_ctx->count++;
355 
356 			if (spdk_bdev_unmap_blocks(desc, ch, lba, lba_count,
357 						   nvmf_virtual_ctrlr_dsm_cpl, unmap_ctx)) {
358 				response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
359 				unmap_ctx->count--;
360 				/* We can't return here - we may have to wait for any other
361 				 * unmaps already sent to complete */
362 				break;
363 			}
364 		}
365 
366 		if (unmap_ctx->count == 0) {
367 			free(unmap_ctx);
368 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
369 		}
370 
371 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
372 	}
373 
374 	response->status.sct = SPDK_NVME_SCT_GENERIC;
375 	response->status.sc = SPDK_NVME_SC_SUCCESS;
376 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
377 }
378 
379 static int
380 nvmf_bdev_ctrlr_nvme_passthru_io(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
381 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
382 {
383 	if (spdk_bdev_nvme_io_passthru(desc, ch, &req->cmd->nvme_cmd, req->data, req->length,
384 				       nvmf_bdev_ctrlr_complete_cmd, req)) {
385 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
386 		req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
387 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
388 	}
389 
390 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
391 }
392 
393 int
394 spdk_nvmf_ctrlr_process_io_cmd(struct spdk_nvmf_request *req)
395 {
396 	uint32_t nsid;
397 	struct spdk_nvmf_ns *ns;
398 	struct spdk_bdev *bdev;
399 	struct spdk_bdev_desc *desc;
400 	struct spdk_io_channel *ch;
401 	struct spdk_nvmf_poll_group *group = req->qpair->group;
402 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
403 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
404 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
405 
406 	/* pre-set response details for this command */
407 	response->status.sc = SPDK_NVME_SC_SUCCESS;
408 	nsid = cmd->nsid;
409 
410 	if (spdk_unlikely(ctrlr == NULL)) {
411 		SPDK_ERRLOG("I/O command sent before CONNECT\n");
412 		response->status.sct = SPDK_NVME_SCT_GENERIC;
413 		response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
414 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
415 	}
416 
417 	if (spdk_unlikely(ctrlr->vcprop.cc.bits.en != 1)) {
418 		SPDK_ERRLOG("I/O command sent to disabled controller\n");
419 		response->status.sct = SPDK_NVME_SCT_GENERIC;
420 		response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
421 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
422 	}
423 
424 	ns = _spdk_nvmf_subsystem_get_ns(ctrlr->subsys, nsid);
425 	if (ns == NULL || ns->bdev == NULL) {
426 		SPDK_ERRLOG("Unsuccessful query for nsid %u\n", cmd->nsid);
427 		response->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
428 		response->status.dnr = 1;
429 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
430 	}
431 
432 	bdev = ns->bdev;
433 	desc = ns->desc;
434 	ch = group->sgroups[ctrlr->subsys->id].channels[nsid - 1];
435 	switch (cmd->opc) {
436 	case SPDK_NVME_OPC_READ:
437 		return nvmf_bdev_ctrlr_read_cmd(bdev, desc, ch, req);
438 	case SPDK_NVME_OPC_WRITE:
439 		return nvmf_bdev_ctrlr_write_cmd(bdev, desc, ch, req);
440 	case SPDK_NVME_OPC_WRITE_ZEROES:
441 		return nvmf_bdev_ctrlr_write_zeroes_cmd(bdev, desc, ch, req);
442 	case SPDK_NVME_OPC_FLUSH:
443 		return nvmf_bdev_ctrlr_flush_cmd(bdev, desc, ch, req);
444 	case SPDK_NVME_OPC_DATASET_MANAGEMENT:
445 		return nvmf_bdev_ctrlr_dsm_cmd(bdev, desc, ch, req);
446 	default:
447 		return nvmf_bdev_ctrlr_nvme_passthru_io(bdev, desc, ch, req);
448 	}
449 }
450