xref: /spdk/lib/nvmf/ctrlr_bdev.c (revision 8a0a98d35e21f282088edf28b9e8da66ec390e3a)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/stdinc.h"
35 
36 #include "nvmf_internal.h"
37 
38 #include "spdk/bdev.h"
39 #include "spdk/endian.h"
40 #include "spdk/thread.h"
41 #include "spdk/likely.h"
42 #include "spdk/nvme.h"
43 #include "spdk/nvmf_spec.h"
44 #include "spdk/trace.h"
45 #include "spdk/scsi_spec.h"
46 #include "spdk/string.h"
47 #include "spdk/util.h"
48 
49 #include "spdk_internal/log.h"
50 
51 static bool
52 spdk_nvmf_subsystem_bdev_io_type_supported(struct spdk_nvmf_subsystem *subsystem,
53 		enum spdk_bdev_io_type io_type)
54 {
55 	struct spdk_nvmf_ns *ns;
56 
57 	for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL;
58 	     ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) {
59 		if (ns->bdev == NULL) {
60 			continue;
61 		}
62 
63 		if (!spdk_bdev_io_type_supported(ns->bdev, io_type)) {
64 			SPDK_DEBUGLOG(SPDK_LOG_NVMF,
65 				      "Subsystem %s namespace %u (%s) does not support io_type %d\n",
66 				      spdk_nvmf_subsystem_get_nqn(subsystem),
67 				      ns->opts.nsid, spdk_bdev_get_name(ns->bdev), (int)io_type);
68 			return false;
69 		}
70 	}
71 
72 	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "All devices in Subsystem %s support io_type %d\n",
73 		      spdk_nvmf_subsystem_get_nqn(subsystem), (int)io_type);
74 	return true;
75 }
76 
77 bool
78 spdk_nvmf_ctrlr_dsm_supported(struct spdk_nvmf_ctrlr *ctrlr)
79 {
80 	return spdk_nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_UNMAP);
81 }
82 
83 bool
84 spdk_nvmf_ctrlr_write_zeroes_supported(struct spdk_nvmf_ctrlr *ctrlr)
85 {
86 	return spdk_nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_WRITE_ZEROES);
87 }
88 
89 static void
90 nvmf_bdev_ctrlr_complete_cmd(struct spdk_bdev_io *bdev_io, bool success,
91 			     void *cb_arg)
92 {
93 	struct spdk_nvmf_request	*req = cb_arg;
94 	struct spdk_nvme_cpl		*response = &req->rsp->nvme_cpl;
95 	int				sc, sct;
96 
97 	spdk_bdev_io_get_nvme_status(bdev_io, &sct, &sc);
98 	response->status.sc = sc;
99 	response->status.sct = sct;
100 
101 	spdk_nvmf_request_complete(req);
102 	spdk_bdev_free_io(bdev_io);
103 }
104 
105 int
106 spdk_nvmf_bdev_ctrlr_identify_ns(struct spdk_nvmf_ns *ns, struct spdk_nvme_ns_data *nsdata)
107 {
108 	struct spdk_bdev *bdev = ns->bdev;
109 	uint64_t num_blocks;
110 
111 	num_blocks = spdk_bdev_get_num_blocks(bdev);
112 
113 	nsdata->nsze = num_blocks;
114 	nsdata->ncap = num_blocks;
115 	nsdata->nuse = num_blocks;
116 	nsdata->nlbaf = 0;
117 	nsdata->flbas.format = 0;
118 	nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_block_size(bdev));
119 	nsdata->noiob = spdk_bdev_get_optimal_io_boundary(bdev);
120 	nsdata->nmic.can_share = 1;
121 
122 	SPDK_STATIC_ASSERT(sizeof(nsdata->nguid) == sizeof(ns->opts.nguid), "size mismatch");
123 	memcpy(nsdata->nguid, ns->opts.nguid, sizeof(nsdata->nguid));
124 
125 	SPDK_STATIC_ASSERT(sizeof(nsdata->eui64) == sizeof(ns->opts.eui64), "size mismatch");
126 	memcpy(&nsdata->eui64, ns->opts.eui64, sizeof(nsdata->eui64));
127 
128 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
129 }
130 
131 static void
132 nvmf_bdev_ctrlr_get_rw_params(const struct spdk_nvme_cmd *cmd, uint64_t *start_lba,
133 			      uint64_t *num_blocks)
134 {
135 	/* SLBA: CDW10 and CDW11 */
136 	*start_lba = from_le64(&cmd->cdw10);
137 
138 	/* NLB: CDW12 bits 15:00, 0's based */
139 	*num_blocks = (from_le32(&cmd->cdw12) & 0xFFFFu) + 1;
140 }
141 
142 static bool
143 nvmf_bdev_ctrlr_lba_in_range(uint64_t bdev_num_blocks, uint64_t io_start_lba,
144 			     uint64_t io_num_blocks)
145 {
146 	if (io_start_lba + io_num_blocks > bdev_num_blocks ||
147 	    io_start_lba + io_num_blocks < io_start_lba) {
148 		return false;
149 	}
150 
151 	return true;
152 }
153 
154 static int
155 nvmf_bdev_ctrlr_read_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
156 			 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
157 {
158 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
159 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
160 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
161 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
162 	uint64_t start_lba;
163 	uint64_t num_blocks;
164 
165 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
166 
167 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
168 		SPDK_ERRLOG("end of media\n");
169 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
170 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
171 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
172 	}
173 
174 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
175 		SPDK_ERRLOG("Read NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
176 			    num_blocks, block_size, req->length);
177 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
178 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
179 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
180 	}
181 
182 	if (spdk_unlikely(spdk_bdev_readv_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
183 			  nvmf_bdev_ctrlr_complete_cmd, req))) {
184 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
185 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
186 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
187 	}
188 
189 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
190 }
191 
192 static int
193 nvmf_bdev_ctrlr_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
194 			  struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
195 {
196 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
197 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
198 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
199 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
200 	uint64_t start_lba;
201 	uint64_t num_blocks;
202 
203 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
204 
205 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
206 		SPDK_ERRLOG("end of media\n");
207 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
208 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
209 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
210 	}
211 
212 	if (spdk_unlikely(num_blocks * block_size > req->length)) {
213 		SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
214 			    num_blocks, block_size, req->length);
215 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
216 		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
217 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
218 	}
219 
220 	if (spdk_unlikely(spdk_bdev_writev_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
221 			  nvmf_bdev_ctrlr_complete_cmd, req))) {
222 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
223 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
224 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
225 	}
226 
227 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
228 }
229 
230 static int
231 nvmf_bdev_ctrlr_write_zeroes_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
232 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
233 {
234 	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
235 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
236 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
237 	uint64_t start_lba;
238 	uint64_t num_blocks;
239 
240 	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
241 
242 	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
243 		SPDK_ERRLOG("end of media\n");
244 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
245 		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
246 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
247 	}
248 
249 	if (spdk_unlikely(spdk_bdev_write_zeroes_blocks(desc, ch, start_lba, num_blocks,
250 			  nvmf_bdev_ctrlr_complete_cmd, req))) {
251 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
252 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
253 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
254 	}
255 
256 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
257 }
258 
259 static int
260 nvmf_bdev_ctrlr_flush_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
261 			  struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
262 {
263 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
264 
265 	/* As for NVMeoF controller, SPDK always set volatile write
266 	 * cache bit to 1, return success for those block devices
267 	 * which can't support FLUSH command.
268 	 */
269 	if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_FLUSH)) {
270 		response->status.sct = SPDK_NVME_SCT_GENERIC;
271 		response->status.sc = SPDK_NVME_SC_SUCCESS;
272 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
273 	}
274 
275 	if (spdk_bdev_flush_blocks(desc, ch, 0, spdk_bdev_get_num_blocks(bdev),
276 				   nvmf_bdev_ctrlr_complete_cmd, req)) {
277 		response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
278 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
279 	}
280 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
281 }
282 
283 struct nvmf_virtual_ctrlr_unmap {
284 	struct spdk_nvmf_request	*req;
285 	uint32_t			count;
286 };
287 
288 static void
289 nvmf_virtual_ctrlr_dsm_cpl(struct spdk_bdev_io *bdev_io, bool success,
290 			   void *cb_arg)
291 {
292 	struct nvmf_virtual_ctrlr_unmap *unmap_ctx = cb_arg;
293 	struct spdk_nvmf_request	*req = unmap_ctx->req;
294 	struct spdk_nvme_cpl		*response = &req->rsp->nvme_cpl;
295 	int				sc, sct;
296 
297 	unmap_ctx->count--;
298 
299 	if (response->status.sct == SPDK_NVME_SCT_GENERIC &&
300 	    response->status.sc == SPDK_NVME_SC_SUCCESS) {
301 		spdk_bdev_io_get_nvme_status(bdev_io, &sct, &sc);
302 		response->status.sc = sc;
303 		response->status.sct = sct;
304 	}
305 
306 	if (unmap_ctx->count == 0) {
307 		spdk_nvmf_request_complete(req);
308 		free(unmap_ctx);
309 	}
310 	spdk_bdev_free_io(bdev_io);
311 }
312 
313 static int
314 nvmf_bdev_ctrlr_dsm_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
315 			struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
316 {
317 	uint32_t attribute;
318 	uint16_t nr, i;
319 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
320 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
321 
322 	nr = ((cmd->cdw10 & 0x000000ff) + 1);
323 	if (nr * sizeof(struct spdk_nvme_dsm_range) > req->length) {
324 		SPDK_ERRLOG("Dataset Management number of ranges > SGL length\n");
325 		response->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
326 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
327 	}
328 
329 	attribute = cmd->cdw11 & 0x00000007;
330 	if (attribute & SPDK_NVME_DSM_ATTR_DEALLOCATE) {
331 		struct nvmf_virtual_ctrlr_unmap *unmap_ctx;
332 		struct spdk_nvme_dsm_range *dsm_range;
333 		uint64_t lba;
334 		uint32_t lba_count;
335 
336 		unmap_ctx = calloc(1, sizeof(*unmap_ctx));
337 		if (!unmap_ctx) {
338 			response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
339 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
340 		}
341 
342 		unmap_ctx->req = req;
343 
344 		response->status.sct = SPDK_NVME_SCT_GENERIC;
345 		response->status.sc = SPDK_NVME_SC_SUCCESS;
346 
347 		dsm_range = (struct spdk_nvme_dsm_range *)req->data;
348 		for (i = 0; i < nr; i++) {
349 			lba = dsm_range[i].starting_lba;
350 			lba_count = dsm_range[i].length;
351 
352 			unmap_ctx->count++;
353 
354 			if (spdk_bdev_unmap_blocks(desc, ch, lba, lba_count,
355 						   nvmf_virtual_ctrlr_dsm_cpl, unmap_ctx)) {
356 				response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
357 				unmap_ctx->count--;
358 				/* We can't return here - we may have to wait for any other
359 				 * unmaps already sent to complete */
360 				break;
361 			}
362 		}
363 
364 		if (unmap_ctx->count == 0) {
365 			free(unmap_ctx);
366 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
367 		}
368 
369 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
370 	}
371 
372 	response->status.sct = SPDK_NVME_SCT_GENERIC;
373 	response->status.sc = SPDK_NVME_SC_SUCCESS;
374 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
375 }
376 
377 static int
378 nvmf_bdev_ctrlr_nvme_passthru_io(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
379 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
380 {
381 	if (spdk_bdev_nvme_io_passthru(desc, ch, &req->cmd->nvme_cmd, req->data, req->length,
382 				       nvmf_bdev_ctrlr_complete_cmd, req)) {
383 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
384 		req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
385 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
386 	}
387 
388 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
389 }
390 
391 int
392 spdk_nvmf_ctrlr_process_io_cmd(struct spdk_nvmf_request *req)
393 {
394 	uint32_t nsid;
395 	struct spdk_nvmf_ns *ns;
396 	struct spdk_bdev *bdev;
397 	struct spdk_bdev_desc *desc;
398 	struct spdk_io_channel *ch;
399 	struct spdk_nvmf_poll_group *group = req->qpair->group;
400 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
401 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
402 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
403 
404 	/* pre-set response details for this command */
405 	response->status.sc = SPDK_NVME_SC_SUCCESS;
406 	nsid = cmd->nsid;
407 
408 	if (spdk_unlikely(ctrlr == NULL)) {
409 		SPDK_ERRLOG("I/O command sent before CONNECT\n");
410 		response->status.sct = SPDK_NVME_SCT_GENERIC;
411 		response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
412 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
413 	}
414 
415 	if (spdk_unlikely(ctrlr->vcprop.cc.bits.en != 1)) {
416 		SPDK_ERRLOG("I/O command sent to disabled controller\n");
417 		response->status.sct = SPDK_NVME_SCT_GENERIC;
418 		response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
419 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
420 	}
421 
422 	ns = _spdk_nvmf_subsystem_get_ns(ctrlr->subsys, nsid);
423 	if (ns == NULL || ns->bdev == NULL) {
424 		SPDK_ERRLOG("Unsuccessful query for nsid %u\n", cmd->nsid);
425 		response->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
426 		response->status.dnr = 1;
427 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
428 	}
429 
430 	bdev = ns->bdev;
431 	desc = ns->desc;
432 	ch = group->sgroups[ctrlr->subsys->id].channels[nsid - 1];
433 	switch (cmd->opc) {
434 	case SPDK_NVME_OPC_READ:
435 		return nvmf_bdev_ctrlr_read_cmd(bdev, desc, ch, req);
436 	case SPDK_NVME_OPC_WRITE:
437 		return nvmf_bdev_ctrlr_write_cmd(bdev, desc, ch, req);
438 	case SPDK_NVME_OPC_WRITE_ZEROES:
439 		return nvmf_bdev_ctrlr_write_zeroes_cmd(bdev, desc, ch, req);
440 	case SPDK_NVME_OPC_FLUSH:
441 		return nvmf_bdev_ctrlr_flush_cmd(bdev, desc, ch, req);
442 	case SPDK_NVME_OPC_DATASET_MANAGEMENT:
443 		return nvmf_bdev_ctrlr_dsm_cmd(bdev, desc, ch, req);
444 	default:
445 		return nvmf_bdev_ctrlr_nvme_passthru_io(bdev, desc, ch, req);
446 	}
447 }
448