xref: /spdk/lib/nvmf/ctrlr.c (revision ba20950a539d0b71a20f8a1199cbf759de92e854)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2017 Intel Corporation. All rights reserved.
3  *   Copyright (c) 2019, 2020 Mellanox Technologies LTD. All rights reserved.
4  *   Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5  */
6 
7 #include "spdk/stdinc.h"
8 
9 #include "nvmf_internal.h"
10 #include "transport.h"
11 
12 #include "spdk/bdev.h"
13 #include "spdk/bdev_zone.h"
14 #include "spdk/bit_array.h"
15 #include "spdk/endian.h"
16 #include "spdk/thread.h"
17 #include "spdk/nvme_spec.h"
18 #include "spdk/nvmf_cmd.h"
19 #include "spdk/string.h"
20 #include "spdk/util.h"
21 #include "spdk/version.h"
22 #include "spdk/log.h"
23 #include "spdk_internal/usdt.h"
24 
25 #define MIN_KEEP_ALIVE_TIMEOUT_IN_MS 10000
26 #define NVMF_DISC_KATO_IN_MS 120000
27 #define KAS_TIME_UNIT_IN_MS 100
28 #define KAS_DEFAULT_VALUE (MIN_KEEP_ALIVE_TIMEOUT_IN_MS / KAS_TIME_UNIT_IN_MS)
29 
30 #define NVMF_CC_RESET_SHN_TIMEOUT_IN_MS	10000
31 
32 #define NVMF_CTRLR_RESET_SHN_TIMEOUT_IN_MS	(NVMF_CC_RESET_SHN_TIMEOUT_IN_MS + 5000)
33 
34 /*
35  * Report the SPDK version as the firmware revision.
36  * SPDK_VERSION_STRING won't fit into FR (only 8 bytes), so try to fit the most important parts.
37  */
38 #define FW_VERSION SPDK_VERSION_MAJOR_STRING SPDK_VERSION_MINOR_STRING SPDK_VERSION_PATCH_STRING
39 
40 #define ANA_TRANSITION_TIME_IN_SEC 10
41 
42 #define NVMF_ABORT_COMMAND_LIMIT 3
43 
44 /*
45  * Support for custom admin command handlers
46  */
47 struct spdk_nvmf_custom_admin_cmd {
48 	spdk_nvmf_custom_cmd_hdlr hdlr;
49 	uint32_t nsid; /* nsid to forward */
50 };
51 
52 static struct spdk_nvmf_custom_admin_cmd g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_MAX_OPC + 1];
53 
54 static void _nvmf_request_complete(void *ctx);
55 
56 static inline void
57 nvmf_invalid_connect_response(struct spdk_nvmf_fabric_connect_rsp *rsp,
58 			      uint8_t iattr, uint16_t ipo)
59 {
60 	rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
61 	rsp->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
62 	rsp->status_code_specific.invalid.iattr = iattr;
63 	rsp->status_code_specific.invalid.ipo = ipo;
64 }
65 
66 #define SPDK_NVMF_INVALID_CONNECT_CMD(rsp, field)	\
67 	nvmf_invalid_connect_response(rsp, 0, offsetof(struct spdk_nvmf_fabric_connect_cmd, field))
68 #define SPDK_NVMF_INVALID_CONNECT_DATA(rsp, field)	\
69 	nvmf_invalid_connect_response(rsp, 1, offsetof(struct spdk_nvmf_fabric_connect_data, field))
70 
71 
72 static void
73 nvmf_ctrlr_stop_keep_alive_timer(struct spdk_nvmf_ctrlr *ctrlr)
74 {
75 	if (!ctrlr) {
76 		SPDK_ERRLOG("Controller is NULL\n");
77 		return;
78 	}
79 
80 	if (ctrlr->keep_alive_poller == NULL) {
81 		return;
82 	}
83 
84 	SPDK_DEBUGLOG(nvmf, "Stop keep alive poller\n");
85 	spdk_poller_unregister(&ctrlr->keep_alive_poller);
86 }
87 
88 static void
89 nvmf_ctrlr_stop_association_timer(struct spdk_nvmf_ctrlr *ctrlr)
90 {
91 	if (!ctrlr) {
92 		SPDK_ERRLOG("Controller is NULL\n");
93 		assert(false);
94 		return;
95 	}
96 
97 	if (ctrlr->association_timer == NULL) {
98 		return;
99 	}
100 
101 	SPDK_DEBUGLOG(nvmf, "Stop association timer\n");
102 	spdk_poller_unregister(&ctrlr->association_timer);
103 }
104 
105 static void
106 nvmf_ctrlr_disconnect_qpairs_done(struct spdk_io_channel_iter *i, int status)
107 {
108 	if (status == 0) {
109 		SPDK_DEBUGLOG(nvmf, "ctrlr disconnect qpairs complete successfully\n");
110 	} else {
111 		SPDK_ERRLOG("Fail to disconnect ctrlr qpairs\n");
112 	}
113 }
114 
115 static int
116 _nvmf_ctrlr_disconnect_qpairs_on_pg(struct spdk_io_channel_iter *i, bool include_admin)
117 {
118 	int rc = 0;
119 	struct spdk_nvmf_ctrlr *ctrlr;
120 	struct spdk_nvmf_qpair *qpair, *temp_qpair;
121 	struct spdk_io_channel *ch;
122 	struct spdk_nvmf_poll_group *group;
123 
124 	ctrlr = spdk_io_channel_iter_get_ctx(i);
125 	ch = spdk_io_channel_iter_get_channel(i);
126 	group = spdk_io_channel_get_ctx(ch);
127 
128 	TAILQ_FOREACH_SAFE(qpair, &group->qpairs, link, temp_qpair) {
129 		if (qpair->ctrlr == ctrlr && (include_admin || !nvmf_qpair_is_admin_queue(qpair))) {
130 			rc = spdk_nvmf_qpair_disconnect(qpair, NULL, NULL);
131 			if (rc) {
132 				SPDK_ERRLOG("Qpair disconnect failed\n");
133 				return rc;
134 			}
135 		}
136 	}
137 
138 	return rc;
139 }
140 
141 static void
142 nvmf_ctrlr_disconnect_qpairs_on_pg(struct spdk_io_channel_iter *i)
143 {
144 	spdk_for_each_channel_continue(i, _nvmf_ctrlr_disconnect_qpairs_on_pg(i, true));
145 }
146 
147 static void
148 nvmf_ctrlr_disconnect_io_qpairs_on_pg(struct spdk_io_channel_iter *i)
149 {
150 	spdk_for_each_channel_continue(i, _nvmf_ctrlr_disconnect_qpairs_on_pg(i, false));
151 }
152 
153 static int
154 nvmf_ctrlr_keep_alive_poll(void *ctx)
155 {
156 	uint64_t keep_alive_timeout_tick;
157 	uint64_t now = spdk_get_ticks();
158 	struct spdk_nvmf_ctrlr *ctrlr = ctx;
159 
160 	if (ctrlr->in_destruct) {
161 		nvmf_ctrlr_stop_keep_alive_timer(ctrlr);
162 		return SPDK_POLLER_IDLE;
163 	}
164 
165 	SPDK_DEBUGLOG(nvmf, "Polling ctrlr keep alive timeout\n");
166 
167 	/* If the Keep alive feature is in use and the timer expires */
168 	keep_alive_timeout_tick = ctrlr->last_keep_alive_tick +
169 				  ctrlr->feat.keep_alive_timer.bits.kato * spdk_get_ticks_hz() / UINT64_C(1000);
170 	if (now > keep_alive_timeout_tick) {
171 		SPDK_NOTICELOG("Disconnecting host %s from subsystem %s due to keep alive timeout.\n",
172 			       ctrlr->hostnqn, ctrlr->subsys->subnqn);
173 		/* set the Controller Fatal Status bit to '1' */
174 		if (ctrlr->vcprop.csts.bits.cfs == 0) {
175 			nvmf_ctrlr_set_fatal_status(ctrlr);
176 
177 			/*
178 			 * disconnect qpairs, terminate Transport connection
179 			 * destroy ctrlr, break the host to controller association
180 			 * disconnect qpairs with qpair->ctrlr == ctrlr
181 			 */
182 			spdk_for_each_channel(ctrlr->subsys->tgt,
183 					      nvmf_ctrlr_disconnect_qpairs_on_pg,
184 					      ctrlr,
185 					      nvmf_ctrlr_disconnect_qpairs_done);
186 			return SPDK_POLLER_BUSY;
187 		}
188 	}
189 
190 	return SPDK_POLLER_IDLE;
191 }
192 
193 static void
194 nvmf_ctrlr_start_keep_alive_timer(struct spdk_nvmf_ctrlr *ctrlr)
195 {
196 	if (!ctrlr) {
197 		SPDK_ERRLOG("Controller is NULL\n");
198 		return;
199 	}
200 
201 	/* if cleared to 0 then the Keep Alive Timer is disabled */
202 	if (ctrlr->feat.keep_alive_timer.bits.kato != 0) {
203 
204 		ctrlr->last_keep_alive_tick = spdk_get_ticks();
205 
206 		SPDK_DEBUGLOG(nvmf, "Ctrlr add keep alive poller\n");
207 		ctrlr->keep_alive_poller = SPDK_POLLER_REGISTER(nvmf_ctrlr_keep_alive_poll, ctrlr,
208 					   ctrlr->feat.keep_alive_timer.bits.kato * 1000);
209 	}
210 }
211 
212 static void
213 ctrlr_add_qpair_and_update_rsp(struct spdk_nvmf_qpair *qpair,
214 			       struct spdk_nvmf_ctrlr *ctrlr,
215 			       struct spdk_nvmf_fabric_connect_rsp *rsp)
216 {
217 	assert(ctrlr->admin_qpair->group->thread == spdk_get_thread());
218 
219 	/* check if we would exceed ctrlr connection limit */
220 	if (qpair->qid >= spdk_bit_array_capacity(ctrlr->qpair_mask)) {
221 		SPDK_ERRLOG("Requested QID %u but Max QID is %u\n",
222 			    qpair->qid, spdk_bit_array_capacity(ctrlr->qpair_mask) - 1);
223 		rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
224 		rsp->status.sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER;
225 		return;
226 	}
227 
228 	if (spdk_bit_array_get(ctrlr->qpair_mask, qpair->qid)) {
229 		SPDK_ERRLOG("Got I/O connect with duplicate QID %u\n", qpair->qid);
230 		rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
231 		rsp->status.sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER;
232 		return;
233 	}
234 
235 	qpair->ctrlr = ctrlr;
236 	spdk_bit_array_set(ctrlr->qpair_mask, qpair->qid);
237 
238 	rsp->status.sc = SPDK_NVME_SC_SUCCESS;
239 	rsp->status_code_specific.success.cntlid = ctrlr->cntlid;
240 	SPDK_DEBUGLOG(nvmf, "connect capsule response: cntlid = 0x%04x\n",
241 		      rsp->status_code_specific.success.cntlid);
242 
243 	SPDK_DTRACE_PROBE4(nvmf_ctrlr_add_qpair, qpair, qpair->qid, ctrlr->subsys->subnqn,
244 			   ctrlr->hostnqn);
245 }
246 
247 static void
248 _nvmf_ctrlr_add_admin_qpair(void *ctx)
249 {
250 	struct spdk_nvmf_request *req = ctx;
251 	struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp;
252 	struct spdk_nvmf_qpair *qpair = req->qpair;
253 	struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
254 
255 	ctrlr->admin_qpair = qpair;
256 	ctrlr->association_timeout = qpair->transport->opts.association_timeout;
257 	nvmf_ctrlr_start_keep_alive_timer(ctrlr);
258 	ctrlr_add_qpair_and_update_rsp(qpair, ctrlr, rsp);
259 	_nvmf_request_complete(req);
260 }
261 
262 static void
263 _nvmf_subsystem_add_ctrlr(void *ctx)
264 {
265 	struct spdk_nvmf_request *req = ctx;
266 	struct spdk_nvmf_qpair *qpair = req->qpair;
267 	struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp;
268 	struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
269 
270 	if (nvmf_subsystem_add_ctrlr(ctrlr->subsys, ctrlr)) {
271 		SPDK_ERRLOG("Unable to add controller to subsystem\n");
272 		spdk_bit_array_free(&ctrlr->qpair_mask);
273 		free(ctrlr);
274 		qpair->ctrlr = NULL;
275 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
276 		spdk_nvmf_request_complete(req);
277 		return;
278 	}
279 
280 	spdk_thread_send_msg(ctrlr->thread, _nvmf_ctrlr_add_admin_qpair, req);
281 }
282 
283 static void
284 nvmf_ctrlr_cdata_init(struct spdk_nvmf_transport *transport, struct spdk_nvmf_subsystem *subsystem,
285 		      struct spdk_nvmf_ctrlr_data *cdata)
286 {
287 	cdata->aerl = SPDK_NVMF_MAX_ASYNC_EVENTS - 1;
288 	cdata->kas = KAS_DEFAULT_VALUE;
289 	cdata->vid = SPDK_PCI_VID_INTEL;
290 	cdata->ssvid = SPDK_PCI_VID_INTEL;
291 	/* INTEL OUI */
292 	cdata->ieee[0] = 0xe4;
293 	cdata->ieee[1] = 0xd2;
294 	cdata->ieee[2] = 0x5c;
295 	cdata->oncs.compare = 1;
296 	cdata->oncs.reservations = 1;
297 	cdata->fuses.compare_and_write = 1;
298 	cdata->sgls.supported = 1;
299 	cdata->sgls.keyed_sgl = 1;
300 	cdata->sgls.sgl_offset = 1;
301 	cdata->nvmf_specific.ioccsz = sizeof(struct spdk_nvme_cmd) / 16;
302 	cdata->nvmf_specific.ioccsz += transport->opts.in_capsule_data_size / 16;
303 	cdata->nvmf_specific.iorcsz = sizeof(struct spdk_nvme_cpl) / 16;
304 	cdata->nvmf_specific.icdoff = 0; /* offset starts directly after SQE */
305 	cdata->nvmf_specific.ctrattr.ctrlr_model = SPDK_NVMF_CTRLR_MODEL_DYNAMIC;
306 	cdata->nvmf_specific.msdbd = 1;
307 
308 	if (transport->ops->cdata_init) {
309 		transport->ops->cdata_init(transport, subsystem, cdata);
310 	}
311 }
312 
313 static bool
314 nvmf_subsys_has_multi_iocs(struct spdk_nvmf_subsystem *subsystem)
315 {
316 	struct spdk_nvmf_ns *ns;
317 	uint32_t i;
318 
319 	for (i = 0; i < subsystem->max_nsid; i++) {
320 		ns = subsystem->ns[i];
321 		if (ns && ns->bdev && spdk_bdev_is_zoned(ns->bdev)) {
322 			return true;
323 		}
324 	}
325 	return false;
326 }
327 
328 static struct spdk_nvmf_ctrlr *
329 nvmf_ctrlr_create(struct spdk_nvmf_subsystem *subsystem,
330 		  struct spdk_nvmf_request *req,
331 		  struct spdk_nvmf_fabric_connect_cmd *connect_cmd,
332 		  struct spdk_nvmf_fabric_connect_data *connect_data)
333 {
334 	struct spdk_nvmf_ctrlr *ctrlr;
335 	struct spdk_nvmf_transport *transport = req->qpair->transport;
336 	struct spdk_nvme_transport_id listen_trid = {};
337 	bool subsys_has_multi_iocs = false;
338 
339 	ctrlr = calloc(1, sizeof(*ctrlr));
340 	if (ctrlr == NULL) {
341 		SPDK_ERRLOG("Memory allocation failed\n");
342 		return NULL;
343 	}
344 
345 	if (spdk_nvme_trtype_is_fabrics(transport->ops->type)) {
346 		ctrlr->dynamic_ctrlr = true;
347 	} else {
348 		ctrlr->cntlid = connect_data->cntlid;
349 	}
350 
351 	SPDK_DTRACE_PROBE3(nvmf_ctrlr_create, ctrlr, subsystem->subnqn,
352 			   spdk_thread_get_id(req->qpair->group->thread));
353 
354 	STAILQ_INIT(&ctrlr->async_events);
355 	TAILQ_INIT(&ctrlr->log_head);
356 	ctrlr->subsys = subsystem;
357 	ctrlr->thread = req->qpair->group->thread;
358 	ctrlr->disconnect_in_progress = false;
359 
360 	ctrlr->qpair_mask = spdk_bit_array_create(transport->opts.max_qpairs_per_ctrlr);
361 	if (!ctrlr->qpair_mask) {
362 		SPDK_ERRLOG("Failed to allocate controller qpair mask\n");
363 		goto err_qpair_mask;
364 	}
365 
366 	nvmf_ctrlr_cdata_init(transport, subsystem, &ctrlr->cdata);
367 
368 	/*
369 	 * KAS: This field indicates the granularity of the Keep Alive Timer in 100ms units.
370 	 * If this field is cleared to 0h, then Keep Alive is not supported.
371 	 */
372 	if (ctrlr->cdata.kas) {
373 		ctrlr->feat.keep_alive_timer.bits.kato = spdk_divide_round_up(connect_cmd->kato,
374 				KAS_DEFAULT_VALUE * KAS_TIME_UNIT_IN_MS) *
375 				KAS_DEFAULT_VALUE * KAS_TIME_UNIT_IN_MS;
376 	}
377 
378 	ctrlr->feat.async_event_configuration.bits.ns_attr_notice = 1;
379 	if (ctrlr->subsys->flags.ana_reporting) {
380 		ctrlr->feat.async_event_configuration.bits.ana_change_notice = 1;
381 	}
382 	ctrlr->feat.volatile_write_cache.bits.wce = 1;
383 	/* Coalescing Disable */
384 	ctrlr->feat.interrupt_vector_configuration.bits.cd = 1;
385 
386 	if (ctrlr->subsys->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
387 		/*
388 		 * If keep-alive timeout is not set, discovery controllers use some
389 		 * arbitrary high value in order to cleanup stale discovery sessions
390 		 *
391 		 * From the 1.0a nvme-of spec:
392 		 * "The Keep Alive command is reserved for
393 		 * Discovery controllers. A transport may specify a
394 		 * fixed Discovery controller activity timeout value
395 		 * (e.g., 2 minutes). If no commands are received
396 		 * by a Discovery controller within that time
397 		 * period, the controller may perform the
398 		 * actions for Keep Alive Timer expiration".
399 		 *
400 		 * From the 1.1 nvme-of spec:
401 		 * "A host requests an explicit persistent connection
402 		 * to a Discovery controller and Asynchronous Event Notifications from
403 		 * the Discovery controller on that persistent connection by specifying
404 		 * a non-zero Keep Alive Timer value in the Connect command."
405 		 *
406 		 * In case non-zero KATO is used, we enable discovery_log_change_notice
407 		 * otherwise we disable it and use default discovery controller KATO.
408 		 * KATO is in millisecond.
409 		 */
410 		if (ctrlr->feat.keep_alive_timer.bits.kato == 0) {
411 			ctrlr->feat.keep_alive_timer.bits.kato = NVMF_DISC_KATO_IN_MS;
412 			ctrlr->feat.async_event_configuration.bits.discovery_log_change_notice = 0;
413 		} else {
414 			ctrlr->feat.async_event_configuration.bits.discovery_log_change_notice = 1;
415 		}
416 	}
417 
418 	/* Subtract 1 for admin queue, 1 for 0's based */
419 	ctrlr->feat.number_of_queues.bits.ncqr = transport->opts.max_qpairs_per_ctrlr - 1 -
420 			1;
421 	ctrlr->feat.number_of_queues.bits.nsqr = transport->opts.max_qpairs_per_ctrlr - 1 -
422 			1;
423 
424 	spdk_uuid_copy(&ctrlr->hostid, (struct spdk_uuid *)connect_data->hostid);
425 	memcpy(ctrlr->hostnqn, connect_data->hostnqn, sizeof(ctrlr->hostnqn));
426 
427 	ctrlr->vcprop.cap.raw = 0;
428 	ctrlr->vcprop.cap.bits.cqr = 1; /* NVMe-oF specification required */
429 	ctrlr->vcprop.cap.bits.mqes = transport->opts.max_queue_depth -
430 				      1; /* max queue depth */
431 	ctrlr->vcprop.cap.bits.ams = 0; /* optional arb mechanisms */
432 	/* ready timeout - 500 msec units */
433 	ctrlr->vcprop.cap.bits.to = NVMF_CTRLR_RESET_SHN_TIMEOUT_IN_MS / 500;
434 	ctrlr->vcprop.cap.bits.dstrd = 0; /* fixed to 0 for NVMe-oF */
435 	subsys_has_multi_iocs = nvmf_subsys_has_multi_iocs(subsystem);
436 	if (subsys_has_multi_iocs) {
437 		ctrlr->vcprop.cap.bits.css =
438 			SPDK_NVME_CAP_CSS_IOCS; /* One or more I/O command sets supported */
439 	} else {
440 		ctrlr->vcprop.cap.bits.css = SPDK_NVME_CAP_CSS_NVM; /* NVM command set */
441 	}
442 
443 	ctrlr->vcprop.cap.bits.mpsmin = 0; /* 2 ^ (12 + mpsmin) == 4k */
444 	ctrlr->vcprop.cap.bits.mpsmax = 0; /* 2 ^ (12 + mpsmax) == 4k */
445 
446 	/* Version Supported: 1.3 */
447 	ctrlr->vcprop.vs.bits.mjr = 1;
448 	ctrlr->vcprop.vs.bits.mnr = 3;
449 	ctrlr->vcprop.vs.bits.ter = 0;
450 
451 	ctrlr->vcprop.cc.raw = 0;
452 	ctrlr->vcprop.cc.bits.en = 0; /* Init controller disabled */
453 	if (subsys_has_multi_iocs) {
454 		ctrlr->vcprop.cc.bits.css =
455 			SPDK_NVME_CC_CSS_IOCS; /* All supported I/O Command Sets */
456 	}
457 
458 	ctrlr->vcprop.csts.raw = 0;
459 	ctrlr->vcprop.csts.bits.rdy = 0; /* Init controller as not ready */
460 
461 	SPDK_DEBUGLOG(nvmf, "cap 0x%" PRIx64 "\n", ctrlr->vcprop.cap.raw);
462 	SPDK_DEBUGLOG(nvmf, "vs 0x%x\n", ctrlr->vcprop.vs.raw);
463 	SPDK_DEBUGLOG(nvmf, "cc 0x%x\n", ctrlr->vcprop.cc.raw);
464 	SPDK_DEBUGLOG(nvmf, "csts 0x%x\n", ctrlr->vcprop.csts.raw);
465 
466 	ctrlr->dif_insert_or_strip = transport->opts.dif_insert_or_strip;
467 
468 	if (ctrlr->subsys->subtype == SPDK_NVMF_SUBTYPE_NVME) {
469 		if (spdk_nvmf_qpair_get_listen_trid(req->qpair, &listen_trid) != 0) {
470 			SPDK_ERRLOG("Could not get listener transport ID\n");
471 			goto err_listener;
472 		}
473 
474 		ctrlr->listener = nvmf_subsystem_find_listener(ctrlr->subsys, &listen_trid);
475 		if (!ctrlr->listener) {
476 			SPDK_ERRLOG("Listener was not found\n");
477 			goto err_listener;
478 		}
479 	}
480 
481 	req->qpair->ctrlr = ctrlr;
482 	spdk_thread_send_msg(subsystem->thread, _nvmf_subsystem_add_ctrlr, req);
483 
484 	return ctrlr;
485 err_listener:
486 	spdk_bit_array_free(&ctrlr->qpair_mask);
487 err_qpair_mask:
488 	free(ctrlr);
489 	return NULL;
490 }
491 
492 static void
493 _nvmf_ctrlr_destruct(void *ctx)
494 {
495 	struct spdk_nvmf_ctrlr *ctrlr = ctx;
496 	struct spdk_nvmf_reservation_log *log, *log_tmp;
497 	struct spdk_nvmf_async_event_completion *event, *event_tmp;
498 
499 	SPDK_DTRACE_PROBE3(nvmf_ctrlr_destruct, ctrlr, ctrlr->subsys->subnqn,
500 			   spdk_thread_get_id(ctrlr->thread));
501 
502 	assert(spdk_get_thread() == ctrlr->thread);
503 	assert(ctrlr->in_destruct);
504 
505 	SPDK_DEBUGLOG(nvmf, "Destroy ctrlr 0x%hx\n", ctrlr->cntlid);
506 	if (ctrlr->disconnect_in_progress) {
507 		SPDK_ERRLOG("freeing ctrlr with disconnect in progress\n");
508 		spdk_thread_send_msg(ctrlr->thread, _nvmf_ctrlr_destruct, ctrlr);
509 		return;
510 	}
511 
512 	nvmf_ctrlr_stop_keep_alive_timer(ctrlr);
513 	nvmf_ctrlr_stop_association_timer(ctrlr);
514 	spdk_bit_array_free(&ctrlr->qpair_mask);
515 
516 	TAILQ_FOREACH_SAFE(log, &ctrlr->log_head, link, log_tmp) {
517 		TAILQ_REMOVE(&ctrlr->log_head, log, link);
518 		free(log);
519 	}
520 	STAILQ_FOREACH_SAFE(event, &ctrlr->async_events, link, event_tmp) {
521 		STAILQ_REMOVE(&ctrlr->async_events, event, spdk_nvmf_async_event_completion, link);
522 		free(event);
523 	}
524 	free(ctrlr);
525 }
526 
527 void
528 nvmf_ctrlr_destruct(struct spdk_nvmf_ctrlr *ctrlr)
529 {
530 	nvmf_subsystem_remove_ctrlr(ctrlr->subsys, ctrlr);
531 
532 	spdk_thread_send_msg(ctrlr->thread, _nvmf_ctrlr_destruct, ctrlr);
533 }
534 
535 static void
536 nvmf_ctrlr_add_io_qpair(void *ctx)
537 {
538 	struct spdk_nvmf_request *req = ctx;
539 	struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp;
540 	struct spdk_nvmf_qpair *qpair = req->qpair;
541 	struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
542 	struct spdk_nvmf_qpair *admin_qpair = ctrlr->admin_qpair;
543 
544 	SPDK_DTRACE_PROBE4(nvmf_ctrlr_add_io_qpair, ctrlr, req->qpair, req->qpair->qid,
545 			   spdk_thread_get_id(ctrlr->thread));
546 
547 	/* Unit test will check qpair->ctrlr after calling spdk_nvmf_ctrlr_connect.
548 	  * For error case, the value should be NULL. So set it to NULL at first.
549 	  */
550 	qpair->ctrlr = NULL;
551 
552 	/* Make sure the controller is not being destroyed. */
553 	if (ctrlr->in_destruct) {
554 		SPDK_ERRLOG("Got I/O connect while ctrlr was being destroyed.\n");
555 		SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
556 		goto end;
557 	}
558 
559 	if (ctrlr->subsys->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
560 		SPDK_ERRLOG("I/O connect not allowed on discovery controller\n");
561 		SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
562 		goto end;
563 	}
564 
565 	if (!ctrlr->vcprop.cc.bits.en) {
566 		SPDK_ERRLOG("Got I/O connect before ctrlr was enabled\n");
567 		SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
568 		goto end;
569 	}
570 
571 	if (1u << ctrlr->vcprop.cc.bits.iosqes != sizeof(struct spdk_nvme_cmd)) {
572 		SPDK_ERRLOG("Got I/O connect with invalid IOSQES %u\n",
573 			    ctrlr->vcprop.cc.bits.iosqes);
574 		SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
575 		goto end;
576 	}
577 
578 	if (1u << ctrlr->vcprop.cc.bits.iocqes != sizeof(struct spdk_nvme_cpl)) {
579 		SPDK_ERRLOG("Got I/O connect with invalid IOCQES %u\n",
580 			    ctrlr->vcprop.cc.bits.iocqes);
581 		SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
582 		goto end;
583 	}
584 
585 	if (admin_qpair->state != SPDK_NVMF_QPAIR_ACTIVE || admin_qpair->group == NULL) {
586 		/* There is a chance that admin qpair is being destroyed at this moment due to e.g.
587 		 * expired keep alive timer. Part of the qpair destruction process is change of qpair's
588 		 * state to DEACTIVATING and removing it from poll group */
589 		SPDK_ERRLOG("Inactive admin qpair (state %d, group %p)\n", admin_qpair->state, admin_qpair->group);
590 		SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
591 		goto end;
592 	}
593 
594 	ctrlr_add_qpair_and_update_rsp(qpair, ctrlr, rsp);
595 end:
596 	spdk_nvmf_request_complete(req);
597 }
598 
599 static void
600 _nvmf_ctrlr_add_io_qpair(void *ctx)
601 {
602 	struct spdk_nvmf_request *req = ctx;
603 	struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp;
604 	struct spdk_nvmf_fabric_connect_data *data;
605 	struct spdk_nvmf_ctrlr *ctrlr;
606 	struct spdk_nvmf_qpair *qpair = req->qpair;
607 	struct spdk_nvmf_qpair *admin_qpair;
608 	struct spdk_nvmf_tgt *tgt = qpair->transport->tgt;
609 	struct spdk_nvmf_subsystem *subsystem;
610 	struct spdk_nvme_transport_id listen_trid = {};
611 	const struct spdk_nvmf_subsystem_listener *listener;
612 
613 	assert(req->iovcnt == 1);
614 
615 	data = req->iov[0].iov_base;
616 
617 	SPDK_DEBUGLOG(nvmf, "Connect I/O Queue for controller id 0x%x\n", data->cntlid);
618 
619 	subsystem = spdk_nvmf_tgt_find_subsystem(tgt, data->subnqn);
620 	/* We already checked this in spdk_nvmf_ctrlr_connect */
621 	assert(subsystem != NULL);
622 
623 	ctrlr = nvmf_subsystem_get_ctrlr(subsystem, data->cntlid);
624 	if (ctrlr == NULL) {
625 		SPDK_ERRLOG("Unknown controller ID 0x%x\n", data->cntlid);
626 		SPDK_NVMF_INVALID_CONNECT_DATA(rsp, cntlid);
627 		spdk_nvmf_request_complete(req);
628 		return;
629 	}
630 
631 	/* fail before passing a message to the controller thread. */
632 	if (ctrlr->in_destruct) {
633 		SPDK_ERRLOG("Got I/O connect while ctrlr was being destroyed.\n");
634 		SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
635 		spdk_nvmf_request_complete(req);
636 		return;
637 	}
638 
639 	/* If ANA reporting is enabled, check if I/O connect is on the same listener. */
640 	if (subsystem->flags.ana_reporting) {
641 		if (spdk_nvmf_qpair_get_listen_trid(req->qpair, &listen_trid) != 0) {
642 			SPDK_ERRLOG("Could not get listener transport ID\n");
643 			SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
644 			spdk_nvmf_request_complete(req);
645 			return;
646 		}
647 
648 		listener = nvmf_subsystem_find_listener(subsystem, &listen_trid);
649 		if (listener != ctrlr->listener) {
650 			SPDK_ERRLOG("I/O connect is on a listener different from admin connect\n");
651 			SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
652 			spdk_nvmf_request_complete(req);
653 			return;
654 		}
655 	}
656 
657 	admin_qpair = ctrlr->admin_qpair;
658 	if (admin_qpair->state != SPDK_NVMF_QPAIR_ACTIVE || admin_qpair->group == NULL) {
659 		/* There is a chance that admin qpair is being destroyed at this moment due to e.g.
660 		 * expired keep alive timer. Part of the qpair destruction process is change of qpair's
661 		 * state to DEACTIVATING and removing it from poll group */
662 		SPDK_ERRLOG("Inactive admin qpair (state %d, group %p)\n", admin_qpair->state, admin_qpair->group);
663 		SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
664 		spdk_nvmf_request_complete(req);
665 		return;
666 	}
667 	qpair->ctrlr = ctrlr;
668 	spdk_thread_send_msg(admin_qpair->group->thread, nvmf_ctrlr_add_io_qpair, req);
669 }
670 
671 static bool
672 nvmf_qpair_access_allowed(struct spdk_nvmf_qpair *qpair, struct spdk_nvmf_subsystem *subsystem,
673 			  const char *hostnqn)
674 {
675 	struct spdk_nvme_transport_id listen_trid = {};
676 
677 	if (!spdk_nvmf_subsystem_host_allowed(subsystem, hostnqn)) {
678 		SPDK_ERRLOG("Subsystem '%s' does not allow host '%s'\n", subsystem->subnqn, hostnqn);
679 		return false;
680 	}
681 
682 	if (spdk_nvmf_qpair_get_listen_trid(qpair, &listen_trid)) {
683 		SPDK_ERRLOG("Subsystem '%s' is unable to enforce access control due to an internal error.\n",
684 			    subsystem->subnqn);
685 		return false;
686 	}
687 
688 	if (!spdk_nvmf_subsystem_listener_allowed(subsystem, &listen_trid)) {
689 		SPDK_ERRLOG("Subsystem '%s' does not allow host '%s' to connect at this address.\n",
690 			    subsystem->subnqn, hostnqn);
691 		return false;
692 	}
693 
694 	return true;
695 }
696 
697 static int
698 _nvmf_ctrlr_connect(struct spdk_nvmf_request *req)
699 {
700 	struct spdk_nvmf_fabric_connect_data *data = req->iov[0].iov_base;
701 	struct spdk_nvmf_fabric_connect_cmd *cmd = &req->cmd->connect_cmd;
702 	struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp;
703 	struct spdk_nvmf_qpair *qpair = req->qpair;
704 	struct spdk_nvmf_transport *transport = qpair->transport;
705 	struct spdk_nvmf_ctrlr *ctrlr;
706 	struct spdk_nvmf_subsystem *subsystem;
707 
708 	SPDK_DEBUGLOG(nvmf, "recfmt 0x%x qid %u sqsize %u\n",
709 		      cmd->recfmt, cmd->qid, cmd->sqsize);
710 
711 	SPDK_DEBUGLOG(nvmf, "Connect data:\n");
712 	SPDK_DEBUGLOG(nvmf, "  cntlid:  0x%04x\n", data->cntlid);
713 	SPDK_DEBUGLOG(nvmf, "  hostid: %08x-%04x-%04x-%02x%02x-%04x%08x ***\n",
714 		      ntohl(*(uint32_t *)&data->hostid[0]),
715 		      ntohs(*(uint16_t *)&data->hostid[4]),
716 		      ntohs(*(uint16_t *)&data->hostid[6]),
717 		      data->hostid[8],
718 		      data->hostid[9],
719 		      ntohs(*(uint16_t *)&data->hostid[10]),
720 		      ntohl(*(uint32_t *)&data->hostid[12]));
721 	SPDK_DEBUGLOG(nvmf, "  subnqn: \"%s\"\n", data->subnqn);
722 	SPDK_DEBUGLOG(nvmf, "  hostnqn: \"%s\"\n", data->hostnqn);
723 
724 	subsystem = spdk_nvmf_tgt_find_subsystem(transport->tgt, data->subnqn);
725 	if (!subsystem) {
726 		SPDK_NVMF_INVALID_CONNECT_DATA(rsp, subnqn);
727 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
728 	}
729 
730 	if (cmd->recfmt != 0) {
731 		SPDK_ERRLOG("Connect command unsupported RECFMT %u\n", cmd->recfmt);
732 		rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
733 		rsp->status.sc = SPDK_NVMF_FABRIC_SC_INCOMPATIBLE_FORMAT;
734 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
735 	}
736 
737 	/*
738 	 * SQSIZE is a 0-based value, so it must be at least 1 (minimum queue depth is 2) and
739 	 * strictly less than max_aq_depth (admin queues) or max_queue_depth (io queues).
740 	 */
741 	if (cmd->sqsize == 0) {
742 		SPDK_ERRLOG("Invalid SQSIZE = 0\n");
743 		SPDK_NVMF_INVALID_CONNECT_CMD(rsp, sqsize);
744 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
745 	}
746 
747 	if (cmd->qid == 0) {
748 		if (cmd->sqsize >= transport->opts.max_aq_depth) {
749 			SPDK_ERRLOG("Invalid SQSIZE for admin queue %u (min 1, max %u)\n",
750 				    cmd->sqsize, transport->opts.max_aq_depth - 1);
751 			SPDK_NVMF_INVALID_CONNECT_CMD(rsp, sqsize);
752 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
753 		}
754 	} else if (cmd->sqsize >= transport->opts.max_queue_depth) {
755 		SPDK_ERRLOG("Invalid SQSIZE %u (min 1, max %u)\n",
756 			    cmd->sqsize, transport->opts.max_queue_depth - 1);
757 		SPDK_NVMF_INVALID_CONNECT_CMD(rsp, sqsize);
758 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
759 	}
760 
761 	qpair->sq_head_max = cmd->sqsize;
762 	qpair->qid = cmd->qid;
763 	qpair->connect_received = true;
764 
765 	pthread_mutex_lock(&qpair->group->mutex);
766 	qpair->group->current_unassociated_qpairs--;
767 	pthread_mutex_unlock(&qpair->group->mutex);
768 
769 	if (0 == qpair->qid) {
770 		qpair->group->stat.admin_qpairs++;
771 		qpair->group->stat.current_admin_qpairs++;
772 	} else {
773 		qpair->group->stat.io_qpairs++;
774 		qpair->group->stat.current_io_qpairs++;
775 	}
776 
777 	if (cmd->qid == 0) {
778 		SPDK_DEBUGLOG(nvmf, "Connect Admin Queue for controller ID 0x%x\n", data->cntlid);
779 
780 		if (spdk_nvme_trtype_is_fabrics(transport->ops->type) && data->cntlid != 0xFFFF) {
781 			/* This NVMf target only supports dynamic mode. */
782 			SPDK_ERRLOG("The NVMf target only supports dynamic mode (CNTLID = 0x%x).\n", data->cntlid);
783 			SPDK_NVMF_INVALID_CONNECT_DATA(rsp, cntlid);
784 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
785 		}
786 
787 		/* Establish a new ctrlr */
788 		ctrlr = nvmf_ctrlr_create(subsystem, req, cmd, data);
789 		if (!ctrlr) {
790 			SPDK_ERRLOG("nvmf_ctrlr_create() failed\n");
791 			rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
792 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
793 		} else {
794 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
795 		}
796 	} else {
797 		spdk_thread_send_msg(subsystem->thread, _nvmf_ctrlr_add_io_qpair, req);
798 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
799 	}
800 }
801 
802 static inline bool
803 nvmf_request_is_fabric_connect(struct spdk_nvmf_request *req)
804 {
805 	return req->cmd->nvmf_cmd.opcode == SPDK_NVME_OPC_FABRIC &&
806 	       req->cmd->nvmf_cmd.fctype == SPDK_NVMF_FABRIC_COMMAND_CONNECT;
807 }
808 
809 static struct spdk_nvmf_subsystem_poll_group *
810 nvmf_subsystem_pg_from_connect_cmd(struct spdk_nvmf_request *req)
811 {
812 	struct spdk_nvmf_fabric_connect_data *data;
813 	struct spdk_nvmf_subsystem *subsystem;
814 	struct spdk_nvmf_tgt *tgt;
815 
816 	assert(nvmf_request_is_fabric_connect(req));
817 	assert(req->qpair->ctrlr == NULL);
818 	assert(req->iovcnt == 1);
819 
820 	data = req->iov[0].iov_base;
821 	tgt = req->qpair->transport->tgt;
822 
823 	subsystem = spdk_nvmf_tgt_find_subsystem(tgt, data->subnqn);
824 	if (subsystem == NULL) {
825 		return NULL;
826 	}
827 
828 	return &req->qpair->group->sgroups[subsystem->id];
829 }
830 
831 int
832 spdk_nvmf_ctrlr_connect(struct spdk_nvmf_request *req)
833 {
834 	struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp;
835 	struct spdk_nvmf_subsystem_poll_group *sgroup;
836 	struct spdk_nvmf_qpair *qpair = req->qpair;
837 	enum spdk_nvmf_request_exec_status status;
838 
839 	if (req->iovcnt > 1) {
840 		SPDK_ERRLOG("Connect command invalid iovcnt: %d\n", req->iovcnt);
841 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
842 		status = SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
843 		goto out;
844 	}
845 
846 	sgroup = nvmf_subsystem_pg_from_connect_cmd(req);
847 	if (!sgroup) {
848 		SPDK_NVMF_INVALID_CONNECT_DATA(rsp, subnqn);
849 		status = SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
850 		goto out;
851 	}
852 
853 	sgroup->mgmt_io_outstanding++;
854 	TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);
855 
856 	status = _nvmf_ctrlr_connect(req);
857 
858 out:
859 	if (status == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
860 		_nvmf_request_complete(req);
861 	}
862 
863 	return status;
864 }
865 
866 static int nvmf_ctrlr_cmd_connect(struct spdk_nvmf_request *req);
867 
868 static int
869 retry_connect(void *arg)
870 {
871 	struct spdk_nvmf_request *req = arg;
872 	struct spdk_nvmf_subsystem_poll_group *sgroup;
873 	int rc;
874 
875 	sgroup = nvmf_subsystem_pg_from_connect_cmd(req);
876 	assert(sgroup != NULL);
877 	sgroup->mgmt_io_outstanding++;
878 	spdk_poller_unregister(&req->poller);
879 	rc = nvmf_ctrlr_cmd_connect(req);
880 	if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
881 		_nvmf_request_complete(req);
882 	}
883 	return SPDK_POLLER_BUSY;
884 }
885 
886 static int
887 nvmf_ctrlr_cmd_connect(struct spdk_nvmf_request *req)
888 {
889 	struct spdk_nvmf_fabric_connect_data *data = req->iov[0].iov_base;
890 	struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp;
891 	struct spdk_nvmf_transport *transport = req->qpair->transport;
892 	struct spdk_nvmf_subsystem *subsystem;
893 
894 	if (req->length < sizeof(struct spdk_nvmf_fabric_connect_data)) {
895 		SPDK_ERRLOG("Connect command data length 0x%x too small\n", req->length);
896 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
897 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
898 	}
899 
900 	if (req->iovcnt > 1) {
901 		SPDK_ERRLOG("Connect command invalid iovcnt: %d\n", req->iovcnt);
902 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
903 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
904 	}
905 
906 	subsystem = spdk_nvmf_tgt_find_subsystem(transport->tgt, data->subnqn);
907 	if (!subsystem) {
908 		SPDK_NVMF_INVALID_CONNECT_DATA(rsp, subnqn);
909 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
910 	}
911 
912 	if ((subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE) ||
913 	    (subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSING) ||
914 	    (subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED) ||
915 	    (subsystem->state == SPDK_NVMF_SUBSYSTEM_DEACTIVATING)) {
916 		struct spdk_nvmf_subsystem_poll_group *sgroup;
917 
918 		if (req->timeout_tsc == 0) {
919 			/* We will only retry the request up to 1 second. */
920 			req->timeout_tsc = spdk_get_ticks() + spdk_get_ticks_hz();
921 		} else if (spdk_get_ticks() > req->timeout_tsc) {
922 			SPDK_ERRLOG("Subsystem '%s' was not ready for 1 second\n", subsystem->subnqn);
923 			rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
924 			rsp->status.sc = SPDK_NVMF_FABRIC_SC_CONTROLLER_BUSY;
925 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
926 		}
927 
928 		/* Subsystem is not ready to handle a connect. Use a poller to retry it
929 		 * again later. Decrement the mgmt_io_outstanding to avoid the
930 		 * subsystem waiting for this command to complete before unpausing.
931 		 */
932 		sgroup = nvmf_subsystem_pg_from_connect_cmd(req);
933 		assert(sgroup != NULL);
934 		sgroup->mgmt_io_outstanding--;
935 		SPDK_DEBUGLOG(nvmf, "Subsystem '%s' is not ready for connect, retrying...\n", subsystem->subnqn);
936 		req->poller = SPDK_POLLER_REGISTER(retry_connect, req, 100);
937 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
938 	}
939 
940 	/* Ensure that hostnqn is null terminated */
941 	if (!memchr(data->hostnqn, '\0', SPDK_NVMF_NQN_MAX_LEN + 1)) {
942 		SPDK_ERRLOG("Connect HOSTNQN is not null terminated\n");
943 		SPDK_NVMF_INVALID_CONNECT_DATA(rsp, hostnqn);
944 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
945 	}
946 
947 	if (!nvmf_qpair_access_allowed(req->qpair, subsystem, data->hostnqn)) {
948 		rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
949 		rsp->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_HOST;
950 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
951 	}
952 
953 	return _nvmf_ctrlr_connect(req);
954 }
955 
956 static int
957 nvmf_ctrlr_association_remove(void *ctx)
958 {
959 	struct spdk_nvmf_ctrlr *ctrlr = ctx;
960 	int rc;
961 
962 	nvmf_ctrlr_stop_association_timer(ctrlr);
963 
964 	if (ctrlr->in_destruct) {
965 		return SPDK_POLLER_IDLE;
966 	}
967 	SPDK_DEBUGLOG(nvmf, "Disconnecting host from subsystem %s due to association timeout.\n",
968 		      ctrlr->subsys->subnqn);
969 
970 	if (ctrlr->admin_qpair) {
971 		rc = spdk_nvmf_qpair_disconnect(ctrlr->admin_qpair, NULL, NULL);
972 		if (rc < 0) {
973 			SPDK_ERRLOG("Fail to disconnect admin ctrlr qpair\n");
974 			assert(false);
975 		}
976 	}
977 
978 	return SPDK_POLLER_BUSY;
979 }
980 
981 static int
982 _nvmf_ctrlr_cc_reset_shn_done(void *ctx)
983 {
984 	struct spdk_nvmf_ctrlr *ctrlr = ctx;
985 	uint64_t now = spdk_get_ticks();
986 	uint32_t count;
987 
988 	if (ctrlr->cc_timer) {
989 		spdk_poller_unregister(&ctrlr->cc_timer);
990 	}
991 
992 	count = spdk_bit_array_count_set(ctrlr->qpair_mask);
993 	SPDK_DEBUGLOG(nvmf, "ctrlr %p active queue count %u\n", ctrlr, count);
994 
995 	if (count > 1) {
996 		if (now < ctrlr->cc_timeout_tsc) {
997 			/* restart cc timer */
998 			ctrlr->cc_timer = SPDK_POLLER_REGISTER(_nvmf_ctrlr_cc_reset_shn_done, ctrlr, 100 * 1000);
999 			return SPDK_POLLER_IDLE;
1000 		} else {
1001 			/* controller fatal status */
1002 			SPDK_WARNLOG("IO timeout, ctrlr %p is in fatal status\n", ctrlr);
1003 			nvmf_ctrlr_set_fatal_status(ctrlr);
1004 		}
1005 	}
1006 
1007 	spdk_poller_unregister(&ctrlr->cc_timeout_timer);
1008 
1009 	if (ctrlr->disconnect_is_shn) {
1010 		ctrlr->vcprop.csts.bits.shst = SPDK_NVME_SHST_COMPLETE;
1011 		ctrlr->disconnect_is_shn = false;
1012 	} else {
1013 		/* Only a subset of the registers are cleared out on a reset */
1014 		ctrlr->vcprop.cc.raw = 0;
1015 		ctrlr->vcprop.csts.raw = 0;
1016 	}
1017 
1018 	/* After CC.EN transitions to 0 (due to shutdown or reset), the association
1019 	 * between the host and controller shall be preserved for at least 2 minutes */
1020 	if (ctrlr->association_timer) {
1021 		SPDK_DEBUGLOG(nvmf, "Association timer already set\n");
1022 		nvmf_ctrlr_stop_association_timer(ctrlr);
1023 	}
1024 	if (ctrlr->association_timeout) {
1025 		ctrlr->association_timer = SPDK_POLLER_REGISTER(nvmf_ctrlr_association_remove, ctrlr,
1026 					   ctrlr->association_timeout * 1000);
1027 	}
1028 	ctrlr->disconnect_in_progress = false;
1029 	return SPDK_POLLER_BUSY;
1030 }
1031 
1032 static void
1033 nvmf_ctrlr_cc_reset_shn_done(struct spdk_io_channel_iter *i, int status)
1034 {
1035 	struct spdk_nvmf_ctrlr *ctrlr = spdk_io_channel_iter_get_ctx(i);
1036 
1037 	if (status < 0) {
1038 		SPDK_ERRLOG("Fail to disconnect io ctrlr qpairs\n");
1039 		assert(false);
1040 	}
1041 
1042 	_nvmf_ctrlr_cc_reset_shn_done((void *)ctrlr);
1043 }
1044 
1045 static void
1046 nvmf_bdev_complete_reset(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
1047 {
1048 	SPDK_NOTICELOG("Resetting bdev done with %s\n", success ? "success" : "failure");
1049 
1050 	spdk_bdev_free_io(bdev_io);
1051 }
1052 
1053 
1054 static int
1055 nvmf_ctrlr_cc_timeout(void *ctx)
1056 {
1057 	struct spdk_nvmf_ctrlr *ctrlr = ctx;
1058 	struct spdk_nvmf_poll_group *group = ctrlr->admin_qpair->group;
1059 	struct spdk_nvmf_ns *ns;
1060 	struct spdk_nvmf_subsystem_pg_ns_info *ns_info;
1061 
1062 	assert(group != NULL && group->sgroups != NULL);
1063 	spdk_poller_unregister(&ctrlr->cc_timeout_timer);
1064 	SPDK_DEBUGLOG(nvmf, "Ctrlr %p reset or shutdown timeout\n", ctrlr);
1065 
1066 	for (ns = spdk_nvmf_subsystem_get_first_ns(ctrlr->subsys); ns != NULL;
1067 	     ns = spdk_nvmf_subsystem_get_next_ns(ctrlr->subsys, ns)) {
1068 		if (ns->bdev == NULL) {
1069 			continue;
1070 		}
1071 		ns_info = &group->sgroups[ctrlr->subsys->id].ns_info[ns->opts.nsid - 1];
1072 		SPDK_NOTICELOG("Ctrlr %p resetting NSID %u\n", ctrlr, ns->opts.nsid);
1073 		spdk_bdev_reset(ns->desc, ns_info->channel, nvmf_bdev_complete_reset, NULL);
1074 	}
1075 
1076 	return SPDK_POLLER_BUSY;
1077 }
1078 
1079 const struct spdk_nvmf_registers *
1080 spdk_nvmf_ctrlr_get_regs(struct spdk_nvmf_ctrlr *ctrlr)
1081 {
1082 	return &ctrlr->vcprop;
1083 }
1084 
1085 void
1086 nvmf_ctrlr_set_fatal_status(struct spdk_nvmf_ctrlr *ctrlr)
1087 {
1088 	ctrlr->vcprop.csts.bits.cfs = 1;
1089 }
1090 
1091 static uint64_t
1092 nvmf_prop_get_cap(struct spdk_nvmf_ctrlr *ctrlr)
1093 {
1094 	return ctrlr->vcprop.cap.raw;
1095 }
1096 
1097 static uint64_t
1098 nvmf_prop_get_vs(struct spdk_nvmf_ctrlr *ctrlr)
1099 {
1100 	return ctrlr->vcprop.vs.raw;
1101 }
1102 
1103 static uint64_t
1104 nvmf_prop_get_cc(struct spdk_nvmf_ctrlr *ctrlr)
1105 {
1106 	return ctrlr->vcprop.cc.raw;
1107 }
1108 
1109 static bool
1110 nvmf_prop_set_cc(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value)
1111 {
1112 	union spdk_nvme_cc_register cc, diff;
1113 	uint32_t cc_timeout_ms;
1114 
1115 	cc.raw = value;
1116 
1117 	SPDK_DEBUGLOG(nvmf, "cur CC: 0x%08x\n", ctrlr->vcprop.cc.raw);
1118 	SPDK_DEBUGLOG(nvmf, "new CC: 0x%08x\n", cc.raw);
1119 
1120 	/*
1121 	 * Calculate which bits changed between the current and new CC.
1122 	 * Mark each bit as 0 once it is handled to determine if any unhandled bits were changed.
1123 	 */
1124 	diff.raw = cc.raw ^ ctrlr->vcprop.cc.raw;
1125 
1126 	if (diff.bits.en) {
1127 		if (cc.bits.en) {
1128 			SPDK_DEBUGLOG(nvmf, "Property Set CC Enable!\n");
1129 			nvmf_ctrlr_stop_association_timer(ctrlr);
1130 
1131 			ctrlr->vcprop.cc.bits.en = 1;
1132 			ctrlr->vcprop.csts.bits.rdy = 1;
1133 		} else {
1134 			SPDK_DEBUGLOG(nvmf, "Property Set CC Disable!\n");
1135 			if (ctrlr->disconnect_in_progress) {
1136 				SPDK_DEBUGLOG(nvmf, "Disconnect in progress\n");
1137 				return true;
1138 			}
1139 
1140 			ctrlr->cc_timeout_timer = SPDK_POLLER_REGISTER(nvmf_ctrlr_cc_timeout, ctrlr,
1141 						  NVMF_CC_RESET_SHN_TIMEOUT_IN_MS * 1000);
1142 			/* Make sure cc_timeout_ms is between cc_timeout_timer and Host reset/shutdown timeout */
1143 			cc_timeout_ms = (NVMF_CC_RESET_SHN_TIMEOUT_IN_MS + NVMF_CTRLR_RESET_SHN_TIMEOUT_IN_MS) / 2;
1144 			ctrlr->cc_timeout_tsc = spdk_get_ticks() + cc_timeout_ms * spdk_get_ticks_hz() / (uint64_t)1000;
1145 
1146 			ctrlr->vcprop.cc.bits.en = 0;
1147 			ctrlr->disconnect_in_progress = true;
1148 			ctrlr->disconnect_is_shn = false;
1149 			spdk_for_each_channel(ctrlr->subsys->tgt,
1150 					      nvmf_ctrlr_disconnect_io_qpairs_on_pg,
1151 					      ctrlr,
1152 					      nvmf_ctrlr_cc_reset_shn_done);
1153 		}
1154 		diff.bits.en = 0;
1155 	}
1156 
1157 	if (diff.bits.shn) {
1158 		if (cc.bits.shn == SPDK_NVME_SHN_NORMAL ||
1159 		    cc.bits.shn == SPDK_NVME_SHN_ABRUPT) {
1160 			SPDK_DEBUGLOG(nvmf, "Property Set CC Shutdown %u%ub!\n",
1161 				      cc.bits.shn >> 1, cc.bits.shn & 1);
1162 			if (ctrlr->disconnect_in_progress) {
1163 				SPDK_DEBUGLOG(nvmf, "Disconnect in progress\n");
1164 				return true;
1165 			}
1166 
1167 			ctrlr->cc_timeout_timer = SPDK_POLLER_REGISTER(nvmf_ctrlr_cc_timeout, ctrlr,
1168 						  NVMF_CC_RESET_SHN_TIMEOUT_IN_MS * 1000);
1169 			/* Make sure cc_timeout_ms is between cc_timeout_timer and Host reset/shutdown timeout */
1170 			cc_timeout_ms = (NVMF_CC_RESET_SHN_TIMEOUT_IN_MS + NVMF_CTRLR_RESET_SHN_TIMEOUT_IN_MS) / 2;
1171 			ctrlr->cc_timeout_tsc = spdk_get_ticks() + cc_timeout_ms * spdk_get_ticks_hz() / (uint64_t)1000;
1172 
1173 			ctrlr->vcprop.cc.bits.shn = cc.bits.shn;
1174 			ctrlr->disconnect_in_progress = true;
1175 			ctrlr->disconnect_is_shn = true;
1176 			spdk_for_each_channel(ctrlr->subsys->tgt,
1177 					      nvmf_ctrlr_disconnect_io_qpairs_on_pg,
1178 					      ctrlr,
1179 					      nvmf_ctrlr_cc_reset_shn_done);
1180 
1181 			/* From the time a shutdown is initiated the controller shall disable
1182 			 * Keep Alive timer */
1183 			nvmf_ctrlr_stop_keep_alive_timer(ctrlr);
1184 		} else if (cc.bits.shn == 0) {
1185 			ctrlr->vcprop.cc.bits.shn = 0;
1186 		} else {
1187 			SPDK_ERRLOG("Prop Set CC: Invalid SHN value %u%ub\n",
1188 				    cc.bits.shn >> 1, cc.bits.shn & 1);
1189 			return false;
1190 		}
1191 		diff.bits.shn = 0;
1192 	}
1193 
1194 	if (diff.bits.iosqes) {
1195 		SPDK_DEBUGLOG(nvmf, "Prop Set IOSQES = %u (%u bytes)\n",
1196 			      cc.bits.iosqes, 1u << cc.bits.iosqes);
1197 		ctrlr->vcprop.cc.bits.iosqes = cc.bits.iosqes;
1198 		diff.bits.iosqes = 0;
1199 	}
1200 
1201 	if (diff.bits.iocqes) {
1202 		SPDK_DEBUGLOG(nvmf, "Prop Set IOCQES = %u (%u bytes)\n",
1203 			      cc.bits.iocqes, 1u << cc.bits.iocqes);
1204 		ctrlr->vcprop.cc.bits.iocqes = cc.bits.iocqes;
1205 		diff.bits.iocqes = 0;
1206 	}
1207 
1208 	if (diff.bits.ams) {
1209 		SPDK_ERRLOG("Arbitration Mechanism Selected (AMS) 0x%x not supported!\n", cc.bits.ams);
1210 		return false;
1211 	}
1212 
1213 	if (diff.bits.mps) {
1214 		SPDK_ERRLOG("Memory Page Size (MPS) %u KiB not supported!\n", (1 << (2 + cc.bits.mps)));
1215 		return false;
1216 	}
1217 
1218 	if (diff.bits.css) {
1219 		if (cc.bits.css > SPDK_NVME_CC_CSS_IOCS) {
1220 			SPDK_ERRLOG("I/O Command Set Selected (CSS) 0x%x not supported!\n", cc.bits.css);
1221 			return false;
1222 		}
1223 		diff.bits.css = 0;
1224 	}
1225 
1226 	if (diff.raw != 0) {
1227 		/* Print an error message, but don't fail the command in this case.
1228 		 * If we did want to fail in this case, we'd need to ensure we acted
1229 		 * on no other bits or the initiator gets confused. */
1230 		SPDK_ERRLOG("Prop Set CC toggled reserved bits 0x%x!\n", diff.raw);
1231 	}
1232 
1233 	return true;
1234 }
1235 
1236 static uint64_t
1237 nvmf_prop_get_csts(struct spdk_nvmf_ctrlr *ctrlr)
1238 {
1239 	return ctrlr->vcprop.csts.raw;
1240 }
1241 
1242 static uint64_t
1243 nvmf_prop_get_aqa(struct spdk_nvmf_ctrlr *ctrlr)
1244 {
1245 	return ctrlr->vcprop.aqa.raw;
1246 }
1247 
1248 static bool
1249 nvmf_prop_set_aqa(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value)
1250 {
1251 	union spdk_nvme_aqa_register aqa;
1252 
1253 	aqa.raw = value;
1254 
1255 	/*
1256 	 * We don't need to explicitly check for maximum size, as the fields are
1257 	 * limited to 12 bits (4096).
1258 	 */
1259 	if (aqa.bits.asqs < SPDK_NVME_ADMIN_QUEUE_MIN_ENTRIES - 1 ||
1260 	    aqa.bits.acqs < SPDK_NVME_ADMIN_QUEUE_MIN_ENTRIES - 1 ||
1261 	    aqa.bits.reserved1 != 0 || aqa.bits.reserved2 != 0) {
1262 		return false;
1263 	}
1264 
1265 	ctrlr->vcprop.aqa.raw = value;
1266 
1267 	return true;
1268 }
1269 
1270 static uint64_t
1271 nvmf_prop_get_asq(struct spdk_nvmf_ctrlr *ctrlr)
1272 {
1273 	return ctrlr->vcprop.asq;
1274 }
1275 
1276 static bool
1277 nvmf_prop_set_asq_lower(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value)
1278 {
1279 	ctrlr->vcprop.asq = (ctrlr->vcprop.asq & (0xFFFFFFFFULL << 32ULL)) | value;
1280 
1281 	return true;
1282 }
1283 
1284 static bool
1285 nvmf_prop_set_asq_upper(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value)
1286 {
1287 	ctrlr->vcprop.asq = (ctrlr->vcprop.asq & 0xFFFFFFFFULL) | ((uint64_t)value << 32ULL);
1288 
1289 	return true;
1290 }
1291 
1292 static uint64_t
1293 nvmf_prop_get_acq(struct spdk_nvmf_ctrlr *ctrlr)
1294 {
1295 	return ctrlr->vcprop.acq;
1296 }
1297 
1298 static bool
1299 nvmf_prop_set_acq_lower(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value)
1300 {
1301 	ctrlr->vcprop.acq = (ctrlr->vcprop.acq & (0xFFFFFFFFULL << 32ULL)) | value;
1302 
1303 	return true;
1304 }
1305 
1306 static bool
1307 nvmf_prop_set_acq_upper(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value)
1308 {
1309 	ctrlr->vcprop.acq = (ctrlr->vcprop.acq & 0xFFFFFFFFULL) | ((uint64_t)value << 32ULL);
1310 
1311 	return true;
1312 }
1313 
1314 struct nvmf_prop {
1315 	uint32_t ofst;
1316 	uint8_t size;
1317 	char name[11];
1318 	uint64_t (*get_cb)(struct spdk_nvmf_ctrlr *ctrlr);
1319 	bool (*set_cb)(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value);
1320 	bool (*set_upper_cb)(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value);
1321 };
1322 
1323 #define PROP(field, size, get_cb, set_cb, set_upper_cb) \
1324 	{ \
1325 		offsetof(struct spdk_nvme_registers, field), \
1326 		size, \
1327 		#field, \
1328 		get_cb, set_cb, set_upper_cb \
1329 	}
1330 
1331 static const struct nvmf_prop nvmf_props[] = {
1332 	PROP(cap,  8, nvmf_prop_get_cap,  NULL,                    NULL),
1333 	PROP(vs,   4, nvmf_prop_get_vs,   NULL,                    NULL),
1334 	PROP(cc,   4, nvmf_prop_get_cc,   nvmf_prop_set_cc,        NULL),
1335 	PROP(csts, 4, nvmf_prop_get_csts, NULL,                    NULL),
1336 	PROP(aqa,  4, nvmf_prop_get_aqa,  nvmf_prop_set_aqa,       NULL),
1337 	PROP(asq,  8, nvmf_prop_get_asq,  nvmf_prop_set_asq_lower, nvmf_prop_set_asq_upper),
1338 	PROP(acq,  8, nvmf_prop_get_acq,  nvmf_prop_set_acq_lower, nvmf_prop_set_acq_upper),
1339 };
1340 
1341 static const struct nvmf_prop *
1342 find_prop(uint32_t ofst, uint8_t size)
1343 {
1344 	size_t i;
1345 
1346 	for (i = 0; i < SPDK_COUNTOF(nvmf_props); i++) {
1347 		const struct nvmf_prop *prop = &nvmf_props[i];
1348 
1349 		if ((ofst >= prop->ofst) && (ofst + size <= prop->ofst + prop->size)) {
1350 			return prop;
1351 		}
1352 	}
1353 
1354 	return NULL;
1355 }
1356 
1357 static int
1358 nvmf_property_get(struct spdk_nvmf_request *req)
1359 {
1360 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1361 	struct spdk_nvmf_fabric_prop_get_cmd *cmd = &req->cmd->prop_get_cmd;
1362 	struct spdk_nvmf_fabric_prop_get_rsp *response = &req->rsp->prop_get_rsp;
1363 	const struct nvmf_prop *prop;
1364 	uint8_t size;
1365 
1366 	response->status.sc = 0;
1367 	response->value.u64 = 0;
1368 
1369 	SPDK_DEBUGLOG(nvmf, "size %d, offset 0x%x\n",
1370 		      cmd->attrib.size, cmd->ofst);
1371 
1372 	switch (cmd->attrib.size) {
1373 	case SPDK_NVMF_PROP_SIZE_4:
1374 		size = 4;
1375 		break;
1376 	case SPDK_NVMF_PROP_SIZE_8:
1377 		size = 8;
1378 		break;
1379 	default:
1380 		SPDK_DEBUGLOG(nvmf, "Invalid size value %d\n", cmd->attrib.size);
1381 		response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
1382 		response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
1383 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1384 	}
1385 
1386 	prop = find_prop(cmd->ofst, size);
1387 	if (prop == NULL || prop->get_cb == NULL) {
1388 		response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
1389 		response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
1390 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1391 	}
1392 
1393 	SPDK_DEBUGLOG(nvmf, "name: %s\n", prop->name);
1394 
1395 	response->value.u64 = prop->get_cb(ctrlr);
1396 
1397 	if (size != prop->size) {
1398 		/* The size must be 4 and the prop->size is 8. Figure out which part of the property to read. */
1399 		assert(size == 4);
1400 		assert(prop->size == 8);
1401 
1402 		if (cmd->ofst == prop->ofst) {
1403 			/* Keep bottom 4 bytes only */
1404 			response->value.u64 &= 0xFFFFFFFF;
1405 		} else {
1406 			/* Keep top 4 bytes only */
1407 			response->value.u64 >>= 32;
1408 		}
1409 	}
1410 
1411 	SPDK_DEBUGLOG(nvmf, "response value: 0x%" PRIx64 "\n", response->value.u64);
1412 
1413 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1414 }
1415 
1416 static int
1417 nvmf_property_set(struct spdk_nvmf_request *req)
1418 {
1419 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1420 	struct spdk_nvmf_fabric_prop_set_cmd *cmd = &req->cmd->prop_set_cmd;
1421 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
1422 	const struct nvmf_prop *prop;
1423 	uint64_t value;
1424 	uint8_t size;
1425 	bool ret;
1426 
1427 	SPDK_DEBUGLOG(nvmf, "size %d, offset 0x%x, value 0x%" PRIx64 "\n",
1428 		      cmd->attrib.size, cmd->ofst, cmd->value.u64);
1429 
1430 	switch (cmd->attrib.size) {
1431 	case SPDK_NVMF_PROP_SIZE_4:
1432 		size = 4;
1433 		break;
1434 	case SPDK_NVMF_PROP_SIZE_8:
1435 		size = 8;
1436 		break;
1437 	default:
1438 		SPDK_DEBUGLOG(nvmf, "Invalid size value %d\n", cmd->attrib.size);
1439 		response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
1440 		response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
1441 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1442 	}
1443 
1444 	prop = find_prop(cmd->ofst, size);
1445 	if (prop == NULL || prop->set_cb == NULL) {
1446 		SPDK_INFOLOG(nvmf, "Invalid offset 0x%x\n", cmd->ofst);
1447 		response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
1448 		response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
1449 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1450 	}
1451 
1452 	SPDK_DEBUGLOG(nvmf, "name: %s\n", prop->name);
1453 
1454 	value = cmd->value.u64;
1455 
1456 	if (prop->size == 4) {
1457 		ret = prop->set_cb(ctrlr, (uint32_t)value);
1458 	} else if (size != prop->size) {
1459 		/* The size must be 4 and the prop->size is 8. Figure out which part of the property to write. */
1460 		assert(size == 4);
1461 		assert(prop->size == 8);
1462 
1463 		if (cmd->ofst == prop->ofst) {
1464 			ret = prop->set_cb(ctrlr, (uint32_t)value);
1465 		} else {
1466 			ret = prop->set_upper_cb(ctrlr, (uint32_t)value);
1467 		}
1468 	} else {
1469 		ret = prop->set_cb(ctrlr, (uint32_t)value);
1470 		if (ret) {
1471 			ret = prop->set_upper_cb(ctrlr, (uint32_t)(value >> 32));
1472 		}
1473 	}
1474 
1475 	if (!ret) {
1476 		SPDK_ERRLOG("prop set_cb failed\n");
1477 		response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
1478 		response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
1479 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1480 	}
1481 
1482 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1483 }
1484 
1485 static int
1486 nvmf_ctrlr_set_features_arbitration(struct spdk_nvmf_request *req)
1487 {
1488 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1489 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1490 
1491 	SPDK_DEBUGLOG(nvmf, "Set Features - Arbitration (cdw11 = 0x%0x)\n", cmd->cdw11);
1492 
1493 	ctrlr->feat.arbitration.raw = cmd->cdw11;
1494 	ctrlr->feat.arbitration.bits.reserved = 0;
1495 
1496 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1497 }
1498 
1499 static int
1500 nvmf_ctrlr_set_features_power_management(struct spdk_nvmf_request *req)
1501 {
1502 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1503 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1504 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
1505 
1506 	SPDK_DEBUGLOG(nvmf, "Set Features - Power Management (cdw11 = 0x%0x)\n", cmd->cdw11);
1507 
1508 	/* Only PS = 0 is allowed, since we report NPSS = 0 */
1509 	if (cmd->cdw11_bits.feat_power_management.bits.ps != 0) {
1510 		SPDK_ERRLOG("Invalid power state %u\n", cmd->cdw11_bits.feat_power_management.bits.ps);
1511 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
1512 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1513 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1514 	}
1515 
1516 	ctrlr->feat.power_management.raw = cmd->cdw11;
1517 	ctrlr->feat.power_management.bits.reserved = 0;
1518 
1519 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1520 }
1521 
1522 static bool
1523 temp_threshold_opts_valid(const union spdk_nvme_feat_temperature_threshold *opts)
1524 {
1525 	/*
1526 	 * Valid TMPSEL values:
1527 	 *  0000b - 1000b: temperature sensors
1528 	 *  1111b: set all implemented temperature sensors
1529 	 */
1530 	if (opts->bits.tmpsel >= 9 && opts->bits.tmpsel != 15) {
1531 		/* 1001b - 1110b: reserved */
1532 		SPDK_ERRLOG("Invalid TMPSEL %u\n", opts->bits.tmpsel);
1533 		return false;
1534 	}
1535 
1536 	/*
1537 	 * Valid THSEL values:
1538 	 *  00b: over temperature threshold
1539 	 *  01b: under temperature threshold
1540 	 */
1541 	if (opts->bits.thsel > 1) {
1542 		/* 10b - 11b: reserved */
1543 		SPDK_ERRLOG("Invalid THSEL %u\n", opts->bits.thsel);
1544 		return false;
1545 	}
1546 
1547 	return true;
1548 }
1549 
1550 static int
1551 nvmf_ctrlr_set_features_temperature_threshold(struct spdk_nvmf_request *req)
1552 {
1553 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1554 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
1555 
1556 	SPDK_DEBUGLOG(nvmf, "Set Features - Temperature Threshold (cdw11 = 0x%0x)\n", cmd->cdw11);
1557 
1558 	if (!temp_threshold_opts_valid(&cmd->cdw11_bits.feat_temp_threshold)) {
1559 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
1560 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1561 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1562 	}
1563 
1564 	/* TODO: no sensors implemented - ignore new values */
1565 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1566 }
1567 
1568 static int
1569 nvmf_ctrlr_get_features_temperature_threshold(struct spdk_nvmf_request *req)
1570 {
1571 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1572 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
1573 
1574 	SPDK_DEBUGLOG(nvmf, "Get Features - Temperature Threshold (cdw11 = 0x%0x)\n", cmd->cdw11);
1575 
1576 	if (!temp_threshold_opts_valid(&cmd->cdw11_bits.feat_temp_threshold)) {
1577 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
1578 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1579 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1580 	}
1581 
1582 	/* TODO: no sensors implemented - return 0 for all thresholds */
1583 	rsp->cdw0 = 0;
1584 
1585 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1586 }
1587 
1588 static int
1589 nvmf_ctrlr_get_features_interrupt_vector_configuration(struct spdk_nvmf_request *req)
1590 {
1591 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1592 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1593 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
1594 	union spdk_nvme_feat_interrupt_vector_configuration iv_conf = {};
1595 
1596 	SPDK_DEBUGLOG(nvmf, "Get Features - Interrupt Vector Configuration (cdw11 = 0x%0x)\n", cmd->cdw11);
1597 
1598 	iv_conf.bits.iv = cmd->cdw11_bits.feat_interrupt_vector_configuration.bits.iv;
1599 	iv_conf.bits.cd = ctrlr->feat.interrupt_vector_configuration.bits.cd;
1600 	rsp->cdw0 = iv_conf.raw;
1601 
1602 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1603 }
1604 
1605 static int
1606 nvmf_ctrlr_set_features_error_recovery(struct spdk_nvmf_request *req)
1607 {
1608 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1609 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1610 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
1611 
1612 	SPDK_DEBUGLOG(nvmf, "Set Features - Error Recovery (cdw11 = 0x%0x)\n", cmd->cdw11);
1613 
1614 	if (cmd->cdw11_bits.feat_error_recovery.bits.dulbe) {
1615 		/*
1616 		 * Host is not allowed to set this bit, since we don't advertise it in
1617 		 * Identify Namespace.
1618 		 */
1619 		SPDK_ERRLOG("Host set unsupported DULBE bit\n");
1620 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
1621 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1622 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1623 	}
1624 
1625 	ctrlr->feat.error_recovery.raw = cmd->cdw11;
1626 	ctrlr->feat.error_recovery.bits.reserved = 0;
1627 
1628 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1629 }
1630 
1631 static int
1632 nvmf_ctrlr_set_features_volatile_write_cache(struct spdk_nvmf_request *req)
1633 {
1634 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1635 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1636 
1637 	SPDK_DEBUGLOG(nvmf, "Set Features - Volatile Write Cache (cdw11 = 0x%0x)\n", cmd->cdw11);
1638 
1639 	ctrlr->feat.volatile_write_cache.raw = cmd->cdw11;
1640 	ctrlr->feat.volatile_write_cache.bits.reserved = 0;
1641 
1642 	SPDK_DEBUGLOG(nvmf, "Set Features - Volatile Write Cache %s\n",
1643 		      ctrlr->feat.volatile_write_cache.bits.wce ? "Enabled" : "Disabled");
1644 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1645 }
1646 
1647 static int
1648 nvmf_ctrlr_set_features_write_atomicity(struct spdk_nvmf_request *req)
1649 {
1650 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1651 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1652 
1653 	SPDK_DEBUGLOG(nvmf, "Set Features - Write Atomicity (cdw11 = 0x%0x)\n", cmd->cdw11);
1654 
1655 	ctrlr->feat.write_atomicity.raw = cmd->cdw11;
1656 	ctrlr->feat.write_atomicity.bits.reserved = 0;
1657 
1658 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1659 }
1660 
1661 static int
1662 nvmf_ctrlr_set_features_host_identifier(struct spdk_nvmf_request *req)
1663 {
1664 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
1665 
1666 	SPDK_ERRLOG("Set Features - Host Identifier not allowed\n");
1667 	response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
1668 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1669 }
1670 
1671 static int
1672 nvmf_ctrlr_get_features_host_identifier(struct spdk_nvmf_request *req)
1673 {
1674 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1675 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1676 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
1677 	struct spdk_iov_xfer ix;
1678 
1679 	SPDK_DEBUGLOG(nvmf, "Get Features - Host Identifier\n");
1680 
1681 	if (!cmd->cdw11_bits.feat_host_identifier.bits.exhid) {
1682 		/* NVMe over Fabrics requires EXHID=1 (128-bit/16-byte host ID) */
1683 		SPDK_ERRLOG("Get Features - Host Identifier with EXHID=0 not allowed\n");
1684 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1685 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1686 	}
1687 
1688 	if (req->iovcnt < 1 || req->length < sizeof(ctrlr->hostid)) {
1689 		SPDK_ERRLOG("Invalid data buffer for Get Features - Host Identifier\n");
1690 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1691 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1692 	}
1693 
1694 	spdk_iov_xfer_init(&ix, req->iov, req->iovcnt);
1695 	spdk_iov_xfer_from_buf(&ix, &ctrlr->hostid, sizeof(ctrlr->hostid));
1696 
1697 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1698 }
1699 
1700 static int
1701 nvmf_ctrlr_get_features_reservation_notification_mask(struct spdk_nvmf_request *req)
1702 {
1703 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1704 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1705 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
1706 	struct spdk_nvmf_ns *ns;
1707 
1708 	SPDK_DEBUGLOG(nvmf, "get Features - Reservation Notification Mask\n");
1709 
1710 	if (cmd->nsid == SPDK_NVME_GLOBAL_NS_TAG) {
1711 		SPDK_ERRLOG("get Features - Invalid Namespace ID\n");
1712 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1713 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1714 	}
1715 
1716 	ns = _nvmf_subsystem_get_ns(ctrlr->subsys, cmd->nsid);
1717 	if (ns == NULL) {
1718 		SPDK_ERRLOG("Set Features - Invalid Namespace ID\n");
1719 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1720 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1721 	}
1722 	rsp->cdw0 = ns->mask;
1723 
1724 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1725 }
1726 
1727 static int
1728 nvmf_ctrlr_set_features_reservation_notification_mask(struct spdk_nvmf_request *req)
1729 {
1730 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1731 	struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys;
1732 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1733 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
1734 	struct spdk_nvmf_ns *ns;
1735 
1736 	SPDK_DEBUGLOG(nvmf, "Set Features - Reservation Notification Mask\n");
1737 
1738 	if (cmd->nsid == SPDK_NVME_GLOBAL_NS_TAG) {
1739 		for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL;
1740 		     ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) {
1741 			ns->mask = cmd->cdw11;
1742 		}
1743 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1744 	}
1745 
1746 	ns = _nvmf_subsystem_get_ns(ctrlr->subsys, cmd->nsid);
1747 	if (ns == NULL) {
1748 		SPDK_ERRLOG("Set Features - Invalid Namespace ID\n");
1749 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1750 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1751 	}
1752 	ns->mask = cmd->cdw11;
1753 
1754 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1755 }
1756 
1757 static int
1758 nvmf_ctrlr_get_features_reservation_persistence(struct spdk_nvmf_request *req)
1759 {
1760 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1761 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1762 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
1763 	struct spdk_nvmf_ns *ns;
1764 
1765 	SPDK_DEBUGLOG(nvmf, "Get Features - Reservation Persistence\n");
1766 
1767 	ns = _nvmf_subsystem_get_ns(ctrlr->subsys, cmd->nsid);
1768 	/* NSID with SPDK_NVME_GLOBAL_NS_TAG (=0xffffffff) also included */
1769 	if (ns == NULL) {
1770 		SPDK_ERRLOG("Get Features - Invalid Namespace ID\n");
1771 		response->status.sct = SPDK_NVME_SCT_GENERIC;
1772 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1773 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1774 	}
1775 
1776 	response->cdw0 = ns->ptpl_activated;
1777 
1778 	response->status.sct = SPDK_NVME_SCT_GENERIC;
1779 	response->status.sc = SPDK_NVME_SC_SUCCESS;
1780 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1781 }
1782 
1783 static int
1784 nvmf_ctrlr_set_features_reservation_persistence(struct spdk_nvmf_request *req)
1785 {
1786 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1787 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1788 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
1789 	struct spdk_nvmf_ns *ns;
1790 	bool ptpl;
1791 
1792 	SPDK_DEBUGLOG(nvmf, "Set Features - Reservation Persistence\n");
1793 
1794 	ns = _nvmf_subsystem_get_ns(ctrlr->subsys, cmd->nsid);
1795 	ptpl = cmd->cdw11_bits.feat_rsv_persistence.bits.ptpl;
1796 
1797 	if (cmd->nsid != SPDK_NVME_GLOBAL_NS_TAG && ns && ns->ptpl_file) {
1798 		ns->ptpl_activated = ptpl;
1799 	} else if (cmd->nsid == SPDK_NVME_GLOBAL_NS_TAG) {
1800 		for (ns = spdk_nvmf_subsystem_get_first_ns(ctrlr->subsys); ns && ns->ptpl_file;
1801 		     ns = spdk_nvmf_subsystem_get_next_ns(ctrlr->subsys, ns)) {
1802 			ns->ptpl_activated = ptpl;
1803 		}
1804 	} else {
1805 		SPDK_ERRLOG("Set Features - Invalid Namespace ID or Reservation Configuration\n");
1806 		response->status.sct = SPDK_NVME_SCT_GENERIC;
1807 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1808 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1809 	}
1810 
1811 	/* TODO: Feature not changeable for now */
1812 	response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
1813 	response->status.sc = SPDK_NVME_SC_FEATURE_ID_NOT_SAVEABLE;
1814 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1815 }
1816 
1817 static int
1818 nvmf_ctrlr_get_features_host_behavior_support(struct spdk_nvmf_request *req)
1819 {
1820 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1821 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
1822 	struct spdk_nvme_host_behavior host_behavior = {};
1823 	struct spdk_iov_xfer ix;
1824 
1825 	SPDK_DEBUGLOG(nvmf, "Get Features - Host Behavior Support\n");
1826 
1827 	if (req->iovcnt < 1 || req->length < sizeof(struct spdk_nvme_host_behavior)) {
1828 		SPDK_ERRLOG("invalid data buffer for Host Behavior Support\n");
1829 		response->status.sct = SPDK_NVME_SCT_GENERIC;
1830 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1831 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1832 	}
1833 
1834 	host_behavior.acre = ctrlr->acre_enabled;
1835 
1836 	spdk_iov_xfer_init(&ix, req->iov, req->iovcnt);
1837 	spdk_iov_xfer_from_buf(&ix, &host_behavior, sizeof(host_behavior));
1838 
1839 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1840 }
1841 
1842 static int
1843 nvmf_ctrlr_set_features_host_behavior_support(struct spdk_nvmf_request *req)
1844 {
1845 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1846 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
1847 	struct spdk_nvme_host_behavior *host_behavior;
1848 
1849 	SPDK_DEBUGLOG(nvmf, "Set Features - Host Behavior Support\n");
1850 	if (req->iovcnt != 1) {
1851 		SPDK_ERRLOG("Host Behavior Support invalid iovcnt: %d\n", req->iovcnt);
1852 		response->status.sct = SPDK_NVME_SCT_GENERIC;
1853 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1854 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1855 	}
1856 	if (req->iov[0].iov_len != sizeof(struct spdk_nvme_host_behavior)) {
1857 		SPDK_ERRLOG("Host Behavior Support invalid iov_len: %zd\n", req->iov[0].iov_len);
1858 		response->status.sct = SPDK_NVME_SCT_GENERIC;
1859 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1860 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1861 	}
1862 
1863 	host_behavior = (struct spdk_nvme_host_behavior *)req->iov[0].iov_base;
1864 	if (host_behavior->acre == 0) {
1865 		ctrlr->acre_enabled = false;
1866 	} else if (host_behavior->acre == 1) {
1867 		ctrlr->acre_enabled = true;
1868 	} else {
1869 		SPDK_ERRLOG("Host Behavior Support invalid acre: 0x%02x\n", host_behavior->acre);
1870 		response->status.sct = SPDK_NVME_SCT_GENERIC;
1871 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1872 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1873 	}
1874 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1875 }
1876 
1877 static int
1878 nvmf_ctrlr_set_features_keep_alive_timer(struct spdk_nvmf_request *req)
1879 {
1880 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1881 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1882 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
1883 
1884 	SPDK_DEBUGLOG(nvmf, "Set Features - Keep Alive Timer (%u ms)\n", cmd->cdw11);
1885 
1886 	/*
1887 	 * if attempts to disable keep alive by setting kato to 0h
1888 	 * a status value of keep alive invalid shall be returned
1889 	 */
1890 	if (cmd->cdw11_bits.feat_keep_alive_timer.bits.kato == 0) {
1891 		rsp->status.sc = SPDK_NVME_SC_KEEP_ALIVE_INVALID;
1892 	} else if (cmd->cdw11_bits.feat_keep_alive_timer.bits.kato < MIN_KEEP_ALIVE_TIMEOUT_IN_MS) {
1893 		ctrlr->feat.keep_alive_timer.bits.kato = MIN_KEEP_ALIVE_TIMEOUT_IN_MS;
1894 	} else {
1895 		/* round up to milliseconds */
1896 		ctrlr->feat.keep_alive_timer.bits.kato = spdk_divide_round_up(
1897 					cmd->cdw11_bits.feat_keep_alive_timer.bits.kato,
1898 					KAS_DEFAULT_VALUE * KAS_TIME_UNIT_IN_MS) *
1899 				KAS_DEFAULT_VALUE * KAS_TIME_UNIT_IN_MS;
1900 	}
1901 
1902 	/*
1903 	 * if change the keep alive timeout value successfully
1904 	 * update the keep alive poller.
1905 	 */
1906 	if (cmd->cdw11_bits.feat_keep_alive_timer.bits.kato != 0) {
1907 		if (ctrlr->keep_alive_poller != NULL) {
1908 			spdk_poller_unregister(&ctrlr->keep_alive_poller);
1909 		}
1910 		ctrlr->keep_alive_poller = SPDK_POLLER_REGISTER(nvmf_ctrlr_keep_alive_poll, ctrlr,
1911 					   ctrlr->feat.keep_alive_timer.bits.kato * 1000);
1912 	}
1913 
1914 	SPDK_DEBUGLOG(nvmf, "Set Features - Keep Alive Timer set to %u ms\n",
1915 		      ctrlr->feat.keep_alive_timer.bits.kato);
1916 
1917 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1918 }
1919 
1920 static int
1921 nvmf_ctrlr_set_features_number_of_queues(struct spdk_nvmf_request *req)
1922 {
1923 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1924 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1925 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
1926 	uint32_t count;
1927 
1928 	SPDK_DEBUGLOG(nvmf, "Set Features - Number of Queues, cdw11 0x%x\n",
1929 		      req->cmd->nvme_cmd.cdw11);
1930 
1931 	if (cmd->cdw11_bits.feat_num_of_queues.bits.ncqr == UINT16_MAX ||
1932 	    cmd->cdw11_bits.feat_num_of_queues.bits.nsqr == UINT16_MAX) {
1933 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1934 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1935 	}
1936 
1937 	count = spdk_bit_array_count_set(ctrlr->qpair_mask);
1938 	/* verify that the controller is ready to process commands */
1939 	if (count > 1) {
1940 		SPDK_DEBUGLOG(nvmf, "Queue pairs already active!\n");
1941 		rsp->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
1942 	} else {
1943 		/*
1944 		 * Ignore the value requested by the host -
1945 		 * always return the pre-configured value based on max_qpairs_allowed.
1946 		 */
1947 		rsp->cdw0 = ctrlr->feat.number_of_queues.raw;
1948 	}
1949 
1950 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1951 }
1952 
1953 SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_ctrlr) == 4920,
1954 		   "Please check migration fields that need to be added or not");
1955 
1956 static void
1957 nvmf_ctrlr_migr_data_copy(struct spdk_nvmf_ctrlr_migr_data *data,
1958 			  const struct spdk_nvmf_ctrlr_migr_data *data_src, size_t data_size)
1959 {
1960 	assert(data);
1961 	assert(data_src);
1962 	assert(data_size);
1963 
1964 	memcpy(&data->regs, &data_src->regs, spdk_min(data->regs_size, data_src->regs_size));
1965 	memcpy(&data->feat, &data_src->feat, spdk_min(data->feat_size, data_src->feat_size));
1966 
1967 #define SET_FIELD(field) \
1968     if (offsetof(struct spdk_nvmf_ctrlr_migr_data, field) + sizeof(data->field) <= data_size) { \
1969         data->field = data_src->field; \
1970     } \
1971 
1972 	SET_FIELD(cntlid);
1973 	SET_FIELD(acre);
1974 	SET_FIELD(num_aer_cids);
1975 	SET_FIELD(num_async_events);
1976 	SET_FIELD(notice_aen_mask);
1977 #undef SET_FIELD
1978 
1979 #define SET_ARRAY(arr) \
1980     if (offsetof(struct spdk_nvmf_ctrlr_migr_data, arr) + sizeof(data->arr) <= data_size) { \
1981         memcpy(&data->arr, &data_src->arr, sizeof(data->arr)); \
1982     } \
1983 
1984 	SET_ARRAY(async_events);
1985 	SET_ARRAY(aer_cids);
1986 #undef SET_ARRAY
1987 }
1988 
1989 int
1990 spdk_nvmf_ctrlr_save_migr_data(struct spdk_nvmf_ctrlr *ctrlr,
1991 			       struct spdk_nvmf_ctrlr_migr_data *data)
1992 {
1993 	struct spdk_nvmf_async_event_completion *event, *event_tmp;
1994 	uint32_t i;
1995 	struct spdk_nvmf_ctrlr_migr_data data_local = {
1996 		.data_size = offsetof(struct spdk_nvmf_ctrlr_migr_data, unused),
1997 		.regs_size = sizeof(struct spdk_nvmf_registers),
1998 		.feat_size = sizeof(struct spdk_nvmf_ctrlr_feat)
1999 	};
2000 
2001 	assert(data->data_size <= sizeof(data_local));
2002 	assert(spdk_get_thread() == ctrlr->thread);
2003 
2004 	memcpy(&data_local.regs, &ctrlr->vcprop, sizeof(struct spdk_nvmf_registers));
2005 	memcpy(&data_local.feat, &ctrlr->feat, sizeof(struct spdk_nvmf_ctrlr_feat));
2006 
2007 	data_local.cntlid = ctrlr->cntlid;
2008 	data_local.acre = ctrlr->acre_enabled;
2009 	data_local.num_aer_cids = ctrlr->nr_aer_reqs;
2010 
2011 	STAILQ_FOREACH_SAFE(event, &ctrlr->async_events, link, event_tmp) {
2012 		if (data_local.num_async_events + 1 > SPDK_NVMF_MIGR_MAX_PENDING_AERS) {
2013 			SPDK_ERRLOG("ctrlr %p has too many pending AERs\n", ctrlr);
2014 			break;
2015 		}
2016 
2017 		data_local.async_events[data_local.num_async_events++].raw = event->event.raw;
2018 	}
2019 
2020 	for (i = 0; i < ctrlr->nr_aer_reqs; i++) {
2021 		struct spdk_nvmf_request *req = ctrlr->aer_req[i];
2022 		data_local.aer_cids[i] = req->cmd->nvme_cmd.cid;
2023 	}
2024 	data_local.notice_aen_mask = ctrlr->notice_aen_mask;
2025 
2026 	nvmf_ctrlr_migr_data_copy(data, &data_local, spdk_min(data->data_size, data_local.data_size));
2027 	return 0;
2028 }
2029 
2030 int
2031 spdk_nvmf_ctrlr_restore_migr_data(struct spdk_nvmf_ctrlr *ctrlr,
2032 				  const struct spdk_nvmf_ctrlr_migr_data *data)
2033 {
2034 	uint32_t i;
2035 	struct spdk_nvmf_ctrlr_migr_data data_local = {
2036 		.data_size = offsetof(struct spdk_nvmf_ctrlr_migr_data, unused),
2037 		.regs_size = sizeof(struct spdk_nvmf_registers),
2038 		.feat_size = sizeof(struct spdk_nvmf_ctrlr_feat)
2039 	};
2040 
2041 	assert(data->data_size <= sizeof(data_local));
2042 	assert(spdk_get_thread() == ctrlr->thread);
2043 
2044 	/* local version of data should have defaults set before copy */
2045 	nvmf_ctrlr_migr_data_copy(&data_local, data, spdk_min(data->data_size, data_local.data_size));
2046 	memcpy(&ctrlr->vcprop, &data_local.regs, sizeof(struct spdk_nvmf_registers));
2047 	memcpy(&ctrlr->feat, &data_local.feat, sizeof(struct spdk_nvmf_ctrlr_feat));
2048 
2049 	ctrlr->cntlid = data_local.cntlid;
2050 	ctrlr->acre_enabled = data_local.acre;
2051 
2052 	for (i = 0; i < data_local.num_async_events; i++) {
2053 		struct spdk_nvmf_async_event_completion *event;
2054 
2055 		event = calloc(1, sizeof(*event));
2056 		if (!event) {
2057 			return -ENOMEM;
2058 		}
2059 
2060 		event->event.raw = data_local.async_events[i].raw;
2061 		STAILQ_INSERT_TAIL(&ctrlr->async_events, event, link);
2062 	}
2063 	ctrlr->notice_aen_mask = data_local.notice_aen_mask;
2064 
2065 	return 0;
2066 }
2067 
2068 static int
2069 nvmf_ctrlr_set_features_async_event_configuration(struct spdk_nvmf_request *req)
2070 {
2071 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
2072 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
2073 
2074 	SPDK_DEBUGLOG(nvmf, "Set Features - Async Event Configuration, cdw11 0x%08x\n",
2075 		      cmd->cdw11);
2076 	ctrlr->feat.async_event_configuration.raw = cmd->cdw11;
2077 	ctrlr->feat.async_event_configuration.bits.reserved1 = 0;
2078 	ctrlr->feat.async_event_configuration.bits.reserved2 = 0;
2079 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2080 }
2081 
2082 static int
2083 nvmf_ctrlr_async_event_request(struct spdk_nvmf_request *req)
2084 {
2085 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
2086 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
2087 	struct spdk_nvmf_async_event_completion *pending_event;
2088 
2089 	SPDK_DEBUGLOG(nvmf, "Async Event Request\n");
2090 
2091 	/* Four asynchronous events are supported for now */
2092 	if (ctrlr->nr_aer_reqs >= SPDK_NVMF_MAX_ASYNC_EVENTS) {
2093 		SPDK_DEBUGLOG(nvmf, "AERL exceeded\n");
2094 		rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
2095 		rsp->status.sc = SPDK_NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED;
2096 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2097 	}
2098 
2099 	if (!STAILQ_EMPTY(&ctrlr->async_events)) {
2100 		pending_event = STAILQ_FIRST(&ctrlr->async_events);
2101 		rsp->cdw0 = pending_event->event.raw;
2102 		STAILQ_REMOVE(&ctrlr->async_events, pending_event, spdk_nvmf_async_event_completion, link);
2103 		free(pending_event);
2104 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2105 	}
2106 
2107 	ctrlr->aer_req[ctrlr->nr_aer_reqs++] = req;
2108 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
2109 }
2110 
2111 static void
2112 nvmf_get_firmware_slot_log_page(struct iovec *iovs, int iovcnt, uint64_t offset, uint32_t length)
2113 {
2114 	struct spdk_nvme_firmware_page fw_page;
2115 	size_t copy_len;
2116 	struct spdk_iov_xfer ix;
2117 
2118 	spdk_iov_xfer_init(&ix, iovs, iovcnt);
2119 
2120 	memset(&fw_page, 0, sizeof(fw_page));
2121 	fw_page.afi.active_slot = 1;
2122 	fw_page.afi.next_reset_slot = 0;
2123 	spdk_strcpy_pad(fw_page.revision[0], FW_VERSION, sizeof(fw_page.revision[0]), ' ');
2124 
2125 	if (offset < sizeof(fw_page)) {
2126 		copy_len = spdk_min(sizeof(fw_page) - offset, length);
2127 		if (copy_len > 0) {
2128 			spdk_iov_xfer_from_buf(&ix, (const char *)&fw_page + offset, copy_len);
2129 		}
2130 	}
2131 }
2132 
2133 /*
2134  * Asynchronous Event Mask Bit
2135  */
2136 enum spdk_nvme_async_event_mask_bit {
2137 	/* Mask Namespace Change Notification */
2138 	SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGE_MASK_BIT		= 0,
2139 	/* Mask Asymmetric Namespace Access Change Notification */
2140 	SPDK_NVME_ASYNC_EVENT_ANA_CHANGE_MASK_BIT		= 1,
2141 	/* Mask Discovery Log Change Notification */
2142 	SPDK_NVME_ASYNC_EVENT_DISCOVERY_LOG_CHANGE_MASK_BIT	= 2,
2143 	/* Mask Reservation Log Page Available Notification */
2144 	SPDK_NVME_ASYNC_EVENT_RESERVATION_LOG_AVAIL_MASK_BIT	= 3,
2145 	/* Mask Error Event */
2146 	SPDK_NVME_ASYNC_EVENT_ERROR_MASK_BIT			= 4,
2147 	/* 4 - 63 Reserved */
2148 };
2149 
2150 static inline void
2151 nvmf_ctrlr_unmask_aen(struct spdk_nvmf_ctrlr *ctrlr,
2152 		      enum spdk_nvme_async_event_mask_bit mask)
2153 {
2154 	ctrlr->notice_aen_mask &= ~(1 << mask);
2155 }
2156 
2157 static inline bool
2158 nvmf_ctrlr_mask_aen(struct spdk_nvmf_ctrlr *ctrlr,
2159 		    enum spdk_nvme_async_event_mask_bit mask)
2160 {
2161 	if (ctrlr->notice_aen_mask & (1 << mask)) {
2162 		return false;
2163 	} else {
2164 		ctrlr->notice_aen_mask |= (1 << mask);
2165 		return true;
2166 	}
2167 }
2168 
2169 /* we have to use the typedef in the function declaration to appease astyle. */
2170 typedef enum spdk_nvme_ana_state spdk_nvme_ana_state_t;
2171 
2172 static inline spdk_nvme_ana_state_t
2173 nvmf_ctrlr_get_ana_state(struct spdk_nvmf_ctrlr *ctrlr, uint32_t anagrpid)
2174 {
2175 	if (!ctrlr->subsys->flags.ana_reporting) {
2176 		return SPDK_NVME_ANA_OPTIMIZED_STATE;
2177 	}
2178 
2179 	if (spdk_unlikely(ctrlr->listener == NULL)) {
2180 		return SPDK_NVME_ANA_INACCESSIBLE_STATE;
2181 	}
2182 
2183 	assert(anagrpid - 1 < ctrlr->subsys->max_nsid);
2184 	return ctrlr->listener->ana_state[anagrpid - 1];
2185 }
2186 
2187 static spdk_nvme_ana_state_t
2188 nvmf_ctrlr_get_ana_state_from_nsid(struct spdk_nvmf_ctrlr *ctrlr, uint32_t nsid)
2189 {
2190 	struct spdk_nvmf_ns *ns;
2191 
2192 	/* We do not have NVM subsystem specific ANA state. Hence if NSID is either
2193 	 * SPDK_NVMF_GLOBAL_NS_TAG, invalid, or for inactive namespace, return
2194 	 * the optimized state.
2195 	 */
2196 	ns = _nvmf_subsystem_get_ns(ctrlr->subsys, nsid);
2197 	if (ns == NULL) {
2198 		return SPDK_NVME_ANA_OPTIMIZED_STATE;
2199 	}
2200 
2201 	return nvmf_ctrlr_get_ana_state(ctrlr, ns->anagrpid);
2202 }
2203 
2204 static void
2205 nvmf_get_error_log_page(struct spdk_nvmf_ctrlr *ctrlr, struct iovec *iovs, int iovcnt,
2206 			uint64_t offset, uint32_t length, uint32_t rae)
2207 {
2208 	if (!rae) {
2209 		nvmf_ctrlr_unmask_aen(ctrlr, SPDK_NVME_ASYNC_EVENT_ERROR_MASK_BIT);
2210 	}
2211 
2212 	/* TODO: actually fill out log page data */
2213 }
2214 
2215 static void
2216 nvmf_get_ana_log_page(struct spdk_nvmf_ctrlr *ctrlr, struct iovec *iovs, int iovcnt,
2217 		      uint64_t offset, uint32_t length, uint32_t rae)
2218 {
2219 	struct spdk_nvme_ana_page ana_hdr;
2220 	struct spdk_nvme_ana_group_descriptor ana_desc;
2221 	size_t copy_len, copied_len;
2222 	uint32_t num_anagrp = 0, anagrpid;
2223 	struct spdk_nvmf_ns *ns;
2224 	struct spdk_iov_xfer ix;
2225 
2226 	spdk_iov_xfer_init(&ix, iovs, iovcnt);
2227 
2228 	if (length == 0) {
2229 		goto done;
2230 	}
2231 
2232 	if (offset >= sizeof(ana_hdr)) {
2233 		offset -= sizeof(ana_hdr);
2234 	} else {
2235 		for (anagrpid = 1; anagrpid <= ctrlr->subsys->max_nsid; anagrpid++) {
2236 			if (ctrlr->subsys->ana_group[anagrpid - 1] > 0) {
2237 				num_anagrp++;
2238 			}
2239 		}
2240 
2241 		memset(&ana_hdr, 0, sizeof(ana_hdr));
2242 
2243 		ana_hdr.num_ana_group_desc = num_anagrp;
2244 		/* TODO: Support Change Count. */
2245 		ana_hdr.change_count = 0;
2246 
2247 		copy_len = spdk_min(sizeof(ana_hdr) - offset, length);
2248 		copied_len = spdk_iov_xfer_from_buf(&ix, (const char *)&ana_hdr + offset, copy_len);
2249 		assert(copied_len == copy_len);
2250 		length -= copied_len;
2251 		offset = 0;
2252 	}
2253 
2254 	if (length == 0) {
2255 		goto done;
2256 	}
2257 
2258 	for (anagrpid = 1; anagrpid <= ctrlr->subsys->max_nsid; anagrpid++) {
2259 		if (ctrlr->subsys->ana_group[anagrpid - 1] == 0) {
2260 			continue;
2261 		}
2262 
2263 		if (offset >= sizeof(ana_desc)) {
2264 			offset -= sizeof(ana_desc);
2265 		} else {
2266 			memset(&ana_desc, 0, sizeof(ana_desc));
2267 
2268 			ana_desc.ana_group_id = anagrpid;
2269 			ana_desc.num_of_nsid = ctrlr->subsys->ana_group[anagrpid - 1];
2270 			ana_desc.ana_state = nvmf_ctrlr_get_ana_state(ctrlr, anagrpid);
2271 
2272 			copy_len = spdk_min(sizeof(ana_desc) - offset, length);
2273 			copied_len = spdk_iov_xfer_from_buf(&ix, (const char *)&ana_desc + offset,
2274 							    copy_len);
2275 			assert(copied_len == copy_len);
2276 			length -= copied_len;
2277 			offset = 0;
2278 
2279 			if (length == 0) {
2280 				goto done;
2281 			}
2282 		}
2283 
2284 		/* TODO: Revisit here about O(n^2) cost if we have subsystem with
2285 		 * many namespaces in the future.
2286 		 */
2287 		for (ns = spdk_nvmf_subsystem_get_first_ns(ctrlr->subsys); ns != NULL;
2288 		     ns = spdk_nvmf_subsystem_get_next_ns(ctrlr->subsys, ns)) {
2289 			if (ns->anagrpid != anagrpid) {
2290 				continue;
2291 			}
2292 
2293 			if (offset >= sizeof(uint32_t)) {
2294 				offset -= sizeof(uint32_t);
2295 				continue;
2296 			}
2297 
2298 			copy_len = spdk_min(sizeof(uint32_t) - offset, length);
2299 			copied_len = spdk_iov_xfer_from_buf(&ix, (const char *)&ns->nsid + offset,
2300 							    copy_len);
2301 			assert(copied_len == copy_len);
2302 			length -= copied_len;
2303 			offset = 0;
2304 
2305 			if (length == 0) {
2306 				goto done;
2307 			}
2308 		}
2309 	}
2310 
2311 done:
2312 	if (!rae) {
2313 		nvmf_ctrlr_unmask_aen(ctrlr, SPDK_NVME_ASYNC_EVENT_ANA_CHANGE_MASK_BIT);
2314 	}
2315 }
2316 
2317 void
2318 nvmf_ctrlr_ns_changed(struct spdk_nvmf_ctrlr *ctrlr, uint32_t nsid)
2319 {
2320 	uint16_t max_changes = SPDK_COUNTOF(ctrlr->changed_ns_list.ns_list);
2321 	uint16_t i;
2322 	bool found = false;
2323 
2324 	for (i = 0; i < ctrlr->changed_ns_list_count; i++) {
2325 		if (ctrlr->changed_ns_list.ns_list[i] == nsid) {
2326 			/* nsid is already in the list */
2327 			found = true;
2328 			break;
2329 		}
2330 	}
2331 
2332 	if (!found) {
2333 		if (ctrlr->changed_ns_list_count == max_changes) {
2334 			/* Out of space - set first entry to FFFFFFFFh and zero-fill the rest. */
2335 			ctrlr->changed_ns_list.ns_list[0] = 0xFFFFFFFFu;
2336 			for (i = 1; i < max_changes; i++) {
2337 				ctrlr->changed_ns_list.ns_list[i] = 0;
2338 			}
2339 		} else {
2340 			ctrlr->changed_ns_list.ns_list[ctrlr->changed_ns_list_count++] = nsid;
2341 		}
2342 	}
2343 }
2344 
2345 static void
2346 nvmf_get_changed_ns_list_log_page(struct spdk_nvmf_ctrlr *ctrlr,
2347 				  struct iovec *iovs, int iovcnt, uint64_t offset, uint32_t length, uint32_t rae)
2348 {
2349 	size_t copy_length;
2350 	struct spdk_iov_xfer ix;
2351 
2352 	spdk_iov_xfer_init(&ix, iovs, iovcnt);
2353 
2354 	if (offset < sizeof(ctrlr->changed_ns_list)) {
2355 		copy_length = spdk_min(length, sizeof(ctrlr->changed_ns_list) - offset);
2356 		if (copy_length) {
2357 			spdk_iov_xfer_from_buf(&ix, (char *)&ctrlr->changed_ns_list + offset, copy_length);
2358 		}
2359 	}
2360 
2361 	/* Clear log page each time it is read */
2362 	ctrlr->changed_ns_list_count = 0;
2363 	memset(&ctrlr->changed_ns_list, 0, sizeof(ctrlr->changed_ns_list));
2364 
2365 	if (!rae) {
2366 		nvmf_ctrlr_unmask_aen(ctrlr, SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGE_MASK_BIT);
2367 	}
2368 }
2369 
2370 /* The structure can be modified if we provide support for other commands in future */
2371 static const struct spdk_nvme_cmds_and_effect_log_page g_cmds_and_effect_log_page = {
2372 	.admin_cmds_supported = {
2373 		/* CSUPP, LBCC, NCC, NIC, CCC, CSE */
2374 		/* Get Log Page */
2375 		[SPDK_NVME_OPC_GET_LOG_PAGE]		= {1, 0, 0, 0, 0, 0, 0, 0},
2376 		/* Identify */
2377 		[SPDK_NVME_OPC_IDENTIFY]		= {1, 0, 0, 0, 0, 0, 0, 0},
2378 		/* Abort */
2379 		[SPDK_NVME_OPC_ABORT]			= {1, 0, 0, 0, 0, 0, 0, 0},
2380 		/* Set Features */
2381 		[SPDK_NVME_OPC_SET_FEATURES]		= {1, 0, 0, 0, 0, 0, 0, 0},
2382 		/* Get Features */
2383 		[SPDK_NVME_OPC_GET_FEATURES]		= {1, 0, 0, 0, 0, 0, 0, 0},
2384 		/* Async Event Request */
2385 		[SPDK_NVME_OPC_ASYNC_EVENT_REQUEST]	= {1, 0, 0, 0, 0, 0, 0, 0},
2386 		/* Keep Alive */
2387 		[SPDK_NVME_OPC_KEEP_ALIVE]		= {1, 0, 0, 0, 0, 0, 0, 0},
2388 	},
2389 	.io_cmds_supported = {
2390 		/* FLUSH */
2391 		[SPDK_NVME_OPC_FLUSH]			= {1, 1, 0, 0, 0, 0, 0, 0},
2392 		/* WRITE */
2393 		[SPDK_NVME_OPC_WRITE]			= {1, 1, 0, 0, 0, 0, 0, 0},
2394 		/* READ */
2395 		[SPDK_NVME_OPC_READ]			= {1, 0, 0, 0, 0, 0, 0, 0},
2396 		/* WRITE ZEROES */
2397 		[SPDK_NVME_OPC_WRITE_ZEROES]		= {1, 1, 0, 0, 0, 0, 0, 0},
2398 		/* DATASET MANAGEMENT */
2399 		[SPDK_NVME_OPC_DATASET_MANAGEMENT]	= {1, 1, 0, 0, 0, 0, 0, 0},
2400 		/* COMPARE */
2401 		[SPDK_NVME_OPC_COMPARE]			= {1, 0, 0, 0, 0, 0, 0, 0},
2402 		/* ZONE MANAGEMENT SEND */
2403 		[SPDK_NVME_OPC_ZONE_MGMT_SEND]		= {1, 1, 0, 0, 0, 0, 0, 0},
2404 		/* ZONE MANAGEMENT RECEIVE */
2405 		[SPDK_NVME_OPC_ZONE_MGMT_RECV]		= {1, 0, 0, 0, 0, 0, 0, 0},
2406 	},
2407 };
2408 
2409 static void
2410 nvmf_get_cmds_and_effects_log_page(struct spdk_nvmf_ctrlr *ctrlr, struct iovec *iovs, int iovcnt,
2411 				   uint64_t offset, uint32_t length)
2412 {
2413 	uint32_t page_size = sizeof(struct spdk_nvme_cmds_and_effect_log_page);
2414 	size_t copy_len = 0;
2415 	struct spdk_nvme_cmds_and_effect_log_page cmds_and_effect_log_page = g_cmds_and_effect_log_page;
2416 	struct spdk_nvme_cmds_and_effect_entry csupp_and_lbcc_effect_entry = {1, 1, 0, 0, 0, 0, 0, 0};
2417 	struct spdk_iov_xfer ix;
2418 
2419 	spdk_iov_xfer_init(&ix, iovs, iovcnt);
2420 
2421 	if (offset < page_size) {
2422 		if (ctrlr->subsys->zone_append_supported) {
2423 			cmds_and_effect_log_page.io_cmds_supported[SPDK_NVME_OPC_ZONE_APPEND] =
2424 				csupp_and_lbcc_effect_entry;
2425 		}
2426 		copy_len = spdk_min(page_size - offset, length);
2427 		spdk_iov_xfer_from_buf(&ix, (char *)(&cmds_and_effect_log_page) + offset, copy_len);
2428 	}
2429 }
2430 
2431 static void
2432 nvmf_get_reservation_notification_log_page(struct spdk_nvmf_ctrlr *ctrlr,
2433 		struct iovec *iovs, int iovcnt, uint64_t offset, uint32_t length, uint32_t rae)
2434 {
2435 	uint32_t unit_log_len, avail_log_len, next_pos, copy_len;
2436 	struct spdk_nvmf_reservation_log *log, *log_tmp;
2437 	struct spdk_iov_xfer ix;
2438 
2439 	spdk_iov_xfer_init(&ix, iovs, iovcnt);
2440 
2441 	unit_log_len = sizeof(struct spdk_nvme_reservation_notification_log);
2442 	/* No available log, return zeroed log pages */
2443 	if (!ctrlr->num_avail_log_pages) {
2444 		return;
2445 	}
2446 
2447 	avail_log_len = ctrlr->num_avail_log_pages * unit_log_len;
2448 	if (offset >= avail_log_len) {
2449 		return;
2450 	}
2451 
2452 	next_pos = 0;
2453 	TAILQ_FOREACH_SAFE(log, &ctrlr->log_head, link, log_tmp) {
2454 		TAILQ_REMOVE(&ctrlr->log_head, log, link);
2455 		ctrlr->num_avail_log_pages--;
2456 
2457 		next_pos += unit_log_len;
2458 		if (next_pos > offset) {
2459 			copy_len = spdk_min(next_pos - offset, length);
2460 			spdk_iov_xfer_from_buf(&ix, &log->log, copy_len);
2461 			length -= copy_len;
2462 			offset += copy_len;
2463 		}
2464 		free(log);
2465 
2466 		if (length == 0) {
2467 			break;
2468 		}
2469 	}
2470 
2471 	if (!rae) {
2472 		nvmf_ctrlr_unmask_aen(ctrlr, SPDK_NVME_ASYNC_EVENT_RESERVATION_LOG_AVAIL_MASK_BIT);
2473 	}
2474 	return;
2475 }
2476 
2477 static int
2478 nvmf_ctrlr_get_log_page(struct spdk_nvmf_request *req)
2479 {
2480 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
2481 	struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys;
2482 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
2483 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
2484 	struct spdk_nvme_transport_id cmd_source_trid;
2485 	uint64_t offset, len;
2486 	uint32_t rae, numdl, numdu;
2487 	uint8_t lid;
2488 
2489 	if (req->iovcnt < 1) {
2490 		SPDK_DEBUGLOG(nvmf, "get log command with no buffer\n");
2491 		response->status.sct = SPDK_NVME_SCT_GENERIC;
2492 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
2493 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2494 	}
2495 
2496 	offset = (uint64_t)cmd->cdw12 | ((uint64_t)cmd->cdw13 << 32);
2497 	if (offset & 3) {
2498 		SPDK_ERRLOG("Invalid log page offset 0x%" PRIx64 "\n", offset);
2499 		response->status.sct = SPDK_NVME_SCT_GENERIC;
2500 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
2501 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2502 	}
2503 
2504 	rae = cmd->cdw10_bits.get_log_page.rae;
2505 	numdl = cmd->cdw10_bits.get_log_page.numdl;
2506 	numdu = cmd->cdw11_bits.get_log_page.numdu;
2507 	len = ((numdu << 16) + numdl + (uint64_t)1) * 4;
2508 	if (len > req->length) {
2509 		SPDK_ERRLOG("Get log page: len (%" PRIu64 ") > buf size (%u)\n",
2510 			    len, req->length);
2511 		response->status.sct = SPDK_NVME_SCT_GENERIC;
2512 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
2513 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2514 	}
2515 
2516 	lid = cmd->cdw10_bits.get_log_page.lid;
2517 	SPDK_DEBUGLOG(nvmf, "Get log page: LID=0x%02X offset=0x%" PRIx64 " len=0x%" PRIx64 " rae=%u\n",
2518 		      lid, offset, len, rae);
2519 
2520 	if (subsystem->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
2521 		switch (lid) {
2522 		case SPDK_NVME_LOG_DISCOVERY:
2523 			if (spdk_nvmf_qpair_get_listen_trid(req->qpair, &cmd_source_trid)) {
2524 				SPDK_ERRLOG("Failed to get LOG_DISCOVERY source trid\n");
2525 				response->status.sct = SPDK_NVME_SCT_GENERIC;
2526 				response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2527 				return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2528 			}
2529 			nvmf_get_discovery_log_page(subsystem->tgt, ctrlr->hostnqn, req->iov, req->iovcnt,
2530 						    offset, len, &cmd_source_trid);
2531 			if (!rae) {
2532 				nvmf_ctrlr_unmask_aen(ctrlr, SPDK_NVME_ASYNC_EVENT_DISCOVERY_LOG_CHANGE_MASK_BIT);
2533 			}
2534 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2535 		default:
2536 			goto invalid_log_page;
2537 		}
2538 	} else {
2539 		if (offset > len) {
2540 			SPDK_ERRLOG("Get log page: offset (%" PRIu64 ") > len (%" PRIu64 ")\n",
2541 				    offset, len);
2542 			response->status.sct = SPDK_NVME_SCT_GENERIC;
2543 			response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
2544 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2545 		}
2546 
2547 		switch (lid) {
2548 		case SPDK_NVME_LOG_ERROR:
2549 			nvmf_get_error_log_page(ctrlr, req->iov, req->iovcnt, offset, len, rae);
2550 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2551 		case SPDK_NVME_LOG_HEALTH_INFORMATION:
2552 			/* TODO: actually fill out log page data */
2553 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2554 		case SPDK_NVME_LOG_FIRMWARE_SLOT:
2555 			nvmf_get_firmware_slot_log_page(req->iov, req->iovcnt, offset, len);
2556 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2557 		case SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS:
2558 			if (subsystem->flags.ana_reporting) {
2559 				nvmf_get_ana_log_page(ctrlr, req->iov, req->iovcnt, offset, len, rae);
2560 				return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2561 			} else {
2562 				goto invalid_log_page;
2563 			}
2564 		case SPDK_NVME_LOG_COMMAND_EFFECTS_LOG:
2565 			nvmf_get_cmds_and_effects_log_page(ctrlr, req->iov, req->iovcnt, offset, len);
2566 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2567 		case SPDK_NVME_LOG_CHANGED_NS_LIST:
2568 			nvmf_get_changed_ns_list_log_page(ctrlr, req->iov, req->iovcnt, offset, len, rae);
2569 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2570 		case SPDK_NVME_LOG_RESERVATION_NOTIFICATION:
2571 			nvmf_get_reservation_notification_log_page(ctrlr, req->iov, req->iovcnt, offset, len, rae);
2572 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2573 		default:
2574 			goto invalid_log_page;
2575 		}
2576 	}
2577 
2578 invalid_log_page:
2579 	SPDK_INFOLOG(nvmf, "Unsupported Get Log Page 0x%02X\n", lid);
2580 	response->status.sct = SPDK_NVME_SCT_GENERIC;
2581 	response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
2582 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2583 }
2584 
2585 static struct spdk_nvmf_ns *
2586 _nvmf_subsystem_get_ns_safe(struct spdk_nvmf_subsystem *subsystem,
2587 			    uint32_t nsid,
2588 			    struct spdk_nvme_cpl *rsp)
2589 {
2590 	struct spdk_nvmf_ns *ns;
2591 	if (nsid == 0 || nsid > subsystem->max_nsid) {
2592 		SPDK_ERRLOG("Identify Namespace for invalid NSID %u\n", nsid);
2593 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
2594 		rsp->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
2595 		return NULL;
2596 	}
2597 
2598 	ns = _nvmf_subsystem_get_ns(subsystem, nsid);
2599 	if (ns == NULL || ns->bdev == NULL) {
2600 		/*
2601 		 * Inactive namespaces should return a zero filled data structure.
2602 		 * The data buffer is already zeroed by nvmf_ctrlr_process_admin_cmd(),
2603 		 * so we can just return early here.
2604 		 */
2605 		SPDK_DEBUGLOG(nvmf, "Identify Namespace for inactive NSID %u\n", nsid);
2606 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
2607 		rsp->status.sc = SPDK_NVME_SC_SUCCESS;
2608 		return NULL;
2609 	}
2610 	return ns;
2611 }
2612 
2613 int
2614 spdk_nvmf_ctrlr_identify_ns(struct spdk_nvmf_ctrlr *ctrlr,
2615 			    struct spdk_nvme_cmd *cmd,
2616 			    struct spdk_nvme_cpl *rsp,
2617 			    struct spdk_nvme_ns_data *nsdata)
2618 {
2619 	struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys;
2620 	struct spdk_nvmf_ns *ns;
2621 	uint32_t max_num_blocks, format_index;
2622 	enum spdk_nvme_ana_state ana_state;
2623 
2624 	ns = _nvmf_subsystem_get_ns_safe(subsystem, cmd->nsid, rsp);
2625 	if (ns == NULL) {
2626 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2627 	}
2628 
2629 	nvmf_bdev_ctrlr_identify_ns(ns, nsdata, ctrlr->dif_insert_or_strip);
2630 
2631 	assert(ctrlr->admin_qpair);
2632 
2633 	format_index = spdk_nvme_ns_get_format_index(nsdata);
2634 
2635 	/* Due to bug in the Linux kernel NVMe driver we have to set noiob no larger than mdts */
2636 	max_num_blocks = ctrlr->admin_qpair->transport->opts.max_io_size /
2637 			 (1U << nsdata->lbaf[format_index].lbads);
2638 	if (nsdata->noiob > max_num_blocks) {
2639 		nsdata->noiob = max_num_blocks;
2640 	}
2641 
2642 	/* Set NOWS equal to Controller MDTS */
2643 	if (nsdata->nsfeat.optperf) {
2644 		nsdata->nows = max_num_blocks - 1;
2645 	}
2646 
2647 	if (subsystem->flags.ana_reporting) {
2648 		assert(ns->anagrpid - 1 < subsystem->max_nsid);
2649 		nsdata->anagrpid = ns->anagrpid;
2650 
2651 		ana_state = nvmf_ctrlr_get_ana_state(ctrlr, ns->anagrpid);
2652 		if (ana_state == SPDK_NVME_ANA_INACCESSIBLE_STATE ||
2653 		    ana_state == SPDK_NVME_ANA_PERSISTENT_LOSS_STATE) {
2654 			nsdata->nuse = 0;
2655 		}
2656 	}
2657 
2658 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2659 }
2660 
2661 static void
2662 nvmf_ctrlr_populate_oacs(struct spdk_nvmf_ctrlr *ctrlr,
2663 			 struct spdk_nvme_ctrlr_data *cdata)
2664 {
2665 	cdata->oacs = ctrlr->cdata.oacs;
2666 
2667 	cdata->oacs.virtualization_management =
2668 		g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_VIRTUALIZATION_MANAGEMENT].hdlr != NULL;
2669 	cdata->oacs.nvme_mi = g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_NVME_MI_SEND].hdlr != NULL
2670 			      && g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_NVME_MI_RECEIVE].hdlr != NULL;
2671 	cdata->oacs.directives = g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_DIRECTIVE_SEND].hdlr != NULL
2672 				 && g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_DIRECTIVE_RECEIVE].hdlr != NULL;
2673 	cdata->oacs.device_self_test =
2674 		g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_DEVICE_SELF_TEST].hdlr != NULL;
2675 	cdata->oacs.ns_manage = g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_NS_MANAGEMENT].hdlr != NULL
2676 				&& g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_NS_ATTACHMENT].hdlr != NULL;
2677 	cdata->oacs.firmware = g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD].hdlr !=
2678 			       NULL
2679 			       && g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_FIRMWARE_COMMIT].hdlr != NULL;
2680 	cdata->oacs.format =
2681 		g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_FORMAT_NVM].hdlr != NULL;
2682 	cdata->oacs.security = g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_SECURITY_SEND].hdlr != NULL
2683 			       && g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_SECURITY_RECEIVE].hdlr != NULL;
2684 	cdata->oacs.get_lba_status = g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_GET_LBA_STATUS].hdlr !=
2685 				     NULL;
2686 }
2687 
2688 int
2689 spdk_nvmf_ctrlr_identify_ctrlr(struct spdk_nvmf_ctrlr *ctrlr, struct spdk_nvme_ctrlr_data *cdata)
2690 {
2691 	struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys;
2692 	struct spdk_nvmf_transport *transport;
2693 
2694 	/*
2695 	 * Common fields for discovery and NVM subsystems
2696 	 */
2697 	assert(ctrlr->admin_qpair);
2698 	transport = ctrlr->admin_qpair->transport;
2699 	spdk_strcpy_pad(cdata->fr, FW_VERSION, sizeof(cdata->fr), ' ');
2700 	assert((transport->opts.max_io_size % 4096) == 0);
2701 	cdata->mdts = spdk_u32log2(transport->opts.max_io_size / 4096);
2702 	cdata->cntlid = ctrlr->cntlid;
2703 	cdata->ver = ctrlr->vcprop.vs;
2704 	cdata->aerl = ctrlr->cdata.aerl;
2705 	cdata->lpa.edlp = 1;
2706 	cdata->elpe = 127;
2707 	cdata->maxcmd = transport->opts.max_queue_depth;
2708 	cdata->sgls = ctrlr->cdata.sgls;
2709 	cdata->fuses = ctrlr->cdata.fuses;
2710 	cdata->acwu = 0; /* ACWU is 0-based. */
2711 	if (subsystem->flags.ana_reporting) {
2712 		cdata->mnan = subsystem->max_nsid;
2713 	}
2714 	spdk_strcpy_pad(cdata->subnqn, subsystem->subnqn, sizeof(cdata->subnqn), '\0');
2715 
2716 	SPDK_DEBUGLOG(nvmf, "ctrlr data: maxcmd 0x%x\n", cdata->maxcmd);
2717 	SPDK_DEBUGLOG(nvmf, "sgls data: 0x%x\n", from_le32(&cdata->sgls));
2718 
2719 
2720 	if (subsystem->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
2721 		/*
2722 		 * NVM Discovery subsystem fields
2723 		 */
2724 		cdata->oaes.discovery_log_change_notices = 1;
2725 	} else {
2726 		cdata->vid = ctrlr->cdata.vid;
2727 		cdata->ssvid = ctrlr->cdata.ssvid;
2728 		cdata->ieee[0] = ctrlr->cdata.ieee[0];
2729 		cdata->ieee[1] = ctrlr->cdata.ieee[1];
2730 		cdata->ieee[2] = ctrlr->cdata.ieee[2];
2731 
2732 		/*
2733 		 * NVM subsystem fields (reserved for discovery subsystems)
2734 		 */
2735 		spdk_strcpy_pad(cdata->mn, spdk_nvmf_subsystem_get_mn(subsystem), sizeof(cdata->mn), ' ');
2736 		spdk_strcpy_pad(cdata->sn, spdk_nvmf_subsystem_get_sn(subsystem), sizeof(cdata->sn), ' ');
2737 		cdata->kas = ctrlr->cdata.kas;
2738 
2739 		cdata->rab = 6;
2740 		cdata->cmic.multi_port = 1;
2741 		cdata->cmic.multi_ctrlr = 1;
2742 		cdata->oaes.ns_attribute_notices = 1;
2743 		cdata->ctratt.host_id_exhid_supported = 1;
2744 		/* We do not have any actual limitation to the number of abort commands.
2745 		 * We follow the recommendation by the NVMe specification.
2746 		 */
2747 		cdata->acl = NVMF_ABORT_COMMAND_LIMIT;
2748 		cdata->frmw.slot1_ro = 1;
2749 		cdata->frmw.num_slots = 1;
2750 
2751 		cdata->lpa.celp = 1; /* Command Effects log page supported */
2752 
2753 		cdata->sqes.min = 6;
2754 		cdata->sqes.max = 6;
2755 		cdata->cqes.min = 4;
2756 		cdata->cqes.max = 4;
2757 		cdata->nn = subsystem->max_nsid;
2758 		cdata->vwc.present = 1;
2759 		cdata->vwc.flush_broadcast = SPDK_NVME_FLUSH_BROADCAST_NOT_SUPPORTED;
2760 
2761 		cdata->nvmf_specific = ctrlr->cdata.nvmf_specific;
2762 
2763 		cdata->oncs.compare = ctrlr->cdata.oncs.compare;
2764 		cdata->oncs.dsm = nvmf_ctrlr_dsm_supported(ctrlr);
2765 		cdata->oncs.write_zeroes = nvmf_ctrlr_write_zeroes_supported(ctrlr);
2766 		cdata->oncs.reservations = ctrlr->cdata.oncs.reservations;
2767 		cdata->oncs.copy = nvmf_ctrlr_copy_supported(ctrlr);
2768 		cdata->ocfs.copy_format0 = cdata->oncs.copy;
2769 		if (subsystem->flags.ana_reporting) {
2770 			/* Asymmetric Namespace Access Reporting is supported. */
2771 			cdata->cmic.ana_reporting = 1;
2772 			cdata->oaes.ana_change_notices = 1;
2773 
2774 			cdata->anatt = ANA_TRANSITION_TIME_IN_SEC;
2775 			/* ANA Change state is not used, and ANA Persistent Loss state
2776 			 * is not supported for now.
2777 			 */
2778 			cdata->anacap.ana_optimized_state = 1;
2779 			cdata->anacap.ana_non_optimized_state = 1;
2780 			cdata->anacap.ana_inaccessible_state = 1;
2781 			/* ANAGRPID does not change while namespace is attached to controller */
2782 			cdata->anacap.no_change_anagrpid = 1;
2783 			cdata->anagrpmax = subsystem->max_nsid;
2784 			cdata->nanagrpid = subsystem->max_nsid;
2785 		}
2786 
2787 		nvmf_ctrlr_populate_oacs(ctrlr, cdata);
2788 
2789 		assert(subsystem->tgt != NULL);
2790 		cdata->crdt[0] = subsystem->tgt->crdt[0];
2791 		cdata->crdt[1] = subsystem->tgt->crdt[1];
2792 		cdata->crdt[2] = subsystem->tgt->crdt[2];
2793 
2794 		SPDK_DEBUGLOG(nvmf, "ext ctrlr data: ioccsz 0x%x\n",
2795 			      cdata->nvmf_specific.ioccsz);
2796 		SPDK_DEBUGLOG(nvmf, "ext ctrlr data: iorcsz 0x%x\n",
2797 			      cdata->nvmf_specific.iorcsz);
2798 		SPDK_DEBUGLOG(nvmf, "ext ctrlr data: icdoff 0x%x\n",
2799 			      cdata->nvmf_specific.icdoff);
2800 		SPDK_DEBUGLOG(nvmf, "ext ctrlr data: ctrattr 0x%x\n",
2801 			      *(uint8_t *)&cdata->nvmf_specific.ctrattr);
2802 		SPDK_DEBUGLOG(nvmf, "ext ctrlr data: msdbd 0x%x\n",
2803 			      cdata->nvmf_specific.msdbd);
2804 	}
2805 
2806 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2807 }
2808 
2809 static int
2810 nvmf_ns_identify_iocs_zns(struct spdk_nvmf_ns *ns,
2811 			  struct spdk_nvme_cmd *cmd,
2812 			  struct spdk_nvme_cpl *rsp,
2813 			  struct spdk_nvme_zns_ns_data *nsdata_zns)
2814 {
2815 	nsdata_zns->zoc.variable_zone_capacity = 0;
2816 	nsdata_zns->zoc.zone_active_excursions = 0;
2817 	nsdata_zns->ozcs.read_across_zone_boundaries = 1;
2818 	/* Underflowing the zero based mar and mor bdev helper results in the correct
2819 	   value of FFFFFFFFh. */
2820 	nsdata_zns->mar = spdk_bdev_get_max_active_zones(ns->bdev) - 1;
2821 	nsdata_zns->mor = spdk_bdev_get_max_open_zones(ns->bdev) - 1;
2822 	nsdata_zns->rrl = 0;
2823 	nsdata_zns->frl = 0;
2824 	nsdata_zns->lbafe[0].zsze = spdk_bdev_get_zone_size(ns->bdev);
2825 
2826 	rsp->status.sct = SPDK_NVME_SCT_GENERIC;
2827 	rsp->status.sc = SPDK_NVME_SC_SUCCESS;
2828 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2829 }
2830 
2831 int
2832 spdk_nvmf_ns_identify_iocs_specific(struct spdk_nvmf_ctrlr *ctrlr,
2833 				    struct spdk_nvme_cmd *cmd,
2834 				    struct spdk_nvme_cpl *rsp,
2835 				    void *nsdata,
2836 				    size_t nsdata_size)
2837 {
2838 	uint8_t csi = cmd->cdw11_bits.identify.csi;
2839 	struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys;
2840 	struct spdk_nvmf_ns *ns = _nvmf_subsystem_get_ns_safe(subsystem, cmd->nsid, rsp);
2841 
2842 	memset(nsdata, 0, nsdata_size);
2843 
2844 	if (ns == NULL) {
2845 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
2846 		rsp->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
2847 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2848 	}
2849 
2850 	switch (csi) {
2851 	case SPDK_NVME_CSI_ZNS:
2852 		return nvmf_ns_identify_iocs_zns(ns, cmd, rsp, nsdata);
2853 	default:
2854 		break;
2855 	}
2856 
2857 	SPDK_DEBUGLOG(nvmf,
2858 		      "Returning zero filled struct for the iocs specific ns "
2859 		      "identify command and CSI 0x%02x\n",
2860 		      csi);
2861 	rsp->status.sct = SPDK_NVME_SCT_GENERIC;
2862 	rsp->status.sc = SPDK_NVME_SC_SUCCESS;
2863 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2864 }
2865 
2866 static int
2867 nvmf_ctrlr_identify_iocs_zns(struct spdk_nvmf_ctrlr *ctrlr,
2868 			     struct spdk_nvme_cmd *cmd,
2869 			     struct spdk_nvme_cpl *rsp,
2870 			     struct spdk_nvme_zns_ctrlr_data *cdata_zns)
2871 {
2872 	/* The unit of max_zone_append_size_kib is KiB.
2873 	The unit of zasl is the minimum memory page size
2874 	(2 ^ (12 + CAP.MPSMIN) KiB)
2875 	and is reported as a power of two (2^n). */
2876 	cdata_zns->zasl = spdk_u64log2(ctrlr->subsys->max_zone_append_size_kib >>
2877 				       (12 + ctrlr->vcprop.cap.bits.mpsmin));
2878 
2879 	rsp->status.sct = SPDK_NVME_SCT_GENERIC;
2880 	rsp->status.sc = SPDK_NVME_SC_SUCCESS;
2881 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2882 }
2883 
2884 int
2885 spdk_nvmf_ctrlr_identify_iocs_specific(struct spdk_nvmf_ctrlr *ctrlr,
2886 				       struct spdk_nvme_cmd *cmd,
2887 				       struct spdk_nvme_cpl *rsp,
2888 				       void *cdata,
2889 				       size_t cdata_size)
2890 {
2891 	uint8_t csi = cmd->cdw11_bits.identify.csi;
2892 
2893 	memset(cdata, 0, cdata_size);
2894 
2895 	switch (csi) {
2896 	case SPDK_NVME_CSI_ZNS:
2897 		return nvmf_ctrlr_identify_iocs_zns(ctrlr, cmd, rsp, cdata);
2898 	default:
2899 		break;
2900 	}
2901 
2902 	SPDK_DEBUGLOG(nvmf,
2903 		      "Returning zero filled struct for the iocs specific ctrlr "
2904 		      "identify command and CSI 0x%02x\n",
2905 		      csi);
2906 	rsp->status.sct = SPDK_NVME_SCT_GENERIC;
2907 	rsp->status.sc = SPDK_NVME_SC_SUCCESS;
2908 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2909 }
2910 
2911 static int
2912 nvmf_ctrlr_identify_active_ns_list(struct spdk_nvmf_subsystem *subsystem,
2913 				   struct spdk_nvme_cmd *cmd,
2914 				   struct spdk_nvme_cpl *rsp,
2915 				   struct spdk_nvme_ns_list *ns_list)
2916 {
2917 	struct spdk_nvmf_ns *ns;
2918 	uint32_t count = 0;
2919 
2920 	if (cmd->nsid >= 0xfffffffeUL) {
2921 		SPDK_ERRLOG("Identify Active Namespace List with invalid NSID %u\n", cmd->nsid);
2922 		rsp->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
2923 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2924 	}
2925 
2926 	memset(ns_list, 0, sizeof(*ns_list));
2927 
2928 	for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL;
2929 	     ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) {
2930 		if (ns->opts.nsid <= cmd->nsid) {
2931 			continue;
2932 		}
2933 
2934 		ns_list->ns_list[count++] = ns->opts.nsid;
2935 		if (count == SPDK_COUNTOF(ns_list->ns_list)) {
2936 			break;
2937 		}
2938 	}
2939 
2940 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2941 }
2942 
2943 static void
2944 _add_ns_id_desc(void **buf_ptr, size_t *buf_remain,
2945 		enum spdk_nvme_nidt type,
2946 		const void *data, size_t data_size)
2947 {
2948 	struct spdk_nvme_ns_id_desc *desc;
2949 	size_t desc_size = sizeof(*desc) + data_size;
2950 
2951 	/*
2952 	 * These should never fail in practice, since all valid NS ID descriptors
2953 	 * should be defined so that they fit in the available 4096-byte buffer.
2954 	 */
2955 	assert(data_size > 0);
2956 	assert(data_size <= UINT8_MAX);
2957 	assert(desc_size < *buf_remain);
2958 	if (data_size == 0 || data_size > UINT8_MAX || desc_size > *buf_remain) {
2959 		return;
2960 	}
2961 
2962 	desc = *buf_ptr;
2963 	desc->nidt = type;
2964 	desc->nidl = data_size;
2965 	memcpy(desc->nid, data, data_size);
2966 
2967 	*buf_ptr += desc_size;
2968 	*buf_remain -= desc_size;
2969 }
2970 
2971 static int
2972 nvmf_ctrlr_identify_ns_id_descriptor_list(
2973 	struct spdk_nvmf_subsystem *subsystem,
2974 	struct spdk_nvme_cmd *cmd,
2975 	struct spdk_nvme_cpl *rsp,
2976 	void *id_desc_list, size_t id_desc_list_size)
2977 {
2978 	struct spdk_nvmf_ns *ns;
2979 	size_t buf_remain = id_desc_list_size;
2980 	void *buf_ptr = id_desc_list;
2981 
2982 	ns = _nvmf_subsystem_get_ns(subsystem, cmd->nsid);
2983 	if (ns == NULL || ns->bdev == NULL) {
2984 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
2985 		rsp->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
2986 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2987 	}
2988 
2989 #define ADD_ID_DESC(type, data, size) \
2990 	do { \
2991 		if (!spdk_mem_all_zero(data, size)) { \
2992 			_add_ns_id_desc(&buf_ptr, &buf_remain, type, data, size); \
2993 		} \
2994 	} while (0)
2995 
2996 	ADD_ID_DESC(SPDK_NVME_NIDT_EUI64, ns->opts.eui64, sizeof(ns->opts.eui64));
2997 	ADD_ID_DESC(SPDK_NVME_NIDT_NGUID, ns->opts.nguid, sizeof(ns->opts.nguid));
2998 	ADD_ID_DESC(SPDK_NVME_NIDT_UUID, &ns->opts.uuid, sizeof(ns->opts.uuid));
2999 	ADD_ID_DESC(SPDK_NVME_NIDT_CSI, &ns->csi, sizeof(uint8_t));
3000 
3001 	/*
3002 	 * The list is automatically 0-terminated, both in the temporary buffer
3003 	 * used by nvmf_ctrlr_identify(), and the eventual iov destination -
3004 	 * controller to host buffers in admin commands always get zeroed in
3005 	 * nvmf_ctrlr_process_admin_cmd().
3006 	 */
3007 
3008 #undef ADD_ID_DESC
3009 
3010 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3011 }
3012 
3013 static int
3014 nvmf_ctrlr_identify(struct spdk_nvmf_request *req)
3015 {
3016 	uint8_t cns;
3017 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
3018 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
3019 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
3020 	struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys;
3021 	int ret = SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3022 	char tmpbuf[SPDK_NVME_IDENTIFY_BUFLEN] = "";
3023 	struct spdk_iov_xfer ix;
3024 
3025 	if (req->iovcnt < 1 || req->length < SPDK_NVME_IDENTIFY_BUFLEN) {
3026 		SPDK_DEBUGLOG(nvmf, "identify command with invalid buffer\n");
3027 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
3028 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
3029 		return ret;
3030 	}
3031 
3032 	cns = cmd->cdw10_bits.identify.cns;
3033 
3034 	if (subsystem->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY &&
3035 	    cns != SPDK_NVME_IDENTIFY_CTRLR) {
3036 		/* Discovery controllers only support Identify Controller */
3037 		goto invalid_cns;
3038 	}
3039 
3040 	/*
3041 	 * We must use a temporary buffer: it's entirely possible the out buffer
3042 	 * is split across more than one IOV.
3043 	 */
3044 	spdk_iov_xfer_init(&ix, req->iov, req->iovcnt);
3045 
3046 	SPDK_DEBUGLOG(nvmf, "Received identify command with CNS 0x%02x\n", cns);
3047 
3048 	switch (cns) {
3049 	case SPDK_NVME_IDENTIFY_NS:
3050 		ret = spdk_nvmf_ctrlr_identify_ns(ctrlr, cmd, rsp, (void *)&tmpbuf);
3051 		break;
3052 	case SPDK_NVME_IDENTIFY_CTRLR:
3053 		ret = spdk_nvmf_ctrlr_identify_ctrlr(ctrlr, (void *)&tmpbuf);
3054 		break;
3055 	case SPDK_NVME_IDENTIFY_ACTIVE_NS_LIST:
3056 		ret = nvmf_ctrlr_identify_active_ns_list(subsystem, cmd, rsp, (void *)&tmpbuf);
3057 		break;
3058 	case SPDK_NVME_IDENTIFY_NS_ID_DESCRIPTOR_LIST:
3059 		ret = nvmf_ctrlr_identify_ns_id_descriptor_list(subsystem, cmd, rsp,
3060 				tmpbuf, req->length);
3061 		break;
3062 	case SPDK_NVME_IDENTIFY_NS_IOCS:
3063 		ret = spdk_nvmf_ns_identify_iocs_specific(ctrlr, cmd, rsp, (void *)&tmpbuf, req->length);
3064 		break;
3065 	case SPDK_NVME_IDENTIFY_CTRLR_IOCS:
3066 		ret = spdk_nvmf_ctrlr_identify_iocs_specific(ctrlr, cmd, rsp, (void *)&tmpbuf, req->length);
3067 		break;
3068 	default:
3069 		goto invalid_cns;
3070 	}
3071 
3072 	if (ret == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
3073 		spdk_iov_xfer_from_buf(&ix, tmpbuf, sizeof(tmpbuf));
3074 	}
3075 
3076 	return ret;
3077 
3078 invalid_cns:
3079 	SPDK_DEBUGLOG(nvmf, "Identify command with unsupported CNS 0x%02x\n", cns);
3080 	rsp->status.sct = SPDK_NVME_SCT_GENERIC;
3081 	rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
3082 	return ret;
3083 }
3084 
3085 static bool
3086 nvmf_qpair_abort_aer(struct spdk_nvmf_qpair *qpair, uint16_t cid)
3087 {
3088 	struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
3089 	struct spdk_nvmf_request *req;
3090 	int i;
3091 
3092 	if (!nvmf_qpair_is_admin_queue(qpair)) {
3093 		return false;
3094 	}
3095 
3096 	assert(spdk_get_thread() == ctrlr->thread);
3097 
3098 	for (i = 0; i < ctrlr->nr_aer_reqs; i++) {
3099 		if (ctrlr->aer_req[i]->cmd->nvme_cmd.cid == cid) {
3100 			SPDK_DEBUGLOG(nvmf, "Aborting AER request\n");
3101 			req = ctrlr->aer_req[i];
3102 			ctrlr->aer_req[i] = NULL;
3103 			ctrlr->nr_aer_reqs--;
3104 
3105 			/* Move the last req to the aborting position for making aer_reqs
3106 			 * in continuous
3107 			 */
3108 			if (i < ctrlr->nr_aer_reqs) {
3109 				ctrlr->aer_req[i] = ctrlr->aer_req[ctrlr->nr_aer_reqs];
3110 				ctrlr->aer_req[ctrlr->nr_aer_reqs] = NULL;
3111 			}
3112 
3113 			req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
3114 			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_ABORTED_BY_REQUEST;
3115 			_nvmf_request_complete(req);
3116 			return true;
3117 		}
3118 	}
3119 
3120 	return false;
3121 }
3122 
3123 void
3124 nvmf_qpair_abort_pending_zcopy_reqs(struct spdk_nvmf_qpair *qpair)
3125 {
3126 	struct spdk_nvmf_request *req, *tmp;
3127 
3128 	TAILQ_FOREACH_SAFE(req, &qpair->outstanding, link, tmp) {
3129 		if (req->zcopy_phase == NVMF_ZCOPY_PHASE_EXECUTE) {
3130 			/* Zero-copy requests are kept on the outstanding queue from the moment
3131 			 * zcopy_start is sent until a zcopy_end callback is received.  Therefore,
3132 			 * we can't remove them from the outstanding queue here, but need to rely on
3133 			 * the transport to do a zcopy_end to release their buffers and, in turn,
3134 			 * remove them from the queue.
3135 			 */
3136 			req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
3137 			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_ABORTED_BY_REQUEST;
3138 			nvmf_transport_req_free(req);
3139 		}
3140 	}
3141 }
3142 
3143 static void
3144 nvmf_qpair_abort_request(struct spdk_nvmf_qpair *qpair, struct spdk_nvmf_request *req)
3145 {
3146 	uint16_t cid = req->cmd->nvme_cmd.cdw10_bits.abort.cid;
3147 
3148 	if (nvmf_qpair_abort_aer(qpair, cid)) {
3149 		SPDK_DEBUGLOG(nvmf, "abort ctrlr=%p sqid=%u cid=%u successful\n",
3150 			      qpair->ctrlr, qpair->qid, cid);
3151 		req->rsp->nvme_cpl.cdw0 &= ~1U; /* Command successfully aborted */
3152 
3153 		spdk_nvmf_request_complete(req);
3154 		return;
3155 	}
3156 
3157 	nvmf_transport_qpair_abort_request(qpair, req);
3158 }
3159 
3160 static void
3161 nvmf_ctrlr_abort_done(struct spdk_io_channel_iter *i, int status)
3162 {
3163 	struct spdk_nvmf_request *req = spdk_io_channel_iter_get_ctx(i);
3164 
3165 	if (status == 0) {
3166 		/* There was no qpair whose ID matches SQID of the abort command.
3167 		 * Hence call _nvmf_request_complete() here.
3168 		 */
3169 		_nvmf_request_complete(req);
3170 	}
3171 }
3172 
3173 static void
3174 nvmf_ctrlr_abort_on_pg(struct spdk_io_channel_iter *i)
3175 {
3176 	struct spdk_nvmf_request *req = spdk_io_channel_iter_get_ctx(i);
3177 	struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
3178 	struct spdk_nvmf_poll_group *group = spdk_io_channel_get_ctx(ch);
3179 	uint16_t sqid = req->cmd->nvme_cmd.cdw10_bits.abort.sqid;
3180 	struct spdk_nvmf_qpair *qpair;
3181 
3182 	TAILQ_FOREACH(qpair, &group->qpairs, link) {
3183 		if (qpair->ctrlr == req->qpair->ctrlr && qpair->qid == sqid) {
3184 			/* Found the qpair */
3185 
3186 			nvmf_qpair_abort_request(qpair, req);
3187 
3188 			/* Return -1 for the status so the iteration across threads stops. */
3189 			spdk_for_each_channel_continue(i, -1);
3190 			return;
3191 		}
3192 	}
3193 
3194 	spdk_for_each_channel_continue(i, 0);
3195 }
3196 
3197 static int
3198 nvmf_ctrlr_abort(struct spdk_nvmf_request *req)
3199 {
3200 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
3201 
3202 	rsp->cdw0 = 1U; /* Command not aborted */
3203 	rsp->status.sct = SPDK_NVME_SCT_GENERIC;
3204 	rsp->status.sc = SPDK_NVME_SC_SUCCESS;
3205 
3206 	/* Send a message to each poll group, searching for this ctrlr, sqid, and command. */
3207 	spdk_for_each_channel(req->qpair->ctrlr->subsys->tgt,
3208 			      nvmf_ctrlr_abort_on_pg,
3209 			      req,
3210 			      nvmf_ctrlr_abort_done
3211 			     );
3212 
3213 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
3214 }
3215 
3216 int
3217 nvmf_ctrlr_abort_request(struct spdk_nvmf_request *req)
3218 {
3219 	struct spdk_nvmf_request *req_to_abort = req->req_to_abort;
3220 	struct spdk_bdev *bdev;
3221 	struct spdk_bdev_desc *desc;
3222 	struct spdk_io_channel *ch;
3223 	int rc;
3224 
3225 	assert(req_to_abort != NULL);
3226 
3227 	if (g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_ABORT].hdlr &&
3228 	    nvmf_qpair_is_admin_queue(req_to_abort->qpair)) {
3229 		return g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_ABORT].hdlr(req);
3230 	}
3231 
3232 	rc = spdk_nvmf_request_get_bdev(req_to_abort->cmd->nvme_cmd.nsid, req_to_abort,
3233 					&bdev, &desc, &ch);
3234 	if (rc != 0) {
3235 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3236 	}
3237 
3238 	return spdk_nvmf_bdev_ctrlr_abort_cmd(bdev, desc, ch, req, req_to_abort);
3239 }
3240 
3241 static int
3242 get_features_generic(struct spdk_nvmf_request *req, uint32_t cdw0)
3243 {
3244 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
3245 
3246 	rsp->cdw0 = cdw0;
3247 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3248 }
3249 
3250 /* we have to use the typedef in the function declaration to appease astyle. */
3251 typedef enum spdk_nvme_path_status_code spdk_nvme_path_status_code_t;
3252 
3253 static spdk_nvme_path_status_code_t
3254 _nvme_ana_state_to_path_status(enum spdk_nvme_ana_state ana_state)
3255 {
3256 	switch (ana_state) {
3257 	case SPDK_NVME_ANA_INACCESSIBLE_STATE:
3258 		return SPDK_NVME_SC_ASYMMETRIC_ACCESS_INACCESSIBLE;
3259 	case SPDK_NVME_ANA_PERSISTENT_LOSS_STATE:
3260 		return SPDK_NVME_SC_ASYMMETRIC_ACCESS_PERSISTENT_LOSS;
3261 	case SPDK_NVME_ANA_CHANGE_STATE:
3262 		return SPDK_NVME_SC_ASYMMETRIC_ACCESS_TRANSITION;
3263 	default:
3264 		return SPDK_NVME_SC_INTERNAL_PATH_ERROR;
3265 	}
3266 }
3267 
3268 static int
3269 nvmf_ctrlr_get_features(struct spdk_nvmf_request *req)
3270 {
3271 	uint8_t feature;
3272 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
3273 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
3274 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
3275 	enum spdk_nvme_ana_state ana_state;
3276 
3277 	feature = cmd->cdw10_bits.get_features.fid;
3278 
3279 	if (ctrlr->subsys->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
3280 		/*
3281 		 * Features supported by Discovery controller
3282 		 */
3283 		switch (feature) {
3284 		case SPDK_NVME_FEAT_KEEP_ALIVE_TIMER:
3285 			return get_features_generic(req, ctrlr->feat.keep_alive_timer.raw);
3286 		case SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION:
3287 			return get_features_generic(req, ctrlr->feat.async_event_configuration.raw);
3288 		default:
3289 			SPDK_INFOLOG(nvmf, "Get Features command with unsupported feature ID 0x%02x\n", feature);
3290 			response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
3291 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3292 		}
3293 	}
3294 	/*
3295 	 * Process Get Features command for non-discovery controller
3296 	 */
3297 	ana_state = nvmf_ctrlr_get_ana_state_from_nsid(ctrlr, cmd->nsid);
3298 	switch (ana_state) {
3299 	case SPDK_NVME_ANA_INACCESSIBLE_STATE:
3300 	case SPDK_NVME_ANA_PERSISTENT_LOSS_STATE:
3301 	case SPDK_NVME_ANA_CHANGE_STATE:
3302 		switch (feature) {
3303 		case SPDK_NVME_FEAT_ERROR_RECOVERY:
3304 		case SPDK_NVME_FEAT_WRITE_ATOMICITY:
3305 		case SPDK_NVME_FEAT_HOST_RESERVE_MASK:
3306 		case SPDK_NVME_FEAT_HOST_RESERVE_PERSIST:
3307 			response->status.sct = SPDK_NVME_SCT_PATH;
3308 			response->status.sc = _nvme_ana_state_to_path_status(ana_state);
3309 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3310 		default:
3311 			break;
3312 		}
3313 		break;
3314 	default:
3315 		break;
3316 	}
3317 
3318 	switch (feature) {
3319 	case SPDK_NVME_FEAT_ARBITRATION:
3320 		return get_features_generic(req, ctrlr->feat.arbitration.raw);
3321 	case SPDK_NVME_FEAT_POWER_MANAGEMENT:
3322 		return get_features_generic(req, ctrlr->feat.power_management.raw);
3323 	case SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD:
3324 		return nvmf_ctrlr_get_features_temperature_threshold(req);
3325 	case SPDK_NVME_FEAT_ERROR_RECOVERY:
3326 		return get_features_generic(req, ctrlr->feat.error_recovery.raw);
3327 	case SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE:
3328 		return get_features_generic(req, ctrlr->feat.volatile_write_cache.raw);
3329 	case SPDK_NVME_FEAT_NUMBER_OF_QUEUES:
3330 		return get_features_generic(req, ctrlr->feat.number_of_queues.raw);
3331 	case SPDK_NVME_FEAT_INTERRUPT_COALESCING:
3332 		return get_features_generic(req, ctrlr->feat.interrupt_coalescing.raw);
3333 	case SPDK_NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION:
3334 		return nvmf_ctrlr_get_features_interrupt_vector_configuration(req);
3335 	case SPDK_NVME_FEAT_WRITE_ATOMICITY:
3336 		return get_features_generic(req, ctrlr->feat.write_atomicity.raw);
3337 	case SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION:
3338 		return get_features_generic(req, ctrlr->feat.async_event_configuration.raw);
3339 	case SPDK_NVME_FEAT_KEEP_ALIVE_TIMER:
3340 		return get_features_generic(req, ctrlr->feat.keep_alive_timer.raw);
3341 	case SPDK_NVME_FEAT_HOST_IDENTIFIER:
3342 		return nvmf_ctrlr_get_features_host_identifier(req);
3343 	case SPDK_NVME_FEAT_HOST_RESERVE_MASK:
3344 		return nvmf_ctrlr_get_features_reservation_notification_mask(req);
3345 	case SPDK_NVME_FEAT_HOST_RESERVE_PERSIST:
3346 		return nvmf_ctrlr_get_features_reservation_persistence(req);
3347 	case SPDK_NVME_FEAT_HOST_BEHAVIOR_SUPPORT:
3348 		return nvmf_ctrlr_get_features_host_behavior_support(req);
3349 	default:
3350 		SPDK_INFOLOG(nvmf, "Get Features command with unsupported feature ID 0x%02x\n", feature);
3351 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
3352 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3353 	}
3354 }
3355 
3356 static int
3357 nvmf_ctrlr_set_features(struct spdk_nvmf_request *req)
3358 {
3359 	uint8_t feature, save;
3360 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
3361 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
3362 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
3363 	enum spdk_nvme_ana_state ana_state;
3364 	/*
3365 	 * Features are not saveable by the controller as indicated by
3366 	 * ONCS field of the Identify Controller data.
3367 	 * */
3368 	save = cmd->cdw10_bits.set_features.sv;
3369 	if (save) {
3370 		response->status.sc = SPDK_NVME_SC_FEATURE_ID_NOT_SAVEABLE;
3371 		response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
3372 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3373 	}
3374 
3375 	feature = cmd->cdw10_bits.set_features.fid;
3376 
3377 	if (ctrlr->subsys->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
3378 		/*
3379 		 * Features supported by Discovery controller
3380 		 */
3381 		switch (feature) {
3382 		case SPDK_NVME_FEAT_KEEP_ALIVE_TIMER:
3383 			return nvmf_ctrlr_set_features_keep_alive_timer(req);
3384 		case SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION:
3385 			return nvmf_ctrlr_set_features_async_event_configuration(req);
3386 		default:
3387 			SPDK_INFOLOG(nvmf, "Set Features command with unsupported feature ID 0x%02x\n", feature);
3388 			response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
3389 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3390 		}
3391 	}
3392 	/*
3393 	 * Process Set Features command for non-discovery controller
3394 	 */
3395 	ana_state = nvmf_ctrlr_get_ana_state_from_nsid(ctrlr, cmd->nsid);
3396 	switch (ana_state) {
3397 	case SPDK_NVME_ANA_INACCESSIBLE_STATE:
3398 	case SPDK_NVME_ANA_CHANGE_STATE:
3399 		if (cmd->nsid == SPDK_NVME_GLOBAL_NS_TAG) {
3400 			response->status.sct = SPDK_NVME_SCT_PATH;
3401 			response->status.sc = _nvme_ana_state_to_path_status(ana_state);
3402 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3403 		} else {
3404 			switch (feature) {
3405 			case SPDK_NVME_FEAT_ERROR_RECOVERY:
3406 			case SPDK_NVME_FEAT_WRITE_ATOMICITY:
3407 			case SPDK_NVME_FEAT_HOST_RESERVE_MASK:
3408 			case SPDK_NVME_FEAT_HOST_RESERVE_PERSIST:
3409 				response->status.sct = SPDK_NVME_SCT_PATH;
3410 				response->status.sc = _nvme_ana_state_to_path_status(ana_state);
3411 				return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3412 			default:
3413 				break;
3414 			}
3415 		}
3416 		break;
3417 	case SPDK_NVME_ANA_PERSISTENT_LOSS_STATE:
3418 		response->status.sct = SPDK_NVME_SCT_PATH;
3419 		response->status.sc = SPDK_NVME_SC_ASYMMETRIC_ACCESS_PERSISTENT_LOSS;
3420 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3421 	default:
3422 		break;
3423 	}
3424 
3425 	switch (feature) {
3426 	case SPDK_NVME_FEAT_ARBITRATION:
3427 		return nvmf_ctrlr_set_features_arbitration(req);
3428 	case SPDK_NVME_FEAT_POWER_MANAGEMENT:
3429 		return nvmf_ctrlr_set_features_power_management(req);
3430 	case SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD:
3431 		return nvmf_ctrlr_set_features_temperature_threshold(req);
3432 	case SPDK_NVME_FEAT_ERROR_RECOVERY:
3433 		return nvmf_ctrlr_set_features_error_recovery(req);
3434 	case SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE:
3435 		return nvmf_ctrlr_set_features_volatile_write_cache(req);
3436 	case SPDK_NVME_FEAT_NUMBER_OF_QUEUES:
3437 		return nvmf_ctrlr_set_features_number_of_queues(req);
3438 	case SPDK_NVME_FEAT_INTERRUPT_COALESCING:
3439 		response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
3440 		response->status.sc = SPDK_NVME_SC_FEATURE_NOT_CHANGEABLE;
3441 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3442 	case SPDK_NVME_FEAT_WRITE_ATOMICITY:
3443 		return nvmf_ctrlr_set_features_write_atomicity(req);
3444 	case SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION:
3445 		return nvmf_ctrlr_set_features_async_event_configuration(req);
3446 	case SPDK_NVME_FEAT_KEEP_ALIVE_TIMER:
3447 		return nvmf_ctrlr_set_features_keep_alive_timer(req);
3448 	case SPDK_NVME_FEAT_HOST_IDENTIFIER:
3449 		return nvmf_ctrlr_set_features_host_identifier(req);
3450 	case SPDK_NVME_FEAT_HOST_RESERVE_MASK:
3451 		return nvmf_ctrlr_set_features_reservation_notification_mask(req);
3452 	case SPDK_NVME_FEAT_HOST_RESERVE_PERSIST:
3453 		return nvmf_ctrlr_set_features_reservation_persistence(req);
3454 	case SPDK_NVME_FEAT_HOST_BEHAVIOR_SUPPORT:
3455 		return nvmf_ctrlr_set_features_host_behavior_support(req);
3456 	default:
3457 		SPDK_INFOLOG(nvmf, "Set Features command with unsupported feature ID 0x%02x\n", feature);
3458 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
3459 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3460 	}
3461 }
3462 
3463 static int
3464 nvmf_ctrlr_keep_alive(struct spdk_nvmf_request *req)
3465 {
3466 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
3467 
3468 	SPDK_DEBUGLOG(nvmf, "Keep Alive\n");
3469 	/*
3470 	 * To handle keep alive just clear or reset the
3471 	 * ctrlr based keep alive duration counter.
3472 	 * When added, a separate timer based process
3473 	 * will monitor if the time since last recorded
3474 	 * keep alive has exceeded the max duration and
3475 	 * take appropriate action.
3476 	 */
3477 	ctrlr->last_keep_alive_tick = spdk_get_ticks();
3478 
3479 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3480 }
3481 
3482 int
3483 nvmf_ctrlr_process_admin_cmd(struct spdk_nvmf_request *req)
3484 {
3485 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
3486 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
3487 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
3488 	struct spdk_nvmf_subsystem_poll_group *sgroup;
3489 	int rc;
3490 
3491 	if (cmd->opc == SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) {
3492 		/* We do not want to treat AERs as outstanding commands,
3493 		 * so decrement mgmt_io_outstanding here to offset
3494 		 * the increment that happened prior to this call.
3495 		 */
3496 		sgroup = &req->qpair->group->sgroups[ctrlr->subsys->id];
3497 		assert(sgroup != NULL);
3498 		sgroup->mgmt_io_outstanding--;
3499 	}
3500 
3501 	if (ctrlr == NULL) {
3502 		SPDK_ERRLOG("Admin command sent before CONNECT\n");
3503 		response->status.sct = SPDK_NVME_SCT_GENERIC;
3504 		response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
3505 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3506 	}
3507 
3508 	assert(spdk_get_thread() == ctrlr->thread);
3509 
3510 	if (cmd->fuse != 0) {
3511 		/* Fused admin commands are not supported. */
3512 		response->status.sct = SPDK_NVME_SCT_GENERIC;
3513 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
3514 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3515 	}
3516 
3517 	if (ctrlr->vcprop.cc.bits.en != 1) {
3518 		SPDK_ERRLOG("Admin command sent to disabled controller\n");
3519 		response->status.sct = SPDK_NVME_SCT_GENERIC;
3520 		response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
3521 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3522 	}
3523 
3524 	if (req->iovcnt && spdk_nvme_opc_get_data_transfer(cmd->opc) == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
3525 		spdk_iov_memset(req->iov, req->iovcnt, 0);
3526 	}
3527 
3528 	if (ctrlr->subsys->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
3529 		/* Discovery controllers only support these admin OPS. */
3530 		switch (cmd->opc) {
3531 		case SPDK_NVME_OPC_IDENTIFY:
3532 		case SPDK_NVME_OPC_GET_LOG_PAGE:
3533 		case SPDK_NVME_OPC_KEEP_ALIVE:
3534 		case SPDK_NVME_OPC_SET_FEATURES:
3535 		case SPDK_NVME_OPC_GET_FEATURES:
3536 		case SPDK_NVME_OPC_ASYNC_EVENT_REQUEST:
3537 			break;
3538 		default:
3539 			goto invalid_opcode;
3540 		}
3541 	}
3542 
3543 	/* Call a custom adm cmd handler if set. Aborts are handled in a different path (see nvmf_passthru_admin_cmd) */
3544 	if (g_nvmf_custom_admin_cmd_hdlrs[cmd->opc].hdlr && cmd->opc != SPDK_NVME_OPC_ABORT) {
3545 		rc = g_nvmf_custom_admin_cmd_hdlrs[cmd->opc].hdlr(req);
3546 		if (rc >= SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
3547 			/* The handler took care of this command */
3548 			return rc;
3549 		}
3550 	}
3551 
3552 	switch (cmd->opc) {
3553 	case SPDK_NVME_OPC_GET_LOG_PAGE:
3554 		return nvmf_ctrlr_get_log_page(req);
3555 	case SPDK_NVME_OPC_IDENTIFY:
3556 		return nvmf_ctrlr_identify(req);
3557 	case SPDK_NVME_OPC_ABORT:
3558 		return nvmf_ctrlr_abort(req);
3559 	case SPDK_NVME_OPC_GET_FEATURES:
3560 		return nvmf_ctrlr_get_features(req);
3561 	case SPDK_NVME_OPC_SET_FEATURES:
3562 		return nvmf_ctrlr_set_features(req);
3563 	case SPDK_NVME_OPC_ASYNC_EVENT_REQUEST:
3564 		return nvmf_ctrlr_async_event_request(req);
3565 	case SPDK_NVME_OPC_KEEP_ALIVE:
3566 		return nvmf_ctrlr_keep_alive(req);
3567 
3568 	case SPDK_NVME_OPC_CREATE_IO_SQ:
3569 	case SPDK_NVME_OPC_CREATE_IO_CQ:
3570 	case SPDK_NVME_OPC_DELETE_IO_SQ:
3571 	case SPDK_NVME_OPC_DELETE_IO_CQ:
3572 		/* Create and Delete I/O CQ/SQ not allowed in NVMe-oF */
3573 		goto invalid_opcode;
3574 
3575 	default:
3576 		goto invalid_opcode;
3577 	}
3578 
3579 invalid_opcode:
3580 	SPDK_INFOLOG(nvmf, "Unsupported admin opcode 0x%x\n", cmd->opc);
3581 	response->status.sct = SPDK_NVME_SCT_GENERIC;
3582 	response->status.sc = SPDK_NVME_SC_INVALID_OPCODE;
3583 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3584 }
3585 
3586 static int
3587 nvmf_ctrlr_process_fabrics_cmd(struct spdk_nvmf_request *req)
3588 {
3589 	struct spdk_nvmf_qpair *qpair = req->qpair;
3590 	struct spdk_nvmf_capsule_cmd *cap_hdr;
3591 
3592 	cap_hdr = &req->cmd->nvmf_cmd;
3593 
3594 	if (qpair->ctrlr == NULL) {
3595 		/* No ctrlr established yet; the only valid command is Connect */
3596 		if (cap_hdr->fctype == SPDK_NVMF_FABRIC_COMMAND_CONNECT) {
3597 			return nvmf_ctrlr_cmd_connect(req);
3598 		} else {
3599 			SPDK_DEBUGLOG(nvmf, "Got fctype 0x%x, expected Connect\n",
3600 				      cap_hdr->fctype);
3601 			req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
3602 			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
3603 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3604 		}
3605 	} else if (nvmf_qpair_is_admin_queue(qpair)) {
3606 		/*
3607 		 * Controller session is established, and this is an admin queue.
3608 		 * Disallow Connect and allow other fabrics commands.
3609 		 */
3610 		switch (cap_hdr->fctype) {
3611 		case SPDK_NVMF_FABRIC_COMMAND_PROPERTY_SET:
3612 			return nvmf_property_set(req);
3613 		case SPDK_NVMF_FABRIC_COMMAND_PROPERTY_GET:
3614 			return nvmf_property_get(req);
3615 		default:
3616 			SPDK_DEBUGLOG(nvmf, "unknown fctype 0x%02x\n",
3617 				      cap_hdr->fctype);
3618 			req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
3619 			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
3620 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3621 		}
3622 	} else {
3623 		/* Controller session is established, and this is an I/O queue */
3624 		/* For now, no I/O-specific Fabrics commands are implemented (other than Connect) */
3625 		SPDK_DEBUGLOG(nvmf, "Unexpected I/O fctype 0x%x\n", cap_hdr->fctype);
3626 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
3627 		req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
3628 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3629 	}
3630 }
3631 
3632 static inline void
3633 nvmf_ctrlr_queue_pending_async_event(struct spdk_nvmf_ctrlr *ctrlr,
3634 				     union spdk_nvme_async_event_completion *event)
3635 {
3636 	struct spdk_nvmf_async_event_completion *nvmf_event;
3637 
3638 	nvmf_event = calloc(1, sizeof(*nvmf_event));
3639 	if (!nvmf_event) {
3640 		SPDK_ERRLOG("Alloc nvmf event failed, ignore the event\n");
3641 		return;
3642 	}
3643 	nvmf_event->event.raw = event->raw;
3644 	STAILQ_INSERT_TAIL(&ctrlr->async_events, nvmf_event, link);
3645 }
3646 
3647 static inline int
3648 nvmf_ctrlr_async_event_notification(struct spdk_nvmf_ctrlr *ctrlr,
3649 				    union spdk_nvme_async_event_completion *event)
3650 {
3651 	struct spdk_nvmf_request *req;
3652 	struct spdk_nvme_cpl *rsp;
3653 
3654 	assert(spdk_get_thread() == ctrlr->thread);
3655 
3656 	/* If there is no outstanding AER request, queue the event.  Then
3657 	 * if an AER is later submitted, this event can be sent as a
3658 	 * response.
3659 	 */
3660 	if (ctrlr->nr_aer_reqs == 0) {
3661 		nvmf_ctrlr_queue_pending_async_event(ctrlr, event);
3662 		return 0;
3663 	}
3664 
3665 	req = ctrlr->aer_req[--ctrlr->nr_aer_reqs];
3666 	rsp = &req->rsp->nvme_cpl;
3667 
3668 	rsp->cdw0 = event->raw;
3669 
3670 	_nvmf_request_complete(req);
3671 	ctrlr->aer_req[ctrlr->nr_aer_reqs] = NULL;
3672 
3673 	return 0;
3674 }
3675 
3676 int
3677 nvmf_ctrlr_async_event_ns_notice(struct spdk_nvmf_ctrlr *ctrlr)
3678 {
3679 	union spdk_nvme_async_event_completion event = {0};
3680 
3681 	/* Users may disable the event notification */
3682 	if (!ctrlr->feat.async_event_configuration.bits.ns_attr_notice) {
3683 		return 0;
3684 	}
3685 
3686 	if (!nvmf_ctrlr_mask_aen(ctrlr, SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGE_MASK_BIT)) {
3687 		return 0;
3688 	}
3689 
3690 	event.bits.async_event_type = SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE;
3691 	event.bits.async_event_info = SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED;
3692 	event.bits.log_page_identifier = SPDK_NVME_LOG_CHANGED_NS_LIST;
3693 
3694 	return nvmf_ctrlr_async_event_notification(ctrlr, &event);
3695 }
3696 
3697 int
3698 nvmf_ctrlr_async_event_ana_change_notice(struct spdk_nvmf_ctrlr *ctrlr)
3699 {
3700 	union spdk_nvme_async_event_completion event = {0};
3701 
3702 	/* Users may disable the event notification */
3703 	if (!ctrlr->feat.async_event_configuration.bits.ana_change_notice) {
3704 		return 0;
3705 	}
3706 
3707 	if (!nvmf_ctrlr_mask_aen(ctrlr, SPDK_NVME_ASYNC_EVENT_ANA_CHANGE_MASK_BIT)) {
3708 		return 0;
3709 	}
3710 
3711 	event.bits.async_event_type = SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE;
3712 	event.bits.async_event_info = SPDK_NVME_ASYNC_EVENT_ANA_CHANGE;
3713 	event.bits.log_page_identifier = SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS;
3714 
3715 	return nvmf_ctrlr_async_event_notification(ctrlr, &event);
3716 }
3717 
3718 void
3719 nvmf_ctrlr_async_event_reservation_notification(struct spdk_nvmf_ctrlr *ctrlr)
3720 {
3721 	union spdk_nvme_async_event_completion event = {0};
3722 
3723 	if (!ctrlr->num_avail_log_pages) {
3724 		return;
3725 	}
3726 
3727 	if (!nvmf_ctrlr_mask_aen(ctrlr, SPDK_NVME_ASYNC_EVENT_RESERVATION_LOG_AVAIL_MASK_BIT)) {
3728 		return;
3729 	}
3730 
3731 	event.bits.async_event_type = SPDK_NVME_ASYNC_EVENT_TYPE_IO;
3732 	event.bits.async_event_info = SPDK_NVME_ASYNC_EVENT_RESERVATION_LOG_AVAIL;
3733 	event.bits.log_page_identifier = SPDK_NVME_LOG_RESERVATION_NOTIFICATION;
3734 
3735 	nvmf_ctrlr_async_event_notification(ctrlr, &event);
3736 }
3737 
3738 void
3739 nvmf_ctrlr_async_event_discovery_log_change_notice(void *ctx)
3740 {
3741 	union spdk_nvme_async_event_completion event = {0};
3742 	struct spdk_nvmf_ctrlr *ctrlr = ctx;
3743 
3744 	/* Users may disable the event notification manually or
3745 	 * it may not be enabled due to keep alive timeout
3746 	 * not being set in connect command to discovery controller.
3747 	 */
3748 	if (!ctrlr->feat.async_event_configuration.bits.discovery_log_change_notice) {
3749 		return;
3750 	}
3751 
3752 	if (!nvmf_ctrlr_mask_aen(ctrlr, SPDK_NVME_ASYNC_EVENT_DISCOVERY_LOG_CHANGE_MASK_BIT)) {
3753 		return;
3754 	}
3755 
3756 	event.bits.async_event_type = SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE;
3757 	event.bits.async_event_info = SPDK_NVME_ASYNC_EVENT_DISCOVERY_LOG_CHANGE;
3758 	event.bits.log_page_identifier = SPDK_NVME_LOG_DISCOVERY;
3759 
3760 	nvmf_ctrlr_async_event_notification(ctrlr, &event);
3761 }
3762 
3763 int
3764 nvmf_ctrlr_async_event_error_event(struct spdk_nvmf_ctrlr *ctrlr,
3765 				   union spdk_nvme_async_event_completion event)
3766 {
3767 	if (!nvmf_ctrlr_mask_aen(ctrlr, SPDK_NVME_ASYNC_EVENT_ERROR_MASK_BIT)) {
3768 		return 0;
3769 	}
3770 
3771 	if (event.bits.async_event_type != SPDK_NVME_ASYNC_EVENT_TYPE_ERROR ||
3772 	    event.bits.async_event_info > SPDK_NVME_ASYNC_EVENT_FW_IMAGE_LOAD) {
3773 		return 0;
3774 	}
3775 
3776 	return nvmf_ctrlr_async_event_notification(ctrlr, &event);
3777 }
3778 
3779 void
3780 nvmf_qpair_free_aer(struct spdk_nvmf_qpair *qpair)
3781 {
3782 	struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
3783 	int i;
3784 
3785 	if (!nvmf_qpair_is_admin_queue(qpair)) {
3786 		return;
3787 	}
3788 
3789 	assert(spdk_get_thread() == ctrlr->thread);
3790 
3791 	for (i = 0; i < ctrlr->nr_aer_reqs; i++) {
3792 		spdk_nvmf_request_free(ctrlr->aer_req[i]);
3793 		ctrlr->aer_req[i] = NULL;
3794 	}
3795 
3796 	ctrlr->nr_aer_reqs = 0;
3797 }
3798 
3799 void
3800 nvmf_ctrlr_abort_aer(struct spdk_nvmf_ctrlr *ctrlr)
3801 {
3802 	struct spdk_nvmf_request *req;
3803 	int i;
3804 
3805 	assert(spdk_get_thread() == ctrlr->thread);
3806 
3807 	if (!ctrlr->nr_aer_reqs) {
3808 		return;
3809 	}
3810 
3811 	for (i = 0; i < ctrlr->nr_aer_reqs; i++) {
3812 		req = ctrlr->aer_req[i];
3813 
3814 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
3815 		req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_ABORTED_BY_REQUEST;
3816 		_nvmf_request_complete(req);
3817 
3818 		ctrlr->aer_req[i] = NULL;
3819 	}
3820 
3821 	ctrlr->nr_aer_reqs = 0;
3822 }
3823 
3824 static void
3825 _nvmf_ctrlr_add_reservation_log(void *ctx)
3826 {
3827 	struct spdk_nvmf_reservation_log *log = (struct spdk_nvmf_reservation_log *)ctx;
3828 	struct spdk_nvmf_ctrlr *ctrlr = log->ctrlr;
3829 
3830 	ctrlr->log_page_count++;
3831 
3832 	/* Maximum number of queued log pages is 255 */
3833 	if (ctrlr->num_avail_log_pages == 0xff) {
3834 		struct spdk_nvmf_reservation_log *entry;
3835 		entry = TAILQ_LAST(&ctrlr->log_head, log_page_head);
3836 		entry->log.log_page_count = ctrlr->log_page_count;
3837 		free(log);
3838 		return;
3839 	}
3840 
3841 	log->log.log_page_count = ctrlr->log_page_count;
3842 	log->log.num_avail_log_pages = ctrlr->num_avail_log_pages++;
3843 	TAILQ_INSERT_TAIL(&ctrlr->log_head, log, link);
3844 
3845 	nvmf_ctrlr_async_event_reservation_notification(ctrlr);
3846 }
3847 
3848 void
3849 nvmf_ctrlr_reservation_notice_log(struct spdk_nvmf_ctrlr *ctrlr,
3850 				  struct spdk_nvmf_ns *ns,
3851 				  enum spdk_nvme_reservation_notification_log_page_type type)
3852 {
3853 	struct spdk_nvmf_reservation_log *log;
3854 
3855 	switch (type) {
3856 	case SPDK_NVME_RESERVATION_LOG_PAGE_EMPTY:
3857 		return;
3858 	case SPDK_NVME_REGISTRATION_PREEMPTED:
3859 		if (ns->mask & SPDK_NVME_REGISTRATION_PREEMPTED_MASK) {
3860 			return;
3861 		}
3862 		break;
3863 	case SPDK_NVME_RESERVATION_RELEASED:
3864 		if (ns->mask & SPDK_NVME_RESERVATION_RELEASED_MASK) {
3865 			return;
3866 		}
3867 		break;
3868 	case SPDK_NVME_RESERVATION_PREEMPTED:
3869 		if (ns->mask & SPDK_NVME_RESERVATION_PREEMPTED_MASK) {
3870 			return;
3871 		}
3872 		break;
3873 	default:
3874 		return;
3875 	}
3876 
3877 	log = calloc(1, sizeof(*log));
3878 	if (!log) {
3879 		SPDK_ERRLOG("Alloc log page failed, ignore the log\n");
3880 		return;
3881 	}
3882 	log->ctrlr = ctrlr;
3883 	log->log.type = type;
3884 	log->log.nsid = ns->nsid;
3885 
3886 	spdk_thread_send_msg(ctrlr->thread, _nvmf_ctrlr_add_reservation_log, log);
3887 }
3888 
3889 /* Check from subsystem poll group's namespace information data structure */
3890 static bool
3891 nvmf_ns_info_ctrlr_is_registrant(struct spdk_nvmf_subsystem_pg_ns_info *ns_info,
3892 				 struct spdk_nvmf_ctrlr *ctrlr)
3893 {
3894 	uint32_t i;
3895 
3896 	for (i = 0; i < SPDK_NVMF_MAX_NUM_REGISTRANTS; i++) {
3897 		if (!spdk_uuid_compare(&ns_info->reg_hostid[i], &ctrlr->hostid)) {
3898 			return true;
3899 		}
3900 	}
3901 
3902 	return false;
3903 }
3904 
3905 /*
3906  * Check the NVMe command is permitted or not for current controller(Host).
3907  */
3908 static int
3909 nvmf_ns_reservation_request_check(struct spdk_nvmf_subsystem_pg_ns_info *ns_info,
3910 				  struct spdk_nvmf_ctrlr *ctrlr,
3911 				  struct spdk_nvmf_request *req)
3912 {
3913 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
3914 	enum spdk_nvme_reservation_type rtype = ns_info->rtype;
3915 	uint8_t status = SPDK_NVME_SC_SUCCESS;
3916 	uint8_t racqa;
3917 	bool is_registrant;
3918 
3919 	/* No valid reservation */
3920 	if (!rtype) {
3921 		return 0;
3922 	}
3923 
3924 	is_registrant = nvmf_ns_info_ctrlr_is_registrant(ns_info, ctrlr);
3925 	/* All registrants type and current ctrlr is a valid registrant */
3926 	if ((rtype == SPDK_NVME_RESERVE_WRITE_EXCLUSIVE_ALL_REGS ||
3927 	     rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_ALL_REGS) && is_registrant) {
3928 		return 0;
3929 	} else if (!spdk_uuid_compare(&ns_info->holder_id, &ctrlr->hostid)) {
3930 		return 0;
3931 	}
3932 
3933 	/* Non-holder for current controller */
3934 	switch (cmd->opc) {
3935 	case SPDK_NVME_OPC_READ:
3936 	case SPDK_NVME_OPC_COMPARE:
3937 		if (rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS) {
3938 			status = SPDK_NVME_SC_RESERVATION_CONFLICT;
3939 			goto exit;
3940 		}
3941 		if ((rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_REG_ONLY ||
3942 		     rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_ALL_REGS) && !is_registrant) {
3943 			status = SPDK_NVME_SC_RESERVATION_CONFLICT;
3944 		}
3945 		break;
3946 	case SPDK_NVME_OPC_FLUSH:
3947 	case SPDK_NVME_OPC_WRITE:
3948 	case SPDK_NVME_OPC_WRITE_UNCORRECTABLE:
3949 	case SPDK_NVME_OPC_WRITE_ZEROES:
3950 	case SPDK_NVME_OPC_DATASET_MANAGEMENT:
3951 		if (rtype == SPDK_NVME_RESERVE_WRITE_EXCLUSIVE ||
3952 		    rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS) {
3953 			status = SPDK_NVME_SC_RESERVATION_CONFLICT;
3954 			goto exit;
3955 		}
3956 		if (!is_registrant) {
3957 			status = SPDK_NVME_SC_RESERVATION_CONFLICT;
3958 		}
3959 		break;
3960 	case SPDK_NVME_OPC_RESERVATION_ACQUIRE:
3961 		racqa = cmd->cdw10_bits.resv_acquire.racqa;
3962 		if (racqa == SPDK_NVME_RESERVE_ACQUIRE) {
3963 			status = SPDK_NVME_SC_RESERVATION_CONFLICT;
3964 			goto exit;
3965 		}
3966 		if (!is_registrant) {
3967 			status = SPDK_NVME_SC_RESERVATION_CONFLICT;
3968 		}
3969 		break;
3970 	case SPDK_NVME_OPC_RESERVATION_RELEASE:
3971 		if (!is_registrant) {
3972 			status = SPDK_NVME_SC_RESERVATION_CONFLICT;
3973 		}
3974 		break;
3975 	default:
3976 		break;
3977 	}
3978 
3979 exit:
3980 	req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
3981 	req->rsp->nvme_cpl.status.sc = status;
3982 	if (status == SPDK_NVME_SC_RESERVATION_CONFLICT) {
3983 		return -EPERM;
3984 	}
3985 
3986 	return 0;
3987 }
3988 
3989 static int
3990 nvmf_ctrlr_process_io_fused_cmd(struct spdk_nvmf_request *req, struct spdk_bdev *bdev,
3991 				struct spdk_bdev_desc *desc, struct spdk_io_channel *ch)
3992 {
3993 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
3994 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
3995 	struct spdk_nvmf_request *first_fused_req = req->qpair->first_fused_req;
3996 	int rc;
3997 
3998 	if (cmd->fuse == SPDK_NVME_CMD_FUSE_FIRST) {
3999 		/* first fused operation (should be compare) */
4000 		if (first_fused_req != NULL) {
4001 			struct spdk_nvme_cpl *fused_response = &first_fused_req->rsp->nvme_cpl;
4002 
4003 			SPDK_ERRLOG("Wrong sequence of fused operations\n");
4004 
4005 			/* abort req->qpair->first_fused_request and continue with new fused command */
4006 			fused_response->status.sc = SPDK_NVME_SC_ABORTED_MISSING_FUSED;
4007 			fused_response->status.sct = SPDK_NVME_SCT_GENERIC;
4008 			_nvmf_request_complete(first_fused_req);
4009 		} else if (cmd->opc != SPDK_NVME_OPC_COMPARE) {
4010 			SPDK_ERRLOG("Wrong op code of fused operations\n");
4011 			rsp->status.sct = SPDK_NVME_SCT_GENERIC;
4012 			rsp->status.sc = SPDK_NVME_SC_INVALID_OPCODE;
4013 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
4014 		}
4015 
4016 		req->qpair->first_fused_req = req;
4017 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
4018 	} else if (cmd->fuse == SPDK_NVME_CMD_FUSE_SECOND) {
4019 		/* second fused operation (should be write) */
4020 		if (first_fused_req == NULL) {
4021 			SPDK_ERRLOG("Wrong sequence of fused operations\n");
4022 			rsp->status.sct = SPDK_NVME_SCT_GENERIC;
4023 			rsp->status.sc = SPDK_NVME_SC_ABORTED_MISSING_FUSED;
4024 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
4025 		} else if (cmd->opc != SPDK_NVME_OPC_WRITE) {
4026 			struct spdk_nvme_cpl *fused_response = &first_fused_req->rsp->nvme_cpl;
4027 
4028 			SPDK_ERRLOG("Wrong op code of fused operations\n");
4029 
4030 			/* abort req->qpair->first_fused_request and fail current command */
4031 			fused_response->status.sc = SPDK_NVME_SC_ABORTED_MISSING_FUSED;
4032 			fused_response->status.sct = SPDK_NVME_SCT_GENERIC;
4033 			_nvmf_request_complete(first_fused_req);
4034 
4035 			rsp->status.sct = SPDK_NVME_SCT_GENERIC;
4036 			rsp->status.sc = SPDK_NVME_SC_INVALID_OPCODE;
4037 			req->qpair->first_fused_req = NULL;
4038 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
4039 		}
4040 
4041 		/* save request of first command to generate response later */
4042 		req->first_fused_req = first_fused_req;
4043 		req->qpair->first_fused_req = NULL;
4044 	} else {
4045 		SPDK_ERRLOG("Invalid fused command fuse field.\n");
4046 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
4047 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
4048 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
4049 	}
4050 
4051 	rc = nvmf_bdev_ctrlr_compare_and_write_cmd(bdev, desc, ch, req->first_fused_req, req);
4052 
4053 	if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
4054 		if (spdk_nvme_cpl_is_error(rsp)) {
4055 			struct spdk_nvme_cpl *fused_response = &first_fused_req->rsp->nvme_cpl;
4056 
4057 			fused_response->status = rsp->status;
4058 			rsp->status.sct = SPDK_NVME_SCT_GENERIC;
4059 			rsp->status.sc = SPDK_NVME_SC_ABORTED_FAILED_FUSED;
4060 			/* Complete first of fused commands. Second will be completed by upper layer */
4061 			_nvmf_request_complete(first_fused_req);
4062 			req->first_fused_req = NULL;
4063 		}
4064 	}
4065 
4066 	return rc;
4067 }
4068 
4069 bool
4070 nvmf_ctrlr_use_zcopy(struct spdk_nvmf_request *req)
4071 {
4072 	struct spdk_nvmf_transport *transport = req->qpair->transport;
4073 	struct spdk_nvmf_ns *ns;
4074 
4075 	assert(req->zcopy_phase == NVMF_ZCOPY_PHASE_NONE);
4076 
4077 	if (!transport->opts.zcopy) {
4078 		return false;
4079 	}
4080 
4081 	if (nvmf_qpair_is_admin_queue(req->qpair)) {
4082 		/* Admin queue */
4083 		return false;
4084 	}
4085 
4086 	if ((req->cmd->nvme_cmd.opc != SPDK_NVME_OPC_WRITE) &&
4087 	    (req->cmd->nvme_cmd.opc != SPDK_NVME_OPC_READ)) {
4088 		/* Not a READ or WRITE command */
4089 		return false;
4090 	}
4091 
4092 	if (req->cmd->nvme_cmd.fuse != SPDK_NVME_CMD_FUSE_NONE) {
4093 		/* Fused commands dont use zcopy buffers */
4094 		return false;
4095 	}
4096 
4097 	ns = _nvmf_subsystem_get_ns(req->qpair->ctrlr->subsys, req->cmd->nvme_cmd.nsid);
4098 	if (ns == NULL || ns->bdev == NULL || !ns->zcopy) {
4099 		return false;
4100 	}
4101 
4102 	req->zcopy_phase = NVMF_ZCOPY_PHASE_INIT;
4103 	return true;
4104 }
4105 
4106 void
4107 spdk_nvmf_request_zcopy_start(struct spdk_nvmf_request *req)
4108 {
4109 	assert(req->zcopy_phase == NVMF_ZCOPY_PHASE_INIT);
4110 
4111 	/* Set iovcnt to be the maximum number of iovs that the ZCOPY can use */
4112 	req->iovcnt = NVMF_REQ_MAX_BUFFERS;
4113 
4114 	spdk_nvmf_request_exec(req);
4115 }
4116 
4117 void
4118 spdk_nvmf_request_zcopy_end(struct spdk_nvmf_request *req, bool commit)
4119 {
4120 	assert(req->zcopy_phase == NVMF_ZCOPY_PHASE_EXECUTE);
4121 	req->zcopy_phase = NVMF_ZCOPY_PHASE_END_PENDING;
4122 
4123 	nvmf_bdev_ctrlr_zcopy_end(req, commit);
4124 }
4125 
4126 int
4127 nvmf_ctrlr_process_io_cmd(struct spdk_nvmf_request *req)
4128 {
4129 	uint32_t nsid;
4130 	struct spdk_nvmf_ns *ns;
4131 	struct spdk_bdev *bdev;
4132 	struct spdk_bdev_desc *desc;
4133 	struct spdk_io_channel *ch;
4134 	struct spdk_nvmf_poll_group *group = req->qpair->group;
4135 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
4136 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
4137 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
4138 	struct spdk_nvmf_subsystem_pg_ns_info *ns_info;
4139 	enum spdk_nvme_ana_state ana_state;
4140 
4141 	/* pre-set response details for this command */
4142 	response->status.sc = SPDK_NVME_SC_SUCCESS;
4143 	nsid = cmd->nsid;
4144 
4145 	if (spdk_unlikely(ctrlr == NULL)) {
4146 		SPDK_ERRLOG("I/O command sent before CONNECT\n");
4147 		response->status.sct = SPDK_NVME_SCT_GENERIC;
4148 		response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
4149 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
4150 	}
4151 
4152 	if (spdk_unlikely(ctrlr->vcprop.cc.bits.en != 1)) {
4153 		SPDK_ERRLOG("I/O command sent to disabled controller\n");
4154 		response->status.sct = SPDK_NVME_SCT_GENERIC;
4155 		response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
4156 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
4157 	}
4158 
4159 	ns = _nvmf_subsystem_get_ns(ctrlr->subsys, nsid);
4160 	if (ns == NULL || ns->bdev == NULL) {
4161 		SPDK_DEBUGLOG(nvmf, "Unsuccessful query for nsid %u\n", cmd->nsid);
4162 		response->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
4163 		response->status.dnr = 1;
4164 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
4165 	}
4166 
4167 	ana_state = nvmf_ctrlr_get_ana_state(ctrlr, ns->anagrpid);
4168 	if (spdk_unlikely(ana_state != SPDK_NVME_ANA_OPTIMIZED_STATE &&
4169 			  ana_state != SPDK_NVME_ANA_NON_OPTIMIZED_STATE)) {
4170 		SPDK_DEBUGLOG(nvmf, "Fail I/O command due to ANA state %d\n",
4171 			      ana_state);
4172 		response->status.sct = SPDK_NVME_SCT_PATH;
4173 		response->status.sc = _nvme_ana_state_to_path_status(ana_state);
4174 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
4175 	}
4176 
4177 	if (spdk_likely(ctrlr->listener != NULL)) {
4178 		SPDK_DTRACE_PROBE3(nvmf_request_io_exec_path, req,
4179 				   ctrlr->listener->trid->traddr,
4180 				   ctrlr->listener->trid->trsvcid);
4181 	}
4182 
4183 	/* scan-build falsely reporting dereference of null pointer */
4184 	assert(group != NULL && group->sgroups != NULL);
4185 	ns_info = &group->sgroups[ctrlr->subsys->id].ns_info[nsid - 1];
4186 	if (nvmf_ns_reservation_request_check(ns_info, ctrlr, req)) {
4187 		SPDK_DEBUGLOG(nvmf, "Reservation Conflict for nsid %u, opcode %u\n",
4188 			      cmd->nsid, cmd->opc);
4189 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
4190 	}
4191 
4192 	bdev = ns->bdev;
4193 	desc = ns->desc;
4194 	ch = ns_info->channel;
4195 
4196 	if (spdk_unlikely(cmd->fuse & SPDK_NVME_CMD_FUSE_MASK)) {
4197 		return nvmf_ctrlr_process_io_fused_cmd(req, bdev, desc, ch);
4198 	} else if (spdk_unlikely(req->qpair->first_fused_req != NULL)) {
4199 		struct spdk_nvme_cpl *fused_response = &req->qpair->first_fused_req->rsp->nvme_cpl;
4200 
4201 		SPDK_ERRLOG("Expected second of fused commands - failing first of fused commands\n");
4202 
4203 		/* abort req->qpair->first_fused_request and continue with new command */
4204 		fused_response->status.sc = SPDK_NVME_SC_ABORTED_MISSING_FUSED;
4205 		fused_response->status.sct = SPDK_NVME_SCT_GENERIC;
4206 		_nvmf_request_complete(req->qpair->first_fused_req);
4207 		req->qpair->first_fused_req = NULL;
4208 	}
4209 
4210 	if (spdk_nvmf_request_using_zcopy(req)) {
4211 		assert(req->zcopy_phase == NVMF_ZCOPY_PHASE_INIT);
4212 		return nvmf_bdev_ctrlr_zcopy_start(bdev, desc, ch, req);
4213 	} else {
4214 		switch (cmd->opc) {
4215 		case SPDK_NVME_OPC_READ:
4216 			return nvmf_bdev_ctrlr_read_cmd(bdev, desc, ch, req);
4217 		case SPDK_NVME_OPC_WRITE:
4218 			return nvmf_bdev_ctrlr_write_cmd(bdev, desc, ch, req);
4219 		case SPDK_NVME_OPC_COMPARE:
4220 			return nvmf_bdev_ctrlr_compare_cmd(bdev, desc, ch, req);
4221 		case SPDK_NVME_OPC_WRITE_ZEROES:
4222 			return nvmf_bdev_ctrlr_write_zeroes_cmd(bdev, desc, ch, req);
4223 		case SPDK_NVME_OPC_FLUSH:
4224 			return nvmf_bdev_ctrlr_flush_cmd(bdev, desc, ch, req);
4225 		case SPDK_NVME_OPC_DATASET_MANAGEMENT:
4226 			return nvmf_bdev_ctrlr_dsm_cmd(bdev, desc, ch, req);
4227 		case SPDK_NVME_OPC_RESERVATION_REGISTER:
4228 		case SPDK_NVME_OPC_RESERVATION_ACQUIRE:
4229 		case SPDK_NVME_OPC_RESERVATION_RELEASE:
4230 		case SPDK_NVME_OPC_RESERVATION_REPORT:
4231 			spdk_thread_send_msg(ctrlr->subsys->thread, nvmf_ns_reservation_request, req);
4232 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
4233 		case SPDK_NVME_OPC_COPY:
4234 			return nvmf_bdev_ctrlr_copy_cmd(bdev, desc, ch, req);
4235 		default:
4236 			return nvmf_bdev_ctrlr_nvme_passthru_io(bdev, desc, ch, req);
4237 		}
4238 	}
4239 }
4240 
4241 static void
4242 nvmf_qpair_request_cleanup(struct spdk_nvmf_qpair *qpair)
4243 {
4244 	if (qpair->state == SPDK_NVMF_QPAIR_DEACTIVATING) {
4245 		assert(qpair->state_cb != NULL);
4246 
4247 		if (TAILQ_EMPTY(&qpair->outstanding)) {
4248 			qpair->state_cb(qpair->state_cb_arg, 0);
4249 		}
4250 	}
4251 }
4252 
4253 int
4254 spdk_nvmf_request_free(struct spdk_nvmf_request *req)
4255 {
4256 	struct spdk_nvmf_qpair *qpair = req->qpair;
4257 
4258 	TAILQ_REMOVE(&qpair->outstanding, req, link);
4259 	if (nvmf_transport_req_free(req)) {
4260 		SPDK_ERRLOG("Unable to free transport level request resources.\n");
4261 	}
4262 
4263 	nvmf_qpair_request_cleanup(qpair);
4264 
4265 	return 0;
4266 }
4267 
4268 static void
4269 _nvmf_request_complete(void *ctx)
4270 {
4271 	struct spdk_nvmf_request *req = ctx;
4272 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
4273 	struct spdk_nvmf_qpair *qpair;
4274 	struct spdk_nvmf_subsystem_poll_group *sgroup = NULL;
4275 	struct spdk_nvmf_subsystem_pg_ns_info *ns_info;
4276 	bool is_aer = false;
4277 	uint32_t nsid;
4278 	bool paused;
4279 	uint8_t opcode;
4280 
4281 	rsp->sqid = 0;
4282 	rsp->status.p = 0;
4283 	rsp->cid = req->cmd->nvme_cmd.cid;
4284 	nsid = req->cmd->nvme_cmd.nsid;
4285 	opcode = req->cmd->nvmf_cmd.opcode;
4286 
4287 	qpair = req->qpair;
4288 	if (qpair->ctrlr) {
4289 		sgroup = &qpair->group->sgroups[qpair->ctrlr->subsys->id];
4290 		assert(sgroup != NULL);
4291 		is_aer = req->cmd->nvme_cmd.opc == SPDK_NVME_OPC_ASYNC_EVENT_REQUEST;
4292 		if (spdk_likely(qpair->qid != 0)) {
4293 			qpair->group->stat.completed_nvme_io++;
4294 		}
4295 
4296 		/*
4297 		 * Set the crd value.
4298 		 * If the the IO has any error, and dnr (DoNotRetry) is not 1,
4299 		 * and ACRE is enabled, we will set the crd to 1 to select the first CRDT.
4300 		 */
4301 		if (spdk_nvme_cpl_is_error(rsp) &&
4302 		    rsp->status.dnr == 0 &&
4303 		    qpair->ctrlr->acre_enabled) {
4304 			rsp->status.crd = 1;
4305 		}
4306 	} else if (spdk_unlikely(nvmf_request_is_fabric_connect(req))) {
4307 		sgroup = nvmf_subsystem_pg_from_connect_cmd(req);
4308 	}
4309 
4310 	if (SPDK_DEBUGLOG_FLAG_ENABLED("nvmf")) {
4311 		spdk_nvme_print_completion(qpair->qid, rsp);
4312 	}
4313 
4314 	switch (req->zcopy_phase) {
4315 	case NVMF_ZCOPY_PHASE_NONE:
4316 		TAILQ_REMOVE(&qpair->outstanding, req, link);
4317 		break;
4318 	case NVMF_ZCOPY_PHASE_INIT:
4319 		if (spdk_unlikely(spdk_nvme_cpl_is_error(rsp))) {
4320 			req->zcopy_phase = NVMF_ZCOPY_PHASE_INIT_FAILED;
4321 			TAILQ_REMOVE(&qpair->outstanding, req, link);
4322 		} else {
4323 			req->zcopy_phase = NVMF_ZCOPY_PHASE_EXECUTE;
4324 		}
4325 		break;
4326 	case NVMF_ZCOPY_PHASE_EXECUTE:
4327 		break;
4328 	case NVMF_ZCOPY_PHASE_END_PENDING:
4329 		TAILQ_REMOVE(&qpair->outstanding, req, link);
4330 		req->zcopy_phase = NVMF_ZCOPY_PHASE_COMPLETE;
4331 		break;
4332 	default:
4333 		SPDK_ERRLOG("Invalid ZCOPY phase %u\n", req->zcopy_phase);
4334 		break;
4335 	}
4336 
4337 	if (nvmf_transport_req_complete(req)) {
4338 		SPDK_ERRLOG("Transport request completion error!\n");
4339 	}
4340 
4341 	/* AER cmd is an exception */
4342 	if (sgroup && !is_aer) {
4343 		if (spdk_unlikely(opcode == SPDK_NVME_OPC_FABRIC ||
4344 				  nvmf_qpair_is_admin_queue(qpair))) {
4345 			assert(sgroup->mgmt_io_outstanding > 0);
4346 			sgroup->mgmt_io_outstanding--;
4347 		} else {
4348 			if (req->zcopy_phase == NVMF_ZCOPY_PHASE_NONE ||
4349 			    req->zcopy_phase == NVMF_ZCOPY_PHASE_COMPLETE ||
4350 			    req->zcopy_phase == NVMF_ZCOPY_PHASE_INIT_FAILED) {
4351 				/* End of request */
4352 
4353 				/* NOTE: This implicitly also checks for 0, since 0 - 1 wraps around to UINT32_MAX. */
4354 				if (spdk_likely(nsid - 1 < sgroup->num_ns)) {
4355 					sgroup->ns_info[nsid - 1].io_outstanding--;
4356 				}
4357 			}
4358 		}
4359 
4360 		if (spdk_unlikely(sgroup->state == SPDK_NVMF_SUBSYSTEM_PAUSING &&
4361 				  sgroup->mgmt_io_outstanding == 0)) {
4362 			paused = true;
4363 			for (nsid = 0; nsid < sgroup->num_ns; nsid++) {
4364 				ns_info = &sgroup->ns_info[nsid];
4365 
4366 				if (ns_info->state == SPDK_NVMF_SUBSYSTEM_PAUSING &&
4367 				    ns_info->io_outstanding > 0) {
4368 					paused = false;
4369 					break;
4370 				}
4371 			}
4372 
4373 			if (paused) {
4374 				sgroup->state = SPDK_NVMF_SUBSYSTEM_PAUSED;
4375 				sgroup->cb_fn(sgroup->cb_arg, 0);
4376 				sgroup->cb_fn = NULL;
4377 				sgroup->cb_arg = NULL;
4378 			}
4379 		}
4380 
4381 	}
4382 
4383 	nvmf_qpair_request_cleanup(qpair);
4384 }
4385 
4386 int
4387 spdk_nvmf_request_complete(struct spdk_nvmf_request *req)
4388 {
4389 	struct spdk_nvmf_qpair *qpair = req->qpair;
4390 
4391 	spdk_thread_exec_msg(qpair->group->thread, _nvmf_request_complete, req);
4392 
4393 	return 0;
4394 }
4395 
4396 void
4397 spdk_nvmf_request_exec_fabrics(struct spdk_nvmf_request *req)
4398 {
4399 	struct spdk_nvmf_qpair *qpair = req->qpair;
4400 	struct spdk_nvmf_subsystem_poll_group *sgroup = NULL;
4401 	enum spdk_nvmf_request_exec_status status;
4402 
4403 	if (qpair->ctrlr) {
4404 		sgroup = &qpair->group->sgroups[qpair->ctrlr->subsys->id];
4405 	} else if (spdk_unlikely(nvmf_request_is_fabric_connect(req))) {
4406 		sgroup = nvmf_subsystem_pg_from_connect_cmd(req);
4407 	}
4408 
4409 	assert(sgroup != NULL);
4410 	sgroup->mgmt_io_outstanding++;
4411 
4412 	/* Place the request on the outstanding list so we can keep track of it */
4413 	TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);
4414 
4415 	assert(req->cmd->nvmf_cmd.opcode == SPDK_NVME_OPC_FABRIC);
4416 	status = nvmf_ctrlr_process_fabrics_cmd(req);
4417 
4418 	if (status == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
4419 		_nvmf_request_complete(req);
4420 	}
4421 }
4422 
4423 static bool
4424 nvmf_check_subsystem_active(struct spdk_nvmf_request *req)
4425 {
4426 	struct spdk_nvmf_qpair *qpair = req->qpair;
4427 	struct spdk_nvmf_subsystem_poll_group *sgroup = NULL;
4428 	struct spdk_nvmf_subsystem_pg_ns_info *ns_info;
4429 	uint32_t nsid;
4430 
4431 	if (qpair->ctrlr) {
4432 		sgroup = &qpair->group->sgroups[qpair->ctrlr->subsys->id];
4433 		assert(sgroup != NULL);
4434 	} else if (spdk_unlikely(nvmf_request_is_fabric_connect(req))) {
4435 		sgroup = nvmf_subsystem_pg_from_connect_cmd(req);
4436 	}
4437 
4438 	/* Check if the subsystem is paused (if there is a subsystem) */
4439 	if (sgroup != NULL) {
4440 		if (spdk_unlikely(req->cmd->nvmf_cmd.opcode == SPDK_NVME_OPC_FABRIC ||
4441 				  nvmf_qpair_is_admin_queue(qpair))) {
4442 			if (sgroup->state != SPDK_NVMF_SUBSYSTEM_ACTIVE) {
4443 				/* The subsystem is not currently active. Queue this request. */
4444 				TAILQ_INSERT_TAIL(&sgroup->queued, req, link);
4445 				return false;
4446 			}
4447 			sgroup->mgmt_io_outstanding++;
4448 		} else {
4449 			nsid = req->cmd->nvme_cmd.nsid;
4450 
4451 			/* NOTE: This implicitly also checks for 0, since 0 - 1 wraps around to UINT32_MAX. */
4452 			if (spdk_unlikely(nsid - 1 >= sgroup->num_ns)) {
4453 				req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
4454 				req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
4455 				req->rsp->nvme_cpl.status.dnr = 1;
4456 				TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);
4457 				_nvmf_request_complete(req);
4458 				return false;
4459 			}
4460 
4461 			ns_info = &sgroup->ns_info[nsid - 1];
4462 			if (ns_info->channel == NULL) {
4463 				/* This can can happen if host sends I/O to a namespace that is
4464 				 * in the process of being added, but before the full addition
4465 				 * process is complete.  Report invalid namespace in that case.
4466 				 */
4467 				req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
4468 				req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
4469 				req->rsp->nvme_cpl.status.dnr = 1;
4470 				TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);
4471 				ns_info->io_outstanding++;
4472 				_nvmf_request_complete(req);
4473 				return false;
4474 			}
4475 
4476 			if (ns_info->state != SPDK_NVMF_SUBSYSTEM_ACTIVE) {
4477 				/* The namespace is not currently active. Queue this request. */
4478 				TAILQ_INSERT_TAIL(&sgroup->queued, req, link);
4479 				return false;
4480 			}
4481 
4482 			ns_info->io_outstanding++;
4483 		}
4484 
4485 		if (qpair->state != SPDK_NVMF_QPAIR_ACTIVE) {
4486 			req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
4487 			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
4488 			TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);
4489 			_nvmf_request_complete(req);
4490 			return false;
4491 		}
4492 	}
4493 
4494 	return true;
4495 }
4496 
4497 void
4498 spdk_nvmf_request_exec(struct spdk_nvmf_request *req)
4499 {
4500 	struct spdk_nvmf_qpair *qpair = req->qpair;
4501 	struct spdk_nvmf_transport *transport = qpair->transport;
4502 	enum spdk_nvmf_request_exec_status status;
4503 
4504 	if (req->data != NULL) {
4505 		assert(req->iovcnt > 0);
4506 	}
4507 
4508 	if (!nvmf_check_subsystem_active(req)) {
4509 		return;
4510 	}
4511 
4512 	if (SPDK_DEBUGLOG_FLAG_ENABLED("nvmf")) {
4513 		spdk_nvme_print_command(qpair->qid, &req->cmd->nvme_cmd);
4514 	}
4515 
4516 	/* Place the request on the outstanding list so we can keep track of it */
4517 	TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);
4518 
4519 	if (spdk_unlikely((req->cmd->nvmf_cmd.opcode == SPDK_NVME_OPC_FABRIC) &&
4520 			  spdk_nvme_trtype_is_fabrics(transport->ops->type))) {
4521 		status = nvmf_ctrlr_process_fabrics_cmd(req);
4522 	} else if (spdk_unlikely(nvmf_qpair_is_admin_queue(qpair))) {
4523 		status = nvmf_ctrlr_process_admin_cmd(req);
4524 	} else {
4525 		status = nvmf_ctrlr_process_io_cmd(req);
4526 	}
4527 
4528 	if (status == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
4529 		_nvmf_request_complete(req);
4530 	}
4531 }
4532 
4533 static bool
4534 nvmf_ctrlr_get_dif_ctx(struct spdk_nvmf_ctrlr *ctrlr, struct spdk_nvme_cmd *cmd,
4535 		       struct spdk_dif_ctx *dif_ctx)
4536 {
4537 	struct spdk_nvmf_ns *ns;
4538 	struct spdk_bdev *bdev;
4539 
4540 	if (ctrlr == NULL || cmd == NULL) {
4541 		return false;
4542 	}
4543 
4544 	ns = _nvmf_subsystem_get_ns(ctrlr->subsys, cmd->nsid);
4545 	if (ns == NULL || ns->bdev == NULL) {
4546 		return false;
4547 	}
4548 
4549 	bdev = ns->bdev;
4550 
4551 	switch (cmd->opc) {
4552 	case SPDK_NVME_OPC_READ:
4553 	case SPDK_NVME_OPC_WRITE:
4554 	case SPDK_NVME_OPC_COMPARE:
4555 		return nvmf_bdev_ctrlr_get_dif_ctx(bdev, cmd, dif_ctx);
4556 	default:
4557 		break;
4558 	}
4559 
4560 	return false;
4561 }
4562 
4563 bool
4564 spdk_nvmf_request_get_dif_ctx(struct spdk_nvmf_request *req, struct spdk_dif_ctx *dif_ctx)
4565 {
4566 	struct spdk_nvmf_qpair *qpair = req->qpair;
4567 	struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
4568 
4569 	if (spdk_likely(ctrlr == NULL || !ctrlr->dif_insert_or_strip)) {
4570 		return false;
4571 	}
4572 
4573 	if (spdk_unlikely(qpair->state != SPDK_NVMF_QPAIR_ACTIVE)) {
4574 		return false;
4575 	}
4576 
4577 	if (spdk_unlikely(req->cmd->nvmf_cmd.opcode == SPDK_NVME_OPC_FABRIC)) {
4578 		return false;
4579 	}
4580 
4581 	if (spdk_unlikely(nvmf_qpair_is_admin_queue(qpair))) {
4582 		return false;
4583 	}
4584 
4585 	return nvmf_ctrlr_get_dif_ctx(ctrlr, &req->cmd->nvme_cmd, dif_ctx);
4586 }
4587 
4588 void
4589 spdk_nvmf_set_custom_admin_cmd_hdlr(uint8_t opc, spdk_nvmf_custom_cmd_hdlr hdlr)
4590 {
4591 	g_nvmf_custom_admin_cmd_hdlrs[opc].hdlr = hdlr;
4592 }
4593 
4594 static int
4595 nvmf_passthru_admin_cmd(struct spdk_nvmf_request *req)
4596 {
4597 	struct spdk_bdev *bdev;
4598 	struct spdk_bdev_desc *desc;
4599 	struct spdk_io_channel *ch;
4600 	struct spdk_nvme_cmd *cmd = spdk_nvmf_request_get_cmd(req);
4601 	struct spdk_nvme_cpl *response = spdk_nvmf_request_get_response(req);
4602 	uint32_t bdev_nsid;
4603 	int rc;
4604 
4605 	if (g_nvmf_custom_admin_cmd_hdlrs[cmd->opc].nsid == 0) {
4606 		bdev_nsid = cmd->nsid;
4607 	} else {
4608 		bdev_nsid = g_nvmf_custom_admin_cmd_hdlrs[cmd->opc].nsid;
4609 	}
4610 
4611 	rc = spdk_nvmf_request_get_bdev(bdev_nsid, req, &bdev, &desc, &ch);
4612 	if (rc) {
4613 		response->status.sct = SPDK_NVME_SCT_GENERIC;
4614 		response->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
4615 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
4616 	}
4617 	return spdk_nvmf_bdev_ctrlr_nvme_passthru_admin(bdev, desc, ch, req, NULL);
4618 }
4619 
4620 void
4621 spdk_nvmf_set_passthru_admin_cmd(uint8_t opc, uint32_t forward_nsid)
4622 {
4623 	g_nvmf_custom_admin_cmd_hdlrs[opc].hdlr = nvmf_passthru_admin_cmd;
4624 	g_nvmf_custom_admin_cmd_hdlrs[opc].nsid = forward_nsid;
4625 }
4626 
4627 int
4628 spdk_nvmf_request_get_bdev(uint32_t nsid, struct spdk_nvmf_request *req,
4629 			   struct spdk_bdev **bdev, struct spdk_bdev_desc **desc, struct spdk_io_channel **ch)
4630 {
4631 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
4632 	struct spdk_nvmf_ns *ns;
4633 	struct spdk_nvmf_poll_group *group = req->qpair->group;
4634 	struct spdk_nvmf_subsystem_pg_ns_info *ns_info;
4635 
4636 	*bdev = NULL;
4637 	*desc = NULL;
4638 	*ch = NULL;
4639 
4640 	ns = _nvmf_subsystem_get_ns(ctrlr->subsys, nsid);
4641 	if (ns == NULL || ns->bdev == NULL) {
4642 		return -EINVAL;
4643 	}
4644 
4645 	assert(group != NULL && group->sgroups != NULL);
4646 	ns_info = &group->sgroups[ctrlr->subsys->id].ns_info[nsid - 1];
4647 	*bdev = ns->bdev;
4648 	*desc = ns->desc;
4649 	*ch = ns_info->channel;
4650 
4651 	return 0;
4652 }
4653 
4654 struct spdk_nvmf_ctrlr *spdk_nvmf_request_get_ctrlr(struct spdk_nvmf_request *req)
4655 {
4656 	return req->qpair->ctrlr;
4657 }
4658 
4659 struct spdk_nvme_cmd *spdk_nvmf_request_get_cmd(struct spdk_nvmf_request *req)
4660 {
4661 	return &req->cmd->nvme_cmd;
4662 }
4663 
4664 struct spdk_nvme_cpl *spdk_nvmf_request_get_response(struct spdk_nvmf_request *req)
4665 {
4666 	return &req->rsp->nvme_cpl;
4667 }
4668 
4669 struct spdk_nvmf_subsystem *spdk_nvmf_request_get_subsystem(struct spdk_nvmf_request *req)
4670 {
4671 	return req->qpair->ctrlr->subsys;
4672 }
4673 
4674 SPDK_LOG_DEPRECATION_REGISTER(nvmf_request_get_data, "spdk_nvmf_request_get_data",
4675 			      "SPDK 23.09", 60);
4676 
4677 void
4678 spdk_nvmf_request_get_data(struct spdk_nvmf_request *req, void **data, uint32_t *length)
4679 {
4680 	SPDK_LOG_DEPRECATED(nvmf_request_get_data);
4681 	*data = req->data;
4682 	*length = req->length;
4683 }
4684 
4685 size_t
4686 spdk_nvmf_request_copy_from_buf(struct spdk_nvmf_request *req,
4687 				void *buf, size_t buflen)
4688 {
4689 	struct spdk_iov_xfer ix;
4690 
4691 	spdk_iov_xfer_init(&ix, req->iov, req->iovcnt);
4692 	return spdk_iov_xfer_from_buf(&ix, buf, buflen);
4693 }
4694 
4695 size_t
4696 spdk_nvmf_request_copy_to_buf(struct spdk_nvmf_request *req,
4697 			      void *buf, size_t buflen)
4698 {
4699 	struct spdk_iov_xfer ix;
4700 
4701 	spdk_iov_xfer_init(&ix, req->iov, req->iovcnt);
4702 	return spdk_iov_xfer_to_buf(&ix, buf, buflen);
4703 }
4704 
4705 struct spdk_nvmf_subsystem *spdk_nvmf_ctrlr_get_subsystem(struct spdk_nvmf_ctrlr *ctrlr)
4706 {
4707 	return ctrlr->subsys;
4708 }
4709 
4710 uint16_t
4711 spdk_nvmf_ctrlr_get_id(struct spdk_nvmf_ctrlr *ctrlr)
4712 {
4713 	return ctrlr->cntlid;
4714 }
4715 
4716 struct spdk_nvmf_request *spdk_nvmf_request_get_req_to_abort(struct spdk_nvmf_request *req)
4717 {
4718 	return req->req_to_abort;
4719 }
4720