xref: /spdk/lib/nvmf/ctrlr.c (revision a0d24145bf3d795cf89adc414320b138fae480ab)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2017 Intel Corporation. All rights reserved.
3  *   Copyright (c) 2019, 2020 Mellanox Technologies LTD. All rights reserved.
4  *   Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5  */
6 
7 #include "spdk/stdinc.h"
8 
9 #include "nvmf_internal.h"
10 #include "transport.h"
11 
12 #include "spdk/bdev.h"
13 #include "spdk/bdev_zone.h"
14 #include "spdk/bit_array.h"
15 #include "spdk/endian.h"
16 #include "spdk/thread.h"
17 #include "spdk/nvme_spec.h"
18 #include "spdk/nvmf_cmd.h"
19 #include "spdk/string.h"
20 #include "spdk/util.h"
21 #include "spdk/version.h"
22 #include "spdk/log.h"
23 #include "spdk_internal/usdt.h"
24 
25 #define MIN_KEEP_ALIVE_TIMEOUT_IN_MS 10000
26 #define NVMF_DISC_KATO_IN_MS 120000
27 #define KAS_TIME_UNIT_IN_MS 100
28 #define KAS_DEFAULT_VALUE (MIN_KEEP_ALIVE_TIMEOUT_IN_MS / KAS_TIME_UNIT_IN_MS)
29 
30 #define NVMF_CC_RESET_SHN_TIMEOUT_IN_MS	10000
31 
32 #define NVMF_CTRLR_RESET_SHN_TIMEOUT_IN_MS	(NVMF_CC_RESET_SHN_TIMEOUT_IN_MS + 5000)
33 
34 /*
35  * Report the SPDK version as the firmware revision.
36  * SPDK_VERSION_STRING won't fit into FR (only 8 bytes), so try to fit the most important parts.
37  */
38 #define FW_VERSION SPDK_VERSION_MAJOR_STRING SPDK_VERSION_MINOR_STRING SPDK_VERSION_PATCH_STRING
39 
40 #define ANA_TRANSITION_TIME_IN_SEC 10
41 
42 #define NVMF_ABORT_COMMAND_LIMIT 3
43 
44 /*
45  * Support for custom admin command handlers
46  */
47 struct spdk_nvmf_custom_admin_cmd {
48 	spdk_nvmf_custom_cmd_hdlr hdlr;
49 	uint32_t nsid; /* nsid to forward */
50 };
51 
52 static struct spdk_nvmf_custom_admin_cmd g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_MAX_OPC + 1];
53 
54 static void _nvmf_request_complete(void *ctx);
55 
56 static inline void
57 nvmf_invalid_connect_response(struct spdk_nvmf_fabric_connect_rsp *rsp,
58 			      uint8_t iattr, uint16_t ipo)
59 {
60 	rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
61 	rsp->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
62 	rsp->status_code_specific.invalid.iattr = iattr;
63 	rsp->status_code_specific.invalid.ipo = ipo;
64 }
65 
66 #define SPDK_NVMF_INVALID_CONNECT_CMD(rsp, field)	\
67 	nvmf_invalid_connect_response(rsp, 0, offsetof(struct spdk_nvmf_fabric_connect_cmd, field))
68 #define SPDK_NVMF_INVALID_CONNECT_DATA(rsp, field)	\
69 	nvmf_invalid_connect_response(rsp, 1, offsetof(struct spdk_nvmf_fabric_connect_data, field))
70 
71 
72 static void
73 nvmf_ctrlr_stop_keep_alive_timer(struct spdk_nvmf_ctrlr *ctrlr)
74 {
75 	if (!ctrlr) {
76 		SPDK_ERRLOG("Controller is NULL\n");
77 		return;
78 	}
79 
80 	if (ctrlr->keep_alive_poller == NULL) {
81 		return;
82 	}
83 
84 	SPDK_DEBUGLOG(nvmf, "Stop keep alive poller\n");
85 	spdk_poller_unregister(&ctrlr->keep_alive_poller);
86 }
87 
88 static void
89 nvmf_ctrlr_stop_association_timer(struct spdk_nvmf_ctrlr *ctrlr)
90 {
91 	if (!ctrlr) {
92 		SPDK_ERRLOG("Controller is NULL\n");
93 		assert(false);
94 		return;
95 	}
96 
97 	if (ctrlr->association_timer == NULL) {
98 		return;
99 	}
100 
101 	SPDK_DEBUGLOG(nvmf, "Stop association timer\n");
102 	spdk_poller_unregister(&ctrlr->association_timer);
103 }
104 
105 static void
106 nvmf_ctrlr_disconnect_qpairs_done(struct spdk_io_channel_iter *i, int status)
107 {
108 	if (status == 0) {
109 		SPDK_DEBUGLOG(nvmf, "ctrlr disconnect qpairs complete successfully\n");
110 	} else {
111 		SPDK_ERRLOG("Fail to disconnect ctrlr qpairs\n");
112 	}
113 }
114 
115 static int
116 _nvmf_ctrlr_disconnect_qpairs_on_pg(struct spdk_io_channel_iter *i, bool include_admin)
117 {
118 	int rc = 0;
119 	struct spdk_nvmf_ctrlr *ctrlr;
120 	struct spdk_nvmf_qpair *qpair, *temp_qpair;
121 	struct spdk_io_channel *ch;
122 	struct spdk_nvmf_poll_group *group;
123 
124 	ctrlr = spdk_io_channel_iter_get_ctx(i);
125 	ch = spdk_io_channel_iter_get_channel(i);
126 	group = spdk_io_channel_get_ctx(ch);
127 
128 	TAILQ_FOREACH_SAFE(qpair, &group->qpairs, link, temp_qpair) {
129 		if (qpair->ctrlr == ctrlr && (include_admin || !nvmf_qpair_is_admin_queue(qpair))) {
130 			rc = spdk_nvmf_qpair_disconnect(qpair, NULL, NULL);
131 			if (rc) {
132 				SPDK_ERRLOG("Qpair disconnect failed\n");
133 				return rc;
134 			}
135 		}
136 	}
137 
138 	return rc;
139 }
140 
141 static void
142 nvmf_ctrlr_disconnect_qpairs_on_pg(struct spdk_io_channel_iter *i)
143 {
144 	spdk_for_each_channel_continue(i, _nvmf_ctrlr_disconnect_qpairs_on_pg(i, true));
145 }
146 
147 static void
148 nvmf_ctrlr_disconnect_io_qpairs_on_pg(struct spdk_io_channel_iter *i)
149 {
150 	spdk_for_each_channel_continue(i, _nvmf_ctrlr_disconnect_qpairs_on_pg(i, false));
151 }
152 
153 static int
154 nvmf_ctrlr_keep_alive_poll(void *ctx)
155 {
156 	uint64_t keep_alive_timeout_tick;
157 	uint64_t now = spdk_get_ticks();
158 	struct spdk_nvmf_ctrlr *ctrlr = ctx;
159 
160 	if (ctrlr->in_destruct) {
161 		nvmf_ctrlr_stop_keep_alive_timer(ctrlr);
162 		return SPDK_POLLER_IDLE;
163 	}
164 
165 	SPDK_DEBUGLOG(nvmf, "Polling ctrlr keep alive timeout\n");
166 
167 	/* If the Keep alive feature is in use and the timer expires */
168 	keep_alive_timeout_tick = ctrlr->last_keep_alive_tick +
169 				  ctrlr->feat.keep_alive_timer.bits.kato * spdk_get_ticks_hz() / UINT64_C(1000);
170 	if (now > keep_alive_timeout_tick) {
171 		SPDK_NOTICELOG("Disconnecting host %s from subsystem %s due to keep alive timeout.\n",
172 			       ctrlr->hostnqn, ctrlr->subsys->subnqn);
173 		/* set the Controller Fatal Status bit to '1' */
174 		if (ctrlr->vcprop.csts.bits.cfs == 0) {
175 			nvmf_ctrlr_set_fatal_status(ctrlr);
176 
177 			/*
178 			 * disconnect qpairs, terminate Transport connection
179 			 * destroy ctrlr, break the host to controller association
180 			 * disconnect qpairs with qpair->ctrlr == ctrlr
181 			 */
182 			spdk_for_each_channel(ctrlr->subsys->tgt,
183 					      nvmf_ctrlr_disconnect_qpairs_on_pg,
184 					      ctrlr,
185 					      nvmf_ctrlr_disconnect_qpairs_done);
186 			return SPDK_POLLER_BUSY;
187 		}
188 	}
189 
190 	return SPDK_POLLER_IDLE;
191 }
192 
193 static void
194 nvmf_ctrlr_start_keep_alive_timer(struct spdk_nvmf_ctrlr *ctrlr)
195 {
196 	if (!ctrlr) {
197 		SPDK_ERRLOG("Controller is NULL\n");
198 		return;
199 	}
200 
201 	/* if cleared to 0 then the Keep Alive Timer is disabled */
202 	if (ctrlr->feat.keep_alive_timer.bits.kato != 0) {
203 
204 		ctrlr->last_keep_alive_tick = spdk_get_ticks();
205 
206 		SPDK_DEBUGLOG(nvmf, "Ctrlr add keep alive poller\n");
207 		ctrlr->keep_alive_poller = SPDK_POLLER_REGISTER(nvmf_ctrlr_keep_alive_poll, ctrlr,
208 					   ctrlr->feat.keep_alive_timer.bits.kato * 1000);
209 	}
210 }
211 
212 static void
213 ctrlr_add_qpair_and_update_rsp(struct spdk_nvmf_qpair *qpair,
214 			       struct spdk_nvmf_ctrlr *ctrlr,
215 			       struct spdk_nvmf_fabric_connect_rsp *rsp)
216 {
217 	assert(ctrlr->admin_qpair->group->thread == spdk_get_thread());
218 
219 	/* check if we would exceed ctrlr connection limit */
220 	if (qpair->qid >= spdk_bit_array_capacity(ctrlr->qpair_mask)) {
221 		SPDK_ERRLOG("Requested QID %u but Max QID is %u\n",
222 			    qpair->qid, spdk_bit_array_capacity(ctrlr->qpair_mask) - 1);
223 		rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
224 		rsp->status.sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER;
225 		return;
226 	}
227 
228 	if (spdk_bit_array_get(ctrlr->qpair_mask, qpair->qid)) {
229 		SPDK_ERRLOG("Got I/O connect with duplicate QID %u\n", qpair->qid);
230 		rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
231 		rsp->status.sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER;
232 		return;
233 	}
234 
235 	qpair->ctrlr = ctrlr;
236 	spdk_bit_array_set(ctrlr->qpair_mask, qpair->qid);
237 
238 	rsp->status.sc = SPDK_NVME_SC_SUCCESS;
239 	rsp->status_code_specific.success.cntlid = ctrlr->cntlid;
240 	SPDK_DEBUGLOG(nvmf, "connect capsule response: cntlid = 0x%04x\n",
241 		      rsp->status_code_specific.success.cntlid);
242 
243 	SPDK_DTRACE_PROBE4(nvmf_ctrlr_add_qpair, qpair, qpair->qid, ctrlr->subsys->subnqn,
244 			   ctrlr->hostnqn);
245 }
246 
247 static void
248 _nvmf_ctrlr_add_admin_qpair(void *ctx)
249 {
250 	struct spdk_nvmf_request *req = ctx;
251 	struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp;
252 	struct spdk_nvmf_qpair *qpair = req->qpair;
253 	struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
254 
255 	ctrlr->admin_qpair = qpair;
256 	ctrlr->association_timeout = qpair->transport->opts.association_timeout;
257 	nvmf_ctrlr_start_keep_alive_timer(ctrlr);
258 	ctrlr_add_qpair_and_update_rsp(qpair, ctrlr, rsp);
259 	_nvmf_request_complete(req);
260 }
261 
262 static void
263 _nvmf_subsystem_add_ctrlr(void *ctx)
264 {
265 	struct spdk_nvmf_request *req = ctx;
266 	struct spdk_nvmf_qpair *qpair = req->qpair;
267 	struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp;
268 	struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
269 
270 	if (nvmf_subsystem_add_ctrlr(ctrlr->subsys, ctrlr)) {
271 		SPDK_ERRLOG("Unable to add controller to subsystem\n");
272 		spdk_bit_array_free(&ctrlr->qpair_mask);
273 		free(ctrlr);
274 		qpair->ctrlr = NULL;
275 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
276 		spdk_nvmf_request_complete(req);
277 		return;
278 	}
279 
280 	spdk_thread_send_msg(ctrlr->thread, _nvmf_ctrlr_add_admin_qpair, req);
281 }
282 
283 static void
284 nvmf_ctrlr_cdata_init(struct spdk_nvmf_transport *transport, struct spdk_nvmf_subsystem *subsystem,
285 		      struct spdk_nvmf_ctrlr_data *cdata)
286 {
287 	cdata->aerl = SPDK_NVMF_MAX_ASYNC_EVENTS - 1;
288 	cdata->kas = KAS_DEFAULT_VALUE;
289 	cdata->vid = SPDK_PCI_VID_INTEL;
290 	cdata->ssvid = SPDK_PCI_VID_INTEL;
291 	/* INTEL OUI */
292 	cdata->ieee[0] = 0xe4;
293 	cdata->ieee[1] = 0xd2;
294 	cdata->ieee[2] = 0x5c;
295 	cdata->oncs.compare = 1;
296 	cdata->oncs.reservations = 1;
297 	cdata->fuses.compare_and_write = 1;
298 	cdata->oncs.copy = 1;
299 	cdata->sgls.supported = 1;
300 	cdata->sgls.keyed_sgl = 1;
301 	cdata->sgls.sgl_offset = 1;
302 	cdata->nvmf_specific.ioccsz = sizeof(struct spdk_nvme_cmd) / 16;
303 	cdata->nvmf_specific.ioccsz += transport->opts.in_capsule_data_size / 16;
304 	cdata->nvmf_specific.iorcsz = sizeof(struct spdk_nvme_cpl) / 16;
305 	cdata->nvmf_specific.icdoff = 0; /* offset starts directly after SQE */
306 	cdata->nvmf_specific.ctrattr.ctrlr_model = SPDK_NVMF_CTRLR_MODEL_DYNAMIC;
307 	cdata->nvmf_specific.msdbd = 1;
308 
309 	if (transport->ops->cdata_init) {
310 		transport->ops->cdata_init(transport, subsystem, cdata);
311 	}
312 }
313 
314 static bool
315 nvmf_subsys_has_multi_iocs(struct spdk_nvmf_subsystem *subsystem)
316 {
317 	struct spdk_nvmf_ns *ns;
318 	uint32_t i;
319 
320 	for (i = 0; i < subsystem->max_nsid; i++) {
321 		ns = subsystem->ns[i];
322 		if (ns && ns->bdev && spdk_bdev_is_zoned(ns->bdev)) {
323 			return true;
324 		}
325 	}
326 	return false;
327 }
328 
329 static struct spdk_nvmf_ctrlr *
330 nvmf_ctrlr_create(struct spdk_nvmf_subsystem *subsystem,
331 		  struct spdk_nvmf_request *req,
332 		  struct spdk_nvmf_fabric_connect_cmd *connect_cmd,
333 		  struct spdk_nvmf_fabric_connect_data *connect_data)
334 {
335 	struct spdk_nvmf_ctrlr *ctrlr;
336 	struct spdk_nvmf_transport *transport = req->qpair->transport;
337 	struct spdk_nvme_transport_id listen_trid = {};
338 	bool subsys_has_multi_iocs = false;
339 
340 	ctrlr = calloc(1, sizeof(*ctrlr));
341 	if (ctrlr == NULL) {
342 		SPDK_ERRLOG("Memory allocation failed\n");
343 		return NULL;
344 	}
345 
346 	if (spdk_nvme_trtype_is_fabrics(transport->ops->type)) {
347 		ctrlr->dynamic_ctrlr = true;
348 	} else {
349 		ctrlr->cntlid = connect_data->cntlid;
350 	}
351 
352 	SPDK_DTRACE_PROBE3(nvmf_ctrlr_create, ctrlr, subsystem->subnqn,
353 			   spdk_thread_get_id(req->qpair->group->thread));
354 
355 	STAILQ_INIT(&ctrlr->async_events);
356 	TAILQ_INIT(&ctrlr->log_head);
357 	ctrlr->subsys = subsystem;
358 	ctrlr->thread = req->qpair->group->thread;
359 	ctrlr->disconnect_in_progress = false;
360 
361 	ctrlr->qpair_mask = spdk_bit_array_create(transport->opts.max_qpairs_per_ctrlr);
362 	if (!ctrlr->qpair_mask) {
363 		SPDK_ERRLOG("Failed to allocate controller qpair mask\n");
364 		goto err_qpair_mask;
365 	}
366 
367 	nvmf_ctrlr_cdata_init(transport, subsystem, &ctrlr->cdata);
368 
369 	/*
370 	 * KAS: This field indicates the granularity of the Keep Alive Timer in 100ms units.
371 	 * If this field is cleared to 0h, then Keep Alive is not supported.
372 	 */
373 	if (ctrlr->cdata.kas) {
374 		ctrlr->feat.keep_alive_timer.bits.kato = spdk_divide_round_up(connect_cmd->kato,
375 				KAS_DEFAULT_VALUE * KAS_TIME_UNIT_IN_MS) *
376 				KAS_DEFAULT_VALUE * KAS_TIME_UNIT_IN_MS;
377 	}
378 
379 	ctrlr->feat.async_event_configuration.bits.ns_attr_notice = 1;
380 	if (ctrlr->subsys->flags.ana_reporting) {
381 		ctrlr->feat.async_event_configuration.bits.ana_change_notice = 1;
382 	}
383 	ctrlr->feat.volatile_write_cache.bits.wce = 1;
384 	/* Coalescing Disable */
385 	ctrlr->feat.interrupt_vector_configuration.bits.cd = 1;
386 
387 	if (ctrlr->subsys->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
388 		/*
389 		 * If keep-alive timeout is not set, discovery controllers use some
390 		 * arbitrary high value in order to cleanup stale discovery sessions
391 		 *
392 		 * From the 1.0a nvme-of spec:
393 		 * "The Keep Alive command is reserved for
394 		 * Discovery controllers. A transport may specify a
395 		 * fixed Discovery controller activity timeout value
396 		 * (e.g., 2 minutes). If no commands are received
397 		 * by a Discovery controller within that time
398 		 * period, the controller may perform the
399 		 * actions for Keep Alive Timer expiration".
400 		 *
401 		 * From the 1.1 nvme-of spec:
402 		 * "A host requests an explicit persistent connection
403 		 * to a Discovery controller and Asynchronous Event Notifications from
404 		 * the Discovery controller on that persistent connection by specifying
405 		 * a non-zero Keep Alive Timer value in the Connect command."
406 		 *
407 		 * In case non-zero KATO is used, we enable discovery_log_change_notice
408 		 * otherwise we disable it and use default discovery controller KATO.
409 		 * KATO is in millisecond.
410 		 */
411 		if (ctrlr->feat.keep_alive_timer.bits.kato == 0) {
412 			ctrlr->feat.keep_alive_timer.bits.kato = NVMF_DISC_KATO_IN_MS;
413 			ctrlr->feat.async_event_configuration.bits.discovery_log_change_notice = 0;
414 		} else {
415 			ctrlr->feat.async_event_configuration.bits.discovery_log_change_notice = 1;
416 		}
417 	}
418 
419 	/* Subtract 1 for admin queue, 1 for 0's based */
420 	ctrlr->feat.number_of_queues.bits.ncqr = transport->opts.max_qpairs_per_ctrlr - 1 -
421 			1;
422 	ctrlr->feat.number_of_queues.bits.nsqr = transport->opts.max_qpairs_per_ctrlr - 1 -
423 			1;
424 
425 	spdk_uuid_copy(&ctrlr->hostid, (struct spdk_uuid *)connect_data->hostid);
426 	memcpy(ctrlr->hostnqn, connect_data->hostnqn, sizeof(ctrlr->hostnqn));
427 
428 	ctrlr->vcprop.cap.raw = 0;
429 	ctrlr->vcprop.cap.bits.cqr = 1; /* NVMe-oF specification required */
430 	ctrlr->vcprop.cap.bits.mqes = transport->opts.max_queue_depth -
431 				      1; /* max queue depth */
432 	ctrlr->vcprop.cap.bits.ams = 0; /* optional arb mechanisms */
433 	/* ready timeout - 500 msec units */
434 	ctrlr->vcprop.cap.bits.to = NVMF_CTRLR_RESET_SHN_TIMEOUT_IN_MS / 500;
435 	ctrlr->vcprop.cap.bits.dstrd = 0; /* fixed to 0 for NVMe-oF */
436 	subsys_has_multi_iocs = nvmf_subsys_has_multi_iocs(subsystem);
437 	if (subsys_has_multi_iocs) {
438 		ctrlr->vcprop.cap.bits.css =
439 			SPDK_NVME_CAP_CSS_IOCS; /* One or more I/O command sets supported */
440 	} else {
441 		ctrlr->vcprop.cap.bits.css = SPDK_NVME_CAP_CSS_NVM; /* NVM command set */
442 	}
443 
444 	ctrlr->vcprop.cap.bits.mpsmin = 0; /* 2 ^ (12 + mpsmin) == 4k */
445 	ctrlr->vcprop.cap.bits.mpsmax = 0; /* 2 ^ (12 + mpsmax) == 4k */
446 
447 	/* Version Supported: 1.3 */
448 	ctrlr->vcprop.vs.bits.mjr = 1;
449 	ctrlr->vcprop.vs.bits.mnr = 3;
450 	ctrlr->vcprop.vs.bits.ter = 0;
451 
452 	ctrlr->vcprop.cc.raw = 0;
453 	ctrlr->vcprop.cc.bits.en = 0; /* Init controller disabled */
454 	if (subsys_has_multi_iocs) {
455 		ctrlr->vcprop.cc.bits.css =
456 			SPDK_NVME_CC_CSS_IOCS; /* All supported I/O Command Sets */
457 	}
458 
459 	ctrlr->vcprop.csts.raw = 0;
460 	ctrlr->vcprop.csts.bits.rdy = 0; /* Init controller as not ready */
461 
462 	SPDK_DEBUGLOG(nvmf, "cap 0x%" PRIx64 "\n", ctrlr->vcprop.cap.raw);
463 	SPDK_DEBUGLOG(nvmf, "vs 0x%x\n", ctrlr->vcprop.vs.raw);
464 	SPDK_DEBUGLOG(nvmf, "cc 0x%x\n", ctrlr->vcprop.cc.raw);
465 	SPDK_DEBUGLOG(nvmf, "csts 0x%x\n", ctrlr->vcprop.csts.raw);
466 
467 	ctrlr->dif_insert_or_strip = transport->opts.dif_insert_or_strip;
468 
469 	if (ctrlr->subsys->subtype == SPDK_NVMF_SUBTYPE_NVME) {
470 		if (spdk_nvmf_qpair_get_listen_trid(req->qpair, &listen_trid) != 0) {
471 			SPDK_ERRLOG("Could not get listener transport ID\n");
472 			goto err_listener;
473 		}
474 
475 		ctrlr->listener = nvmf_subsystem_find_listener(ctrlr->subsys, &listen_trid);
476 		if (!ctrlr->listener) {
477 			SPDK_ERRLOG("Listener was not found\n");
478 			goto err_listener;
479 		}
480 	}
481 
482 	req->qpair->ctrlr = ctrlr;
483 	spdk_thread_send_msg(subsystem->thread, _nvmf_subsystem_add_ctrlr, req);
484 
485 	return ctrlr;
486 err_listener:
487 	spdk_bit_array_free(&ctrlr->qpair_mask);
488 err_qpair_mask:
489 	free(ctrlr);
490 	return NULL;
491 }
492 
493 static void
494 _nvmf_ctrlr_destruct(void *ctx)
495 {
496 	struct spdk_nvmf_ctrlr *ctrlr = ctx;
497 	struct spdk_nvmf_reservation_log *log, *log_tmp;
498 	struct spdk_nvmf_async_event_completion *event, *event_tmp;
499 
500 	SPDK_DTRACE_PROBE3(nvmf_ctrlr_destruct, ctrlr, ctrlr->subsys->subnqn,
501 			   spdk_thread_get_id(ctrlr->thread));
502 
503 	assert(spdk_get_thread() == ctrlr->thread);
504 	assert(ctrlr->in_destruct);
505 
506 	SPDK_DEBUGLOG(nvmf, "Destroy ctrlr 0x%hx\n", ctrlr->cntlid);
507 	if (ctrlr->disconnect_in_progress) {
508 		SPDK_ERRLOG("freeing ctrlr with disconnect in progress\n");
509 		spdk_thread_send_msg(ctrlr->thread, _nvmf_ctrlr_destruct, ctrlr);
510 		return;
511 	}
512 
513 	nvmf_ctrlr_stop_keep_alive_timer(ctrlr);
514 	nvmf_ctrlr_stop_association_timer(ctrlr);
515 	spdk_bit_array_free(&ctrlr->qpair_mask);
516 
517 	TAILQ_FOREACH_SAFE(log, &ctrlr->log_head, link, log_tmp) {
518 		TAILQ_REMOVE(&ctrlr->log_head, log, link);
519 		free(log);
520 	}
521 	STAILQ_FOREACH_SAFE(event, &ctrlr->async_events, link, event_tmp) {
522 		STAILQ_REMOVE(&ctrlr->async_events, event, spdk_nvmf_async_event_completion, link);
523 		free(event);
524 	}
525 	free(ctrlr);
526 }
527 
528 void
529 nvmf_ctrlr_destruct(struct spdk_nvmf_ctrlr *ctrlr)
530 {
531 	nvmf_subsystem_remove_ctrlr(ctrlr->subsys, ctrlr);
532 
533 	spdk_thread_send_msg(ctrlr->thread, _nvmf_ctrlr_destruct, ctrlr);
534 }
535 
536 static void
537 nvmf_ctrlr_add_io_qpair(void *ctx)
538 {
539 	struct spdk_nvmf_request *req = ctx;
540 	struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp;
541 	struct spdk_nvmf_qpair *qpair = req->qpair;
542 	struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
543 	struct spdk_nvmf_qpair *admin_qpair = ctrlr->admin_qpair;
544 
545 	SPDK_DTRACE_PROBE4(nvmf_ctrlr_add_io_qpair, ctrlr, req->qpair, req->qpair->qid,
546 			   spdk_thread_get_id(ctrlr->thread));
547 
548 	/* Unit test will check qpair->ctrlr after calling spdk_nvmf_ctrlr_connect.
549 	  * For error case, the value should be NULL. So set it to NULL at first.
550 	  */
551 	qpair->ctrlr = NULL;
552 
553 	/* Make sure the controller is not being destroyed. */
554 	if (ctrlr->in_destruct) {
555 		SPDK_ERRLOG("Got I/O connect while ctrlr was being destroyed.\n");
556 		SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
557 		goto end;
558 	}
559 
560 	if (ctrlr->subsys->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
561 		SPDK_ERRLOG("I/O connect not allowed on discovery controller\n");
562 		SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
563 		goto end;
564 	}
565 
566 	if (!ctrlr->vcprop.cc.bits.en) {
567 		SPDK_ERRLOG("Got I/O connect before ctrlr was enabled\n");
568 		SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
569 		goto end;
570 	}
571 
572 	if (1u << ctrlr->vcprop.cc.bits.iosqes != sizeof(struct spdk_nvme_cmd)) {
573 		SPDK_ERRLOG("Got I/O connect with invalid IOSQES %u\n",
574 			    ctrlr->vcprop.cc.bits.iosqes);
575 		SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
576 		goto end;
577 	}
578 
579 	if (1u << ctrlr->vcprop.cc.bits.iocqes != sizeof(struct spdk_nvme_cpl)) {
580 		SPDK_ERRLOG("Got I/O connect with invalid IOCQES %u\n",
581 			    ctrlr->vcprop.cc.bits.iocqes);
582 		SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
583 		goto end;
584 	}
585 
586 	if (admin_qpair->state != SPDK_NVMF_QPAIR_ACTIVE || admin_qpair->group == NULL) {
587 		/* There is a chance that admin qpair is being destroyed at this moment due to e.g.
588 		 * expired keep alive timer. Part of the qpair destruction process is change of qpair's
589 		 * state to DEACTIVATING and removing it from poll group */
590 		SPDK_ERRLOG("Inactive admin qpair (state %d, group %p)\n", admin_qpair->state, admin_qpair->group);
591 		SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
592 		goto end;
593 	}
594 
595 	ctrlr_add_qpair_and_update_rsp(qpair, ctrlr, rsp);
596 end:
597 	spdk_nvmf_request_complete(req);
598 }
599 
600 static void
601 _nvmf_ctrlr_add_io_qpair(void *ctx)
602 {
603 	struct spdk_nvmf_request *req = ctx;
604 	struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp;
605 	struct spdk_nvmf_fabric_connect_data *data;
606 	struct spdk_nvmf_ctrlr *ctrlr;
607 	struct spdk_nvmf_qpair *qpair = req->qpair;
608 	struct spdk_nvmf_qpair *admin_qpair;
609 	struct spdk_nvmf_tgt *tgt = qpair->transport->tgt;
610 	struct spdk_nvmf_subsystem *subsystem;
611 	struct spdk_nvme_transport_id listen_trid = {};
612 	const struct spdk_nvmf_subsystem_listener *listener;
613 
614 	assert(req->iovcnt == 1);
615 
616 	data = req->iov[0].iov_base;
617 
618 	SPDK_DEBUGLOG(nvmf, "Connect I/O Queue for controller id 0x%x\n", data->cntlid);
619 
620 	subsystem = spdk_nvmf_tgt_find_subsystem(tgt, data->subnqn);
621 	/* We already checked this in spdk_nvmf_ctrlr_connect */
622 	assert(subsystem != NULL);
623 
624 	ctrlr = nvmf_subsystem_get_ctrlr(subsystem, data->cntlid);
625 	if (ctrlr == NULL) {
626 		SPDK_ERRLOG("Unknown controller ID 0x%x\n", data->cntlid);
627 		SPDK_NVMF_INVALID_CONNECT_DATA(rsp, cntlid);
628 		spdk_nvmf_request_complete(req);
629 		return;
630 	}
631 
632 	/* fail before passing a message to the controller thread. */
633 	if (ctrlr->in_destruct) {
634 		SPDK_ERRLOG("Got I/O connect while ctrlr was being destroyed.\n");
635 		SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
636 		spdk_nvmf_request_complete(req);
637 		return;
638 	}
639 
640 	/* If ANA reporting is enabled, check if I/O connect is on the same listener. */
641 	if (subsystem->flags.ana_reporting) {
642 		if (spdk_nvmf_qpair_get_listen_trid(req->qpair, &listen_trid) != 0) {
643 			SPDK_ERRLOG("Could not get listener transport ID\n");
644 			SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
645 			spdk_nvmf_request_complete(req);
646 			return;
647 		}
648 
649 		listener = nvmf_subsystem_find_listener(subsystem, &listen_trid);
650 		if (listener != ctrlr->listener) {
651 			SPDK_ERRLOG("I/O connect is on a listener different from admin connect\n");
652 			SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
653 			spdk_nvmf_request_complete(req);
654 			return;
655 		}
656 	}
657 
658 	admin_qpair = ctrlr->admin_qpair;
659 	if (admin_qpair->state != SPDK_NVMF_QPAIR_ACTIVE || admin_qpair->group == NULL) {
660 		/* There is a chance that admin qpair is being destroyed at this moment due to e.g.
661 		 * expired keep alive timer. Part of the qpair destruction process is change of qpair's
662 		 * state to DEACTIVATING and removing it from poll group */
663 		SPDK_ERRLOG("Inactive admin qpair (state %d, group %p)\n", admin_qpair->state, admin_qpair->group);
664 		SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
665 		spdk_nvmf_request_complete(req);
666 		return;
667 	}
668 	qpair->ctrlr = ctrlr;
669 	spdk_thread_send_msg(admin_qpair->group->thread, nvmf_ctrlr_add_io_qpair, req);
670 }
671 
672 static bool
673 nvmf_qpair_access_allowed(struct spdk_nvmf_qpair *qpair, struct spdk_nvmf_subsystem *subsystem,
674 			  const char *hostnqn)
675 {
676 	struct spdk_nvme_transport_id listen_trid = {};
677 
678 	if (!spdk_nvmf_subsystem_host_allowed(subsystem, hostnqn)) {
679 		SPDK_ERRLOG("Subsystem '%s' does not allow host '%s'\n", subsystem->subnqn, hostnqn);
680 		return false;
681 	}
682 
683 	if (spdk_nvmf_qpair_get_listen_trid(qpair, &listen_trid)) {
684 		SPDK_ERRLOG("Subsystem '%s' is unable to enforce access control due to an internal error.\n",
685 			    subsystem->subnqn);
686 		return false;
687 	}
688 
689 	if (!spdk_nvmf_subsystem_listener_allowed(subsystem, &listen_trid)) {
690 		SPDK_ERRLOG("Subsystem '%s' does not allow host '%s' to connect at this address.\n",
691 			    subsystem->subnqn, hostnqn);
692 		return false;
693 	}
694 
695 	return true;
696 }
697 
698 static int
699 _nvmf_ctrlr_connect(struct spdk_nvmf_request *req)
700 {
701 	struct spdk_nvmf_fabric_connect_data *data = req->iov[0].iov_base;
702 	struct spdk_nvmf_fabric_connect_cmd *cmd = &req->cmd->connect_cmd;
703 	struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp;
704 	struct spdk_nvmf_qpair *qpair = req->qpair;
705 	struct spdk_nvmf_transport *transport = qpair->transport;
706 	struct spdk_nvmf_ctrlr *ctrlr;
707 	struct spdk_nvmf_subsystem *subsystem;
708 
709 	SPDK_DEBUGLOG(nvmf, "recfmt 0x%x qid %u sqsize %u\n",
710 		      cmd->recfmt, cmd->qid, cmd->sqsize);
711 
712 	SPDK_DEBUGLOG(nvmf, "Connect data:\n");
713 	SPDK_DEBUGLOG(nvmf, "  cntlid:  0x%04x\n", data->cntlid);
714 	SPDK_DEBUGLOG(nvmf, "  hostid: %08x-%04x-%04x-%02x%02x-%04x%08x ***\n",
715 		      ntohl(*(uint32_t *)&data->hostid[0]),
716 		      ntohs(*(uint16_t *)&data->hostid[4]),
717 		      ntohs(*(uint16_t *)&data->hostid[6]),
718 		      data->hostid[8],
719 		      data->hostid[9],
720 		      ntohs(*(uint16_t *)&data->hostid[10]),
721 		      ntohl(*(uint32_t *)&data->hostid[12]));
722 	SPDK_DEBUGLOG(nvmf, "  subnqn: \"%s\"\n", data->subnqn);
723 	SPDK_DEBUGLOG(nvmf, "  hostnqn: \"%s\"\n", data->hostnqn);
724 
725 	subsystem = spdk_nvmf_tgt_find_subsystem(transport->tgt, data->subnqn);
726 	if (!subsystem) {
727 		SPDK_NVMF_INVALID_CONNECT_DATA(rsp, subnqn);
728 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
729 	}
730 
731 	if (cmd->recfmt != 0) {
732 		SPDK_ERRLOG("Connect command unsupported RECFMT %u\n", cmd->recfmt);
733 		rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
734 		rsp->status.sc = SPDK_NVMF_FABRIC_SC_INCOMPATIBLE_FORMAT;
735 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
736 	}
737 
738 	/*
739 	 * SQSIZE is a 0-based value, so it must be at least 1 (minimum queue depth is 2) and
740 	 * strictly less than max_aq_depth (admin queues) or max_queue_depth (io queues).
741 	 */
742 	if (cmd->sqsize == 0) {
743 		SPDK_ERRLOG("Invalid SQSIZE = 0\n");
744 		SPDK_NVMF_INVALID_CONNECT_CMD(rsp, sqsize);
745 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
746 	}
747 
748 	if (cmd->qid == 0) {
749 		if (cmd->sqsize >= transport->opts.max_aq_depth) {
750 			SPDK_ERRLOG("Invalid SQSIZE for admin queue %u (min 1, max %u)\n",
751 				    cmd->sqsize, transport->opts.max_aq_depth - 1);
752 			SPDK_NVMF_INVALID_CONNECT_CMD(rsp, sqsize);
753 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
754 		}
755 	} else if (cmd->sqsize >= transport->opts.max_queue_depth) {
756 		SPDK_ERRLOG("Invalid SQSIZE %u (min 1, max %u)\n",
757 			    cmd->sqsize, transport->opts.max_queue_depth - 1);
758 		SPDK_NVMF_INVALID_CONNECT_CMD(rsp, sqsize);
759 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
760 	}
761 
762 	qpair->sq_head_max = cmd->sqsize;
763 	qpair->qid = cmd->qid;
764 	qpair->connect_received = true;
765 
766 	pthread_mutex_lock(&qpair->group->mutex);
767 	qpair->group->current_unassociated_qpairs--;
768 	pthread_mutex_unlock(&qpair->group->mutex);
769 
770 	if (0 == qpair->qid) {
771 		qpair->group->stat.admin_qpairs++;
772 		qpair->group->stat.current_admin_qpairs++;
773 	} else {
774 		qpair->group->stat.io_qpairs++;
775 		qpair->group->stat.current_io_qpairs++;
776 	}
777 
778 	if (cmd->qid == 0) {
779 		SPDK_DEBUGLOG(nvmf, "Connect Admin Queue for controller ID 0x%x\n", data->cntlid);
780 
781 		if (spdk_nvme_trtype_is_fabrics(transport->ops->type) && data->cntlid != 0xFFFF) {
782 			/* This NVMf target only supports dynamic mode. */
783 			SPDK_ERRLOG("The NVMf target only supports dynamic mode (CNTLID = 0x%x).\n", data->cntlid);
784 			SPDK_NVMF_INVALID_CONNECT_DATA(rsp, cntlid);
785 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
786 		}
787 
788 		/* Establish a new ctrlr */
789 		ctrlr = nvmf_ctrlr_create(subsystem, req, cmd, data);
790 		if (!ctrlr) {
791 			SPDK_ERRLOG("nvmf_ctrlr_create() failed\n");
792 			rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
793 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
794 		} else {
795 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
796 		}
797 	} else {
798 		spdk_thread_send_msg(subsystem->thread, _nvmf_ctrlr_add_io_qpair, req);
799 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
800 	}
801 }
802 
803 static inline bool
804 nvmf_request_is_fabric_connect(struct spdk_nvmf_request *req)
805 {
806 	return req->cmd->nvmf_cmd.opcode == SPDK_NVME_OPC_FABRIC &&
807 	       req->cmd->nvmf_cmd.fctype == SPDK_NVMF_FABRIC_COMMAND_CONNECT;
808 }
809 
810 static struct spdk_nvmf_subsystem_poll_group *
811 nvmf_subsystem_pg_from_connect_cmd(struct spdk_nvmf_request *req)
812 {
813 	struct spdk_nvmf_fabric_connect_data *data;
814 	struct spdk_nvmf_subsystem *subsystem;
815 	struct spdk_nvmf_tgt *tgt;
816 
817 	assert(nvmf_request_is_fabric_connect(req));
818 	assert(req->qpair->ctrlr == NULL);
819 	assert(req->iovcnt == 1);
820 
821 	data = req->iov[0].iov_base;
822 	tgt = req->qpair->transport->tgt;
823 
824 	subsystem = spdk_nvmf_tgt_find_subsystem(tgt, data->subnqn);
825 	if (subsystem == NULL) {
826 		return NULL;
827 	}
828 
829 	return &req->qpair->group->sgroups[subsystem->id];
830 }
831 
832 int
833 spdk_nvmf_ctrlr_connect(struct spdk_nvmf_request *req)
834 {
835 	struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp;
836 	struct spdk_nvmf_subsystem_poll_group *sgroup;
837 	struct spdk_nvmf_qpair *qpair = req->qpair;
838 	enum spdk_nvmf_request_exec_status status;
839 
840 	if (req->iovcnt > 1) {
841 		SPDK_ERRLOG("Connect command invalid iovcnt: %d\n", req->iovcnt);
842 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
843 		status = SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
844 		goto out;
845 	}
846 
847 	sgroup = nvmf_subsystem_pg_from_connect_cmd(req);
848 	if (!sgroup) {
849 		SPDK_NVMF_INVALID_CONNECT_DATA(rsp, subnqn);
850 		status = SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
851 		goto out;
852 	}
853 
854 	sgroup->mgmt_io_outstanding++;
855 	TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);
856 
857 	status = _nvmf_ctrlr_connect(req);
858 
859 out:
860 	if (status == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
861 		_nvmf_request_complete(req);
862 	}
863 
864 	return status;
865 }
866 
867 static int nvmf_ctrlr_cmd_connect(struct spdk_nvmf_request *req);
868 
869 static int
870 retry_connect(void *arg)
871 {
872 	struct spdk_nvmf_request *req = arg;
873 	struct spdk_nvmf_subsystem_poll_group *sgroup;
874 	int rc;
875 
876 	sgroup = nvmf_subsystem_pg_from_connect_cmd(req);
877 	assert(sgroup != NULL);
878 	sgroup->mgmt_io_outstanding++;
879 	spdk_poller_unregister(&req->poller);
880 	rc = nvmf_ctrlr_cmd_connect(req);
881 	if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
882 		_nvmf_request_complete(req);
883 	}
884 	return SPDK_POLLER_BUSY;
885 }
886 
887 static int
888 nvmf_ctrlr_cmd_connect(struct spdk_nvmf_request *req)
889 {
890 	struct spdk_nvmf_fabric_connect_data *data = req->iov[0].iov_base;
891 	struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp;
892 	struct spdk_nvmf_transport *transport = req->qpair->transport;
893 	struct spdk_nvmf_subsystem *subsystem;
894 
895 	if (req->length < sizeof(struct spdk_nvmf_fabric_connect_data)) {
896 		SPDK_ERRLOG("Connect command data length 0x%x too small\n", req->length);
897 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
898 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
899 	}
900 
901 	if (req->iovcnt > 1) {
902 		SPDK_ERRLOG("Connect command invalid iovcnt: %d\n", req->iovcnt);
903 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
904 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
905 	}
906 
907 	subsystem = spdk_nvmf_tgt_find_subsystem(transport->tgt, data->subnqn);
908 	if (!subsystem) {
909 		SPDK_NVMF_INVALID_CONNECT_DATA(rsp, subnqn);
910 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
911 	}
912 
913 	if ((subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE) ||
914 	    (subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSING) ||
915 	    (subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED) ||
916 	    (subsystem->state == SPDK_NVMF_SUBSYSTEM_DEACTIVATING)) {
917 		struct spdk_nvmf_subsystem_poll_group *sgroup;
918 
919 		if (req->timeout_tsc == 0) {
920 			/* We will only retry the request up to 1 second. */
921 			req->timeout_tsc = spdk_get_ticks() + spdk_get_ticks_hz();
922 		} else if (spdk_get_ticks() > req->timeout_tsc) {
923 			SPDK_ERRLOG("Subsystem '%s' was not ready for 1 second\n", subsystem->subnqn);
924 			rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
925 			rsp->status.sc = SPDK_NVMF_FABRIC_SC_CONTROLLER_BUSY;
926 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
927 		}
928 
929 		/* Subsystem is not ready to handle a connect. Use a poller to retry it
930 		 * again later. Decrement the mgmt_io_outstanding to avoid the
931 		 * subsystem waiting for this command to complete before unpausing.
932 		 */
933 		sgroup = nvmf_subsystem_pg_from_connect_cmd(req);
934 		assert(sgroup != NULL);
935 		sgroup->mgmt_io_outstanding--;
936 		SPDK_DEBUGLOG(nvmf, "Subsystem '%s' is not ready for connect, retrying...\n", subsystem->subnqn);
937 		req->poller = SPDK_POLLER_REGISTER(retry_connect, req, 100);
938 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
939 	}
940 
941 	/* Ensure that hostnqn is null terminated */
942 	if (!memchr(data->hostnqn, '\0', SPDK_NVMF_NQN_MAX_LEN + 1)) {
943 		SPDK_ERRLOG("Connect HOSTNQN is not null terminated\n");
944 		SPDK_NVMF_INVALID_CONNECT_DATA(rsp, hostnqn);
945 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
946 	}
947 
948 	if (!nvmf_qpair_access_allowed(req->qpair, subsystem, data->hostnqn)) {
949 		rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
950 		rsp->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_HOST;
951 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
952 	}
953 
954 	return _nvmf_ctrlr_connect(req);
955 }
956 
957 static int
958 nvmf_ctrlr_association_remove(void *ctx)
959 {
960 	struct spdk_nvmf_ctrlr *ctrlr = ctx;
961 	int rc;
962 
963 	nvmf_ctrlr_stop_association_timer(ctrlr);
964 
965 	if (ctrlr->in_destruct) {
966 		return SPDK_POLLER_IDLE;
967 	}
968 	SPDK_DEBUGLOG(nvmf, "Disconnecting host from subsystem %s due to association timeout.\n",
969 		      ctrlr->subsys->subnqn);
970 
971 	if (ctrlr->admin_qpair) {
972 		rc = spdk_nvmf_qpair_disconnect(ctrlr->admin_qpair, NULL, NULL);
973 		if (rc < 0) {
974 			SPDK_ERRLOG("Fail to disconnect admin ctrlr qpair\n");
975 			assert(false);
976 		}
977 	}
978 
979 	return SPDK_POLLER_BUSY;
980 }
981 
982 static int
983 _nvmf_ctrlr_cc_reset_shn_done(void *ctx)
984 {
985 	struct spdk_nvmf_ctrlr *ctrlr = ctx;
986 	uint64_t now = spdk_get_ticks();
987 	uint32_t count;
988 
989 	if (ctrlr->cc_timer) {
990 		spdk_poller_unregister(&ctrlr->cc_timer);
991 	}
992 
993 	count = spdk_bit_array_count_set(ctrlr->qpair_mask);
994 	SPDK_DEBUGLOG(nvmf, "ctrlr %p active queue count %u\n", ctrlr, count);
995 
996 	if (count > 1) {
997 		if (now < ctrlr->cc_timeout_tsc) {
998 			/* restart cc timer */
999 			ctrlr->cc_timer = SPDK_POLLER_REGISTER(_nvmf_ctrlr_cc_reset_shn_done, ctrlr, 100 * 1000);
1000 			return SPDK_POLLER_IDLE;
1001 		} else {
1002 			/* controller fatal status */
1003 			SPDK_WARNLOG("IO timeout, ctrlr %p is in fatal status\n", ctrlr);
1004 			nvmf_ctrlr_set_fatal_status(ctrlr);
1005 		}
1006 	}
1007 
1008 	spdk_poller_unregister(&ctrlr->cc_timeout_timer);
1009 
1010 	if (ctrlr->disconnect_is_shn) {
1011 		ctrlr->vcprop.csts.bits.shst = SPDK_NVME_SHST_COMPLETE;
1012 		ctrlr->disconnect_is_shn = false;
1013 	} else {
1014 		/* Only a subset of the registers are cleared out on a reset */
1015 		ctrlr->vcprop.cc.raw = 0;
1016 		ctrlr->vcprop.csts.raw = 0;
1017 	}
1018 
1019 	/* After CC.EN transitions to 0 (due to shutdown or reset), the association
1020 	 * between the host and controller shall be preserved for at least 2 minutes */
1021 	if (ctrlr->association_timer) {
1022 		SPDK_DEBUGLOG(nvmf, "Association timer already set\n");
1023 		nvmf_ctrlr_stop_association_timer(ctrlr);
1024 	}
1025 	if (ctrlr->association_timeout) {
1026 		ctrlr->association_timer = SPDK_POLLER_REGISTER(nvmf_ctrlr_association_remove, ctrlr,
1027 					   ctrlr->association_timeout * 1000);
1028 	}
1029 	ctrlr->disconnect_in_progress = false;
1030 	return SPDK_POLLER_BUSY;
1031 }
1032 
1033 static void
1034 nvmf_ctrlr_cc_reset_shn_done(struct spdk_io_channel_iter *i, int status)
1035 {
1036 	struct spdk_nvmf_ctrlr *ctrlr = spdk_io_channel_iter_get_ctx(i);
1037 
1038 	if (status < 0) {
1039 		SPDK_ERRLOG("Fail to disconnect io ctrlr qpairs\n");
1040 		assert(false);
1041 	}
1042 
1043 	_nvmf_ctrlr_cc_reset_shn_done((void *)ctrlr);
1044 }
1045 
1046 static void
1047 nvmf_bdev_complete_reset(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
1048 {
1049 	SPDK_NOTICELOG("Resetting bdev done with %s\n", success ? "success" : "failure");
1050 
1051 	spdk_bdev_free_io(bdev_io);
1052 }
1053 
1054 
1055 static int
1056 nvmf_ctrlr_cc_timeout(void *ctx)
1057 {
1058 	struct spdk_nvmf_ctrlr *ctrlr = ctx;
1059 	struct spdk_nvmf_poll_group *group = ctrlr->admin_qpair->group;
1060 	struct spdk_nvmf_ns *ns;
1061 	struct spdk_nvmf_subsystem_pg_ns_info *ns_info;
1062 
1063 	assert(group != NULL && group->sgroups != NULL);
1064 	spdk_poller_unregister(&ctrlr->cc_timeout_timer);
1065 	SPDK_DEBUGLOG(nvmf, "Ctrlr %p reset or shutdown timeout\n", ctrlr);
1066 
1067 	for (ns = spdk_nvmf_subsystem_get_first_ns(ctrlr->subsys); ns != NULL;
1068 	     ns = spdk_nvmf_subsystem_get_next_ns(ctrlr->subsys, ns)) {
1069 		if (ns->bdev == NULL) {
1070 			continue;
1071 		}
1072 		ns_info = &group->sgroups[ctrlr->subsys->id].ns_info[ns->opts.nsid - 1];
1073 		SPDK_NOTICELOG("Ctrlr %p resetting NSID %u\n", ctrlr, ns->opts.nsid);
1074 		spdk_bdev_reset(ns->desc, ns_info->channel, nvmf_bdev_complete_reset, NULL);
1075 	}
1076 
1077 	return SPDK_POLLER_BUSY;
1078 }
1079 
1080 const struct spdk_nvmf_registers *
1081 spdk_nvmf_ctrlr_get_regs(struct spdk_nvmf_ctrlr *ctrlr)
1082 {
1083 	return &ctrlr->vcprop;
1084 }
1085 
1086 void
1087 nvmf_ctrlr_set_fatal_status(struct spdk_nvmf_ctrlr *ctrlr)
1088 {
1089 	ctrlr->vcprop.csts.bits.cfs = 1;
1090 }
1091 
1092 static uint64_t
1093 nvmf_prop_get_cap(struct spdk_nvmf_ctrlr *ctrlr)
1094 {
1095 	return ctrlr->vcprop.cap.raw;
1096 }
1097 
1098 static uint64_t
1099 nvmf_prop_get_vs(struct spdk_nvmf_ctrlr *ctrlr)
1100 {
1101 	return ctrlr->vcprop.vs.raw;
1102 }
1103 
1104 static uint64_t
1105 nvmf_prop_get_cc(struct spdk_nvmf_ctrlr *ctrlr)
1106 {
1107 	return ctrlr->vcprop.cc.raw;
1108 }
1109 
1110 static bool
1111 nvmf_prop_set_cc(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value)
1112 {
1113 	union spdk_nvme_cc_register cc, diff;
1114 	uint32_t cc_timeout_ms;
1115 
1116 	cc.raw = value;
1117 
1118 	SPDK_DEBUGLOG(nvmf, "cur CC: 0x%08x\n", ctrlr->vcprop.cc.raw);
1119 	SPDK_DEBUGLOG(nvmf, "new CC: 0x%08x\n", cc.raw);
1120 
1121 	/*
1122 	 * Calculate which bits changed between the current and new CC.
1123 	 * Mark each bit as 0 once it is handled to determine if any unhandled bits were changed.
1124 	 */
1125 	diff.raw = cc.raw ^ ctrlr->vcprop.cc.raw;
1126 
1127 	if (diff.bits.en) {
1128 		if (cc.bits.en) {
1129 			SPDK_DEBUGLOG(nvmf, "Property Set CC Enable!\n");
1130 			nvmf_ctrlr_stop_association_timer(ctrlr);
1131 
1132 			ctrlr->vcprop.cc.bits.en = 1;
1133 			ctrlr->vcprop.csts.bits.rdy = 1;
1134 		} else {
1135 			SPDK_DEBUGLOG(nvmf, "Property Set CC Disable!\n");
1136 			if (ctrlr->disconnect_in_progress) {
1137 				SPDK_DEBUGLOG(nvmf, "Disconnect in progress\n");
1138 				return true;
1139 			}
1140 
1141 			ctrlr->cc_timeout_timer = SPDK_POLLER_REGISTER(nvmf_ctrlr_cc_timeout, ctrlr,
1142 						  NVMF_CC_RESET_SHN_TIMEOUT_IN_MS * 1000);
1143 			/* Make sure cc_timeout_ms is between cc_timeout_timer and Host reset/shutdown timeout */
1144 			cc_timeout_ms = (NVMF_CC_RESET_SHN_TIMEOUT_IN_MS + NVMF_CTRLR_RESET_SHN_TIMEOUT_IN_MS) / 2;
1145 			ctrlr->cc_timeout_tsc = spdk_get_ticks() + cc_timeout_ms * spdk_get_ticks_hz() / (uint64_t)1000;
1146 
1147 			ctrlr->vcprop.cc.bits.en = 0;
1148 			ctrlr->disconnect_in_progress = true;
1149 			ctrlr->disconnect_is_shn = false;
1150 			spdk_for_each_channel(ctrlr->subsys->tgt,
1151 					      nvmf_ctrlr_disconnect_io_qpairs_on_pg,
1152 					      ctrlr,
1153 					      nvmf_ctrlr_cc_reset_shn_done);
1154 		}
1155 		diff.bits.en = 0;
1156 	}
1157 
1158 	if (diff.bits.shn) {
1159 		if (cc.bits.shn == SPDK_NVME_SHN_NORMAL ||
1160 		    cc.bits.shn == SPDK_NVME_SHN_ABRUPT) {
1161 			SPDK_DEBUGLOG(nvmf, "Property Set CC Shutdown %u%ub!\n",
1162 				      cc.bits.shn >> 1, cc.bits.shn & 1);
1163 			if (ctrlr->disconnect_in_progress) {
1164 				SPDK_DEBUGLOG(nvmf, "Disconnect in progress\n");
1165 				return true;
1166 			}
1167 
1168 			ctrlr->cc_timeout_timer = SPDK_POLLER_REGISTER(nvmf_ctrlr_cc_timeout, ctrlr,
1169 						  NVMF_CC_RESET_SHN_TIMEOUT_IN_MS * 1000);
1170 			/* Make sure cc_timeout_ms is between cc_timeout_timer and Host reset/shutdown timeout */
1171 			cc_timeout_ms = (NVMF_CC_RESET_SHN_TIMEOUT_IN_MS + NVMF_CTRLR_RESET_SHN_TIMEOUT_IN_MS) / 2;
1172 			ctrlr->cc_timeout_tsc = spdk_get_ticks() + cc_timeout_ms * spdk_get_ticks_hz() / (uint64_t)1000;
1173 
1174 			ctrlr->vcprop.cc.bits.shn = cc.bits.shn;
1175 			ctrlr->disconnect_in_progress = true;
1176 			ctrlr->disconnect_is_shn = true;
1177 			spdk_for_each_channel(ctrlr->subsys->tgt,
1178 					      nvmf_ctrlr_disconnect_io_qpairs_on_pg,
1179 					      ctrlr,
1180 					      nvmf_ctrlr_cc_reset_shn_done);
1181 
1182 			/* From the time a shutdown is initiated the controller shall disable
1183 			 * Keep Alive timer */
1184 			nvmf_ctrlr_stop_keep_alive_timer(ctrlr);
1185 		} else if (cc.bits.shn == 0) {
1186 			ctrlr->vcprop.cc.bits.shn = 0;
1187 		} else {
1188 			SPDK_ERRLOG("Prop Set CC: Invalid SHN value %u%ub\n",
1189 				    cc.bits.shn >> 1, cc.bits.shn & 1);
1190 			return false;
1191 		}
1192 		diff.bits.shn = 0;
1193 	}
1194 
1195 	if (diff.bits.iosqes) {
1196 		SPDK_DEBUGLOG(nvmf, "Prop Set IOSQES = %u (%u bytes)\n",
1197 			      cc.bits.iosqes, 1u << cc.bits.iosqes);
1198 		ctrlr->vcprop.cc.bits.iosqes = cc.bits.iosqes;
1199 		diff.bits.iosqes = 0;
1200 	}
1201 
1202 	if (diff.bits.iocqes) {
1203 		SPDK_DEBUGLOG(nvmf, "Prop Set IOCQES = %u (%u bytes)\n",
1204 			      cc.bits.iocqes, 1u << cc.bits.iocqes);
1205 		ctrlr->vcprop.cc.bits.iocqes = cc.bits.iocqes;
1206 		diff.bits.iocqes = 0;
1207 	}
1208 
1209 	if (diff.bits.ams) {
1210 		SPDK_ERRLOG("Arbitration Mechanism Selected (AMS) 0x%x not supported!\n", cc.bits.ams);
1211 		return false;
1212 	}
1213 
1214 	if (diff.bits.mps) {
1215 		SPDK_ERRLOG("Memory Page Size (MPS) %u KiB not supported!\n", (1 << (2 + cc.bits.mps)));
1216 		return false;
1217 	}
1218 
1219 	if (diff.bits.css) {
1220 		if (cc.bits.css > SPDK_NVME_CC_CSS_IOCS) {
1221 			SPDK_ERRLOG("I/O Command Set Selected (CSS) 0x%x not supported!\n", cc.bits.css);
1222 			return false;
1223 		}
1224 		diff.bits.css = 0;
1225 	}
1226 
1227 	if (diff.raw != 0) {
1228 		/* Print an error message, but don't fail the command in this case.
1229 		 * If we did want to fail in this case, we'd need to ensure we acted
1230 		 * on no other bits or the initiator gets confused. */
1231 		SPDK_ERRLOG("Prop Set CC toggled reserved bits 0x%x!\n", diff.raw);
1232 	}
1233 
1234 	return true;
1235 }
1236 
1237 static uint64_t
1238 nvmf_prop_get_csts(struct spdk_nvmf_ctrlr *ctrlr)
1239 {
1240 	return ctrlr->vcprop.csts.raw;
1241 }
1242 
1243 static uint64_t
1244 nvmf_prop_get_aqa(struct spdk_nvmf_ctrlr *ctrlr)
1245 {
1246 	return ctrlr->vcprop.aqa.raw;
1247 }
1248 
1249 static bool
1250 nvmf_prop_set_aqa(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value)
1251 {
1252 	union spdk_nvme_aqa_register aqa;
1253 
1254 	aqa.raw = value;
1255 
1256 	/*
1257 	 * We don't need to explicitly check for maximum size, as the fields are
1258 	 * limited to 12 bits (4096).
1259 	 */
1260 	if (aqa.bits.asqs < SPDK_NVME_ADMIN_QUEUE_MIN_ENTRIES - 1 ||
1261 	    aqa.bits.acqs < SPDK_NVME_ADMIN_QUEUE_MIN_ENTRIES - 1 ||
1262 	    aqa.bits.reserved1 != 0 || aqa.bits.reserved2 != 0) {
1263 		return false;
1264 	}
1265 
1266 	ctrlr->vcprop.aqa.raw = value;
1267 
1268 	return true;
1269 }
1270 
1271 static uint64_t
1272 nvmf_prop_get_asq(struct spdk_nvmf_ctrlr *ctrlr)
1273 {
1274 	return ctrlr->vcprop.asq;
1275 }
1276 
1277 static bool
1278 nvmf_prop_set_asq_lower(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value)
1279 {
1280 	ctrlr->vcprop.asq = (ctrlr->vcprop.asq & (0xFFFFFFFFULL << 32ULL)) | value;
1281 
1282 	return true;
1283 }
1284 
1285 static bool
1286 nvmf_prop_set_asq_upper(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value)
1287 {
1288 	ctrlr->vcprop.asq = (ctrlr->vcprop.asq & 0xFFFFFFFFULL) | ((uint64_t)value << 32ULL);
1289 
1290 	return true;
1291 }
1292 
1293 static uint64_t
1294 nvmf_prop_get_acq(struct spdk_nvmf_ctrlr *ctrlr)
1295 {
1296 	return ctrlr->vcprop.acq;
1297 }
1298 
1299 static bool
1300 nvmf_prop_set_acq_lower(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value)
1301 {
1302 	ctrlr->vcprop.acq = (ctrlr->vcprop.acq & (0xFFFFFFFFULL << 32ULL)) | value;
1303 
1304 	return true;
1305 }
1306 
1307 static bool
1308 nvmf_prop_set_acq_upper(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value)
1309 {
1310 	ctrlr->vcprop.acq = (ctrlr->vcprop.acq & 0xFFFFFFFFULL) | ((uint64_t)value << 32ULL);
1311 
1312 	return true;
1313 }
1314 
1315 struct nvmf_prop {
1316 	uint32_t ofst;
1317 	uint8_t size;
1318 	char name[11];
1319 	uint64_t (*get_cb)(struct spdk_nvmf_ctrlr *ctrlr);
1320 	bool (*set_cb)(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value);
1321 	bool (*set_upper_cb)(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value);
1322 };
1323 
1324 #define PROP(field, size, get_cb, set_cb, set_upper_cb) \
1325 	{ \
1326 		offsetof(struct spdk_nvme_registers, field), \
1327 		size, \
1328 		#field, \
1329 		get_cb, set_cb, set_upper_cb \
1330 	}
1331 
1332 static const struct nvmf_prop nvmf_props[] = {
1333 	PROP(cap,  8, nvmf_prop_get_cap,  NULL,                    NULL),
1334 	PROP(vs,   4, nvmf_prop_get_vs,   NULL,                    NULL),
1335 	PROP(cc,   4, nvmf_prop_get_cc,   nvmf_prop_set_cc,        NULL),
1336 	PROP(csts, 4, nvmf_prop_get_csts, NULL,                    NULL),
1337 	PROP(aqa,  4, nvmf_prop_get_aqa,  nvmf_prop_set_aqa,       NULL),
1338 	PROP(asq,  8, nvmf_prop_get_asq,  nvmf_prop_set_asq_lower, nvmf_prop_set_asq_upper),
1339 	PROP(acq,  8, nvmf_prop_get_acq,  nvmf_prop_set_acq_lower, nvmf_prop_set_acq_upper),
1340 };
1341 
1342 static const struct nvmf_prop *
1343 find_prop(uint32_t ofst, uint8_t size)
1344 {
1345 	size_t i;
1346 
1347 	for (i = 0; i < SPDK_COUNTOF(nvmf_props); i++) {
1348 		const struct nvmf_prop *prop = &nvmf_props[i];
1349 
1350 		if ((ofst >= prop->ofst) && (ofst + size <= prop->ofst + prop->size)) {
1351 			return prop;
1352 		}
1353 	}
1354 
1355 	return NULL;
1356 }
1357 
1358 static int
1359 nvmf_property_get(struct spdk_nvmf_request *req)
1360 {
1361 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1362 	struct spdk_nvmf_fabric_prop_get_cmd *cmd = &req->cmd->prop_get_cmd;
1363 	struct spdk_nvmf_fabric_prop_get_rsp *response = &req->rsp->prop_get_rsp;
1364 	const struct nvmf_prop *prop;
1365 	uint8_t size;
1366 
1367 	response->status.sc = 0;
1368 	response->value.u64 = 0;
1369 
1370 	SPDK_DEBUGLOG(nvmf, "size %d, offset 0x%x\n",
1371 		      cmd->attrib.size, cmd->ofst);
1372 
1373 	switch (cmd->attrib.size) {
1374 	case SPDK_NVMF_PROP_SIZE_4:
1375 		size = 4;
1376 		break;
1377 	case SPDK_NVMF_PROP_SIZE_8:
1378 		size = 8;
1379 		break;
1380 	default:
1381 		SPDK_DEBUGLOG(nvmf, "Invalid size value %d\n", cmd->attrib.size);
1382 		response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
1383 		response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
1384 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1385 	}
1386 
1387 	prop = find_prop(cmd->ofst, size);
1388 	if (prop == NULL || prop->get_cb == NULL) {
1389 		response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
1390 		response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
1391 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1392 	}
1393 
1394 	SPDK_DEBUGLOG(nvmf, "name: %s\n", prop->name);
1395 
1396 	response->value.u64 = prop->get_cb(ctrlr);
1397 
1398 	if (size != prop->size) {
1399 		/* The size must be 4 and the prop->size is 8. Figure out which part of the property to read. */
1400 		assert(size == 4);
1401 		assert(prop->size == 8);
1402 
1403 		if (cmd->ofst == prop->ofst) {
1404 			/* Keep bottom 4 bytes only */
1405 			response->value.u64 &= 0xFFFFFFFF;
1406 		} else {
1407 			/* Keep top 4 bytes only */
1408 			response->value.u64 >>= 32;
1409 		}
1410 	}
1411 
1412 	SPDK_DEBUGLOG(nvmf, "response value: 0x%" PRIx64 "\n", response->value.u64);
1413 
1414 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1415 }
1416 
1417 static int
1418 nvmf_property_set(struct spdk_nvmf_request *req)
1419 {
1420 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1421 	struct spdk_nvmf_fabric_prop_set_cmd *cmd = &req->cmd->prop_set_cmd;
1422 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
1423 	const struct nvmf_prop *prop;
1424 	uint64_t value;
1425 	uint8_t size;
1426 	bool ret;
1427 
1428 	SPDK_DEBUGLOG(nvmf, "size %d, offset 0x%x, value 0x%" PRIx64 "\n",
1429 		      cmd->attrib.size, cmd->ofst, cmd->value.u64);
1430 
1431 	switch (cmd->attrib.size) {
1432 	case SPDK_NVMF_PROP_SIZE_4:
1433 		size = 4;
1434 		break;
1435 	case SPDK_NVMF_PROP_SIZE_8:
1436 		size = 8;
1437 		break;
1438 	default:
1439 		SPDK_DEBUGLOG(nvmf, "Invalid size value %d\n", cmd->attrib.size);
1440 		response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
1441 		response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
1442 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1443 	}
1444 
1445 	prop = find_prop(cmd->ofst, size);
1446 	if (prop == NULL || prop->set_cb == NULL) {
1447 		SPDK_INFOLOG(nvmf, "Invalid offset 0x%x\n", cmd->ofst);
1448 		response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
1449 		response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
1450 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1451 	}
1452 
1453 	SPDK_DEBUGLOG(nvmf, "name: %s\n", prop->name);
1454 
1455 	value = cmd->value.u64;
1456 
1457 	if (prop->size == 4) {
1458 		ret = prop->set_cb(ctrlr, (uint32_t)value);
1459 	} else if (size != prop->size) {
1460 		/* The size must be 4 and the prop->size is 8. Figure out which part of the property to write. */
1461 		assert(size == 4);
1462 		assert(prop->size == 8);
1463 
1464 		if (cmd->ofst == prop->ofst) {
1465 			ret = prop->set_cb(ctrlr, (uint32_t)value);
1466 		} else {
1467 			ret = prop->set_upper_cb(ctrlr, (uint32_t)value);
1468 		}
1469 	} else {
1470 		ret = prop->set_cb(ctrlr, (uint32_t)value);
1471 		if (ret) {
1472 			ret = prop->set_upper_cb(ctrlr, (uint32_t)(value >> 32));
1473 		}
1474 	}
1475 
1476 	if (!ret) {
1477 		SPDK_ERRLOG("prop set_cb failed\n");
1478 		response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
1479 		response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
1480 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1481 	}
1482 
1483 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1484 }
1485 
1486 static int
1487 nvmf_ctrlr_set_features_arbitration(struct spdk_nvmf_request *req)
1488 {
1489 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1490 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1491 
1492 	SPDK_DEBUGLOG(nvmf, "Set Features - Arbitration (cdw11 = 0x%0x)\n", cmd->cdw11);
1493 
1494 	ctrlr->feat.arbitration.raw = cmd->cdw11;
1495 	ctrlr->feat.arbitration.bits.reserved = 0;
1496 
1497 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1498 }
1499 
1500 static int
1501 nvmf_ctrlr_set_features_power_management(struct spdk_nvmf_request *req)
1502 {
1503 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1504 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1505 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
1506 
1507 	SPDK_DEBUGLOG(nvmf, "Set Features - Power Management (cdw11 = 0x%0x)\n", cmd->cdw11);
1508 
1509 	/* Only PS = 0 is allowed, since we report NPSS = 0 */
1510 	if (cmd->cdw11_bits.feat_power_management.bits.ps != 0) {
1511 		SPDK_ERRLOG("Invalid power state %u\n", cmd->cdw11_bits.feat_power_management.bits.ps);
1512 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
1513 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1514 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1515 	}
1516 
1517 	ctrlr->feat.power_management.raw = cmd->cdw11;
1518 	ctrlr->feat.power_management.bits.reserved = 0;
1519 
1520 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1521 }
1522 
1523 static bool
1524 temp_threshold_opts_valid(const union spdk_nvme_feat_temperature_threshold *opts)
1525 {
1526 	/*
1527 	 * Valid TMPSEL values:
1528 	 *  0000b - 1000b: temperature sensors
1529 	 *  1111b: set all implemented temperature sensors
1530 	 */
1531 	if (opts->bits.tmpsel >= 9 && opts->bits.tmpsel != 15) {
1532 		/* 1001b - 1110b: reserved */
1533 		SPDK_ERRLOG("Invalid TMPSEL %u\n", opts->bits.tmpsel);
1534 		return false;
1535 	}
1536 
1537 	/*
1538 	 * Valid THSEL values:
1539 	 *  00b: over temperature threshold
1540 	 *  01b: under temperature threshold
1541 	 */
1542 	if (opts->bits.thsel > 1) {
1543 		/* 10b - 11b: reserved */
1544 		SPDK_ERRLOG("Invalid THSEL %u\n", opts->bits.thsel);
1545 		return false;
1546 	}
1547 
1548 	return true;
1549 }
1550 
1551 static int
1552 nvmf_ctrlr_set_features_temperature_threshold(struct spdk_nvmf_request *req)
1553 {
1554 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1555 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
1556 
1557 	SPDK_DEBUGLOG(nvmf, "Set Features - Temperature Threshold (cdw11 = 0x%0x)\n", cmd->cdw11);
1558 
1559 	if (!temp_threshold_opts_valid(&cmd->cdw11_bits.feat_temp_threshold)) {
1560 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
1561 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1562 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1563 	}
1564 
1565 	/* TODO: no sensors implemented - ignore new values */
1566 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1567 }
1568 
1569 static int
1570 nvmf_ctrlr_get_features_temperature_threshold(struct spdk_nvmf_request *req)
1571 {
1572 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1573 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
1574 
1575 	SPDK_DEBUGLOG(nvmf, "Get Features - Temperature Threshold (cdw11 = 0x%0x)\n", cmd->cdw11);
1576 
1577 	if (!temp_threshold_opts_valid(&cmd->cdw11_bits.feat_temp_threshold)) {
1578 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
1579 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1580 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1581 	}
1582 
1583 	/* TODO: no sensors implemented - return 0 for all thresholds */
1584 	rsp->cdw0 = 0;
1585 
1586 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1587 }
1588 
1589 static int
1590 nvmf_ctrlr_get_features_interrupt_vector_configuration(struct spdk_nvmf_request *req)
1591 {
1592 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1593 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1594 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
1595 	union spdk_nvme_feat_interrupt_vector_configuration iv_conf = {};
1596 
1597 	SPDK_DEBUGLOG(nvmf, "Get Features - Interrupt Vector Configuration (cdw11 = 0x%0x)\n", cmd->cdw11);
1598 
1599 	iv_conf.bits.iv = cmd->cdw11_bits.feat_interrupt_vector_configuration.bits.iv;
1600 	iv_conf.bits.cd = ctrlr->feat.interrupt_vector_configuration.bits.cd;
1601 	rsp->cdw0 = iv_conf.raw;
1602 
1603 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1604 }
1605 
1606 static int
1607 nvmf_ctrlr_set_features_error_recovery(struct spdk_nvmf_request *req)
1608 {
1609 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1610 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1611 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
1612 
1613 	SPDK_DEBUGLOG(nvmf, "Set Features - Error Recovery (cdw11 = 0x%0x)\n", cmd->cdw11);
1614 
1615 	if (cmd->cdw11_bits.feat_error_recovery.bits.dulbe) {
1616 		/*
1617 		 * Host is not allowed to set this bit, since we don't advertise it in
1618 		 * Identify Namespace.
1619 		 */
1620 		SPDK_ERRLOG("Host set unsupported DULBE bit\n");
1621 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
1622 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1623 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1624 	}
1625 
1626 	ctrlr->feat.error_recovery.raw = cmd->cdw11;
1627 	ctrlr->feat.error_recovery.bits.reserved = 0;
1628 
1629 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1630 }
1631 
1632 static int
1633 nvmf_ctrlr_set_features_volatile_write_cache(struct spdk_nvmf_request *req)
1634 {
1635 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1636 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1637 
1638 	SPDK_DEBUGLOG(nvmf, "Set Features - Volatile Write Cache (cdw11 = 0x%0x)\n", cmd->cdw11);
1639 
1640 	ctrlr->feat.volatile_write_cache.raw = cmd->cdw11;
1641 	ctrlr->feat.volatile_write_cache.bits.reserved = 0;
1642 
1643 	SPDK_DEBUGLOG(nvmf, "Set Features - Volatile Write Cache %s\n",
1644 		      ctrlr->feat.volatile_write_cache.bits.wce ? "Enabled" : "Disabled");
1645 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1646 }
1647 
1648 static int
1649 nvmf_ctrlr_set_features_write_atomicity(struct spdk_nvmf_request *req)
1650 {
1651 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1652 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1653 
1654 	SPDK_DEBUGLOG(nvmf, "Set Features - Write Atomicity (cdw11 = 0x%0x)\n", cmd->cdw11);
1655 
1656 	ctrlr->feat.write_atomicity.raw = cmd->cdw11;
1657 	ctrlr->feat.write_atomicity.bits.reserved = 0;
1658 
1659 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1660 }
1661 
1662 static int
1663 nvmf_ctrlr_set_features_host_identifier(struct spdk_nvmf_request *req)
1664 {
1665 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
1666 
1667 	SPDK_ERRLOG("Set Features - Host Identifier not allowed\n");
1668 	response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
1669 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1670 }
1671 
1672 static int
1673 nvmf_ctrlr_get_features_host_identifier(struct spdk_nvmf_request *req)
1674 {
1675 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1676 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1677 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
1678 	struct spdk_iov_xfer ix;
1679 
1680 	SPDK_DEBUGLOG(nvmf, "Get Features - Host Identifier\n");
1681 
1682 	if (!cmd->cdw11_bits.feat_host_identifier.bits.exhid) {
1683 		/* NVMe over Fabrics requires EXHID=1 (128-bit/16-byte host ID) */
1684 		SPDK_ERRLOG("Get Features - Host Identifier with EXHID=0 not allowed\n");
1685 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1686 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1687 	}
1688 
1689 	if (req->iovcnt < 1 || req->length < sizeof(ctrlr->hostid)) {
1690 		SPDK_ERRLOG("Invalid data buffer for Get Features - Host Identifier\n");
1691 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1692 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1693 	}
1694 
1695 	spdk_iov_xfer_init(&ix, req->iov, req->iovcnt);
1696 	spdk_iov_xfer_from_buf(&ix, &ctrlr->hostid, sizeof(ctrlr->hostid));
1697 
1698 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1699 }
1700 
1701 static int
1702 nvmf_ctrlr_get_features_reservation_notification_mask(struct spdk_nvmf_request *req)
1703 {
1704 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1705 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1706 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
1707 	struct spdk_nvmf_ns *ns;
1708 
1709 	SPDK_DEBUGLOG(nvmf, "get Features - Reservation Notification Mask\n");
1710 
1711 	if (cmd->nsid == SPDK_NVME_GLOBAL_NS_TAG) {
1712 		SPDK_ERRLOG("get Features - Invalid Namespace ID\n");
1713 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1714 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1715 	}
1716 
1717 	ns = _nvmf_subsystem_get_ns(ctrlr->subsys, cmd->nsid);
1718 	if (ns == NULL) {
1719 		SPDK_ERRLOG("Set Features - Invalid Namespace ID\n");
1720 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1721 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1722 	}
1723 	rsp->cdw0 = ns->mask;
1724 
1725 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1726 }
1727 
1728 static int
1729 nvmf_ctrlr_set_features_reservation_notification_mask(struct spdk_nvmf_request *req)
1730 {
1731 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1732 	struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys;
1733 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1734 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
1735 	struct spdk_nvmf_ns *ns;
1736 
1737 	SPDK_DEBUGLOG(nvmf, "Set Features - Reservation Notification Mask\n");
1738 
1739 	if (cmd->nsid == SPDK_NVME_GLOBAL_NS_TAG) {
1740 		for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL;
1741 		     ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) {
1742 			ns->mask = cmd->cdw11;
1743 		}
1744 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1745 	}
1746 
1747 	ns = _nvmf_subsystem_get_ns(ctrlr->subsys, cmd->nsid);
1748 	if (ns == NULL) {
1749 		SPDK_ERRLOG("Set Features - Invalid Namespace ID\n");
1750 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1751 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1752 	}
1753 	ns->mask = cmd->cdw11;
1754 
1755 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1756 }
1757 
1758 static int
1759 nvmf_ctrlr_get_features_reservation_persistence(struct spdk_nvmf_request *req)
1760 {
1761 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1762 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1763 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
1764 	struct spdk_nvmf_ns *ns;
1765 
1766 	SPDK_DEBUGLOG(nvmf, "Get Features - Reservation Persistence\n");
1767 
1768 	ns = _nvmf_subsystem_get_ns(ctrlr->subsys, cmd->nsid);
1769 	/* NSID with SPDK_NVME_GLOBAL_NS_TAG (=0xffffffff) also included */
1770 	if (ns == NULL) {
1771 		SPDK_ERRLOG("Get Features - Invalid Namespace ID\n");
1772 		response->status.sct = SPDK_NVME_SCT_GENERIC;
1773 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1774 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1775 	}
1776 
1777 	response->cdw0 = ns->ptpl_activated;
1778 
1779 	response->status.sct = SPDK_NVME_SCT_GENERIC;
1780 	response->status.sc = SPDK_NVME_SC_SUCCESS;
1781 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1782 }
1783 
1784 static int
1785 nvmf_ctrlr_set_features_reservation_persistence(struct spdk_nvmf_request *req)
1786 {
1787 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1788 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1789 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
1790 	struct spdk_nvmf_ns *ns;
1791 	bool ptpl;
1792 
1793 	SPDK_DEBUGLOG(nvmf, "Set Features - Reservation Persistence\n");
1794 
1795 	ns = _nvmf_subsystem_get_ns(ctrlr->subsys, cmd->nsid);
1796 	ptpl = cmd->cdw11_bits.feat_rsv_persistence.bits.ptpl;
1797 
1798 	if (cmd->nsid != SPDK_NVME_GLOBAL_NS_TAG && ns && ns->ptpl_file) {
1799 		ns->ptpl_activated = ptpl;
1800 	} else if (cmd->nsid == SPDK_NVME_GLOBAL_NS_TAG) {
1801 		for (ns = spdk_nvmf_subsystem_get_first_ns(ctrlr->subsys); ns && ns->ptpl_file;
1802 		     ns = spdk_nvmf_subsystem_get_next_ns(ctrlr->subsys, ns)) {
1803 			ns->ptpl_activated = ptpl;
1804 		}
1805 	} else {
1806 		SPDK_ERRLOG("Set Features - Invalid Namespace ID or Reservation Configuration\n");
1807 		response->status.sct = SPDK_NVME_SCT_GENERIC;
1808 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1809 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1810 	}
1811 
1812 	/* TODO: Feature not changeable for now */
1813 	response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
1814 	response->status.sc = SPDK_NVME_SC_FEATURE_ID_NOT_SAVEABLE;
1815 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1816 }
1817 
1818 static int
1819 nvmf_ctrlr_get_features_host_behavior_support(struct spdk_nvmf_request *req)
1820 {
1821 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1822 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
1823 	struct spdk_nvme_host_behavior host_behavior = {};
1824 	struct spdk_iov_xfer ix;
1825 
1826 	SPDK_DEBUGLOG(nvmf, "Get Features - Host Behavior Support\n");
1827 
1828 	if (req->iovcnt < 1 || req->length < sizeof(struct spdk_nvme_host_behavior)) {
1829 		SPDK_ERRLOG("invalid data buffer for Host Behavior Support\n");
1830 		response->status.sct = SPDK_NVME_SCT_GENERIC;
1831 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1832 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1833 	}
1834 
1835 	host_behavior.acre = ctrlr->acre_enabled;
1836 
1837 	spdk_iov_xfer_init(&ix, req->iov, req->iovcnt);
1838 	spdk_iov_xfer_from_buf(&ix, &host_behavior, sizeof(host_behavior));
1839 
1840 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1841 }
1842 
1843 static int
1844 nvmf_ctrlr_set_features_host_behavior_support(struct spdk_nvmf_request *req)
1845 {
1846 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1847 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
1848 	struct spdk_nvme_host_behavior *host_behavior;
1849 
1850 	SPDK_DEBUGLOG(nvmf, "Set Features - Host Behavior Support\n");
1851 	if (req->iovcnt != 1) {
1852 		SPDK_ERRLOG("Host Behavior Support invalid iovcnt: %d\n", req->iovcnt);
1853 		response->status.sct = SPDK_NVME_SCT_GENERIC;
1854 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1855 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1856 	}
1857 	if (req->iov[0].iov_len != sizeof(struct spdk_nvme_host_behavior)) {
1858 		SPDK_ERRLOG("Host Behavior Support invalid iov_len: %zd\n", req->iov[0].iov_len);
1859 		response->status.sct = SPDK_NVME_SCT_GENERIC;
1860 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1861 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1862 	}
1863 
1864 	host_behavior = (struct spdk_nvme_host_behavior *)req->iov[0].iov_base;
1865 	if (host_behavior->acre == 0) {
1866 		ctrlr->acre_enabled = false;
1867 	} else if (host_behavior->acre == 1) {
1868 		ctrlr->acre_enabled = true;
1869 	} else {
1870 		SPDK_ERRLOG("Host Behavior Support invalid acre: 0x%02x\n", host_behavior->acre);
1871 		response->status.sct = SPDK_NVME_SCT_GENERIC;
1872 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1873 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1874 	}
1875 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1876 }
1877 
1878 static int
1879 nvmf_ctrlr_set_features_keep_alive_timer(struct spdk_nvmf_request *req)
1880 {
1881 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1882 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1883 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
1884 
1885 	SPDK_DEBUGLOG(nvmf, "Set Features - Keep Alive Timer (%u ms)\n", cmd->cdw11);
1886 
1887 	/*
1888 	 * if attempts to disable keep alive by setting kato to 0h
1889 	 * a status value of keep alive invalid shall be returned
1890 	 */
1891 	if (cmd->cdw11_bits.feat_keep_alive_timer.bits.kato == 0) {
1892 		rsp->status.sc = SPDK_NVME_SC_KEEP_ALIVE_INVALID;
1893 	} else if (cmd->cdw11_bits.feat_keep_alive_timer.bits.kato < MIN_KEEP_ALIVE_TIMEOUT_IN_MS) {
1894 		ctrlr->feat.keep_alive_timer.bits.kato = MIN_KEEP_ALIVE_TIMEOUT_IN_MS;
1895 	} else {
1896 		/* round up to milliseconds */
1897 		ctrlr->feat.keep_alive_timer.bits.kato = spdk_divide_round_up(
1898 					cmd->cdw11_bits.feat_keep_alive_timer.bits.kato,
1899 					KAS_DEFAULT_VALUE * KAS_TIME_UNIT_IN_MS) *
1900 				KAS_DEFAULT_VALUE * KAS_TIME_UNIT_IN_MS;
1901 	}
1902 
1903 	/*
1904 	 * if change the keep alive timeout value successfully
1905 	 * update the keep alive poller.
1906 	 */
1907 	if (cmd->cdw11_bits.feat_keep_alive_timer.bits.kato != 0) {
1908 		if (ctrlr->keep_alive_poller != NULL) {
1909 			spdk_poller_unregister(&ctrlr->keep_alive_poller);
1910 		}
1911 		ctrlr->keep_alive_poller = SPDK_POLLER_REGISTER(nvmf_ctrlr_keep_alive_poll, ctrlr,
1912 					   ctrlr->feat.keep_alive_timer.bits.kato * 1000);
1913 	}
1914 
1915 	SPDK_DEBUGLOG(nvmf, "Set Features - Keep Alive Timer set to %u ms\n",
1916 		      ctrlr->feat.keep_alive_timer.bits.kato);
1917 
1918 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1919 }
1920 
1921 static int
1922 nvmf_ctrlr_set_features_number_of_queues(struct spdk_nvmf_request *req)
1923 {
1924 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1925 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1926 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
1927 	uint32_t count;
1928 
1929 	SPDK_DEBUGLOG(nvmf, "Set Features - Number of Queues, cdw11 0x%x\n",
1930 		      req->cmd->nvme_cmd.cdw11);
1931 
1932 	if (cmd->cdw11_bits.feat_num_of_queues.bits.ncqr == UINT16_MAX ||
1933 	    cmd->cdw11_bits.feat_num_of_queues.bits.nsqr == UINT16_MAX) {
1934 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1935 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1936 	}
1937 
1938 	count = spdk_bit_array_count_set(ctrlr->qpair_mask);
1939 	/* verify that the controller is ready to process commands */
1940 	if (count > 1) {
1941 		SPDK_DEBUGLOG(nvmf, "Queue pairs already active!\n");
1942 		rsp->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
1943 	} else {
1944 		/*
1945 		 * Ignore the value requested by the host -
1946 		 * always return the pre-configured value based on max_qpairs_allowed.
1947 		 */
1948 		rsp->cdw0 = ctrlr->feat.number_of_queues.raw;
1949 	}
1950 
1951 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1952 }
1953 
1954 SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_ctrlr) == 4920,
1955 		   "Please check migration fields that need to be added or not");
1956 
1957 static void
1958 nvmf_ctrlr_migr_data_copy(struct spdk_nvmf_ctrlr_migr_data *data,
1959 			  const struct spdk_nvmf_ctrlr_migr_data *data_src, size_t data_size)
1960 {
1961 	assert(data);
1962 	assert(data_src);
1963 	assert(data_size);
1964 
1965 	memcpy(&data->regs, &data_src->regs, spdk_min(data->regs_size, data_src->regs_size));
1966 	memcpy(&data->feat, &data_src->feat, spdk_min(data->feat_size, data_src->feat_size));
1967 
1968 #define SET_FIELD(field) \
1969     if (offsetof(struct spdk_nvmf_ctrlr_migr_data, field) + sizeof(data->field) <= data_size) { \
1970         data->field = data_src->field; \
1971     } \
1972 
1973 	SET_FIELD(cntlid);
1974 	SET_FIELD(acre);
1975 	SET_FIELD(num_aer_cids);
1976 	SET_FIELD(num_async_events);
1977 	SET_FIELD(notice_aen_mask);
1978 #undef SET_FIELD
1979 
1980 #define SET_ARRAY(arr) \
1981     if (offsetof(struct spdk_nvmf_ctrlr_migr_data, arr) + sizeof(data->arr) <= data_size) { \
1982         memcpy(&data->arr, &data_src->arr, sizeof(data->arr)); \
1983     } \
1984 
1985 	SET_ARRAY(async_events);
1986 	SET_ARRAY(aer_cids);
1987 #undef SET_ARRAY
1988 }
1989 
1990 int
1991 spdk_nvmf_ctrlr_save_migr_data(struct spdk_nvmf_ctrlr *ctrlr,
1992 			       struct spdk_nvmf_ctrlr_migr_data *data)
1993 {
1994 	struct spdk_nvmf_async_event_completion *event, *event_tmp;
1995 	uint32_t i;
1996 	struct spdk_nvmf_ctrlr_migr_data data_local = {
1997 		.data_size = offsetof(struct spdk_nvmf_ctrlr_migr_data, unused),
1998 		.regs_size = sizeof(struct spdk_nvmf_registers),
1999 		.feat_size = sizeof(struct spdk_nvmf_ctrlr_feat)
2000 	};
2001 
2002 	assert(data->data_size <= sizeof(data_local));
2003 	assert(spdk_get_thread() == ctrlr->thread);
2004 
2005 	memcpy(&data_local.regs, &ctrlr->vcprop, sizeof(struct spdk_nvmf_registers));
2006 	memcpy(&data_local.feat, &ctrlr->feat, sizeof(struct spdk_nvmf_ctrlr_feat));
2007 
2008 	data_local.cntlid = ctrlr->cntlid;
2009 	data_local.acre = ctrlr->acre_enabled;
2010 	data_local.num_aer_cids = ctrlr->nr_aer_reqs;
2011 
2012 	STAILQ_FOREACH_SAFE(event, &ctrlr->async_events, link, event_tmp) {
2013 		if (data_local.num_async_events + 1 > SPDK_NVMF_MIGR_MAX_PENDING_AERS) {
2014 			SPDK_ERRLOG("ctrlr %p has too many pending AERs\n", ctrlr);
2015 			break;
2016 		}
2017 
2018 		data_local.async_events[data_local.num_async_events++].raw = event->event.raw;
2019 	}
2020 
2021 	for (i = 0; i < ctrlr->nr_aer_reqs; i++) {
2022 		struct spdk_nvmf_request *req = ctrlr->aer_req[i];
2023 		data_local.aer_cids[i] = req->cmd->nvme_cmd.cid;
2024 	}
2025 	data_local.notice_aen_mask = ctrlr->notice_aen_mask;
2026 
2027 	nvmf_ctrlr_migr_data_copy(data, &data_local, spdk_min(data->data_size, data_local.data_size));
2028 	return 0;
2029 }
2030 
2031 int
2032 spdk_nvmf_ctrlr_restore_migr_data(struct spdk_nvmf_ctrlr *ctrlr,
2033 				  const struct spdk_nvmf_ctrlr_migr_data *data)
2034 {
2035 	uint32_t i;
2036 	struct spdk_nvmf_ctrlr_migr_data data_local = {
2037 		.data_size = offsetof(struct spdk_nvmf_ctrlr_migr_data, unused),
2038 		.regs_size = sizeof(struct spdk_nvmf_registers),
2039 		.feat_size = sizeof(struct spdk_nvmf_ctrlr_feat)
2040 	};
2041 
2042 	assert(data->data_size <= sizeof(data_local));
2043 	assert(spdk_get_thread() == ctrlr->thread);
2044 
2045 	/* local version of data should have defaults set before copy */
2046 	nvmf_ctrlr_migr_data_copy(&data_local, data, spdk_min(data->data_size, data_local.data_size));
2047 	memcpy(&ctrlr->vcprop, &data_local.regs, sizeof(struct spdk_nvmf_registers));
2048 	memcpy(&ctrlr->feat, &data_local.feat, sizeof(struct spdk_nvmf_ctrlr_feat));
2049 
2050 	ctrlr->cntlid = data_local.cntlid;
2051 	ctrlr->acre_enabled = data_local.acre;
2052 
2053 	for (i = 0; i < data_local.num_async_events; i++) {
2054 		struct spdk_nvmf_async_event_completion *event;
2055 
2056 		event = calloc(1, sizeof(*event));
2057 		if (!event) {
2058 			return -ENOMEM;
2059 		}
2060 
2061 		event->event.raw = data_local.async_events[i].raw;
2062 		STAILQ_INSERT_TAIL(&ctrlr->async_events, event, link);
2063 	}
2064 	ctrlr->notice_aen_mask = data_local.notice_aen_mask;
2065 
2066 	return 0;
2067 }
2068 
2069 static int
2070 nvmf_ctrlr_set_features_async_event_configuration(struct spdk_nvmf_request *req)
2071 {
2072 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
2073 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
2074 
2075 	SPDK_DEBUGLOG(nvmf, "Set Features - Async Event Configuration, cdw11 0x%08x\n",
2076 		      cmd->cdw11);
2077 	ctrlr->feat.async_event_configuration.raw = cmd->cdw11;
2078 	ctrlr->feat.async_event_configuration.bits.reserved1 = 0;
2079 	ctrlr->feat.async_event_configuration.bits.reserved2 = 0;
2080 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2081 }
2082 
2083 static int
2084 nvmf_ctrlr_async_event_request(struct spdk_nvmf_request *req)
2085 {
2086 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
2087 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
2088 	struct spdk_nvmf_async_event_completion *pending_event;
2089 
2090 	SPDK_DEBUGLOG(nvmf, "Async Event Request\n");
2091 
2092 	/* Four asynchronous events are supported for now */
2093 	if (ctrlr->nr_aer_reqs >= SPDK_NVMF_MAX_ASYNC_EVENTS) {
2094 		SPDK_DEBUGLOG(nvmf, "AERL exceeded\n");
2095 		rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
2096 		rsp->status.sc = SPDK_NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED;
2097 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2098 	}
2099 
2100 	if (!STAILQ_EMPTY(&ctrlr->async_events)) {
2101 		pending_event = STAILQ_FIRST(&ctrlr->async_events);
2102 		rsp->cdw0 = pending_event->event.raw;
2103 		STAILQ_REMOVE(&ctrlr->async_events, pending_event, spdk_nvmf_async_event_completion, link);
2104 		free(pending_event);
2105 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2106 	}
2107 
2108 	ctrlr->aer_req[ctrlr->nr_aer_reqs++] = req;
2109 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
2110 }
2111 
2112 static void
2113 nvmf_get_firmware_slot_log_page(struct iovec *iovs, int iovcnt, uint64_t offset, uint32_t length)
2114 {
2115 	struct spdk_nvme_firmware_page fw_page;
2116 	size_t copy_len;
2117 	struct spdk_iov_xfer ix;
2118 
2119 	spdk_iov_xfer_init(&ix, iovs, iovcnt);
2120 
2121 	memset(&fw_page, 0, sizeof(fw_page));
2122 	fw_page.afi.active_slot = 1;
2123 	fw_page.afi.next_reset_slot = 0;
2124 	spdk_strcpy_pad(fw_page.revision[0], FW_VERSION, sizeof(fw_page.revision[0]), ' ');
2125 
2126 	if (offset < sizeof(fw_page)) {
2127 		copy_len = spdk_min(sizeof(fw_page) - offset, length);
2128 		if (copy_len > 0) {
2129 			spdk_iov_xfer_from_buf(&ix, (const char *)&fw_page + offset, copy_len);
2130 		}
2131 	}
2132 }
2133 
2134 /*
2135  * Asynchronous Event Mask Bit
2136  */
2137 enum spdk_nvme_async_event_mask_bit {
2138 	/* Mask Namespace Change Notification */
2139 	SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGE_MASK_BIT		= 0,
2140 	/* Mask Asymmetric Namespace Access Change Notification */
2141 	SPDK_NVME_ASYNC_EVENT_ANA_CHANGE_MASK_BIT		= 1,
2142 	/* Mask Discovery Log Change Notification */
2143 	SPDK_NVME_ASYNC_EVENT_DISCOVERY_LOG_CHANGE_MASK_BIT	= 2,
2144 	/* Mask Reservation Log Page Available Notification */
2145 	SPDK_NVME_ASYNC_EVENT_RESERVATION_LOG_AVAIL_MASK_BIT	= 3,
2146 	/* Mask Error Event */
2147 	SPDK_NVME_ASYNC_EVENT_ERROR_MASK_BIT			= 4,
2148 	/* 4 - 63 Reserved */
2149 };
2150 
2151 static inline void
2152 nvmf_ctrlr_unmask_aen(struct spdk_nvmf_ctrlr *ctrlr,
2153 		      enum spdk_nvme_async_event_mask_bit mask)
2154 {
2155 	ctrlr->notice_aen_mask &= ~(1 << mask);
2156 }
2157 
2158 static inline bool
2159 nvmf_ctrlr_mask_aen(struct spdk_nvmf_ctrlr *ctrlr,
2160 		    enum spdk_nvme_async_event_mask_bit mask)
2161 {
2162 	if (ctrlr->notice_aen_mask & (1 << mask)) {
2163 		return false;
2164 	} else {
2165 		ctrlr->notice_aen_mask |= (1 << mask);
2166 		return true;
2167 	}
2168 }
2169 
2170 /* we have to use the typedef in the function declaration to appease astyle. */
2171 typedef enum spdk_nvme_ana_state spdk_nvme_ana_state_t;
2172 
2173 static inline spdk_nvme_ana_state_t
2174 nvmf_ctrlr_get_ana_state(struct spdk_nvmf_ctrlr *ctrlr, uint32_t anagrpid)
2175 {
2176 	if (!ctrlr->subsys->flags.ana_reporting) {
2177 		return SPDK_NVME_ANA_OPTIMIZED_STATE;
2178 	}
2179 
2180 	if (spdk_unlikely(ctrlr->listener == NULL)) {
2181 		return SPDK_NVME_ANA_INACCESSIBLE_STATE;
2182 	}
2183 
2184 	assert(anagrpid - 1 < ctrlr->subsys->max_nsid);
2185 	return ctrlr->listener->ana_state[anagrpid - 1];
2186 }
2187 
2188 static spdk_nvme_ana_state_t
2189 nvmf_ctrlr_get_ana_state_from_nsid(struct spdk_nvmf_ctrlr *ctrlr, uint32_t nsid)
2190 {
2191 	struct spdk_nvmf_ns *ns;
2192 
2193 	/* We do not have NVM subsystem specific ANA state. Hence if NSID is either
2194 	 * SPDK_NVMF_GLOBAL_NS_TAG, invalid, or for inactive namespace, return
2195 	 * the optimized state.
2196 	 */
2197 	ns = _nvmf_subsystem_get_ns(ctrlr->subsys, nsid);
2198 	if (ns == NULL) {
2199 		return SPDK_NVME_ANA_OPTIMIZED_STATE;
2200 	}
2201 
2202 	return nvmf_ctrlr_get_ana_state(ctrlr, ns->anagrpid);
2203 }
2204 
2205 static void
2206 nvmf_get_error_log_page(struct spdk_nvmf_ctrlr *ctrlr, struct iovec *iovs, int iovcnt,
2207 			uint64_t offset, uint32_t length, uint32_t rae)
2208 {
2209 	if (!rae) {
2210 		nvmf_ctrlr_unmask_aen(ctrlr, SPDK_NVME_ASYNC_EVENT_ERROR_MASK_BIT);
2211 	}
2212 
2213 	/* TODO: actually fill out log page data */
2214 }
2215 
2216 static void
2217 nvmf_get_ana_log_page(struct spdk_nvmf_ctrlr *ctrlr, struct iovec *iovs, int iovcnt,
2218 		      uint64_t offset, uint32_t length, uint32_t rae)
2219 {
2220 	struct spdk_nvme_ana_page ana_hdr;
2221 	struct spdk_nvme_ana_group_descriptor ana_desc;
2222 	size_t copy_len, copied_len;
2223 	uint32_t num_anagrp = 0, anagrpid;
2224 	struct spdk_nvmf_ns *ns;
2225 	struct spdk_iov_xfer ix;
2226 
2227 	spdk_iov_xfer_init(&ix, iovs, iovcnt);
2228 
2229 	if (length == 0) {
2230 		goto done;
2231 	}
2232 
2233 	if (offset >= sizeof(ana_hdr)) {
2234 		offset -= sizeof(ana_hdr);
2235 	} else {
2236 		for (anagrpid = 1; anagrpid <= ctrlr->subsys->max_nsid; anagrpid++) {
2237 			if (ctrlr->subsys->ana_group[anagrpid - 1] > 0) {
2238 				num_anagrp++;
2239 			}
2240 		}
2241 
2242 		memset(&ana_hdr, 0, sizeof(ana_hdr));
2243 
2244 		ana_hdr.num_ana_group_desc = num_anagrp;
2245 		/* TODO: Support Change Count. */
2246 		ana_hdr.change_count = 0;
2247 
2248 		copy_len = spdk_min(sizeof(ana_hdr) - offset, length);
2249 		copied_len = spdk_iov_xfer_from_buf(&ix, (const char *)&ana_hdr + offset, copy_len);
2250 		assert(copied_len == copy_len);
2251 		length -= copied_len;
2252 		offset = 0;
2253 	}
2254 
2255 	if (length == 0) {
2256 		goto done;
2257 	}
2258 
2259 	for (anagrpid = 1; anagrpid <= ctrlr->subsys->max_nsid; anagrpid++) {
2260 		if (ctrlr->subsys->ana_group[anagrpid - 1] == 0) {
2261 			continue;
2262 		}
2263 
2264 		if (offset >= sizeof(ana_desc)) {
2265 			offset -= sizeof(ana_desc);
2266 		} else {
2267 			memset(&ana_desc, 0, sizeof(ana_desc));
2268 
2269 			ana_desc.ana_group_id = anagrpid;
2270 			ana_desc.num_of_nsid = ctrlr->subsys->ana_group[anagrpid - 1];
2271 			ana_desc.ana_state = nvmf_ctrlr_get_ana_state(ctrlr, anagrpid);
2272 
2273 			copy_len = spdk_min(sizeof(ana_desc) - offset, length);
2274 			copied_len = spdk_iov_xfer_from_buf(&ix, (const char *)&ana_desc + offset,
2275 							    copy_len);
2276 			assert(copied_len == copy_len);
2277 			length -= copied_len;
2278 			offset = 0;
2279 
2280 			if (length == 0) {
2281 				goto done;
2282 			}
2283 		}
2284 
2285 		/* TODO: Revisit here about O(n^2) cost if we have subsystem with
2286 		 * many namespaces in the future.
2287 		 */
2288 		for (ns = spdk_nvmf_subsystem_get_first_ns(ctrlr->subsys); ns != NULL;
2289 		     ns = spdk_nvmf_subsystem_get_next_ns(ctrlr->subsys, ns)) {
2290 			if (ns->anagrpid != anagrpid) {
2291 				continue;
2292 			}
2293 
2294 			if (offset >= sizeof(uint32_t)) {
2295 				offset -= sizeof(uint32_t);
2296 				continue;
2297 			}
2298 
2299 			copy_len = spdk_min(sizeof(uint32_t) - offset, length);
2300 			copied_len = spdk_iov_xfer_from_buf(&ix, (const char *)&ns->nsid + offset,
2301 							    copy_len);
2302 			assert(copied_len == copy_len);
2303 			length -= copied_len;
2304 			offset = 0;
2305 
2306 			if (length == 0) {
2307 				goto done;
2308 			}
2309 		}
2310 	}
2311 
2312 done:
2313 	if (!rae) {
2314 		nvmf_ctrlr_unmask_aen(ctrlr, SPDK_NVME_ASYNC_EVENT_ANA_CHANGE_MASK_BIT);
2315 	}
2316 }
2317 
2318 void
2319 nvmf_ctrlr_ns_changed(struct spdk_nvmf_ctrlr *ctrlr, uint32_t nsid)
2320 {
2321 	uint16_t max_changes = SPDK_COUNTOF(ctrlr->changed_ns_list.ns_list);
2322 	uint16_t i;
2323 	bool found = false;
2324 
2325 	for (i = 0; i < ctrlr->changed_ns_list_count; i++) {
2326 		if (ctrlr->changed_ns_list.ns_list[i] == nsid) {
2327 			/* nsid is already in the list */
2328 			found = true;
2329 			break;
2330 		}
2331 	}
2332 
2333 	if (!found) {
2334 		if (ctrlr->changed_ns_list_count == max_changes) {
2335 			/* Out of space - set first entry to FFFFFFFFh and zero-fill the rest. */
2336 			ctrlr->changed_ns_list.ns_list[0] = 0xFFFFFFFFu;
2337 			for (i = 1; i < max_changes; i++) {
2338 				ctrlr->changed_ns_list.ns_list[i] = 0;
2339 			}
2340 		} else {
2341 			ctrlr->changed_ns_list.ns_list[ctrlr->changed_ns_list_count++] = nsid;
2342 		}
2343 	}
2344 }
2345 
2346 static void
2347 nvmf_get_changed_ns_list_log_page(struct spdk_nvmf_ctrlr *ctrlr,
2348 				  struct iovec *iovs, int iovcnt, uint64_t offset, uint32_t length, uint32_t rae)
2349 {
2350 	size_t copy_length;
2351 	struct spdk_iov_xfer ix;
2352 
2353 	spdk_iov_xfer_init(&ix, iovs, iovcnt);
2354 
2355 	if (offset < sizeof(ctrlr->changed_ns_list)) {
2356 		copy_length = spdk_min(length, sizeof(ctrlr->changed_ns_list) - offset);
2357 		if (copy_length) {
2358 			spdk_iov_xfer_from_buf(&ix, (char *)&ctrlr->changed_ns_list + offset, copy_length);
2359 		}
2360 	}
2361 
2362 	/* Clear log page each time it is read */
2363 	ctrlr->changed_ns_list_count = 0;
2364 	memset(&ctrlr->changed_ns_list, 0, sizeof(ctrlr->changed_ns_list));
2365 
2366 	if (!rae) {
2367 		nvmf_ctrlr_unmask_aen(ctrlr, SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGE_MASK_BIT);
2368 	}
2369 }
2370 
2371 /* The structure can be modified if we provide support for other commands in future */
2372 static const struct spdk_nvme_cmds_and_effect_log_page g_cmds_and_effect_log_page = {
2373 	.admin_cmds_supported = {
2374 		/* CSUPP, LBCC, NCC, NIC, CCC, CSE */
2375 		/* Get Log Page */
2376 		[SPDK_NVME_OPC_GET_LOG_PAGE]		= {1, 0, 0, 0, 0, 0, 0, 0},
2377 		/* Identify */
2378 		[SPDK_NVME_OPC_IDENTIFY]		= {1, 0, 0, 0, 0, 0, 0, 0},
2379 		/* Abort */
2380 		[SPDK_NVME_OPC_ABORT]			= {1, 0, 0, 0, 0, 0, 0, 0},
2381 		/* Set Features */
2382 		[SPDK_NVME_OPC_SET_FEATURES]		= {1, 0, 0, 0, 0, 0, 0, 0},
2383 		/* Get Features */
2384 		[SPDK_NVME_OPC_GET_FEATURES]		= {1, 0, 0, 0, 0, 0, 0, 0},
2385 		/* Async Event Request */
2386 		[SPDK_NVME_OPC_ASYNC_EVENT_REQUEST]	= {1, 0, 0, 0, 0, 0, 0, 0},
2387 		/* Keep Alive */
2388 		[SPDK_NVME_OPC_KEEP_ALIVE]		= {1, 0, 0, 0, 0, 0, 0, 0},
2389 	},
2390 	.io_cmds_supported = {
2391 		/* FLUSH */
2392 		[SPDK_NVME_OPC_FLUSH]			= {1, 1, 0, 0, 0, 0, 0, 0},
2393 		/* WRITE */
2394 		[SPDK_NVME_OPC_WRITE]			= {1, 1, 0, 0, 0, 0, 0, 0},
2395 		/* READ */
2396 		[SPDK_NVME_OPC_READ]			= {1, 0, 0, 0, 0, 0, 0, 0},
2397 		/* WRITE ZEROES */
2398 		[SPDK_NVME_OPC_WRITE_ZEROES]		= {1, 1, 0, 0, 0, 0, 0, 0},
2399 		/* DATASET MANAGEMENT */
2400 		[SPDK_NVME_OPC_DATASET_MANAGEMENT]	= {1, 1, 0, 0, 0, 0, 0, 0},
2401 		/* COMPARE */
2402 		[SPDK_NVME_OPC_COMPARE]			= {1, 0, 0, 0, 0, 0, 0, 0},
2403 		/* ZONE MANAGEMENT SEND */
2404 		[SPDK_NVME_OPC_ZONE_MGMT_SEND]		= {1, 1, 0, 0, 0, 0, 0, 0},
2405 		/* ZONE MANAGEMENT RECEIVE */
2406 		[SPDK_NVME_OPC_ZONE_MGMT_RECV]		= {1, 0, 0, 0, 0, 0, 0, 0},
2407 	},
2408 };
2409 
2410 static void
2411 nvmf_get_cmds_and_effects_log_page(struct spdk_nvmf_ctrlr *ctrlr, struct iovec *iovs, int iovcnt,
2412 				   uint64_t offset, uint32_t length)
2413 {
2414 	uint32_t page_size = sizeof(struct spdk_nvme_cmds_and_effect_log_page);
2415 	size_t copy_len = 0;
2416 	struct spdk_nvme_cmds_and_effect_log_page cmds_and_effect_log_page = g_cmds_and_effect_log_page;
2417 	struct spdk_nvme_cmds_and_effect_entry csupp_and_lbcc_effect_entry = {1, 1, 0, 0, 0, 0, 0, 0};
2418 	struct spdk_iov_xfer ix;
2419 
2420 	spdk_iov_xfer_init(&ix, iovs, iovcnt);
2421 
2422 	if (offset < page_size) {
2423 		if (ctrlr->subsys->zone_append_supported) {
2424 			cmds_and_effect_log_page.io_cmds_supported[SPDK_NVME_OPC_ZONE_APPEND] =
2425 				csupp_and_lbcc_effect_entry;
2426 		}
2427 		copy_len = spdk_min(page_size - offset, length);
2428 		spdk_iov_xfer_from_buf(&ix, (char *)(&cmds_and_effect_log_page) + offset, copy_len);
2429 	}
2430 }
2431 
2432 static void
2433 nvmf_get_reservation_notification_log_page(struct spdk_nvmf_ctrlr *ctrlr,
2434 		struct iovec *iovs, int iovcnt, uint64_t offset, uint32_t length, uint32_t rae)
2435 {
2436 	uint32_t unit_log_len, avail_log_len, next_pos, copy_len;
2437 	struct spdk_nvmf_reservation_log *log, *log_tmp;
2438 	struct spdk_iov_xfer ix;
2439 
2440 	spdk_iov_xfer_init(&ix, iovs, iovcnt);
2441 
2442 	unit_log_len = sizeof(struct spdk_nvme_reservation_notification_log);
2443 	/* No available log, return zeroed log pages */
2444 	if (!ctrlr->num_avail_log_pages) {
2445 		return;
2446 	}
2447 
2448 	avail_log_len = ctrlr->num_avail_log_pages * unit_log_len;
2449 	if (offset >= avail_log_len) {
2450 		return;
2451 	}
2452 
2453 	next_pos = 0;
2454 	TAILQ_FOREACH_SAFE(log, &ctrlr->log_head, link, log_tmp) {
2455 		TAILQ_REMOVE(&ctrlr->log_head, log, link);
2456 		ctrlr->num_avail_log_pages--;
2457 
2458 		next_pos += unit_log_len;
2459 		if (next_pos > offset) {
2460 			copy_len = spdk_min(next_pos - offset, length);
2461 			spdk_iov_xfer_from_buf(&ix, &log->log, copy_len);
2462 			length -= copy_len;
2463 			offset += copy_len;
2464 		}
2465 		free(log);
2466 
2467 		if (length == 0) {
2468 			break;
2469 		}
2470 	}
2471 
2472 	if (!rae) {
2473 		nvmf_ctrlr_unmask_aen(ctrlr, SPDK_NVME_ASYNC_EVENT_RESERVATION_LOG_AVAIL_MASK_BIT);
2474 	}
2475 	return;
2476 }
2477 
2478 static int
2479 nvmf_ctrlr_get_log_page(struct spdk_nvmf_request *req)
2480 {
2481 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
2482 	struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys;
2483 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
2484 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
2485 	struct spdk_nvme_transport_id cmd_source_trid;
2486 	uint64_t offset, len;
2487 	uint32_t rae, numdl, numdu;
2488 	uint8_t lid;
2489 
2490 	if (req->iovcnt < 1) {
2491 		SPDK_DEBUGLOG(nvmf, "get log command with no buffer\n");
2492 		response->status.sct = SPDK_NVME_SCT_GENERIC;
2493 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
2494 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2495 	}
2496 
2497 	offset = (uint64_t)cmd->cdw12 | ((uint64_t)cmd->cdw13 << 32);
2498 	if (offset & 3) {
2499 		SPDK_ERRLOG("Invalid log page offset 0x%" PRIx64 "\n", offset);
2500 		response->status.sct = SPDK_NVME_SCT_GENERIC;
2501 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
2502 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2503 	}
2504 
2505 	rae = cmd->cdw10_bits.get_log_page.rae;
2506 	numdl = cmd->cdw10_bits.get_log_page.numdl;
2507 	numdu = cmd->cdw11_bits.get_log_page.numdu;
2508 	len = ((numdu << 16) + numdl + (uint64_t)1) * 4;
2509 	if (len > req->length) {
2510 		SPDK_ERRLOG("Get log page: len (%" PRIu64 ") > buf size (%u)\n",
2511 			    len, req->length);
2512 		response->status.sct = SPDK_NVME_SCT_GENERIC;
2513 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
2514 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2515 	}
2516 
2517 	lid = cmd->cdw10_bits.get_log_page.lid;
2518 	SPDK_DEBUGLOG(nvmf, "Get log page: LID=0x%02X offset=0x%" PRIx64 " len=0x%" PRIx64 " rae=%u\n",
2519 		      lid, offset, len, rae);
2520 
2521 	if (subsystem->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
2522 		switch (lid) {
2523 		case SPDK_NVME_LOG_DISCOVERY:
2524 			if (spdk_nvmf_qpair_get_listen_trid(req->qpair, &cmd_source_trid)) {
2525 				SPDK_ERRLOG("Failed to get LOG_DISCOVERY source trid\n");
2526 				response->status.sct = SPDK_NVME_SCT_GENERIC;
2527 				response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2528 				return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2529 			}
2530 			nvmf_get_discovery_log_page(subsystem->tgt, ctrlr->hostnqn, req->iov, req->iovcnt,
2531 						    offset, len, &cmd_source_trid);
2532 			if (!rae) {
2533 				nvmf_ctrlr_unmask_aen(ctrlr, SPDK_NVME_ASYNC_EVENT_DISCOVERY_LOG_CHANGE_MASK_BIT);
2534 			}
2535 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2536 		default:
2537 			goto invalid_log_page;
2538 		}
2539 	} else {
2540 		if (offset > len) {
2541 			SPDK_ERRLOG("Get log page: offset (%" PRIu64 ") > len (%" PRIu64 ")\n",
2542 				    offset, len);
2543 			response->status.sct = SPDK_NVME_SCT_GENERIC;
2544 			response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
2545 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2546 		}
2547 
2548 		switch (lid) {
2549 		case SPDK_NVME_LOG_ERROR:
2550 			nvmf_get_error_log_page(ctrlr, req->iov, req->iovcnt, offset, len, rae);
2551 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2552 		case SPDK_NVME_LOG_HEALTH_INFORMATION:
2553 			/* TODO: actually fill out log page data */
2554 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2555 		case SPDK_NVME_LOG_FIRMWARE_SLOT:
2556 			nvmf_get_firmware_slot_log_page(req->iov, req->iovcnt, offset, len);
2557 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2558 		case SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS:
2559 			if (subsystem->flags.ana_reporting) {
2560 				nvmf_get_ana_log_page(ctrlr, req->iov, req->iovcnt, offset, len, rae);
2561 				return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2562 			} else {
2563 				goto invalid_log_page;
2564 			}
2565 		case SPDK_NVME_LOG_COMMAND_EFFECTS_LOG:
2566 			nvmf_get_cmds_and_effects_log_page(ctrlr, req->iov, req->iovcnt, offset, len);
2567 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2568 		case SPDK_NVME_LOG_CHANGED_NS_LIST:
2569 			nvmf_get_changed_ns_list_log_page(ctrlr, req->iov, req->iovcnt, offset, len, rae);
2570 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2571 		case SPDK_NVME_LOG_RESERVATION_NOTIFICATION:
2572 			nvmf_get_reservation_notification_log_page(ctrlr, req->iov, req->iovcnt, offset, len, rae);
2573 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2574 		default:
2575 			goto invalid_log_page;
2576 		}
2577 	}
2578 
2579 invalid_log_page:
2580 	SPDK_INFOLOG(nvmf, "Unsupported Get Log Page 0x%02X\n", lid);
2581 	response->status.sct = SPDK_NVME_SCT_GENERIC;
2582 	response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
2583 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2584 }
2585 
2586 static struct spdk_nvmf_ns *
2587 _nvmf_subsystem_get_ns_safe(struct spdk_nvmf_subsystem *subsystem,
2588 			    uint32_t nsid,
2589 			    struct spdk_nvme_cpl *rsp)
2590 {
2591 	struct spdk_nvmf_ns *ns;
2592 	if (nsid == 0 || nsid > subsystem->max_nsid) {
2593 		SPDK_ERRLOG("Identify Namespace for invalid NSID %u\n", nsid);
2594 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
2595 		rsp->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
2596 		return NULL;
2597 	}
2598 
2599 	ns = _nvmf_subsystem_get_ns(subsystem, nsid);
2600 	if (ns == NULL || ns->bdev == NULL) {
2601 		/*
2602 		 * Inactive namespaces should return a zero filled data structure.
2603 		 * The data buffer is already zeroed by nvmf_ctrlr_process_admin_cmd(),
2604 		 * so we can just return early here.
2605 		 */
2606 		SPDK_DEBUGLOG(nvmf, "Identify Namespace for inactive NSID %u\n", nsid);
2607 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
2608 		rsp->status.sc = SPDK_NVME_SC_SUCCESS;
2609 		return NULL;
2610 	}
2611 	return ns;
2612 }
2613 
2614 int
2615 spdk_nvmf_ctrlr_identify_ns(struct spdk_nvmf_ctrlr *ctrlr,
2616 			    struct spdk_nvme_cmd *cmd,
2617 			    struct spdk_nvme_cpl *rsp,
2618 			    struct spdk_nvme_ns_data *nsdata)
2619 {
2620 	struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys;
2621 	struct spdk_nvmf_ns *ns;
2622 	uint32_t max_num_blocks, format_index;
2623 	enum spdk_nvme_ana_state ana_state;
2624 
2625 	ns = _nvmf_subsystem_get_ns_safe(subsystem, cmd->nsid, rsp);
2626 	if (ns == NULL) {
2627 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2628 	}
2629 
2630 	nvmf_bdev_ctrlr_identify_ns(ns, nsdata, ctrlr->dif_insert_or_strip);
2631 
2632 	assert(ctrlr->admin_qpair);
2633 
2634 	format_index = spdk_nvme_ns_get_format_index(nsdata);
2635 
2636 	/* Due to bug in the Linux kernel NVMe driver we have to set noiob no larger than mdts */
2637 	max_num_blocks = ctrlr->admin_qpair->transport->opts.max_io_size /
2638 			 (1U << nsdata->lbaf[format_index].lbads);
2639 	if (nsdata->noiob > max_num_blocks) {
2640 		nsdata->noiob = max_num_blocks;
2641 	}
2642 
2643 	/* Set NOWS equal to Controller MDTS */
2644 	if (nsdata->nsfeat.optperf) {
2645 		nsdata->nows = max_num_blocks - 1;
2646 	}
2647 
2648 	if (subsystem->flags.ana_reporting) {
2649 		assert(ns->anagrpid - 1 < subsystem->max_nsid);
2650 		nsdata->anagrpid = ns->anagrpid;
2651 
2652 		ana_state = nvmf_ctrlr_get_ana_state(ctrlr, ns->anagrpid);
2653 		if (ana_state == SPDK_NVME_ANA_INACCESSIBLE_STATE ||
2654 		    ana_state == SPDK_NVME_ANA_PERSISTENT_LOSS_STATE) {
2655 			nsdata->nuse = 0;
2656 		}
2657 	}
2658 
2659 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2660 }
2661 
2662 static void
2663 nvmf_ctrlr_populate_oacs(struct spdk_nvmf_ctrlr *ctrlr,
2664 			 struct spdk_nvme_ctrlr_data *cdata)
2665 {
2666 	cdata->oacs = ctrlr->cdata.oacs;
2667 
2668 	cdata->oacs.virtualization_management =
2669 		g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_VIRTUALIZATION_MANAGEMENT].hdlr != NULL;
2670 	cdata->oacs.nvme_mi = g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_NVME_MI_SEND].hdlr != NULL
2671 			      && g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_NVME_MI_RECEIVE].hdlr != NULL;
2672 	cdata->oacs.directives = g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_DIRECTIVE_SEND].hdlr != NULL
2673 				 && g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_DIRECTIVE_RECEIVE].hdlr != NULL;
2674 	cdata->oacs.device_self_test =
2675 		g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_DEVICE_SELF_TEST].hdlr != NULL;
2676 	cdata->oacs.ns_manage = g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_NS_MANAGEMENT].hdlr != NULL
2677 				&& g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_NS_ATTACHMENT].hdlr != NULL;
2678 	cdata->oacs.firmware = g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD].hdlr !=
2679 			       NULL
2680 			       && g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_FIRMWARE_COMMIT].hdlr != NULL;
2681 	cdata->oacs.format =
2682 		g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_FORMAT_NVM].hdlr != NULL;
2683 	cdata->oacs.security = g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_SECURITY_SEND].hdlr != NULL
2684 			       && g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_SECURITY_RECEIVE].hdlr != NULL;
2685 	cdata->oacs.get_lba_status = g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_GET_LBA_STATUS].hdlr !=
2686 				     NULL;
2687 }
2688 
2689 int
2690 spdk_nvmf_ctrlr_identify_ctrlr(struct spdk_nvmf_ctrlr *ctrlr, struct spdk_nvme_ctrlr_data *cdata)
2691 {
2692 	struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys;
2693 	struct spdk_nvmf_transport *transport;
2694 
2695 	/*
2696 	 * Common fields for discovery and NVM subsystems
2697 	 */
2698 	assert(ctrlr->admin_qpair);
2699 	transport = ctrlr->admin_qpair->transport;
2700 	spdk_strcpy_pad(cdata->fr, FW_VERSION, sizeof(cdata->fr), ' ');
2701 	assert((transport->opts.max_io_size % 4096) == 0);
2702 	cdata->mdts = spdk_u32log2(transport->opts.max_io_size / 4096);
2703 	cdata->cntlid = ctrlr->cntlid;
2704 	cdata->ver = ctrlr->vcprop.vs;
2705 	cdata->aerl = ctrlr->cdata.aerl;
2706 	cdata->lpa.edlp = 1;
2707 	cdata->elpe = 127;
2708 	cdata->maxcmd = transport->opts.max_queue_depth;
2709 	cdata->sgls = ctrlr->cdata.sgls;
2710 	cdata->fuses = ctrlr->cdata.fuses;
2711 	cdata->acwu = 0; /* ACWU is 0-based. */
2712 	if (subsystem->flags.ana_reporting) {
2713 		cdata->mnan = subsystem->max_nsid;
2714 	}
2715 	spdk_strcpy_pad(cdata->subnqn, subsystem->subnqn, sizeof(cdata->subnqn), '\0');
2716 
2717 	SPDK_DEBUGLOG(nvmf, "ctrlr data: maxcmd 0x%x\n", cdata->maxcmd);
2718 	SPDK_DEBUGLOG(nvmf, "sgls data: 0x%x\n", from_le32(&cdata->sgls));
2719 
2720 
2721 	if (subsystem->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
2722 		/*
2723 		 * NVM Discovery subsystem fields
2724 		 */
2725 		cdata->oaes.discovery_log_change_notices = 1;
2726 	} else {
2727 		cdata->vid = ctrlr->cdata.vid;
2728 		cdata->ssvid = ctrlr->cdata.ssvid;
2729 		cdata->ieee[0] = ctrlr->cdata.ieee[0];
2730 		cdata->ieee[1] = ctrlr->cdata.ieee[1];
2731 		cdata->ieee[2] = ctrlr->cdata.ieee[2];
2732 
2733 		/*
2734 		 * NVM subsystem fields (reserved for discovery subsystems)
2735 		 */
2736 		spdk_strcpy_pad(cdata->mn, spdk_nvmf_subsystem_get_mn(subsystem), sizeof(cdata->mn), ' ');
2737 		spdk_strcpy_pad(cdata->sn, spdk_nvmf_subsystem_get_sn(subsystem), sizeof(cdata->sn), ' ');
2738 		cdata->kas = ctrlr->cdata.kas;
2739 
2740 		cdata->rab = 6;
2741 		cdata->cmic.multi_port = 1;
2742 		cdata->cmic.multi_ctrlr = 1;
2743 		cdata->oaes.ns_attribute_notices = 1;
2744 		cdata->ctratt.host_id_exhid_supported = 1;
2745 		/* We do not have any actual limitation to the number of abort commands.
2746 		 * We follow the recommendation by the NVMe specification.
2747 		 */
2748 		cdata->acl = NVMF_ABORT_COMMAND_LIMIT;
2749 		cdata->frmw.slot1_ro = 1;
2750 		cdata->frmw.num_slots = 1;
2751 
2752 		cdata->lpa.celp = 1; /* Command Effects log page supported */
2753 
2754 		cdata->sqes.min = 6;
2755 		cdata->sqes.max = 6;
2756 		cdata->cqes.min = 4;
2757 		cdata->cqes.max = 4;
2758 		cdata->nn = subsystem->max_nsid;
2759 		cdata->vwc.present = 1;
2760 		cdata->vwc.flush_broadcast = SPDK_NVME_FLUSH_BROADCAST_NOT_SUPPORTED;
2761 
2762 		cdata->nvmf_specific = ctrlr->cdata.nvmf_specific;
2763 
2764 		cdata->oncs.compare = ctrlr->cdata.oncs.compare;
2765 		cdata->oncs.dsm = nvmf_ctrlr_dsm_supported(ctrlr);
2766 		cdata->oncs.write_zeroes = nvmf_ctrlr_write_zeroes_supported(ctrlr);
2767 		cdata->oncs.reservations = ctrlr->cdata.oncs.reservations;
2768 		cdata->oncs.copy = ctrlr->cdata.oncs.copy;
2769 		cdata->ocfs.copy_format0 = cdata->oncs.copy;
2770 		if (subsystem->flags.ana_reporting) {
2771 			/* Asymmetric Namespace Access Reporting is supported. */
2772 			cdata->cmic.ana_reporting = 1;
2773 			cdata->oaes.ana_change_notices = 1;
2774 
2775 			cdata->anatt = ANA_TRANSITION_TIME_IN_SEC;
2776 			/* ANA Change state is not used, and ANA Persistent Loss state
2777 			 * is not supported for now.
2778 			 */
2779 			cdata->anacap.ana_optimized_state = 1;
2780 			cdata->anacap.ana_non_optimized_state = 1;
2781 			cdata->anacap.ana_inaccessible_state = 1;
2782 			/* ANAGRPID does not change while namespace is attached to controller */
2783 			cdata->anacap.no_change_anagrpid = 1;
2784 			cdata->anagrpmax = subsystem->max_nsid;
2785 			cdata->nanagrpid = subsystem->max_nsid;
2786 		}
2787 
2788 		nvmf_ctrlr_populate_oacs(ctrlr, cdata);
2789 
2790 		assert(subsystem->tgt != NULL);
2791 		cdata->crdt[0] = subsystem->tgt->crdt[0];
2792 		cdata->crdt[1] = subsystem->tgt->crdt[1];
2793 		cdata->crdt[2] = subsystem->tgt->crdt[2];
2794 
2795 		SPDK_DEBUGLOG(nvmf, "ext ctrlr data: ioccsz 0x%x\n",
2796 			      cdata->nvmf_specific.ioccsz);
2797 		SPDK_DEBUGLOG(nvmf, "ext ctrlr data: iorcsz 0x%x\n",
2798 			      cdata->nvmf_specific.iorcsz);
2799 		SPDK_DEBUGLOG(nvmf, "ext ctrlr data: icdoff 0x%x\n",
2800 			      cdata->nvmf_specific.icdoff);
2801 		SPDK_DEBUGLOG(nvmf, "ext ctrlr data: ctrattr 0x%x\n",
2802 			      *(uint8_t *)&cdata->nvmf_specific.ctrattr);
2803 		SPDK_DEBUGLOG(nvmf, "ext ctrlr data: msdbd 0x%x\n",
2804 			      cdata->nvmf_specific.msdbd);
2805 	}
2806 
2807 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2808 }
2809 
2810 static int
2811 nvmf_ns_identify_iocs_zns(struct spdk_nvmf_ns *ns,
2812 			  struct spdk_nvme_cmd *cmd,
2813 			  struct spdk_nvme_cpl *rsp,
2814 			  struct spdk_nvme_zns_ns_data *nsdata_zns)
2815 {
2816 	nsdata_zns->zoc.variable_zone_capacity = 0;
2817 	nsdata_zns->zoc.zone_active_excursions = 0;
2818 	nsdata_zns->ozcs.read_across_zone_boundaries = 1;
2819 	/* Underflowing the zero based mar and mor bdev helper results in the correct
2820 	   value of FFFFFFFFh. */
2821 	nsdata_zns->mar = spdk_bdev_get_max_active_zones(ns->bdev) - 1;
2822 	nsdata_zns->mor = spdk_bdev_get_max_open_zones(ns->bdev) - 1;
2823 	nsdata_zns->rrl = 0;
2824 	nsdata_zns->frl = 0;
2825 	nsdata_zns->lbafe[0].zsze = spdk_bdev_get_zone_size(ns->bdev);
2826 
2827 	rsp->status.sct = SPDK_NVME_SCT_GENERIC;
2828 	rsp->status.sc = SPDK_NVME_SC_SUCCESS;
2829 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2830 }
2831 
2832 int
2833 spdk_nvmf_ns_identify_iocs_specific(struct spdk_nvmf_ctrlr *ctrlr,
2834 				    struct spdk_nvme_cmd *cmd,
2835 				    struct spdk_nvme_cpl *rsp,
2836 				    void *nsdata,
2837 				    size_t nsdata_size)
2838 {
2839 	uint8_t csi = cmd->cdw11_bits.identify.csi;
2840 	struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys;
2841 	struct spdk_nvmf_ns *ns = _nvmf_subsystem_get_ns_safe(subsystem, cmd->nsid, rsp);
2842 
2843 	memset(nsdata, 0, nsdata_size);
2844 
2845 	if (ns == NULL) {
2846 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
2847 		rsp->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
2848 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2849 	}
2850 
2851 	switch (csi) {
2852 	case SPDK_NVME_CSI_ZNS:
2853 		return nvmf_ns_identify_iocs_zns(ns, cmd, rsp, nsdata);
2854 	default:
2855 		break;
2856 	}
2857 
2858 	SPDK_DEBUGLOG(nvmf,
2859 		      "Returning zero filled struct for the iocs specific ns "
2860 		      "identify command and CSI 0x%02x\n",
2861 		      csi);
2862 	rsp->status.sct = SPDK_NVME_SCT_GENERIC;
2863 	rsp->status.sc = SPDK_NVME_SC_SUCCESS;
2864 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2865 }
2866 
2867 static int
2868 nvmf_ctrlr_identify_iocs_zns(struct spdk_nvmf_ctrlr *ctrlr,
2869 			     struct spdk_nvme_cmd *cmd,
2870 			     struct spdk_nvme_cpl *rsp,
2871 			     struct spdk_nvme_zns_ctrlr_data *cdata_zns)
2872 {
2873 	/* The unit of max_zone_append_size_kib is KiB.
2874 	The unit of zasl is the minimum memory page size
2875 	(2 ^ (12 + CAP.MPSMIN) KiB)
2876 	and is reported as a power of two (2^n). */
2877 	cdata_zns->zasl = spdk_u64log2(ctrlr->subsys->max_zone_append_size_kib >>
2878 				       (12 + ctrlr->vcprop.cap.bits.mpsmin));
2879 
2880 	rsp->status.sct = SPDK_NVME_SCT_GENERIC;
2881 	rsp->status.sc = SPDK_NVME_SC_SUCCESS;
2882 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2883 }
2884 
2885 int
2886 spdk_nvmf_ctrlr_identify_iocs_specific(struct spdk_nvmf_ctrlr *ctrlr,
2887 				       struct spdk_nvme_cmd *cmd,
2888 				       struct spdk_nvme_cpl *rsp,
2889 				       void *cdata,
2890 				       size_t cdata_size)
2891 {
2892 	uint8_t csi = cmd->cdw11_bits.identify.csi;
2893 
2894 	memset(cdata, 0, cdata_size);
2895 
2896 	switch (csi) {
2897 	case SPDK_NVME_CSI_ZNS:
2898 		return nvmf_ctrlr_identify_iocs_zns(ctrlr, cmd, rsp, cdata);
2899 	default:
2900 		break;
2901 	}
2902 
2903 	SPDK_DEBUGLOG(nvmf,
2904 		      "Returning zero filled struct for the iocs specific ctrlr "
2905 		      "identify command and CSI 0x%02x\n",
2906 		      csi);
2907 	rsp->status.sct = SPDK_NVME_SCT_GENERIC;
2908 	rsp->status.sc = SPDK_NVME_SC_SUCCESS;
2909 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2910 }
2911 
2912 static int
2913 nvmf_ctrlr_identify_active_ns_list(struct spdk_nvmf_subsystem *subsystem,
2914 				   struct spdk_nvme_cmd *cmd,
2915 				   struct spdk_nvme_cpl *rsp,
2916 				   struct spdk_nvme_ns_list *ns_list)
2917 {
2918 	struct spdk_nvmf_ns *ns;
2919 	uint32_t count = 0;
2920 
2921 	if (cmd->nsid >= 0xfffffffeUL) {
2922 		SPDK_ERRLOG("Identify Active Namespace List with invalid NSID %u\n", cmd->nsid);
2923 		rsp->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
2924 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2925 	}
2926 
2927 	memset(ns_list, 0, sizeof(*ns_list));
2928 
2929 	for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL;
2930 	     ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) {
2931 		if (ns->opts.nsid <= cmd->nsid) {
2932 			continue;
2933 		}
2934 
2935 		ns_list->ns_list[count++] = ns->opts.nsid;
2936 		if (count == SPDK_COUNTOF(ns_list->ns_list)) {
2937 			break;
2938 		}
2939 	}
2940 
2941 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2942 }
2943 
2944 static void
2945 _add_ns_id_desc(void **buf_ptr, size_t *buf_remain,
2946 		enum spdk_nvme_nidt type,
2947 		const void *data, size_t data_size)
2948 {
2949 	struct spdk_nvme_ns_id_desc *desc;
2950 	size_t desc_size = sizeof(*desc) + data_size;
2951 
2952 	/*
2953 	 * These should never fail in practice, since all valid NS ID descriptors
2954 	 * should be defined so that they fit in the available 4096-byte buffer.
2955 	 */
2956 	assert(data_size > 0);
2957 	assert(data_size <= UINT8_MAX);
2958 	assert(desc_size < *buf_remain);
2959 	if (data_size == 0 || data_size > UINT8_MAX || desc_size > *buf_remain) {
2960 		return;
2961 	}
2962 
2963 	desc = *buf_ptr;
2964 	desc->nidt = type;
2965 	desc->nidl = data_size;
2966 	memcpy(desc->nid, data, data_size);
2967 
2968 	*buf_ptr += desc_size;
2969 	*buf_remain -= desc_size;
2970 }
2971 
2972 static int
2973 nvmf_ctrlr_identify_ns_id_descriptor_list(
2974 	struct spdk_nvmf_subsystem *subsystem,
2975 	struct spdk_nvme_cmd *cmd,
2976 	struct spdk_nvme_cpl *rsp,
2977 	void *id_desc_list, size_t id_desc_list_size)
2978 {
2979 	struct spdk_nvmf_ns *ns;
2980 	size_t buf_remain = id_desc_list_size;
2981 	void *buf_ptr = id_desc_list;
2982 
2983 	ns = _nvmf_subsystem_get_ns(subsystem, cmd->nsid);
2984 	if (ns == NULL || ns->bdev == NULL) {
2985 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
2986 		rsp->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
2987 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2988 	}
2989 
2990 #define ADD_ID_DESC(type, data, size) \
2991 	do { \
2992 		if (!spdk_mem_all_zero(data, size)) { \
2993 			_add_ns_id_desc(&buf_ptr, &buf_remain, type, data, size); \
2994 		} \
2995 	} while (0)
2996 
2997 	ADD_ID_DESC(SPDK_NVME_NIDT_EUI64, ns->opts.eui64, sizeof(ns->opts.eui64));
2998 	ADD_ID_DESC(SPDK_NVME_NIDT_NGUID, ns->opts.nguid, sizeof(ns->opts.nguid));
2999 	ADD_ID_DESC(SPDK_NVME_NIDT_UUID, &ns->opts.uuid, sizeof(ns->opts.uuid));
3000 	ADD_ID_DESC(SPDK_NVME_NIDT_CSI, &ns->csi, sizeof(uint8_t));
3001 
3002 	/*
3003 	 * The list is automatically 0-terminated, both in the temporary buffer
3004 	 * used by nvmf_ctrlr_identify(), and the eventual iov destination -
3005 	 * controller to host buffers in admin commands always get zeroed in
3006 	 * nvmf_ctrlr_process_admin_cmd().
3007 	 */
3008 
3009 #undef ADD_ID_DESC
3010 
3011 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3012 }
3013 
3014 static int
3015 nvmf_ctrlr_identify(struct spdk_nvmf_request *req)
3016 {
3017 	uint8_t cns;
3018 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
3019 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
3020 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
3021 	struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys;
3022 	int ret = SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3023 	char tmpbuf[SPDK_NVME_IDENTIFY_BUFLEN] = "";
3024 	struct spdk_iov_xfer ix;
3025 
3026 	if (req->iovcnt < 1 || req->length < SPDK_NVME_IDENTIFY_BUFLEN) {
3027 		SPDK_DEBUGLOG(nvmf, "identify command with invalid buffer\n");
3028 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
3029 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
3030 		return ret;
3031 	}
3032 
3033 	cns = cmd->cdw10_bits.identify.cns;
3034 
3035 	if (subsystem->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY &&
3036 	    cns != SPDK_NVME_IDENTIFY_CTRLR) {
3037 		/* Discovery controllers only support Identify Controller */
3038 		goto invalid_cns;
3039 	}
3040 
3041 	/*
3042 	 * We must use a temporary buffer: it's entirely possible the out buffer
3043 	 * is split across more than one IOV.
3044 	 */
3045 	spdk_iov_xfer_init(&ix, req->iov, req->iovcnt);
3046 
3047 	SPDK_DEBUGLOG(nvmf, "Received identify command with CNS 0x%02x\n", cns);
3048 
3049 	switch (cns) {
3050 	case SPDK_NVME_IDENTIFY_NS:
3051 		ret = spdk_nvmf_ctrlr_identify_ns(ctrlr, cmd, rsp, (void *)&tmpbuf);
3052 		break;
3053 	case SPDK_NVME_IDENTIFY_CTRLR:
3054 		ret = spdk_nvmf_ctrlr_identify_ctrlr(ctrlr, (void *)&tmpbuf);
3055 		break;
3056 	case SPDK_NVME_IDENTIFY_ACTIVE_NS_LIST:
3057 		ret = nvmf_ctrlr_identify_active_ns_list(subsystem, cmd, rsp, (void *)&tmpbuf);
3058 		break;
3059 	case SPDK_NVME_IDENTIFY_NS_ID_DESCRIPTOR_LIST:
3060 		ret = nvmf_ctrlr_identify_ns_id_descriptor_list(subsystem, cmd, rsp,
3061 				tmpbuf, req->length);
3062 		break;
3063 	case SPDK_NVME_IDENTIFY_NS_IOCS:
3064 		ret = spdk_nvmf_ns_identify_iocs_specific(ctrlr, cmd, rsp, (void *)&tmpbuf, req->length);
3065 		break;
3066 	case SPDK_NVME_IDENTIFY_CTRLR_IOCS:
3067 		ret = spdk_nvmf_ctrlr_identify_iocs_specific(ctrlr, cmd, rsp, (void *)&tmpbuf, req->length);
3068 		break;
3069 	default:
3070 		goto invalid_cns;
3071 	}
3072 
3073 	if (ret == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
3074 		spdk_iov_xfer_from_buf(&ix, tmpbuf, sizeof(tmpbuf));
3075 	}
3076 
3077 	return ret;
3078 
3079 invalid_cns:
3080 	SPDK_DEBUGLOG(nvmf, "Identify command with unsupported CNS 0x%02x\n", cns);
3081 	rsp->status.sct = SPDK_NVME_SCT_GENERIC;
3082 	rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
3083 	return ret;
3084 }
3085 
3086 static bool
3087 nvmf_qpair_abort_aer(struct spdk_nvmf_qpair *qpair, uint16_t cid)
3088 {
3089 	struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
3090 	struct spdk_nvmf_request *req;
3091 	int i;
3092 
3093 	if (!nvmf_qpair_is_admin_queue(qpair)) {
3094 		return false;
3095 	}
3096 
3097 	assert(spdk_get_thread() == ctrlr->thread);
3098 
3099 	for (i = 0; i < ctrlr->nr_aer_reqs; i++) {
3100 		if (ctrlr->aer_req[i]->cmd->nvme_cmd.cid == cid) {
3101 			SPDK_DEBUGLOG(nvmf, "Aborting AER request\n");
3102 			req = ctrlr->aer_req[i];
3103 			ctrlr->aer_req[i] = NULL;
3104 			ctrlr->nr_aer_reqs--;
3105 
3106 			/* Move the last req to the aborting position for making aer_reqs
3107 			 * in continuous
3108 			 */
3109 			if (i < ctrlr->nr_aer_reqs) {
3110 				ctrlr->aer_req[i] = ctrlr->aer_req[ctrlr->nr_aer_reqs];
3111 				ctrlr->aer_req[ctrlr->nr_aer_reqs] = NULL;
3112 			}
3113 
3114 			req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
3115 			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_ABORTED_BY_REQUEST;
3116 			_nvmf_request_complete(req);
3117 			return true;
3118 		}
3119 	}
3120 
3121 	return false;
3122 }
3123 
3124 void
3125 nvmf_qpair_abort_pending_zcopy_reqs(struct spdk_nvmf_qpair *qpair)
3126 {
3127 	struct spdk_nvmf_request *req, *tmp;
3128 
3129 	TAILQ_FOREACH_SAFE(req, &qpair->outstanding, link, tmp) {
3130 		if (req->zcopy_phase == NVMF_ZCOPY_PHASE_EXECUTE) {
3131 			/* Zero-copy requests are kept on the outstanding queue from the moment
3132 			 * zcopy_start is sent until a zcopy_end callback is received.  Therefore,
3133 			 * we can't remove them from the outstanding queue here, but need to rely on
3134 			 * the transport to do a zcopy_end to release their buffers and, in turn,
3135 			 * remove them from the queue.
3136 			 */
3137 			req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
3138 			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_ABORTED_BY_REQUEST;
3139 			nvmf_transport_req_free(req);
3140 		}
3141 	}
3142 }
3143 
3144 static void
3145 nvmf_qpair_abort_request(struct spdk_nvmf_qpair *qpair, struct spdk_nvmf_request *req)
3146 {
3147 	uint16_t cid = req->cmd->nvme_cmd.cdw10_bits.abort.cid;
3148 
3149 	if (nvmf_qpair_abort_aer(qpair, cid)) {
3150 		SPDK_DEBUGLOG(nvmf, "abort ctrlr=%p sqid=%u cid=%u successful\n",
3151 			      qpair->ctrlr, qpair->qid, cid);
3152 		req->rsp->nvme_cpl.cdw0 &= ~1U; /* Command successfully aborted */
3153 
3154 		spdk_nvmf_request_complete(req);
3155 		return;
3156 	}
3157 
3158 	nvmf_transport_qpair_abort_request(qpair, req);
3159 }
3160 
3161 static void
3162 nvmf_ctrlr_abort_done(struct spdk_io_channel_iter *i, int status)
3163 {
3164 	struct spdk_nvmf_request *req = spdk_io_channel_iter_get_ctx(i);
3165 
3166 	if (status == 0) {
3167 		/* There was no qpair whose ID matches SQID of the abort command.
3168 		 * Hence call _nvmf_request_complete() here.
3169 		 */
3170 		_nvmf_request_complete(req);
3171 	}
3172 }
3173 
3174 static void
3175 nvmf_ctrlr_abort_on_pg(struct spdk_io_channel_iter *i)
3176 {
3177 	struct spdk_nvmf_request *req = spdk_io_channel_iter_get_ctx(i);
3178 	struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
3179 	struct spdk_nvmf_poll_group *group = spdk_io_channel_get_ctx(ch);
3180 	uint16_t sqid = req->cmd->nvme_cmd.cdw10_bits.abort.sqid;
3181 	struct spdk_nvmf_qpair *qpair;
3182 
3183 	TAILQ_FOREACH(qpair, &group->qpairs, link) {
3184 		if (qpair->ctrlr == req->qpair->ctrlr && qpair->qid == sqid) {
3185 			/* Found the qpair */
3186 
3187 			nvmf_qpair_abort_request(qpair, req);
3188 
3189 			/* Return -1 for the status so the iteration across threads stops. */
3190 			spdk_for_each_channel_continue(i, -1);
3191 			return;
3192 		}
3193 	}
3194 
3195 	spdk_for_each_channel_continue(i, 0);
3196 }
3197 
3198 static int
3199 nvmf_ctrlr_abort(struct spdk_nvmf_request *req)
3200 {
3201 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
3202 
3203 	rsp->cdw0 = 1U; /* Command not aborted */
3204 	rsp->status.sct = SPDK_NVME_SCT_GENERIC;
3205 	rsp->status.sc = SPDK_NVME_SC_SUCCESS;
3206 
3207 	/* Send a message to each poll group, searching for this ctrlr, sqid, and command. */
3208 	spdk_for_each_channel(req->qpair->ctrlr->subsys->tgt,
3209 			      nvmf_ctrlr_abort_on_pg,
3210 			      req,
3211 			      nvmf_ctrlr_abort_done
3212 			     );
3213 
3214 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
3215 }
3216 
3217 int
3218 nvmf_ctrlr_abort_request(struct spdk_nvmf_request *req)
3219 {
3220 	struct spdk_nvmf_request *req_to_abort = req->req_to_abort;
3221 	struct spdk_bdev *bdev;
3222 	struct spdk_bdev_desc *desc;
3223 	struct spdk_io_channel *ch;
3224 	int rc;
3225 
3226 	assert(req_to_abort != NULL);
3227 
3228 	if (g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_ABORT].hdlr &&
3229 	    nvmf_qpair_is_admin_queue(req_to_abort->qpair)) {
3230 		return g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_ABORT].hdlr(req);
3231 	}
3232 
3233 	rc = spdk_nvmf_request_get_bdev(req_to_abort->cmd->nvme_cmd.nsid, req_to_abort,
3234 					&bdev, &desc, &ch);
3235 	if (rc != 0) {
3236 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3237 	}
3238 
3239 	return spdk_nvmf_bdev_ctrlr_abort_cmd(bdev, desc, ch, req, req_to_abort);
3240 }
3241 
3242 static int
3243 get_features_generic(struct spdk_nvmf_request *req, uint32_t cdw0)
3244 {
3245 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
3246 
3247 	rsp->cdw0 = cdw0;
3248 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3249 }
3250 
3251 /* we have to use the typedef in the function declaration to appease astyle. */
3252 typedef enum spdk_nvme_path_status_code spdk_nvme_path_status_code_t;
3253 
3254 static spdk_nvme_path_status_code_t
3255 _nvme_ana_state_to_path_status(enum spdk_nvme_ana_state ana_state)
3256 {
3257 	switch (ana_state) {
3258 	case SPDK_NVME_ANA_INACCESSIBLE_STATE:
3259 		return SPDK_NVME_SC_ASYMMETRIC_ACCESS_INACCESSIBLE;
3260 	case SPDK_NVME_ANA_PERSISTENT_LOSS_STATE:
3261 		return SPDK_NVME_SC_ASYMMETRIC_ACCESS_PERSISTENT_LOSS;
3262 	case SPDK_NVME_ANA_CHANGE_STATE:
3263 		return SPDK_NVME_SC_ASYMMETRIC_ACCESS_TRANSITION;
3264 	default:
3265 		return SPDK_NVME_SC_INTERNAL_PATH_ERROR;
3266 	}
3267 }
3268 
3269 static int
3270 nvmf_ctrlr_get_features(struct spdk_nvmf_request *req)
3271 {
3272 	uint8_t feature;
3273 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
3274 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
3275 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
3276 	enum spdk_nvme_ana_state ana_state;
3277 
3278 	feature = cmd->cdw10_bits.get_features.fid;
3279 
3280 	if (ctrlr->subsys->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
3281 		/*
3282 		 * Features supported by Discovery controller
3283 		 */
3284 		switch (feature) {
3285 		case SPDK_NVME_FEAT_KEEP_ALIVE_TIMER:
3286 			return get_features_generic(req, ctrlr->feat.keep_alive_timer.raw);
3287 		case SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION:
3288 			return get_features_generic(req, ctrlr->feat.async_event_configuration.raw);
3289 		default:
3290 			SPDK_INFOLOG(nvmf, "Get Features command with unsupported feature ID 0x%02x\n", feature);
3291 			response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
3292 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3293 		}
3294 	}
3295 	/*
3296 	 * Process Get Features command for non-discovery controller
3297 	 */
3298 	ana_state = nvmf_ctrlr_get_ana_state_from_nsid(ctrlr, cmd->nsid);
3299 	switch (ana_state) {
3300 	case SPDK_NVME_ANA_INACCESSIBLE_STATE:
3301 	case SPDK_NVME_ANA_PERSISTENT_LOSS_STATE:
3302 	case SPDK_NVME_ANA_CHANGE_STATE:
3303 		switch (feature) {
3304 		case SPDK_NVME_FEAT_ERROR_RECOVERY:
3305 		case SPDK_NVME_FEAT_WRITE_ATOMICITY:
3306 		case SPDK_NVME_FEAT_HOST_RESERVE_MASK:
3307 		case SPDK_NVME_FEAT_HOST_RESERVE_PERSIST:
3308 			response->status.sct = SPDK_NVME_SCT_PATH;
3309 			response->status.sc = _nvme_ana_state_to_path_status(ana_state);
3310 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3311 		default:
3312 			break;
3313 		}
3314 		break;
3315 	default:
3316 		break;
3317 	}
3318 
3319 	switch (feature) {
3320 	case SPDK_NVME_FEAT_ARBITRATION:
3321 		return get_features_generic(req, ctrlr->feat.arbitration.raw);
3322 	case SPDK_NVME_FEAT_POWER_MANAGEMENT:
3323 		return get_features_generic(req, ctrlr->feat.power_management.raw);
3324 	case SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD:
3325 		return nvmf_ctrlr_get_features_temperature_threshold(req);
3326 	case SPDK_NVME_FEAT_ERROR_RECOVERY:
3327 		return get_features_generic(req, ctrlr->feat.error_recovery.raw);
3328 	case SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE:
3329 		return get_features_generic(req, ctrlr->feat.volatile_write_cache.raw);
3330 	case SPDK_NVME_FEAT_NUMBER_OF_QUEUES:
3331 		return get_features_generic(req, ctrlr->feat.number_of_queues.raw);
3332 	case SPDK_NVME_FEAT_INTERRUPT_COALESCING:
3333 		return get_features_generic(req, ctrlr->feat.interrupt_coalescing.raw);
3334 	case SPDK_NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION:
3335 		return nvmf_ctrlr_get_features_interrupt_vector_configuration(req);
3336 	case SPDK_NVME_FEAT_WRITE_ATOMICITY:
3337 		return get_features_generic(req, ctrlr->feat.write_atomicity.raw);
3338 	case SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION:
3339 		return get_features_generic(req, ctrlr->feat.async_event_configuration.raw);
3340 	case SPDK_NVME_FEAT_KEEP_ALIVE_TIMER:
3341 		return get_features_generic(req, ctrlr->feat.keep_alive_timer.raw);
3342 	case SPDK_NVME_FEAT_HOST_IDENTIFIER:
3343 		return nvmf_ctrlr_get_features_host_identifier(req);
3344 	case SPDK_NVME_FEAT_HOST_RESERVE_MASK:
3345 		return nvmf_ctrlr_get_features_reservation_notification_mask(req);
3346 	case SPDK_NVME_FEAT_HOST_RESERVE_PERSIST:
3347 		return nvmf_ctrlr_get_features_reservation_persistence(req);
3348 	case SPDK_NVME_FEAT_HOST_BEHAVIOR_SUPPORT:
3349 		return nvmf_ctrlr_get_features_host_behavior_support(req);
3350 	default:
3351 		SPDK_INFOLOG(nvmf, "Get Features command with unsupported feature ID 0x%02x\n", feature);
3352 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
3353 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3354 	}
3355 }
3356 
3357 static int
3358 nvmf_ctrlr_set_features(struct spdk_nvmf_request *req)
3359 {
3360 	uint8_t feature, save;
3361 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
3362 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
3363 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
3364 	enum spdk_nvme_ana_state ana_state;
3365 	/*
3366 	 * Features are not saveable by the controller as indicated by
3367 	 * ONCS field of the Identify Controller data.
3368 	 * */
3369 	save = cmd->cdw10_bits.set_features.sv;
3370 	if (save) {
3371 		response->status.sc = SPDK_NVME_SC_FEATURE_ID_NOT_SAVEABLE;
3372 		response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
3373 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3374 	}
3375 
3376 	feature = cmd->cdw10_bits.set_features.fid;
3377 
3378 	if (ctrlr->subsys->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
3379 		/*
3380 		 * Features supported by Discovery controller
3381 		 */
3382 		switch (feature) {
3383 		case SPDK_NVME_FEAT_KEEP_ALIVE_TIMER:
3384 			return nvmf_ctrlr_set_features_keep_alive_timer(req);
3385 		case SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION:
3386 			return nvmf_ctrlr_set_features_async_event_configuration(req);
3387 		default:
3388 			SPDK_INFOLOG(nvmf, "Set Features command with unsupported feature ID 0x%02x\n", feature);
3389 			response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
3390 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3391 		}
3392 	}
3393 	/*
3394 	 * Process Set Features command for non-discovery controller
3395 	 */
3396 	ana_state = nvmf_ctrlr_get_ana_state_from_nsid(ctrlr, cmd->nsid);
3397 	switch (ana_state) {
3398 	case SPDK_NVME_ANA_INACCESSIBLE_STATE:
3399 	case SPDK_NVME_ANA_CHANGE_STATE:
3400 		if (cmd->nsid == SPDK_NVME_GLOBAL_NS_TAG) {
3401 			response->status.sct = SPDK_NVME_SCT_PATH;
3402 			response->status.sc = _nvme_ana_state_to_path_status(ana_state);
3403 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3404 		} else {
3405 			switch (feature) {
3406 			case SPDK_NVME_FEAT_ERROR_RECOVERY:
3407 			case SPDK_NVME_FEAT_WRITE_ATOMICITY:
3408 			case SPDK_NVME_FEAT_HOST_RESERVE_MASK:
3409 			case SPDK_NVME_FEAT_HOST_RESERVE_PERSIST:
3410 				response->status.sct = SPDK_NVME_SCT_PATH;
3411 				response->status.sc = _nvme_ana_state_to_path_status(ana_state);
3412 				return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3413 			default:
3414 				break;
3415 			}
3416 		}
3417 		break;
3418 	case SPDK_NVME_ANA_PERSISTENT_LOSS_STATE:
3419 		response->status.sct = SPDK_NVME_SCT_PATH;
3420 		response->status.sc = SPDK_NVME_SC_ASYMMETRIC_ACCESS_PERSISTENT_LOSS;
3421 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3422 	default:
3423 		break;
3424 	}
3425 
3426 	switch (feature) {
3427 	case SPDK_NVME_FEAT_ARBITRATION:
3428 		return nvmf_ctrlr_set_features_arbitration(req);
3429 	case SPDK_NVME_FEAT_POWER_MANAGEMENT:
3430 		return nvmf_ctrlr_set_features_power_management(req);
3431 	case SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD:
3432 		return nvmf_ctrlr_set_features_temperature_threshold(req);
3433 	case SPDK_NVME_FEAT_ERROR_RECOVERY:
3434 		return nvmf_ctrlr_set_features_error_recovery(req);
3435 	case SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE:
3436 		return nvmf_ctrlr_set_features_volatile_write_cache(req);
3437 	case SPDK_NVME_FEAT_NUMBER_OF_QUEUES:
3438 		return nvmf_ctrlr_set_features_number_of_queues(req);
3439 	case SPDK_NVME_FEAT_INTERRUPT_COALESCING:
3440 		response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
3441 		response->status.sc = SPDK_NVME_SC_FEATURE_NOT_CHANGEABLE;
3442 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3443 	case SPDK_NVME_FEAT_WRITE_ATOMICITY:
3444 		return nvmf_ctrlr_set_features_write_atomicity(req);
3445 	case SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION:
3446 		return nvmf_ctrlr_set_features_async_event_configuration(req);
3447 	case SPDK_NVME_FEAT_KEEP_ALIVE_TIMER:
3448 		return nvmf_ctrlr_set_features_keep_alive_timer(req);
3449 	case SPDK_NVME_FEAT_HOST_IDENTIFIER:
3450 		return nvmf_ctrlr_set_features_host_identifier(req);
3451 	case SPDK_NVME_FEAT_HOST_RESERVE_MASK:
3452 		return nvmf_ctrlr_set_features_reservation_notification_mask(req);
3453 	case SPDK_NVME_FEAT_HOST_RESERVE_PERSIST:
3454 		return nvmf_ctrlr_set_features_reservation_persistence(req);
3455 	case SPDK_NVME_FEAT_HOST_BEHAVIOR_SUPPORT:
3456 		return nvmf_ctrlr_set_features_host_behavior_support(req);
3457 	default:
3458 		SPDK_INFOLOG(nvmf, "Set Features command with unsupported feature ID 0x%02x\n", feature);
3459 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
3460 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3461 	}
3462 }
3463 
3464 static int
3465 nvmf_ctrlr_keep_alive(struct spdk_nvmf_request *req)
3466 {
3467 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
3468 
3469 	SPDK_DEBUGLOG(nvmf, "Keep Alive\n");
3470 	/*
3471 	 * To handle keep alive just clear or reset the
3472 	 * ctrlr based keep alive duration counter.
3473 	 * When added, a separate timer based process
3474 	 * will monitor if the time since last recorded
3475 	 * keep alive has exceeded the max duration and
3476 	 * take appropriate action.
3477 	 */
3478 	ctrlr->last_keep_alive_tick = spdk_get_ticks();
3479 
3480 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3481 }
3482 
3483 int
3484 nvmf_ctrlr_process_admin_cmd(struct spdk_nvmf_request *req)
3485 {
3486 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
3487 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
3488 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
3489 	struct spdk_nvmf_subsystem_poll_group *sgroup;
3490 	int rc;
3491 
3492 	if (cmd->opc == SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) {
3493 		/* We do not want to treat AERs as outstanding commands,
3494 		 * so decrement mgmt_io_outstanding here to offset
3495 		 * the increment that happened prior to this call.
3496 		 */
3497 		sgroup = &req->qpair->group->sgroups[ctrlr->subsys->id];
3498 		assert(sgroup != NULL);
3499 		sgroup->mgmt_io_outstanding--;
3500 	}
3501 
3502 	if (ctrlr == NULL) {
3503 		SPDK_ERRLOG("Admin command sent before CONNECT\n");
3504 		response->status.sct = SPDK_NVME_SCT_GENERIC;
3505 		response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
3506 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3507 	}
3508 
3509 	assert(spdk_get_thread() == ctrlr->thread);
3510 
3511 	if (cmd->fuse != 0) {
3512 		/* Fused admin commands are not supported. */
3513 		response->status.sct = SPDK_NVME_SCT_GENERIC;
3514 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
3515 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3516 	}
3517 
3518 	if (ctrlr->vcprop.cc.bits.en != 1) {
3519 		SPDK_ERRLOG("Admin command sent to disabled controller\n");
3520 		response->status.sct = SPDK_NVME_SCT_GENERIC;
3521 		response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
3522 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3523 	}
3524 
3525 	if (req->iovcnt && spdk_nvme_opc_get_data_transfer(cmd->opc) == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
3526 		spdk_iov_memset(req->iov, req->iovcnt, 0);
3527 	}
3528 
3529 	if (ctrlr->subsys->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
3530 		/* Discovery controllers only support these admin OPS. */
3531 		switch (cmd->opc) {
3532 		case SPDK_NVME_OPC_IDENTIFY:
3533 		case SPDK_NVME_OPC_GET_LOG_PAGE:
3534 		case SPDK_NVME_OPC_KEEP_ALIVE:
3535 		case SPDK_NVME_OPC_SET_FEATURES:
3536 		case SPDK_NVME_OPC_GET_FEATURES:
3537 		case SPDK_NVME_OPC_ASYNC_EVENT_REQUEST:
3538 			break;
3539 		default:
3540 			goto invalid_opcode;
3541 		}
3542 	}
3543 
3544 	/* Call a custom adm cmd handler if set. Aborts are handled in a different path (see nvmf_passthru_admin_cmd) */
3545 	if (g_nvmf_custom_admin_cmd_hdlrs[cmd->opc].hdlr && cmd->opc != SPDK_NVME_OPC_ABORT) {
3546 		rc = g_nvmf_custom_admin_cmd_hdlrs[cmd->opc].hdlr(req);
3547 		if (rc >= SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
3548 			/* The handler took care of this command */
3549 			return rc;
3550 		}
3551 	}
3552 
3553 	switch (cmd->opc) {
3554 	case SPDK_NVME_OPC_GET_LOG_PAGE:
3555 		return nvmf_ctrlr_get_log_page(req);
3556 	case SPDK_NVME_OPC_IDENTIFY:
3557 		return nvmf_ctrlr_identify(req);
3558 	case SPDK_NVME_OPC_ABORT:
3559 		return nvmf_ctrlr_abort(req);
3560 	case SPDK_NVME_OPC_GET_FEATURES:
3561 		return nvmf_ctrlr_get_features(req);
3562 	case SPDK_NVME_OPC_SET_FEATURES:
3563 		return nvmf_ctrlr_set_features(req);
3564 	case SPDK_NVME_OPC_ASYNC_EVENT_REQUEST:
3565 		return nvmf_ctrlr_async_event_request(req);
3566 	case SPDK_NVME_OPC_KEEP_ALIVE:
3567 		return nvmf_ctrlr_keep_alive(req);
3568 
3569 	case SPDK_NVME_OPC_CREATE_IO_SQ:
3570 	case SPDK_NVME_OPC_CREATE_IO_CQ:
3571 	case SPDK_NVME_OPC_DELETE_IO_SQ:
3572 	case SPDK_NVME_OPC_DELETE_IO_CQ:
3573 		/* Create and Delete I/O CQ/SQ not allowed in NVMe-oF */
3574 		goto invalid_opcode;
3575 
3576 	default:
3577 		goto invalid_opcode;
3578 	}
3579 
3580 invalid_opcode:
3581 	SPDK_INFOLOG(nvmf, "Unsupported admin opcode 0x%x\n", cmd->opc);
3582 	response->status.sct = SPDK_NVME_SCT_GENERIC;
3583 	response->status.sc = SPDK_NVME_SC_INVALID_OPCODE;
3584 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3585 }
3586 
3587 static int
3588 nvmf_ctrlr_process_fabrics_cmd(struct spdk_nvmf_request *req)
3589 {
3590 	struct spdk_nvmf_qpair *qpair = req->qpair;
3591 	struct spdk_nvmf_capsule_cmd *cap_hdr;
3592 
3593 	cap_hdr = &req->cmd->nvmf_cmd;
3594 
3595 	if (qpair->ctrlr == NULL) {
3596 		/* No ctrlr established yet; the only valid command is Connect */
3597 		if (cap_hdr->fctype == SPDK_NVMF_FABRIC_COMMAND_CONNECT) {
3598 			return nvmf_ctrlr_cmd_connect(req);
3599 		} else {
3600 			SPDK_DEBUGLOG(nvmf, "Got fctype 0x%x, expected Connect\n",
3601 				      cap_hdr->fctype);
3602 			req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
3603 			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
3604 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3605 		}
3606 	} else if (nvmf_qpair_is_admin_queue(qpair)) {
3607 		/*
3608 		 * Controller session is established, and this is an admin queue.
3609 		 * Disallow Connect and allow other fabrics commands.
3610 		 */
3611 		switch (cap_hdr->fctype) {
3612 		case SPDK_NVMF_FABRIC_COMMAND_PROPERTY_SET:
3613 			return nvmf_property_set(req);
3614 		case SPDK_NVMF_FABRIC_COMMAND_PROPERTY_GET:
3615 			return nvmf_property_get(req);
3616 		default:
3617 			SPDK_DEBUGLOG(nvmf, "unknown fctype 0x%02x\n",
3618 				      cap_hdr->fctype);
3619 			req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
3620 			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
3621 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3622 		}
3623 	} else {
3624 		/* Controller session is established, and this is an I/O queue */
3625 		/* For now, no I/O-specific Fabrics commands are implemented (other than Connect) */
3626 		SPDK_DEBUGLOG(nvmf, "Unexpected I/O fctype 0x%x\n", cap_hdr->fctype);
3627 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
3628 		req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
3629 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3630 	}
3631 }
3632 
3633 static inline void
3634 nvmf_ctrlr_queue_pending_async_event(struct spdk_nvmf_ctrlr *ctrlr,
3635 				     union spdk_nvme_async_event_completion *event)
3636 {
3637 	struct spdk_nvmf_async_event_completion *nvmf_event;
3638 
3639 	nvmf_event = calloc(1, sizeof(*nvmf_event));
3640 	if (!nvmf_event) {
3641 		SPDK_ERRLOG("Alloc nvmf event failed, ignore the event\n");
3642 		return;
3643 	}
3644 	nvmf_event->event.raw = event->raw;
3645 	STAILQ_INSERT_TAIL(&ctrlr->async_events, nvmf_event, link);
3646 }
3647 
3648 static inline int
3649 nvmf_ctrlr_async_event_notification(struct spdk_nvmf_ctrlr *ctrlr,
3650 				    union spdk_nvme_async_event_completion *event)
3651 {
3652 	struct spdk_nvmf_request *req;
3653 	struct spdk_nvme_cpl *rsp;
3654 
3655 	assert(spdk_get_thread() == ctrlr->thread);
3656 
3657 	/* If there is no outstanding AER request, queue the event.  Then
3658 	 * if an AER is later submitted, this event can be sent as a
3659 	 * response.
3660 	 */
3661 	if (ctrlr->nr_aer_reqs == 0) {
3662 		nvmf_ctrlr_queue_pending_async_event(ctrlr, event);
3663 		return 0;
3664 	}
3665 
3666 	req = ctrlr->aer_req[--ctrlr->nr_aer_reqs];
3667 	rsp = &req->rsp->nvme_cpl;
3668 
3669 	rsp->cdw0 = event->raw;
3670 
3671 	_nvmf_request_complete(req);
3672 	ctrlr->aer_req[ctrlr->nr_aer_reqs] = NULL;
3673 
3674 	return 0;
3675 }
3676 
3677 int
3678 nvmf_ctrlr_async_event_ns_notice(struct spdk_nvmf_ctrlr *ctrlr)
3679 {
3680 	union spdk_nvme_async_event_completion event = {0};
3681 
3682 	/* Users may disable the event notification */
3683 	if (!ctrlr->feat.async_event_configuration.bits.ns_attr_notice) {
3684 		return 0;
3685 	}
3686 
3687 	if (!nvmf_ctrlr_mask_aen(ctrlr, SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGE_MASK_BIT)) {
3688 		return 0;
3689 	}
3690 
3691 	event.bits.async_event_type = SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE;
3692 	event.bits.async_event_info = SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED;
3693 	event.bits.log_page_identifier = SPDK_NVME_LOG_CHANGED_NS_LIST;
3694 
3695 	return nvmf_ctrlr_async_event_notification(ctrlr, &event);
3696 }
3697 
3698 int
3699 nvmf_ctrlr_async_event_ana_change_notice(struct spdk_nvmf_ctrlr *ctrlr)
3700 {
3701 	union spdk_nvme_async_event_completion event = {0};
3702 
3703 	/* Users may disable the event notification */
3704 	if (!ctrlr->feat.async_event_configuration.bits.ana_change_notice) {
3705 		return 0;
3706 	}
3707 
3708 	if (!nvmf_ctrlr_mask_aen(ctrlr, SPDK_NVME_ASYNC_EVENT_ANA_CHANGE_MASK_BIT)) {
3709 		return 0;
3710 	}
3711 
3712 	event.bits.async_event_type = SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE;
3713 	event.bits.async_event_info = SPDK_NVME_ASYNC_EVENT_ANA_CHANGE;
3714 	event.bits.log_page_identifier = SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS;
3715 
3716 	return nvmf_ctrlr_async_event_notification(ctrlr, &event);
3717 }
3718 
3719 void
3720 nvmf_ctrlr_async_event_reservation_notification(struct spdk_nvmf_ctrlr *ctrlr)
3721 {
3722 	union spdk_nvme_async_event_completion event = {0};
3723 
3724 	if (!ctrlr->num_avail_log_pages) {
3725 		return;
3726 	}
3727 
3728 	if (!nvmf_ctrlr_mask_aen(ctrlr, SPDK_NVME_ASYNC_EVENT_RESERVATION_LOG_AVAIL_MASK_BIT)) {
3729 		return;
3730 	}
3731 
3732 	event.bits.async_event_type = SPDK_NVME_ASYNC_EVENT_TYPE_IO;
3733 	event.bits.async_event_info = SPDK_NVME_ASYNC_EVENT_RESERVATION_LOG_AVAIL;
3734 	event.bits.log_page_identifier = SPDK_NVME_LOG_RESERVATION_NOTIFICATION;
3735 
3736 	nvmf_ctrlr_async_event_notification(ctrlr, &event);
3737 }
3738 
3739 void
3740 nvmf_ctrlr_async_event_discovery_log_change_notice(void *ctx)
3741 {
3742 	union spdk_nvme_async_event_completion event = {0};
3743 	struct spdk_nvmf_ctrlr *ctrlr = ctx;
3744 
3745 	/* Users may disable the event notification manually or
3746 	 * it may not be enabled due to keep alive timeout
3747 	 * not being set in connect command to discovery controller.
3748 	 */
3749 	if (!ctrlr->feat.async_event_configuration.bits.discovery_log_change_notice) {
3750 		return;
3751 	}
3752 
3753 	if (!nvmf_ctrlr_mask_aen(ctrlr, SPDK_NVME_ASYNC_EVENT_DISCOVERY_LOG_CHANGE_MASK_BIT)) {
3754 		return;
3755 	}
3756 
3757 	event.bits.async_event_type = SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE;
3758 	event.bits.async_event_info = SPDK_NVME_ASYNC_EVENT_DISCOVERY_LOG_CHANGE;
3759 	event.bits.log_page_identifier = SPDK_NVME_LOG_DISCOVERY;
3760 
3761 	nvmf_ctrlr_async_event_notification(ctrlr, &event);
3762 }
3763 
3764 int
3765 nvmf_ctrlr_async_event_error_event(struct spdk_nvmf_ctrlr *ctrlr,
3766 				   union spdk_nvme_async_event_completion event)
3767 {
3768 	if (!nvmf_ctrlr_mask_aen(ctrlr, SPDK_NVME_ASYNC_EVENT_ERROR_MASK_BIT)) {
3769 		return 0;
3770 	}
3771 
3772 	if (event.bits.async_event_type != SPDK_NVME_ASYNC_EVENT_TYPE_ERROR ||
3773 	    event.bits.async_event_info > SPDK_NVME_ASYNC_EVENT_FW_IMAGE_LOAD) {
3774 		return 0;
3775 	}
3776 
3777 	return nvmf_ctrlr_async_event_notification(ctrlr, &event);
3778 }
3779 
3780 void
3781 nvmf_qpair_free_aer(struct spdk_nvmf_qpair *qpair)
3782 {
3783 	struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
3784 	int i;
3785 
3786 	if (!nvmf_qpair_is_admin_queue(qpair)) {
3787 		return;
3788 	}
3789 
3790 	assert(spdk_get_thread() == ctrlr->thread);
3791 
3792 	for (i = 0; i < ctrlr->nr_aer_reqs; i++) {
3793 		spdk_nvmf_request_free(ctrlr->aer_req[i]);
3794 		ctrlr->aer_req[i] = NULL;
3795 	}
3796 
3797 	ctrlr->nr_aer_reqs = 0;
3798 }
3799 
3800 void
3801 nvmf_ctrlr_abort_aer(struct spdk_nvmf_ctrlr *ctrlr)
3802 {
3803 	struct spdk_nvmf_request *req;
3804 	int i;
3805 
3806 	assert(spdk_get_thread() == ctrlr->thread);
3807 
3808 	if (!ctrlr->nr_aer_reqs) {
3809 		return;
3810 	}
3811 
3812 	for (i = 0; i < ctrlr->nr_aer_reqs; i++) {
3813 		req = ctrlr->aer_req[i];
3814 
3815 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
3816 		req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_ABORTED_BY_REQUEST;
3817 		_nvmf_request_complete(req);
3818 
3819 		ctrlr->aer_req[i] = NULL;
3820 	}
3821 
3822 	ctrlr->nr_aer_reqs = 0;
3823 }
3824 
3825 static void
3826 _nvmf_ctrlr_add_reservation_log(void *ctx)
3827 {
3828 	struct spdk_nvmf_reservation_log *log = (struct spdk_nvmf_reservation_log *)ctx;
3829 	struct spdk_nvmf_ctrlr *ctrlr = log->ctrlr;
3830 
3831 	ctrlr->log_page_count++;
3832 
3833 	/* Maximum number of queued log pages is 255 */
3834 	if (ctrlr->num_avail_log_pages == 0xff) {
3835 		struct spdk_nvmf_reservation_log *entry;
3836 		entry = TAILQ_LAST(&ctrlr->log_head, log_page_head);
3837 		entry->log.log_page_count = ctrlr->log_page_count;
3838 		free(log);
3839 		return;
3840 	}
3841 
3842 	log->log.log_page_count = ctrlr->log_page_count;
3843 	log->log.num_avail_log_pages = ctrlr->num_avail_log_pages++;
3844 	TAILQ_INSERT_TAIL(&ctrlr->log_head, log, link);
3845 
3846 	nvmf_ctrlr_async_event_reservation_notification(ctrlr);
3847 }
3848 
3849 void
3850 nvmf_ctrlr_reservation_notice_log(struct spdk_nvmf_ctrlr *ctrlr,
3851 				  struct spdk_nvmf_ns *ns,
3852 				  enum spdk_nvme_reservation_notification_log_page_type type)
3853 {
3854 	struct spdk_nvmf_reservation_log *log;
3855 
3856 	switch (type) {
3857 	case SPDK_NVME_RESERVATION_LOG_PAGE_EMPTY:
3858 		return;
3859 	case SPDK_NVME_REGISTRATION_PREEMPTED:
3860 		if (ns->mask & SPDK_NVME_REGISTRATION_PREEMPTED_MASK) {
3861 			return;
3862 		}
3863 		break;
3864 	case SPDK_NVME_RESERVATION_RELEASED:
3865 		if (ns->mask & SPDK_NVME_RESERVATION_RELEASED_MASK) {
3866 			return;
3867 		}
3868 		break;
3869 	case SPDK_NVME_RESERVATION_PREEMPTED:
3870 		if (ns->mask & SPDK_NVME_RESERVATION_PREEMPTED_MASK) {
3871 			return;
3872 		}
3873 		break;
3874 	default:
3875 		return;
3876 	}
3877 
3878 	log = calloc(1, sizeof(*log));
3879 	if (!log) {
3880 		SPDK_ERRLOG("Alloc log page failed, ignore the log\n");
3881 		return;
3882 	}
3883 	log->ctrlr = ctrlr;
3884 	log->log.type = type;
3885 	log->log.nsid = ns->nsid;
3886 
3887 	spdk_thread_send_msg(ctrlr->thread, _nvmf_ctrlr_add_reservation_log, log);
3888 }
3889 
3890 /* Check from subsystem poll group's namespace information data structure */
3891 static bool
3892 nvmf_ns_info_ctrlr_is_registrant(struct spdk_nvmf_subsystem_pg_ns_info *ns_info,
3893 				 struct spdk_nvmf_ctrlr *ctrlr)
3894 {
3895 	uint32_t i;
3896 
3897 	for (i = 0; i < SPDK_NVMF_MAX_NUM_REGISTRANTS; i++) {
3898 		if (!spdk_uuid_compare(&ns_info->reg_hostid[i], &ctrlr->hostid)) {
3899 			return true;
3900 		}
3901 	}
3902 
3903 	return false;
3904 }
3905 
3906 /*
3907  * Check the NVMe command is permitted or not for current controller(Host).
3908  */
3909 static int
3910 nvmf_ns_reservation_request_check(struct spdk_nvmf_subsystem_pg_ns_info *ns_info,
3911 				  struct spdk_nvmf_ctrlr *ctrlr,
3912 				  struct spdk_nvmf_request *req)
3913 {
3914 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
3915 	enum spdk_nvme_reservation_type rtype = ns_info->rtype;
3916 	uint8_t status = SPDK_NVME_SC_SUCCESS;
3917 	uint8_t racqa;
3918 	bool is_registrant;
3919 
3920 	/* No valid reservation */
3921 	if (!rtype) {
3922 		return 0;
3923 	}
3924 
3925 	is_registrant = nvmf_ns_info_ctrlr_is_registrant(ns_info, ctrlr);
3926 	/* All registrants type and current ctrlr is a valid registrant */
3927 	if ((rtype == SPDK_NVME_RESERVE_WRITE_EXCLUSIVE_ALL_REGS ||
3928 	     rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_ALL_REGS) && is_registrant) {
3929 		return 0;
3930 	} else if (!spdk_uuid_compare(&ns_info->holder_id, &ctrlr->hostid)) {
3931 		return 0;
3932 	}
3933 
3934 	/* Non-holder for current controller */
3935 	switch (cmd->opc) {
3936 	case SPDK_NVME_OPC_READ:
3937 	case SPDK_NVME_OPC_COMPARE:
3938 		if (rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS) {
3939 			status = SPDK_NVME_SC_RESERVATION_CONFLICT;
3940 			goto exit;
3941 		}
3942 		if ((rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_REG_ONLY ||
3943 		     rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_ALL_REGS) && !is_registrant) {
3944 			status = SPDK_NVME_SC_RESERVATION_CONFLICT;
3945 		}
3946 		break;
3947 	case SPDK_NVME_OPC_FLUSH:
3948 	case SPDK_NVME_OPC_WRITE:
3949 	case SPDK_NVME_OPC_WRITE_UNCORRECTABLE:
3950 	case SPDK_NVME_OPC_WRITE_ZEROES:
3951 	case SPDK_NVME_OPC_DATASET_MANAGEMENT:
3952 		if (rtype == SPDK_NVME_RESERVE_WRITE_EXCLUSIVE ||
3953 		    rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS) {
3954 			status = SPDK_NVME_SC_RESERVATION_CONFLICT;
3955 			goto exit;
3956 		}
3957 		if (!is_registrant) {
3958 			status = SPDK_NVME_SC_RESERVATION_CONFLICT;
3959 		}
3960 		break;
3961 	case SPDK_NVME_OPC_RESERVATION_ACQUIRE:
3962 		racqa = cmd->cdw10_bits.resv_acquire.racqa;
3963 		if (racqa == SPDK_NVME_RESERVE_ACQUIRE) {
3964 			status = SPDK_NVME_SC_RESERVATION_CONFLICT;
3965 			goto exit;
3966 		}
3967 		if (!is_registrant) {
3968 			status = SPDK_NVME_SC_RESERVATION_CONFLICT;
3969 		}
3970 		break;
3971 	case SPDK_NVME_OPC_RESERVATION_RELEASE:
3972 		if (!is_registrant) {
3973 			status = SPDK_NVME_SC_RESERVATION_CONFLICT;
3974 		}
3975 		break;
3976 	default:
3977 		break;
3978 	}
3979 
3980 exit:
3981 	req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
3982 	req->rsp->nvme_cpl.status.sc = status;
3983 	if (status == SPDK_NVME_SC_RESERVATION_CONFLICT) {
3984 		return -EPERM;
3985 	}
3986 
3987 	return 0;
3988 }
3989 
3990 static int
3991 nvmf_ctrlr_process_io_fused_cmd(struct spdk_nvmf_request *req, struct spdk_bdev *bdev,
3992 				struct spdk_bdev_desc *desc, struct spdk_io_channel *ch)
3993 {
3994 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
3995 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
3996 	struct spdk_nvmf_request *first_fused_req = req->qpair->first_fused_req;
3997 	int rc;
3998 
3999 	if (cmd->fuse == SPDK_NVME_CMD_FUSE_FIRST) {
4000 		/* first fused operation (should be compare) */
4001 		if (first_fused_req != NULL) {
4002 			struct spdk_nvme_cpl *fused_response = &first_fused_req->rsp->nvme_cpl;
4003 
4004 			SPDK_ERRLOG("Wrong sequence of fused operations\n");
4005 
4006 			/* abort req->qpair->first_fused_request and continue with new fused command */
4007 			fused_response->status.sc = SPDK_NVME_SC_ABORTED_MISSING_FUSED;
4008 			fused_response->status.sct = SPDK_NVME_SCT_GENERIC;
4009 			_nvmf_request_complete(first_fused_req);
4010 		} else if (cmd->opc != SPDK_NVME_OPC_COMPARE) {
4011 			SPDK_ERRLOG("Wrong op code of fused operations\n");
4012 			rsp->status.sct = SPDK_NVME_SCT_GENERIC;
4013 			rsp->status.sc = SPDK_NVME_SC_INVALID_OPCODE;
4014 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
4015 		}
4016 
4017 		req->qpair->first_fused_req = req;
4018 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
4019 	} else if (cmd->fuse == SPDK_NVME_CMD_FUSE_SECOND) {
4020 		/* second fused operation (should be write) */
4021 		if (first_fused_req == NULL) {
4022 			SPDK_ERRLOG("Wrong sequence of fused operations\n");
4023 			rsp->status.sct = SPDK_NVME_SCT_GENERIC;
4024 			rsp->status.sc = SPDK_NVME_SC_ABORTED_MISSING_FUSED;
4025 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
4026 		} else if (cmd->opc != SPDK_NVME_OPC_WRITE) {
4027 			struct spdk_nvme_cpl *fused_response = &first_fused_req->rsp->nvme_cpl;
4028 
4029 			SPDK_ERRLOG("Wrong op code of fused operations\n");
4030 
4031 			/* abort req->qpair->first_fused_request and fail current command */
4032 			fused_response->status.sc = SPDK_NVME_SC_ABORTED_MISSING_FUSED;
4033 			fused_response->status.sct = SPDK_NVME_SCT_GENERIC;
4034 			_nvmf_request_complete(first_fused_req);
4035 
4036 			rsp->status.sct = SPDK_NVME_SCT_GENERIC;
4037 			rsp->status.sc = SPDK_NVME_SC_INVALID_OPCODE;
4038 			req->qpair->first_fused_req = NULL;
4039 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
4040 		}
4041 
4042 		/* save request of first command to generate response later */
4043 		req->first_fused_req = first_fused_req;
4044 		req->qpair->first_fused_req = NULL;
4045 	} else {
4046 		SPDK_ERRLOG("Invalid fused command fuse field.\n");
4047 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
4048 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
4049 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
4050 	}
4051 
4052 	rc = nvmf_bdev_ctrlr_compare_and_write_cmd(bdev, desc, ch, req->first_fused_req, req);
4053 
4054 	if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
4055 		if (spdk_nvme_cpl_is_error(rsp)) {
4056 			struct spdk_nvme_cpl *fused_response = &first_fused_req->rsp->nvme_cpl;
4057 
4058 			fused_response->status = rsp->status;
4059 			rsp->status.sct = SPDK_NVME_SCT_GENERIC;
4060 			rsp->status.sc = SPDK_NVME_SC_ABORTED_FAILED_FUSED;
4061 			/* Complete first of fused commands. Second will be completed by upper layer */
4062 			_nvmf_request_complete(first_fused_req);
4063 			req->first_fused_req = NULL;
4064 		}
4065 	}
4066 
4067 	return rc;
4068 }
4069 
4070 bool
4071 nvmf_ctrlr_use_zcopy(struct spdk_nvmf_request *req)
4072 {
4073 	struct spdk_nvmf_transport *transport = req->qpair->transport;
4074 	struct spdk_nvmf_ns *ns;
4075 
4076 	assert(req->zcopy_phase == NVMF_ZCOPY_PHASE_NONE);
4077 
4078 	if (!transport->opts.zcopy) {
4079 		return false;
4080 	}
4081 
4082 	if (nvmf_qpair_is_admin_queue(req->qpair)) {
4083 		/* Admin queue */
4084 		return false;
4085 	}
4086 
4087 	if ((req->cmd->nvme_cmd.opc != SPDK_NVME_OPC_WRITE) &&
4088 	    (req->cmd->nvme_cmd.opc != SPDK_NVME_OPC_READ)) {
4089 		/* Not a READ or WRITE command */
4090 		return false;
4091 	}
4092 
4093 	if (req->cmd->nvme_cmd.fuse != SPDK_NVME_CMD_FUSE_NONE) {
4094 		/* Fused commands dont use zcopy buffers */
4095 		return false;
4096 	}
4097 
4098 	ns = _nvmf_subsystem_get_ns(req->qpair->ctrlr->subsys, req->cmd->nvme_cmd.nsid);
4099 	if (ns == NULL || ns->bdev == NULL || !ns->zcopy) {
4100 		return false;
4101 	}
4102 
4103 	req->zcopy_phase = NVMF_ZCOPY_PHASE_INIT;
4104 	return true;
4105 }
4106 
4107 void
4108 spdk_nvmf_request_zcopy_start(struct spdk_nvmf_request *req)
4109 {
4110 	assert(req->zcopy_phase == NVMF_ZCOPY_PHASE_INIT);
4111 
4112 	/* Set iovcnt to be the maximum number of iovs that the ZCOPY can use */
4113 	req->iovcnt = NVMF_REQ_MAX_BUFFERS;
4114 
4115 	spdk_nvmf_request_exec(req);
4116 }
4117 
4118 void
4119 spdk_nvmf_request_zcopy_end(struct spdk_nvmf_request *req, bool commit)
4120 {
4121 	assert(req->zcopy_phase == NVMF_ZCOPY_PHASE_EXECUTE);
4122 	req->zcopy_phase = NVMF_ZCOPY_PHASE_END_PENDING;
4123 
4124 	nvmf_bdev_ctrlr_zcopy_end(req, commit);
4125 }
4126 
4127 int
4128 nvmf_ctrlr_process_io_cmd(struct spdk_nvmf_request *req)
4129 {
4130 	uint32_t nsid;
4131 	struct spdk_nvmf_ns *ns;
4132 	struct spdk_bdev *bdev;
4133 	struct spdk_bdev_desc *desc;
4134 	struct spdk_io_channel *ch;
4135 	struct spdk_nvmf_poll_group *group = req->qpair->group;
4136 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
4137 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
4138 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
4139 	struct spdk_nvmf_subsystem_pg_ns_info *ns_info;
4140 	enum spdk_nvme_ana_state ana_state;
4141 
4142 	/* pre-set response details for this command */
4143 	response->status.sc = SPDK_NVME_SC_SUCCESS;
4144 	nsid = cmd->nsid;
4145 
4146 	if (spdk_unlikely(ctrlr == NULL)) {
4147 		SPDK_ERRLOG("I/O command sent before CONNECT\n");
4148 		response->status.sct = SPDK_NVME_SCT_GENERIC;
4149 		response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
4150 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
4151 	}
4152 
4153 	if (spdk_unlikely(ctrlr->vcprop.cc.bits.en != 1)) {
4154 		SPDK_ERRLOG("I/O command sent to disabled controller\n");
4155 		response->status.sct = SPDK_NVME_SCT_GENERIC;
4156 		response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
4157 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
4158 	}
4159 
4160 	ns = _nvmf_subsystem_get_ns(ctrlr->subsys, nsid);
4161 	if (ns == NULL || ns->bdev == NULL) {
4162 		SPDK_DEBUGLOG(nvmf, "Unsuccessful query for nsid %u\n", cmd->nsid);
4163 		response->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
4164 		response->status.dnr = 1;
4165 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
4166 	}
4167 
4168 	ana_state = nvmf_ctrlr_get_ana_state(ctrlr, ns->anagrpid);
4169 	if (spdk_unlikely(ana_state != SPDK_NVME_ANA_OPTIMIZED_STATE &&
4170 			  ana_state != SPDK_NVME_ANA_NON_OPTIMIZED_STATE)) {
4171 		SPDK_DEBUGLOG(nvmf, "Fail I/O command due to ANA state %d\n",
4172 			      ana_state);
4173 		response->status.sct = SPDK_NVME_SCT_PATH;
4174 		response->status.sc = _nvme_ana_state_to_path_status(ana_state);
4175 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
4176 	}
4177 
4178 	if (spdk_likely(ctrlr->listener != NULL)) {
4179 		SPDK_DTRACE_PROBE3(nvmf_request_io_exec_path, req,
4180 				   ctrlr->listener->trid->traddr,
4181 				   ctrlr->listener->trid->trsvcid);
4182 	}
4183 
4184 	/* scan-build falsely reporting dereference of null pointer */
4185 	assert(group != NULL && group->sgroups != NULL);
4186 	ns_info = &group->sgroups[ctrlr->subsys->id].ns_info[nsid - 1];
4187 	if (nvmf_ns_reservation_request_check(ns_info, ctrlr, req)) {
4188 		SPDK_DEBUGLOG(nvmf, "Reservation Conflict for nsid %u, opcode %u\n",
4189 			      cmd->nsid, cmd->opc);
4190 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
4191 	}
4192 
4193 	bdev = ns->bdev;
4194 	desc = ns->desc;
4195 	ch = ns_info->channel;
4196 
4197 	if (spdk_unlikely(cmd->fuse & SPDK_NVME_CMD_FUSE_MASK)) {
4198 		return nvmf_ctrlr_process_io_fused_cmd(req, bdev, desc, ch);
4199 	} else if (spdk_unlikely(req->qpair->first_fused_req != NULL)) {
4200 		struct spdk_nvme_cpl *fused_response = &req->qpair->first_fused_req->rsp->nvme_cpl;
4201 
4202 		SPDK_ERRLOG("Expected second of fused commands - failing first of fused commands\n");
4203 
4204 		/* abort req->qpair->first_fused_request and continue with new command */
4205 		fused_response->status.sc = SPDK_NVME_SC_ABORTED_MISSING_FUSED;
4206 		fused_response->status.sct = SPDK_NVME_SCT_GENERIC;
4207 		_nvmf_request_complete(req->qpair->first_fused_req);
4208 		req->qpair->first_fused_req = NULL;
4209 	}
4210 
4211 	if (spdk_nvmf_request_using_zcopy(req)) {
4212 		assert(req->zcopy_phase == NVMF_ZCOPY_PHASE_INIT);
4213 		return nvmf_bdev_ctrlr_zcopy_start(bdev, desc, ch, req);
4214 	} else {
4215 		switch (cmd->opc) {
4216 		case SPDK_NVME_OPC_READ:
4217 			return nvmf_bdev_ctrlr_read_cmd(bdev, desc, ch, req);
4218 		case SPDK_NVME_OPC_WRITE:
4219 			return nvmf_bdev_ctrlr_write_cmd(bdev, desc, ch, req);
4220 		case SPDK_NVME_OPC_COMPARE:
4221 			return nvmf_bdev_ctrlr_compare_cmd(bdev, desc, ch, req);
4222 		case SPDK_NVME_OPC_WRITE_ZEROES:
4223 			return nvmf_bdev_ctrlr_write_zeroes_cmd(bdev, desc, ch, req);
4224 		case SPDK_NVME_OPC_FLUSH:
4225 			return nvmf_bdev_ctrlr_flush_cmd(bdev, desc, ch, req);
4226 		case SPDK_NVME_OPC_DATASET_MANAGEMENT:
4227 			return nvmf_bdev_ctrlr_dsm_cmd(bdev, desc, ch, req);
4228 		case SPDK_NVME_OPC_RESERVATION_REGISTER:
4229 		case SPDK_NVME_OPC_RESERVATION_ACQUIRE:
4230 		case SPDK_NVME_OPC_RESERVATION_RELEASE:
4231 		case SPDK_NVME_OPC_RESERVATION_REPORT:
4232 			spdk_thread_send_msg(ctrlr->subsys->thread, nvmf_ns_reservation_request, req);
4233 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
4234 		case SPDK_NVME_OPC_COPY:
4235 			return nvmf_bdev_ctrlr_copy_cmd(bdev, desc, ch, req);
4236 		default:
4237 			return nvmf_bdev_ctrlr_nvme_passthru_io(bdev, desc, ch, req);
4238 		}
4239 	}
4240 }
4241 
4242 static void
4243 nvmf_qpair_request_cleanup(struct spdk_nvmf_qpair *qpair)
4244 {
4245 	if (qpair->state == SPDK_NVMF_QPAIR_DEACTIVATING) {
4246 		assert(qpair->state_cb != NULL);
4247 
4248 		if (TAILQ_EMPTY(&qpair->outstanding)) {
4249 			qpair->state_cb(qpair->state_cb_arg, 0);
4250 		}
4251 	}
4252 }
4253 
4254 int
4255 spdk_nvmf_request_free(struct spdk_nvmf_request *req)
4256 {
4257 	struct spdk_nvmf_qpair *qpair = req->qpair;
4258 
4259 	TAILQ_REMOVE(&qpair->outstanding, req, link);
4260 	if (nvmf_transport_req_free(req)) {
4261 		SPDK_ERRLOG("Unable to free transport level request resources.\n");
4262 	}
4263 
4264 	nvmf_qpair_request_cleanup(qpair);
4265 
4266 	return 0;
4267 }
4268 
4269 static void
4270 _nvmf_request_complete(void *ctx)
4271 {
4272 	struct spdk_nvmf_request *req = ctx;
4273 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
4274 	struct spdk_nvmf_qpair *qpair;
4275 	struct spdk_nvmf_subsystem_poll_group *sgroup = NULL;
4276 	struct spdk_nvmf_subsystem_pg_ns_info *ns_info;
4277 	bool is_aer = false;
4278 	uint32_t nsid;
4279 	bool paused;
4280 	uint8_t opcode;
4281 
4282 	rsp->sqid = 0;
4283 	rsp->status.p = 0;
4284 	rsp->cid = req->cmd->nvme_cmd.cid;
4285 	nsid = req->cmd->nvme_cmd.nsid;
4286 	opcode = req->cmd->nvmf_cmd.opcode;
4287 
4288 	qpair = req->qpair;
4289 	if (qpair->ctrlr) {
4290 		sgroup = &qpair->group->sgroups[qpair->ctrlr->subsys->id];
4291 		assert(sgroup != NULL);
4292 		is_aer = req->cmd->nvme_cmd.opc == SPDK_NVME_OPC_ASYNC_EVENT_REQUEST;
4293 		if (spdk_likely(qpair->qid != 0)) {
4294 			qpair->group->stat.completed_nvme_io++;
4295 		}
4296 
4297 		/*
4298 		 * Set the crd value.
4299 		 * If the the IO has any error, and dnr (DoNotRetry) is not 1,
4300 		 * and ACRE is enabled, we will set the crd to 1 to select the first CRDT.
4301 		 */
4302 		if (spdk_nvme_cpl_is_error(rsp) &&
4303 		    rsp->status.dnr == 0 &&
4304 		    qpair->ctrlr->acre_enabled) {
4305 			rsp->status.crd = 1;
4306 		}
4307 	} else if (spdk_unlikely(nvmf_request_is_fabric_connect(req))) {
4308 		sgroup = nvmf_subsystem_pg_from_connect_cmd(req);
4309 	}
4310 
4311 	if (SPDK_DEBUGLOG_FLAG_ENABLED("nvmf")) {
4312 		spdk_nvme_print_completion(qpair->qid, rsp);
4313 	}
4314 
4315 	switch (req->zcopy_phase) {
4316 	case NVMF_ZCOPY_PHASE_NONE:
4317 		TAILQ_REMOVE(&qpair->outstanding, req, link);
4318 		break;
4319 	case NVMF_ZCOPY_PHASE_INIT:
4320 		if (spdk_unlikely(spdk_nvme_cpl_is_error(rsp))) {
4321 			req->zcopy_phase = NVMF_ZCOPY_PHASE_INIT_FAILED;
4322 			TAILQ_REMOVE(&qpair->outstanding, req, link);
4323 		} else {
4324 			req->zcopy_phase = NVMF_ZCOPY_PHASE_EXECUTE;
4325 		}
4326 		break;
4327 	case NVMF_ZCOPY_PHASE_EXECUTE:
4328 		break;
4329 	case NVMF_ZCOPY_PHASE_END_PENDING:
4330 		TAILQ_REMOVE(&qpair->outstanding, req, link);
4331 		req->zcopy_phase = NVMF_ZCOPY_PHASE_COMPLETE;
4332 		break;
4333 	default:
4334 		SPDK_ERRLOG("Invalid ZCOPY phase %u\n", req->zcopy_phase);
4335 		break;
4336 	}
4337 
4338 	if (nvmf_transport_req_complete(req)) {
4339 		SPDK_ERRLOG("Transport request completion error!\n");
4340 	}
4341 
4342 	/* AER cmd is an exception */
4343 	if (sgroup && !is_aer) {
4344 		if (spdk_unlikely(opcode == SPDK_NVME_OPC_FABRIC ||
4345 				  nvmf_qpair_is_admin_queue(qpair))) {
4346 			assert(sgroup->mgmt_io_outstanding > 0);
4347 			sgroup->mgmt_io_outstanding--;
4348 		} else {
4349 			if (req->zcopy_phase == NVMF_ZCOPY_PHASE_NONE ||
4350 			    req->zcopy_phase == NVMF_ZCOPY_PHASE_COMPLETE ||
4351 			    req->zcopy_phase == NVMF_ZCOPY_PHASE_INIT_FAILED) {
4352 				/* End of request */
4353 
4354 				/* NOTE: This implicitly also checks for 0, since 0 - 1 wraps around to UINT32_MAX. */
4355 				if (spdk_likely(nsid - 1 < sgroup->num_ns)) {
4356 					sgroup->ns_info[nsid - 1].io_outstanding--;
4357 				}
4358 			}
4359 		}
4360 
4361 		if (spdk_unlikely(sgroup->state == SPDK_NVMF_SUBSYSTEM_PAUSING &&
4362 				  sgroup->mgmt_io_outstanding == 0)) {
4363 			paused = true;
4364 			for (nsid = 0; nsid < sgroup->num_ns; nsid++) {
4365 				ns_info = &sgroup->ns_info[nsid];
4366 
4367 				if (ns_info->state == SPDK_NVMF_SUBSYSTEM_PAUSING &&
4368 				    ns_info->io_outstanding > 0) {
4369 					paused = false;
4370 					break;
4371 				}
4372 			}
4373 
4374 			if (paused) {
4375 				sgroup->state = SPDK_NVMF_SUBSYSTEM_PAUSED;
4376 				sgroup->cb_fn(sgroup->cb_arg, 0);
4377 				sgroup->cb_fn = NULL;
4378 				sgroup->cb_arg = NULL;
4379 			}
4380 		}
4381 
4382 	}
4383 
4384 	nvmf_qpair_request_cleanup(qpair);
4385 }
4386 
4387 int
4388 spdk_nvmf_request_complete(struct spdk_nvmf_request *req)
4389 {
4390 	struct spdk_nvmf_qpair *qpair = req->qpair;
4391 
4392 	spdk_thread_exec_msg(qpair->group->thread, _nvmf_request_complete, req);
4393 
4394 	return 0;
4395 }
4396 
4397 void
4398 spdk_nvmf_request_exec_fabrics(struct spdk_nvmf_request *req)
4399 {
4400 	struct spdk_nvmf_qpair *qpair = req->qpair;
4401 	struct spdk_nvmf_subsystem_poll_group *sgroup = NULL;
4402 	enum spdk_nvmf_request_exec_status status;
4403 
4404 	if (qpair->ctrlr) {
4405 		sgroup = &qpair->group->sgroups[qpair->ctrlr->subsys->id];
4406 	} else if (spdk_unlikely(nvmf_request_is_fabric_connect(req))) {
4407 		sgroup = nvmf_subsystem_pg_from_connect_cmd(req);
4408 	}
4409 
4410 	assert(sgroup != NULL);
4411 	sgroup->mgmt_io_outstanding++;
4412 
4413 	/* Place the request on the outstanding list so we can keep track of it */
4414 	TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);
4415 
4416 	assert(req->cmd->nvmf_cmd.opcode == SPDK_NVME_OPC_FABRIC);
4417 	status = nvmf_ctrlr_process_fabrics_cmd(req);
4418 
4419 	if (status == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
4420 		_nvmf_request_complete(req);
4421 	}
4422 }
4423 
4424 static bool
4425 nvmf_check_subsystem_active(struct spdk_nvmf_request *req)
4426 {
4427 	struct spdk_nvmf_qpair *qpair = req->qpair;
4428 	struct spdk_nvmf_subsystem_poll_group *sgroup = NULL;
4429 	struct spdk_nvmf_subsystem_pg_ns_info *ns_info;
4430 	uint32_t nsid;
4431 
4432 	if (qpair->ctrlr) {
4433 		sgroup = &qpair->group->sgroups[qpair->ctrlr->subsys->id];
4434 		assert(sgroup != NULL);
4435 	} else if (spdk_unlikely(nvmf_request_is_fabric_connect(req))) {
4436 		sgroup = nvmf_subsystem_pg_from_connect_cmd(req);
4437 	}
4438 
4439 	/* Check if the subsystem is paused (if there is a subsystem) */
4440 	if (sgroup != NULL) {
4441 		if (spdk_unlikely(req->cmd->nvmf_cmd.opcode == SPDK_NVME_OPC_FABRIC ||
4442 				  nvmf_qpair_is_admin_queue(qpair))) {
4443 			if (sgroup->state != SPDK_NVMF_SUBSYSTEM_ACTIVE) {
4444 				/* The subsystem is not currently active. Queue this request. */
4445 				TAILQ_INSERT_TAIL(&sgroup->queued, req, link);
4446 				return false;
4447 			}
4448 			sgroup->mgmt_io_outstanding++;
4449 		} else {
4450 			nsid = req->cmd->nvme_cmd.nsid;
4451 
4452 			/* NOTE: This implicitly also checks for 0, since 0 - 1 wraps around to UINT32_MAX. */
4453 			if (spdk_unlikely(nsid - 1 >= sgroup->num_ns)) {
4454 				req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
4455 				req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
4456 				req->rsp->nvme_cpl.status.dnr = 1;
4457 				TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);
4458 				_nvmf_request_complete(req);
4459 				return false;
4460 			}
4461 
4462 			ns_info = &sgroup->ns_info[nsid - 1];
4463 			if (ns_info->channel == NULL) {
4464 				/* This can can happen if host sends I/O to a namespace that is
4465 				 * in the process of being added, but before the full addition
4466 				 * process is complete.  Report invalid namespace in that case.
4467 				 */
4468 				req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
4469 				req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
4470 				req->rsp->nvme_cpl.status.dnr = 1;
4471 				TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);
4472 				ns_info->io_outstanding++;
4473 				_nvmf_request_complete(req);
4474 				return false;
4475 			}
4476 
4477 			if (ns_info->state != SPDK_NVMF_SUBSYSTEM_ACTIVE) {
4478 				/* The namespace is not currently active. Queue this request. */
4479 				TAILQ_INSERT_TAIL(&sgroup->queued, req, link);
4480 				return false;
4481 			}
4482 
4483 			ns_info->io_outstanding++;
4484 		}
4485 
4486 		if (qpair->state != SPDK_NVMF_QPAIR_ACTIVE) {
4487 			req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
4488 			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
4489 			TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);
4490 			_nvmf_request_complete(req);
4491 			return false;
4492 		}
4493 	}
4494 
4495 	return true;
4496 }
4497 
4498 void
4499 spdk_nvmf_request_exec(struct spdk_nvmf_request *req)
4500 {
4501 	struct spdk_nvmf_qpair *qpair = req->qpair;
4502 	struct spdk_nvmf_transport *transport = qpair->transport;
4503 	enum spdk_nvmf_request_exec_status status;
4504 
4505 	if (req->data != NULL) {
4506 		assert(req->iovcnt > 0);
4507 	}
4508 
4509 	if (!nvmf_check_subsystem_active(req)) {
4510 		return;
4511 	}
4512 
4513 	if (SPDK_DEBUGLOG_FLAG_ENABLED("nvmf")) {
4514 		spdk_nvme_print_command(qpair->qid, &req->cmd->nvme_cmd);
4515 	}
4516 
4517 	/* Place the request on the outstanding list so we can keep track of it */
4518 	TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);
4519 
4520 	if (spdk_unlikely((req->cmd->nvmf_cmd.opcode == SPDK_NVME_OPC_FABRIC) &&
4521 			  spdk_nvme_trtype_is_fabrics(transport->ops->type))) {
4522 		status = nvmf_ctrlr_process_fabrics_cmd(req);
4523 	} else if (spdk_unlikely(nvmf_qpair_is_admin_queue(qpair))) {
4524 		status = nvmf_ctrlr_process_admin_cmd(req);
4525 	} else {
4526 		status = nvmf_ctrlr_process_io_cmd(req);
4527 	}
4528 
4529 	if (status == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
4530 		_nvmf_request_complete(req);
4531 	}
4532 }
4533 
4534 static bool
4535 nvmf_ctrlr_get_dif_ctx(struct spdk_nvmf_ctrlr *ctrlr, struct spdk_nvme_cmd *cmd,
4536 		       struct spdk_dif_ctx *dif_ctx)
4537 {
4538 	struct spdk_nvmf_ns *ns;
4539 	struct spdk_bdev *bdev;
4540 
4541 	if (ctrlr == NULL || cmd == NULL) {
4542 		return false;
4543 	}
4544 
4545 	ns = _nvmf_subsystem_get_ns(ctrlr->subsys, cmd->nsid);
4546 	if (ns == NULL || ns->bdev == NULL) {
4547 		return false;
4548 	}
4549 
4550 	bdev = ns->bdev;
4551 
4552 	switch (cmd->opc) {
4553 	case SPDK_NVME_OPC_READ:
4554 	case SPDK_NVME_OPC_WRITE:
4555 	case SPDK_NVME_OPC_COMPARE:
4556 		return nvmf_bdev_ctrlr_get_dif_ctx(bdev, cmd, dif_ctx);
4557 	default:
4558 		break;
4559 	}
4560 
4561 	return false;
4562 }
4563 
4564 bool
4565 spdk_nvmf_request_get_dif_ctx(struct spdk_nvmf_request *req, struct spdk_dif_ctx *dif_ctx)
4566 {
4567 	struct spdk_nvmf_qpair *qpair = req->qpair;
4568 	struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
4569 
4570 	if (spdk_likely(ctrlr == NULL || !ctrlr->dif_insert_or_strip)) {
4571 		return false;
4572 	}
4573 
4574 	if (spdk_unlikely(qpair->state != SPDK_NVMF_QPAIR_ACTIVE)) {
4575 		return false;
4576 	}
4577 
4578 	if (spdk_unlikely(req->cmd->nvmf_cmd.opcode == SPDK_NVME_OPC_FABRIC)) {
4579 		return false;
4580 	}
4581 
4582 	if (spdk_unlikely(nvmf_qpair_is_admin_queue(qpair))) {
4583 		return false;
4584 	}
4585 
4586 	return nvmf_ctrlr_get_dif_ctx(ctrlr, &req->cmd->nvme_cmd, dif_ctx);
4587 }
4588 
4589 void
4590 spdk_nvmf_set_custom_admin_cmd_hdlr(uint8_t opc, spdk_nvmf_custom_cmd_hdlr hdlr)
4591 {
4592 	g_nvmf_custom_admin_cmd_hdlrs[opc].hdlr = hdlr;
4593 }
4594 
4595 static int
4596 nvmf_passthru_admin_cmd(struct spdk_nvmf_request *req)
4597 {
4598 	struct spdk_bdev *bdev;
4599 	struct spdk_bdev_desc *desc;
4600 	struct spdk_io_channel *ch;
4601 	struct spdk_nvme_cmd *cmd = spdk_nvmf_request_get_cmd(req);
4602 	struct spdk_nvme_cpl *response = spdk_nvmf_request_get_response(req);
4603 	uint32_t bdev_nsid;
4604 	int rc;
4605 
4606 	if (g_nvmf_custom_admin_cmd_hdlrs[cmd->opc].nsid == 0) {
4607 		bdev_nsid = cmd->nsid;
4608 	} else {
4609 		bdev_nsid = g_nvmf_custom_admin_cmd_hdlrs[cmd->opc].nsid;
4610 	}
4611 
4612 	rc = spdk_nvmf_request_get_bdev(bdev_nsid, req, &bdev, &desc, &ch);
4613 	if (rc) {
4614 		response->status.sct = SPDK_NVME_SCT_GENERIC;
4615 		response->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
4616 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
4617 	}
4618 	return spdk_nvmf_bdev_ctrlr_nvme_passthru_admin(bdev, desc, ch, req, NULL);
4619 }
4620 
4621 void
4622 spdk_nvmf_set_passthru_admin_cmd(uint8_t opc, uint32_t forward_nsid)
4623 {
4624 	g_nvmf_custom_admin_cmd_hdlrs[opc].hdlr = nvmf_passthru_admin_cmd;
4625 	g_nvmf_custom_admin_cmd_hdlrs[opc].nsid = forward_nsid;
4626 }
4627 
4628 int
4629 spdk_nvmf_request_get_bdev(uint32_t nsid, struct spdk_nvmf_request *req,
4630 			   struct spdk_bdev **bdev, struct spdk_bdev_desc **desc, struct spdk_io_channel **ch)
4631 {
4632 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
4633 	struct spdk_nvmf_ns *ns;
4634 	struct spdk_nvmf_poll_group *group = req->qpair->group;
4635 	struct spdk_nvmf_subsystem_pg_ns_info *ns_info;
4636 
4637 	*bdev = NULL;
4638 	*desc = NULL;
4639 	*ch = NULL;
4640 
4641 	ns = _nvmf_subsystem_get_ns(ctrlr->subsys, nsid);
4642 	if (ns == NULL || ns->bdev == NULL) {
4643 		return -EINVAL;
4644 	}
4645 
4646 	assert(group != NULL && group->sgroups != NULL);
4647 	ns_info = &group->sgroups[ctrlr->subsys->id].ns_info[nsid - 1];
4648 	*bdev = ns->bdev;
4649 	*desc = ns->desc;
4650 	*ch = ns_info->channel;
4651 
4652 	return 0;
4653 }
4654 
4655 struct spdk_nvmf_ctrlr *spdk_nvmf_request_get_ctrlr(struct spdk_nvmf_request *req)
4656 {
4657 	return req->qpair->ctrlr;
4658 }
4659 
4660 struct spdk_nvme_cmd *spdk_nvmf_request_get_cmd(struct spdk_nvmf_request *req)
4661 {
4662 	return &req->cmd->nvme_cmd;
4663 }
4664 
4665 struct spdk_nvme_cpl *spdk_nvmf_request_get_response(struct spdk_nvmf_request *req)
4666 {
4667 	return &req->rsp->nvme_cpl;
4668 }
4669 
4670 struct spdk_nvmf_subsystem *spdk_nvmf_request_get_subsystem(struct spdk_nvmf_request *req)
4671 {
4672 	return req->qpair->ctrlr->subsys;
4673 }
4674 
4675 SPDK_LOG_DEPRECATION_REGISTER(nvmf_request_get_data, "spdk_nvmf_request_get_data",
4676 			      "SPDK 23.09", 60);
4677 
4678 void
4679 spdk_nvmf_request_get_data(struct spdk_nvmf_request *req, void **data, uint32_t *length)
4680 {
4681 	SPDK_LOG_DEPRECATED(nvmf_request_get_data);
4682 	*data = req->data;
4683 	*length = req->length;
4684 }
4685 
4686 size_t
4687 spdk_nvmf_request_copy_from_buf(struct spdk_nvmf_request *req,
4688 				void *buf, size_t buflen)
4689 {
4690 	struct spdk_iov_xfer ix;
4691 
4692 	spdk_iov_xfer_init(&ix, req->iov, req->iovcnt);
4693 	return spdk_iov_xfer_from_buf(&ix, buf, buflen);
4694 }
4695 
4696 size_t
4697 spdk_nvmf_request_copy_to_buf(struct spdk_nvmf_request *req,
4698 			      void *buf, size_t buflen)
4699 {
4700 	struct spdk_iov_xfer ix;
4701 
4702 	spdk_iov_xfer_init(&ix, req->iov, req->iovcnt);
4703 	return spdk_iov_xfer_to_buf(&ix, buf, buflen);
4704 }
4705 
4706 struct spdk_nvmf_subsystem *spdk_nvmf_ctrlr_get_subsystem(struct spdk_nvmf_ctrlr *ctrlr)
4707 {
4708 	return ctrlr->subsys;
4709 }
4710 
4711 uint16_t
4712 spdk_nvmf_ctrlr_get_id(struct spdk_nvmf_ctrlr *ctrlr)
4713 {
4714 	return ctrlr->cntlid;
4715 }
4716 
4717 struct spdk_nvmf_request *spdk_nvmf_request_get_req_to_abort(struct spdk_nvmf_request *req)
4718 {
4719 	return req->req_to_abort;
4720 }
4721