xref: /spdk/lib/nvmf/ctrlr.c (revision 927f1fd57bd004df581518466ec4c1b8083e5d23)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation. All rights reserved.
5  *   Copyright (c) 2019, 2020 Mellanox Technologies LTD. All rights reserved.
6  *   Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 #include "spdk/stdinc.h"
36 
37 #include "nvmf_internal.h"
38 #include "transport.h"
39 
40 #include "spdk/bit_array.h"
41 #include "spdk/endian.h"
42 #include "spdk/thread.h"
43 #include "spdk/nvme_spec.h"
44 #include "spdk/nvmf_cmd.h"
45 #include "spdk/string.h"
46 #include "spdk/util.h"
47 #include "spdk/version.h"
48 #include "spdk/log.h"
49 #include "spdk_internal/usdt.h"
50 
51 #define MIN_KEEP_ALIVE_TIMEOUT_IN_MS 10000
52 #define NVMF_DISC_KATO_IN_MS 120000
53 #define KAS_TIME_UNIT_IN_MS 100
54 #define KAS_DEFAULT_VALUE (MIN_KEEP_ALIVE_TIMEOUT_IN_MS / KAS_TIME_UNIT_IN_MS)
55 
56 #define NVMF_CC_RESET_SHN_TIMEOUT_IN_MS	10000
57 
58 #define NVMF_CTRLR_RESET_SHN_TIMEOUT_IN_MS	(NVMF_CC_RESET_SHN_TIMEOUT_IN_MS + 5000)
59 
60 /*
61  * Report the SPDK version as the firmware revision.
62  * SPDK_VERSION_STRING won't fit into FR (only 8 bytes), so try to fit the most important parts.
63  */
64 #define FW_VERSION SPDK_VERSION_MAJOR_STRING SPDK_VERSION_MINOR_STRING SPDK_VERSION_PATCH_STRING
65 
66 #define ANA_TRANSITION_TIME_IN_SEC 10
67 
68 #define NVMF_ABORT_COMMAND_LIMIT 3
69 
70 /*
71  * Support for custom admin command handlers
72  */
73 struct spdk_nvmf_custom_admin_cmd {
74 	spdk_nvmf_custom_cmd_hdlr hdlr;
75 	uint32_t nsid; /* nsid to forward */
76 };
77 
78 static struct spdk_nvmf_custom_admin_cmd g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_MAX_OPC + 1];
79 
80 static void _nvmf_request_complete(void *ctx);
81 
82 static inline void
83 nvmf_invalid_connect_response(struct spdk_nvmf_fabric_connect_rsp *rsp,
84 			      uint8_t iattr, uint16_t ipo)
85 {
86 	rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
87 	rsp->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
88 	rsp->status_code_specific.invalid.iattr = iattr;
89 	rsp->status_code_specific.invalid.ipo = ipo;
90 }
91 
92 #define SPDK_NVMF_INVALID_CONNECT_CMD(rsp, field)	\
93 	nvmf_invalid_connect_response(rsp, 0, offsetof(struct spdk_nvmf_fabric_connect_cmd, field))
94 #define SPDK_NVMF_INVALID_CONNECT_DATA(rsp, field)	\
95 	nvmf_invalid_connect_response(rsp, 1, offsetof(struct spdk_nvmf_fabric_connect_data, field))
96 
97 
98 static void
99 nvmf_ctrlr_stop_keep_alive_timer(struct spdk_nvmf_ctrlr *ctrlr)
100 {
101 	if (!ctrlr) {
102 		SPDK_ERRLOG("Controller is NULL\n");
103 		return;
104 	}
105 
106 	if (ctrlr->keep_alive_poller == NULL) {
107 		return;
108 	}
109 
110 	SPDK_DEBUGLOG(nvmf, "Stop keep alive poller\n");
111 	spdk_poller_unregister(&ctrlr->keep_alive_poller);
112 }
113 
114 static void
115 nvmf_ctrlr_stop_association_timer(struct spdk_nvmf_ctrlr *ctrlr)
116 {
117 	if (!ctrlr) {
118 		SPDK_ERRLOG("Controller is NULL\n");
119 		assert(false);
120 		return;
121 	}
122 
123 	if (ctrlr->association_timer == NULL) {
124 		return;
125 	}
126 
127 	SPDK_DEBUGLOG(nvmf, "Stop association timer\n");
128 	spdk_poller_unregister(&ctrlr->association_timer);
129 }
130 
131 static void
132 nvmf_ctrlr_disconnect_qpairs_done(struct spdk_io_channel_iter *i, int status)
133 {
134 	if (status == 0) {
135 		SPDK_DEBUGLOG(nvmf, "ctrlr disconnect qpairs complete successfully\n");
136 	} else {
137 		SPDK_ERRLOG("Fail to disconnect ctrlr qpairs\n");
138 	}
139 }
140 
141 static int
142 _nvmf_ctrlr_disconnect_qpairs_on_pg(struct spdk_io_channel_iter *i, bool include_admin)
143 {
144 	int rc = 0;
145 	struct spdk_nvmf_ctrlr *ctrlr;
146 	struct spdk_nvmf_qpair *qpair, *temp_qpair;
147 	struct spdk_io_channel *ch;
148 	struct spdk_nvmf_poll_group *group;
149 
150 	ctrlr = spdk_io_channel_iter_get_ctx(i);
151 	ch = spdk_io_channel_iter_get_channel(i);
152 	group = spdk_io_channel_get_ctx(ch);
153 
154 	TAILQ_FOREACH_SAFE(qpair, &group->qpairs, link, temp_qpair) {
155 		if (qpair->ctrlr == ctrlr && (include_admin || !nvmf_qpair_is_admin_queue(qpair))) {
156 			rc = spdk_nvmf_qpair_disconnect(qpair, NULL, NULL);
157 			if (rc) {
158 				SPDK_ERRLOG("Qpair disconnect failed\n");
159 				return rc;
160 			}
161 		}
162 	}
163 
164 	return rc;
165 }
166 
167 static void
168 nvmf_ctrlr_disconnect_qpairs_on_pg(struct spdk_io_channel_iter *i)
169 {
170 	spdk_for_each_channel_continue(i, _nvmf_ctrlr_disconnect_qpairs_on_pg(i, true));
171 }
172 
173 static void
174 nvmf_ctrlr_disconnect_io_qpairs_on_pg(struct spdk_io_channel_iter *i)
175 {
176 	spdk_for_each_channel_continue(i, _nvmf_ctrlr_disconnect_qpairs_on_pg(i, false));
177 }
178 
179 static int
180 nvmf_ctrlr_keep_alive_poll(void *ctx)
181 {
182 	uint64_t keep_alive_timeout_tick;
183 	uint64_t now = spdk_get_ticks();
184 	struct spdk_nvmf_ctrlr *ctrlr = ctx;
185 
186 	if (ctrlr->in_destruct) {
187 		nvmf_ctrlr_stop_keep_alive_timer(ctrlr);
188 		return SPDK_POLLER_IDLE;
189 	}
190 
191 	SPDK_DEBUGLOG(nvmf, "Polling ctrlr keep alive timeout\n");
192 
193 	/* If the Keep alive feature is in use and the timer expires */
194 	keep_alive_timeout_tick = ctrlr->last_keep_alive_tick +
195 				  ctrlr->feat.keep_alive_timer.bits.kato * spdk_get_ticks_hz() / UINT64_C(1000);
196 	if (now > keep_alive_timeout_tick) {
197 		SPDK_NOTICELOG("Disconnecting host %s from subsystem %s due to keep alive timeout.\n",
198 			       ctrlr->hostnqn, ctrlr->subsys->subnqn);
199 		/* set the Controller Fatal Status bit to '1' */
200 		if (ctrlr->vcprop.csts.bits.cfs == 0) {
201 			nvmf_ctrlr_set_fatal_status(ctrlr);
202 
203 			/*
204 			 * disconnect qpairs, terminate Transport connection
205 			 * destroy ctrlr, break the host to controller association
206 			 * disconnect qpairs with qpair->ctrlr == ctrlr
207 			 */
208 			spdk_for_each_channel(ctrlr->subsys->tgt,
209 					      nvmf_ctrlr_disconnect_qpairs_on_pg,
210 					      ctrlr,
211 					      nvmf_ctrlr_disconnect_qpairs_done);
212 			return SPDK_POLLER_BUSY;
213 		}
214 	}
215 
216 	return SPDK_POLLER_IDLE;
217 }
218 
219 static void
220 nvmf_ctrlr_start_keep_alive_timer(struct spdk_nvmf_ctrlr *ctrlr)
221 {
222 	if (!ctrlr) {
223 		SPDK_ERRLOG("Controller is NULL\n");
224 		return;
225 	}
226 
227 	/* if cleared to 0 then the Keep Alive Timer is disabled */
228 	if (ctrlr->feat.keep_alive_timer.bits.kato != 0) {
229 
230 		ctrlr->last_keep_alive_tick = spdk_get_ticks();
231 
232 		SPDK_DEBUGLOG(nvmf, "Ctrlr add keep alive poller\n");
233 		ctrlr->keep_alive_poller = SPDK_POLLER_REGISTER(nvmf_ctrlr_keep_alive_poll, ctrlr,
234 					   ctrlr->feat.keep_alive_timer.bits.kato * 1000);
235 	}
236 }
237 
238 static void
239 ctrlr_add_qpair_and_update_rsp(struct spdk_nvmf_qpair *qpair,
240 			       struct spdk_nvmf_ctrlr *ctrlr,
241 			       struct spdk_nvmf_fabric_connect_rsp *rsp)
242 {
243 	assert(ctrlr->admin_qpair->group->thread == spdk_get_thread());
244 
245 	/* check if we would exceed ctrlr connection limit */
246 	if (qpair->qid >= spdk_bit_array_capacity(ctrlr->qpair_mask)) {
247 		SPDK_ERRLOG("Requested QID %u but Max QID is %u\n",
248 			    qpair->qid, spdk_bit_array_capacity(ctrlr->qpair_mask) - 1);
249 		rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
250 		rsp->status.sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER;
251 		return;
252 	}
253 
254 	if (spdk_bit_array_get(ctrlr->qpair_mask, qpair->qid)) {
255 		SPDK_ERRLOG("Got I/O connect with duplicate QID %u\n", qpair->qid);
256 		rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
257 		rsp->status.sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER;
258 		return;
259 	}
260 
261 	qpair->ctrlr = ctrlr;
262 	spdk_bit_array_set(ctrlr->qpair_mask, qpair->qid);
263 
264 	rsp->status.sc = SPDK_NVME_SC_SUCCESS;
265 	rsp->status_code_specific.success.cntlid = ctrlr->cntlid;
266 	SPDK_DEBUGLOG(nvmf, "connect capsule response: cntlid = 0x%04x\n",
267 		      rsp->status_code_specific.success.cntlid);
268 
269 	SPDK_DTRACE_PROBE4(nvmf_ctrlr_add_qpair, qpair, qpair->qid, ctrlr->subsys->subnqn,
270 			   ctrlr->hostnqn);
271 }
272 
273 static void
274 _nvmf_ctrlr_add_admin_qpair(void *ctx)
275 {
276 	struct spdk_nvmf_request *req = ctx;
277 	struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp;
278 	struct spdk_nvmf_qpair *qpair = req->qpair;
279 	struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
280 
281 	ctrlr->admin_qpair = qpair;
282 	ctrlr->association_timeout = qpair->transport->opts.association_timeout;
283 	nvmf_ctrlr_start_keep_alive_timer(ctrlr);
284 	ctrlr_add_qpair_and_update_rsp(qpair, ctrlr, rsp);
285 	_nvmf_request_complete(req);
286 }
287 
288 static void
289 _nvmf_subsystem_add_ctrlr(void *ctx)
290 {
291 	struct spdk_nvmf_request *req = ctx;
292 	struct spdk_nvmf_qpair *qpair = req->qpair;
293 	struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp;
294 	struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
295 
296 	if (nvmf_subsystem_add_ctrlr(ctrlr->subsys, ctrlr)) {
297 		SPDK_ERRLOG("Unable to add controller to subsystem\n");
298 		spdk_bit_array_free(&ctrlr->qpair_mask);
299 		free(ctrlr);
300 		qpair->ctrlr = NULL;
301 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
302 		spdk_nvmf_request_complete(req);
303 		return;
304 	}
305 
306 	spdk_thread_send_msg(ctrlr->thread, _nvmf_ctrlr_add_admin_qpair, req);
307 }
308 
309 static void
310 nvmf_ctrlr_cdata_init(struct spdk_nvmf_transport *transport, struct spdk_nvmf_subsystem *subsystem,
311 		      struct spdk_nvmf_ctrlr_data *cdata)
312 {
313 	cdata->aerl = NVMF_MAX_ASYNC_EVENTS - 1;
314 	cdata->kas = KAS_DEFAULT_VALUE;
315 	cdata->vid = SPDK_PCI_VID_INTEL;
316 	cdata->ssvid = SPDK_PCI_VID_INTEL;
317 	/* INTEL OUI */
318 	cdata->ieee[0] = 0xe4;
319 	cdata->ieee[1] = 0xd2;
320 	cdata->ieee[2] = 0x5c;
321 	cdata->oncs.reservations = 1;
322 	cdata->sgls.supported = 1;
323 	cdata->sgls.keyed_sgl = 1;
324 	cdata->sgls.sgl_offset = 1;
325 	cdata->nvmf_specific.ioccsz = sizeof(struct spdk_nvme_cmd) / 16;
326 	cdata->nvmf_specific.ioccsz += transport->opts.in_capsule_data_size / 16;
327 	cdata->nvmf_specific.iorcsz = sizeof(struct spdk_nvme_cpl) / 16;
328 	cdata->nvmf_specific.icdoff = 0; /* offset starts directly after SQE */
329 	cdata->nvmf_specific.ctrattr.ctrlr_model = SPDK_NVMF_CTRLR_MODEL_DYNAMIC;
330 	cdata->nvmf_specific.msdbd = 1;
331 
332 	if (transport->ops->cdata_init) {
333 		transport->ops->cdata_init(transport, subsystem, cdata);
334 	}
335 }
336 
337 static struct spdk_nvmf_ctrlr *
338 nvmf_ctrlr_create(struct spdk_nvmf_subsystem *subsystem,
339 		  struct spdk_nvmf_request *req,
340 		  struct spdk_nvmf_fabric_connect_cmd *connect_cmd,
341 		  struct spdk_nvmf_fabric_connect_data *connect_data)
342 {
343 	struct spdk_nvmf_ctrlr *ctrlr;
344 	struct spdk_nvmf_transport *transport = req->qpair->transport;
345 	struct spdk_nvme_transport_id listen_trid = {};
346 
347 	ctrlr = calloc(1, sizeof(*ctrlr));
348 	if (ctrlr == NULL) {
349 		SPDK_ERRLOG("Memory allocation failed\n");
350 		return NULL;
351 	}
352 
353 	if (spdk_nvme_trtype_is_fabrics(transport->ops->type)) {
354 		ctrlr->dynamic_ctrlr = true;
355 	} else {
356 		ctrlr->cntlid = connect_data->cntlid;
357 	}
358 
359 	SPDK_DTRACE_PROBE3(nvmf_ctrlr_create, ctrlr, subsystem->subnqn,
360 			   spdk_thread_get_id(req->qpair->group->thread));
361 
362 	STAILQ_INIT(&ctrlr->async_events);
363 	TAILQ_INIT(&ctrlr->log_head);
364 	ctrlr->subsys = subsystem;
365 	ctrlr->thread = req->qpair->group->thread;
366 	ctrlr->disconnect_in_progress = false;
367 
368 	ctrlr->qpair_mask = spdk_bit_array_create(transport->opts.max_qpairs_per_ctrlr);
369 	if (!ctrlr->qpair_mask) {
370 		SPDK_ERRLOG("Failed to allocate controller qpair mask\n");
371 		goto err_qpair_mask;
372 	}
373 
374 	nvmf_ctrlr_cdata_init(transport, subsystem, &ctrlr->cdata);
375 
376 	/*
377 	 * KAS: This field indicates the granularity of the Keep Alive Timer in 100ms units.
378 	 * If this field is cleared to 0h, then Keep Alive is not supported.
379 	 */
380 	if (ctrlr->cdata.kas) {
381 		ctrlr->feat.keep_alive_timer.bits.kato = spdk_divide_round_up(connect_cmd->kato,
382 				KAS_DEFAULT_VALUE * KAS_TIME_UNIT_IN_MS) *
383 				KAS_DEFAULT_VALUE * KAS_TIME_UNIT_IN_MS;
384 	}
385 
386 	ctrlr->feat.async_event_configuration.bits.ns_attr_notice = 1;
387 	if (ctrlr->subsys->flags.ana_reporting) {
388 		ctrlr->feat.async_event_configuration.bits.ana_change_notice = 1;
389 	}
390 	ctrlr->feat.volatile_write_cache.bits.wce = 1;
391 	/* Coalescing Disable */
392 	ctrlr->feat.interrupt_vector_configuration.bits.cd = 1;
393 
394 	if (ctrlr->subsys->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
395 		/*
396 		 * If keep-alive timeout is not set, discovery controllers use some
397 		 * arbitrary high value in order to cleanup stale discovery sessions
398 		 *
399 		 * From the 1.0a nvme-of spec:
400 		 * "The Keep Alive command is reserved for
401 		 * Discovery controllers. A transport may specify a
402 		 * fixed Discovery controller activity timeout value
403 		 * (e.g., 2 minutes). If no commands are received
404 		 * by a Discovery controller within that time
405 		 * period, the controller may perform the
406 		 * actions for Keep Alive Timer expiration".
407 		 *
408 		 * From the 1.1 nvme-of spec:
409 		 * "A host requests an explicit persistent connection
410 		 * to a Discovery controller and Asynchronous Event Notifications from
411 		 * the Discovery controller on that persistent connection by specifying
412 		 * a non-zero Keep Alive Timer value in the Connect command."
413 		 *
414 		 * In case non-zero KATO is used, we enable discovery_log_change_notice
415 		 * otherwise we disable it and use default discovery controller KATO.
416 		 * KATO is in millisecond.
417 		 */
418 		if (ctrlr->feat.keep_alive_timer.bits.kato == 0) {
419 			ctrlr->feat.keep_alive_timer.bits.kato = NVMF_DISC_KATO_IN_MS;
420 			ctrlr->feat.async_event_configuration.bits.discovery_log_change_notice = 0;
421 		} else {
422 			ctrlr->feat.async_event_configuration.bits.discovery_log_change_notice = 1;
423 		}
424 	}
425 
426 	/* Subtract 1 for admin queue, 1 for 0's based */
427 	ctrlr->feat.number_of_queues.bits.ncqr = transport->opts.max_qpairs_per_ctrlr - 1 -
428 			1;
429 	ctrlr->feat.number_of_queues.bits.nsqr = transport->opts.max_qpairs_per_ctrlr - 1 -
430 			1;
431 
432 	spdk_uuid_copy(&ctrlr->hostid, (struct spdk_uuid *)connect_data->hostid);
433 	memcpy(ctrlr->hostnqn, connect_data->hostnqn, sizeof(ctrlr->hostnqn));
434 
435 	ctrlr->vcprop.cap.raw = 0;
436 	ctrlr->vcprop.cap.bits.cqr = 1; /* NVMe-oF specification required */
437 	ctrlr->vcprop.cap.bits.mqes = transport->opts.max_queue_depth -
438 				      1; /* max queue depth */
439 	ctrlr->vcprop.cap.bits.ams = 0; /* optional arb mechanisms */
440 	/* ready timeout - 500 msec units */
441 	ctrlr->vcprop.cap.bits.to = NVMF_CTRLR_RESET_SHN_TIMEOUT_IN_MS / 500;
442 	ctrlr->vcprop.cap.bits.dstrd = 0; /* fixed to 0 for NVMe-oF */
443 	ctrlr->vcprop.cap.bits.css = SPDK_NVME_CAP_CSS_NVM; /* NVM command set */
444 	ctrlr->vcprop.cap.bits.mpsmin = 0; /* 2 ^ (12 + mpsmin) == 4k */
445 	ctrlr->vcprop.cap.bits.mpsmax = 0; /* 2 ^ (12 + mpsmax) == 4k */
446 
447 	/* Version Supported: 1.3 */
448 	ctrlr->vcprop.vs.bits.mjr = 1;
449 	ctrlr->vcprop.vs.bits.mnr = 3;
450 	ctrlr->vcprop.vs.bits.ter = 0;
451 
452 	ctrlr->vcprop.cc.raw = 0;
453 	ctrlr->vcprop.cc.bits.en = 0; /* Init controller disabled */
454 
455 	ctrlr->vcprop.csts.raw = 0;
456 	ctrlr->vcprop.csts.bits.rdy = 0; /* Init controller as not ready */
457 
458 	SPDK_DEBUGLOG(nvmf, "cap 0x%" PRIx64 "\n", ctrlr->vcprop.cap.raw);
459 	SPDK_DEBUGLOG(nvmf, "vs 0x%x\n", ctrlr->vcprop.vs.raw);
460 	SPDK_DEBUGLOG(nvmf, "cc 0x%x\n", ctrlr->vcprop.cc.raw);
461 	SPDK_DEBUGLOG(nvmf, "csts 0x%x\n", ctrlr->vcprop.csts.raw);
462 
463 	ctrlr->dif_insert_or_strip = transport->opts.dif_insert_or_strip;
464 
465 	if (ctrlr->subsys->subtype == SPDK_NVMF_SUBTYPE_NVME) {
466 		if (spdk_nvmf_qpair_get_listen_trid(req->qpair, &listen_trid) != 0) {
467 			SPDK_ERRLOG("Could not get listener transport ID\n");
468 			goto err_listener;
469 		}
470 
471 		ctrlr->listener = nvmf_subsystem_find_listener(ctrlr->subsys, &listen_trid);
472 		if (!ctrlr->listener) {
473 			SPDK_ERRLOG("Listener was not found\n");
474 			goto err_listener;
475 		}
476 	}
477 
478 	req->qpair->ctrlr = ctrlr;
479 	spdk_thread_send_msg(subsystem->thread, _nvmf_subsystem_add_ctrlr, req);
480 
481 	return ctrlr;
482 err_listener:
483 	spdk_bit_array_free(&ctrlr->qpair_mask);
484 err_qpair_mask:
485 	free(ctrlr);
486 	return NULL;
487 }
488 
489 static void
490 _nvmf_ctrlr_destruct(void *ctx)
491 {
492 	struct spdk_nvmf_ctrlr *ctrlr = ctx;
493 	struct spdk_nvmf_reservation_log *log, *log_tmp;
494 	struct spdk_nvmf_async_event_completion *event, *event_tmp;
495 
496 	SPDK_DTRACE_PROBE3(nvmf_ctrlr_destruct, ctrlr, ctrlr->subsys->subnqn,
497 			   spdk_thread_get_id(ctrlr->thread));
498 
499 	assert(spdk_get_thread() == ctrlr->thread);
500 	assert(ctrlr->in_destruct);
501 
502 	SPDK_DEBUGLOG(nvmf, "Destroy ctrlr 0x%hx\n", ctrlr->cntlid);
503 	if (ctrlr->disconnect_in_progress) {
504 		SPDK_ERRLOG("freeing ctrlr with disconnect in progress\n");
505 		spdk_thread_send_msg(ctrlr->thread, _nvmf_ctrlr_destruct, ctrlr);
506 		return;
507 	}
508 
509 	nvmf_ctrlr_stop_keep_alive_timer(ctrlr);
510 	nvmf_ctrlr_stop_association_timer(ctrlr);
511 	spdk_bit_array_free(&ctrlr->qpair_mask);
512 
513 	TAILQ_FOREACH_SAFE(log, &ctrlr->log_head, link, log_tmp) {
514 		TAILQ_REMOVE(&ctrlr->log_head, log, link);
515 		free(log);
516 	}
517 	STAILQ_FOREACH_SAFE(event, &ctrlr->async_events, link, event_tmp) {
518 		STAILQ_REMOVE(&ctrlr->async_events, event, spdk_nvmf_async_event_completion, link);
519 		free(event);
520 	}
521 	free(ctrlr);
522 }
523 
524 void
525 nvmf_ctrlr_destruct(struct spdk_nvmf_ctrlr *ctrlr)
526 {
527 	nvmf_subsystem_remove_ctrlr(ctrlr->subsys, ctrlr);
528 
529 	spdk_thread_send_msg(ctrlr->thread, _nvmf_ctrlr_destruct, ctrlr);
530 }
531 
532 static void
533 nvmf_ctrlr_add_io_qpair(void *ctx)
534 {
535 	struct spdk_nvmf_request *req = ctx;
536 	struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp;
537 	struct spdk_nvmf_qpair *qpair = req->qpair;
538 	struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
539 	struct spdk_nvmf_qpair *admin_qpair = ctrlr->admin_qpair;
540 
541 	SPDK_DTRACE_PROBE4(nvmf_ctrlr_add_io_qpair, ctrlr, req->qpair, req->qpair->qid,
542 			   spdk_thread_get_id(ctrlr->thread));
543 
544 	/* Unit test will check qpair->ctrlr after calling spdk_nvmf_ctrlr_connect.
545 	  * For error case, the value should be NULL. So set it to NULL at first.
546 	  */
547 	qpair->ctrlr = NULL;
548 
549 	/* Make sure the controller is not being destroyed. */
550 	if (ctrlr->in_destruct) {
551 		SPDK_ERRLOG("Got I/O connect while ctrlr was being destroyed.\n");
552 		SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
553 		goto end;
554 	}
555 
556 	if (ctrlr->subsys->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
557 		SPDK_ERRLOG("I/O connect not allowed on discovery controller\n");
558 		SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
559 		goto end;
560 	}
561 
562 	if (!ctrlr->vcprop.cc.bits.en) {
563 		SPDK_ERRLOG("Got I/O connect before ctrlr was enabled\n");
564 		SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
565 		goto end;
566 	}
567 
568 	if (1u << ctrlr->vcprop.cc.bits.iosqes != sizeof(struct spdk_nvme_cmd)) {
569 		SPDK_ERRLOG("Got I/O connect with invalid IOSQES %u\n",
570 			    ctrlr->vcprop.cc.bits.iosqes);
571 		SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
572 		goto end;
573 	}
574 
575 	if (1u << ctrlr->vcprop.cc.bits.iocqes != sizeof(struct spdk_nvme_cpl)) {
576 		SPDK_ERRLOG("Got I/O connect with invalid IOCQES %u\n",
577 			    ctrlr->vcprop.cc.bits.iocqes);
578 		SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
579 		goto end;
580 	}
581 
582 	if (admin_qpair->state != SPDK_NVMF_QPAIR_ACTIVE || admin_qpair->group == NULL) {
583 		/* There is a chance that admin qpair is being destroyed at this moment due to e.g.
584 		 * expired keep alive timer. Part of the qpair destruction process is change of qpair's
585 		 * state to DEACTIVATING and removing it from poll group */
586 		SPDK_ERRLOG("Inactive admin qpair (state %d, group %p)\n", admin_qpair->state, admin_qpair->group);
587 		SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
588 		goto end;
589 	}
590 
591 	ctrlr_add_qpair_and_update_rsp(qpair, ctrlr, rsp);
592 end:
593 	spdk_nvmf_request_complete(req);
594 }
595 
596 static void
597 _nvmf_ctrlr_add_io_qpair(void *ctx)
598 {
599 	struct spdk_nvmf_request *req = ctx;
600 	struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp;
601 	struct spdk_nvmf_fabric_connect_data *data = req->data;
602 	struct spdk_nvmf_ctrlr *ctrlr;
603 	struct spdk_nvmf_qpair *qpair = req->qpair;
604 	struct spdk_nvmf_qpair *admin_qpair;
605 	struct spdk_nvmf_tgt *tgt = qpair->transport->tgt;
606 	struct spdk_nvmf_subsystem *subsystem;
607 	struct spdk_nvme_transport_id listen_trid = {};
608 	const struct spdk_nvmf_subsystem_listener *listener;
609 
610 	SPDK_DEBUGLOG(nvmf, "Connect I/O Queue for controller id 0x%x\n", data->cntlid);
611 
612 	subsystem = spdk_nvmf_tgt_find_subsystem(tgt, data->subnqn);
613 	/* We already checked this in spdk_nvmf_ctrlr_connect */
614 	assert(subsystem != NULL);
615 
616 	ctrlr = nvmf_subsystem_get_ctrlr(subsystem, data->cntlid);
617 	if (ctrlr == NULL) {
618 		SPDK_ERRLOG("Unknown controller ID 0x%x\n", data->cntlid);
619 		SPDK_NVMF_INVALID_CONNECT_DATA(rsp, cntlid);
620 		spdk_nvmf_request_complete(req);
621 		return;
622 	}
623 
624 	/* fail before passing a message to the controller thread. */
625 	if (ctrlr->in_destruct) {
626 		SPDK_ERRLOG("Got I/O connect while ctrlr was being destroyed.\n");
627 		SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
628 		spdk_nvmf_request_complete(req);
629 		return;
630 	}
631 
632 	/* If ANA reporting is enabled, check if I/O connect is on the same listener. */
633 	if (subsystem->flags.ana_reporting) {
634 		if (spdk_nvmf_qpair_get_listen_trid(req->qpair, &listen_trid) != 0) {
635 			SPDK_ERRLOG("Could not get listener transport ID\n");
636 			SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
637 			spdk_nvmf_request_complete(req);
638 			return;
639 		}
640 
641 		listener = nvmf_subsystem_find_listener(subsystem, &listen_trid);
642 		if (listener != ctrlr->listener) {
643 			SPDK_ERRLOG("I/O connect is on a listener different from admin connect\n");
644 			SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
645 			spdk_nvmf_request_complete(req);
646 			return;
647 		}
648 	}
649 
650 	admin_qpair = ctrlr->admin_qpair;
651 	if (admin_qpair->state != SPDK_NVMF_QPAIR_ACTIVE || admin_qpair->group == NULL) {
652 		/* There is a chance that admin qpair is being destroyed at this moment due to e.g.
653 		 * expired keep alive timer. Part of the qpair destruction process is change of qpair's
654 		 * state to DEACTIVATING and removing it from poll group */
655 		SPDK_ERRLOG("Inactive admin qpair (state %d, group %p)\n", admin_qpair->state, admin_qpair->group);
656 		SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
657 		spdk_nvmf_request_complete(req);
658 		return;
659 	}
660 	qpair->ctrlr = ctrlr;
661 	spdk_thread_send_msg(admin_qpair->group->thread, nvmf_ctrlr_add_io_qpair, req);
662 }
663 
664 static bool
665 nvmf_qpair_access_allowed(struct spdk_nvmf_qpair *qpair, struct spdk_nvmf_subsystem *subsystem,
666 			  const char *hostnqn)
667 {
668 	struct spdk_nvme_transport_id listen_trid = {};
669 
670 	if (!spdk_nvmf_subsystem_host_allowed(subsystem, hostnqn)) {
671 		SPDK_ERRLOG("Subsystem '%s' does not allow host '%s'\n", subsystem->subnqn, hostnqn);
672 		return false;
673 	}
674 
675 	if (spdk_nvmf_qpair_get_listen_trid(qpair, &listen_trid)) {
676 		SPDK_ERRLOG("Subsystem '%s' is unable to enforce access control due to an internal error.\n",
677 			    subsystem->subnqn);
678 		return false;
679 	}
680 
681 	if (!spdk_nvmf_subsystem_listener_allowed(subsystem, &listen_trid)) {
682 		SPDK_ERRLOG("Subsystem '%s' does not allow host '%s' to connect at this address.\n",
683 			    subsystem->subnqn, hostnqn);
684 		return false;
685 	}
686 
687 	return true;
688 }
689 
690 static int
691 _nvmf_ctrlr_connect(struct spdk_nvmf_request *req)
692 {
693 	struct spdk_nvmf_fabric_connect_data *data = req->data;
694 	struct spdk_nvmf_fabric_connect_cmd *cmd = &req->cmd->connect_cmd;
695 	struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp;
696 	struct spdk_nvmf_qpair *qpair = req->qpair;
697 	struct spdk_nvmf_transport *transport = qpair->transport;
698 	struct spdk_nvmf_ctrlr *ctrlr;
699 	struct spdk_nvmf_subsystem *subsystem;
700 
701 	SPDK_DEBUGLOG(nvmf, "recfmt 0x%x qid %u sqsize %u\n",
702 		      cmd->recfmt, cmd->qid, cmd->sqsize);
703 
704 	SPDK_DEBUGLOG(nvmf, "Connect data:\n");
705 	SPDK_DEBUGLOG(nvmf, "  cntlid:  0x%04x\n", data->cntlid);
706 	SPDK_DEBUGLOG(nvmf, "  hostid: %08x-%04x-%04x-%02x%02x-%04x%08x ***\n",
707 		      ntohl(*(uint32_t *)&data->hostid[0]),
708 		      ntohs(*(uint16_t *)&data->hostid[4]),
709 		      ntohs(*(uint16_t *)&data->hostid[6]),
710 		      data->hostid[8],
711 		      data->hostid[9],
712 		      ntohs(*(uint16_t *)&data->hostid[10]),
713 		      ntohl(*(uint32_t *)&data->hostid[12]));
714 	SPDK_DEBUGLOG(nvmf, "  subnqn: \"%s\"\n", data->subnqn);
715 	SPDK_DEBUGLOG(nvmf, "  hostnqn: \"%s\"\n", data->hostnqn);
716 
717 	subsystem = spdk_nvmf_tgt_find_subsystem(transport->tgt, data->subnqn);
718 	if (!subsystem) {
719 		SPDK_NVMF_INVALID_CONNECT_DATA(rsp, subnqn);
720 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
721 	}
722 
723 	if (cmd->recfmt != 0) {
724 		SPDK_ERRLOG("Connect command unsupported RECFMT %u\n", cmd->recfmt);
725 		rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
726 		rsp->status.sc = SPDK_NVMF_FABRIC_SC_INCOMPATIBLE_FORMAT;
727 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
728 	}
729 
730 	/*
731 	 * SQSIZE is a 0-based value, so it must be at least 1 (minimum queue depth is 2) and
732 	 * strictly less than max_aq_depth (admin queues) or max_queue_depth (io queues).
733 	 */
734 	if (cmd->sqsize == 0) {
735 		SPDK_ERRLOG("Invalid SQSIZE = 0\n");
736 		SPDK_NVMF_INVALID_CONNECT_CMD(rsp, sqsize);
737 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
738 	}
739 
740 	if (cmd->qid == 0) {
741 		if (cmd->sqsize >= transport->opts.max_aq_depth) {
742 			SPDK_ERRLOG("Invalid SQSIZE for admin queue %u (min 1, max %u)\n",
743 				    cmd->sqsize, transport->opts.max_aq_depth - 1);
744 			SPDK_NVMF_INVALID_CONNECT_CMD(rsp, sqsize);
745 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
746 		}
747 	} else if (cmd->sqsize >= transport->opts.max_queue_depth) {
748 		SPDK_ERRLOG("Invalid SQSIZE %u (min 1, max %u)\n",
749 			    cmd->sqsize, transport->opts.max_queue_depth - 1);
750 		SPDK_NVMF_INVALID_CONNECT_CMD(rsp, sqsize);
751 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
752 	}
753 
754 	qpair->sq_head_max = cmd->sqsize;
755 	qpair->qid = cmd->qid;
756 
757 	if (0 == qpair->qid) {
758 		qpair->group->stat.admin_qpairs++;
759 		qpair->group->stat.current_admin_qpairs++;
760 	} else {
761 		qpair->group->stat.io_qpairs++;
762 		qpair->group->stat.current_io_qpairs++;
763 	}
764 
765 	if (cmd->qid == 0) {
766 		SPDK_DEBUGLOG(nvmf, "Connect Admin Queue for controller ID 0x%x\n", data->cntlid);
767 
768 		if (spdk_nvme_trtype_is_fabrics(transport->ops->type) && data->cntlid != 0xFFFF) {
769 			/* This NVMf target only supports dynamic mode. */
770 			SPDK_ERRLOG("The NVMf target only supports dynamic mode (CNTLID = 0x%x).\n", data->cntlid);
771 			SPDK_NVMF_INVALID_CONNECT_DATA(rsp, cntlid);
772 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
773 		}
774 
775 		/* Establish a new ctrlr */
776 		ctrlr = nvmf_ctrlr_create(subsystem, req, cmd, data);
777 		if (!ctrlr) {
778 			SPDK_ERRLOG("nvmf_ctrlr_create() failed\n");
779 			rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
780 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
781 		} else {
782 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
783 		}
784 	} else {
785 		spdk_thread_send_msg(subsystem->thread, _nvmf_ctrlr_add_io_qpair, req);
786 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
787 	}
788 }
789 
790 static inline bool
791 nvmf_request_is_fabric_connect(struct spdk_nvmf_request *req)
792 {
793 	return req->cmd->nvmf_cmd.opcode == SPDK_NVME_OPC_FABRIC &&
794 	       req->cmd->nvmf_cmd.fctype == SPDK_NVMF_FABRIC_COMMAND_CONNECT;
795 }
796 
797 static struct spdk_nvmf_subsystem_poll_group *
798 nvmf_subsystem_pg_from_connect_cmd(struct spdk_nvmf_request *req)
799 {
800 	struct spdk_nvmf_fabric_connect_data *data;
801 	struct spdk_nvmf_subsystem *subsystem;
802 	struct spdk_nvmf_tgt *tgt;
803 
804 	assert(nvmf_request_is_fabric_connect(req));
805 	assert(req->qpair->ctrlr == NULL);
806 
807 	data = req->data;
808 	tgt = req->qpair->transport->tgt;
809 
810 	subsystem = spdk_nvmf_tgt_find_subsystem(tgt, data->subnqn);
811 	if (subsystem == NULL) {
812 		return NULL;
813 	}
814 
815 	return &req->qpair->group->sgroups[subsystem->id];
816 }
817 
818 int
819 spdk_nvmf_ctrlr_connect(struct spdk_nvmf_request *req)
820 {
821 	struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp;
822 	struct spdk_nvmf_subsystem_poll_group *sgroup;
823 	struct spdk_nvmf_qpair *qpair = req->qpair;
824 	enum spdk_nvmf_request_exec_status status;
825 
826 	sgroup = nvmf_subsystem_pg_from_connect_cmd(req);
827 	if (!sgroup) {
828 		SPDK_NVMF_INVALID_CONNECT_DATA(rsp, subnqn);
829 		status = SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
830 		goto out;
831 	}
832 
833 	sgroup->mgmt_io_outstanding++;
834 	TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);
835 
836 	status = _nvmf_ctrlr_connect(req);
837 
838 out:
839 	if (status == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
840 		_nvmf_request_complete(req);
841 	}
842 
843 	return status;
844 }
845 
846 static int nvmf_ctrlr_cmd_connect(struct spdk_nvmf_request *req);
847 
848 static int
849 retry_connect(void *arg)
850 {
851 	struct spdk_nvmf_request *req = arg;
852 	struct spdk_nvmf_subsystem_poll_group *sgroup;
853 	int rc;
854 
855 	sgroup = nvmf_subsystem_pg_from_connect_cmd(req);
856 	assert(sgroup != NULL);
857 	sgroup->mgmt_io_outstanding++;
858 	spdk_poller_unregister(&req->poller);
859 	rc = nvmf_ctrlr_cmd_connect(req);
860 	if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
861 		_nvmf_request_complete(req);
862 	}
863 	return SPDK_POLLER_BUSY;
864 }
865 
866 static int
867 nvmf_ctrlr_cmd_connect(struct spdk_nvmf_request *req)
868 {
869 	struct spdk_nvmf_fabric_connect_data *data = req->data;
870 	struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp;
871 	struct spdk_nvmf_transport *transport = req->qpair->transport;
872 	struct spdk_nvmf_subsystem *subsystem;
873 
874 	if (req->length < sizeof(struct spdk_nvmf_fabric_connect_data)) {
875 		SPDK_ERRLOG("Connect command data length 0x%x too small\n", req->length);
876 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
877 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
878 	}
879 
880 	subsystem = spdk_nvmf_tgt_find_subsystem(transport->tgt, data->subnqn);
881 	if (!subsystem) {
882 		SPDK_NVMF_INVALID_CONNECT_DATA(rsp, subnqn);
883 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
884 	}
885 
886 	if ((subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE) ||
887 	    (subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSING) ||
888 	    (subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED) ||
889 	    (subsystem->state == SPDK_NVMF_SUBSYSTEM_DEACTIVATING)) {
890 		struct spdk_nvmf_subsystem_poll_group *sgroup;
891 
892 		if (req->timeout_tsc == 0) {
893 			/* We will only retry the request up to 1 second. */
894 			req->timeout_tsc = spdk_get_ticks() + spdk_get_ticks_hz();
895 		} else if (spdk_get_ticks() > req->timeout_tsc) {
896 			SPDK_ERRLOG("Subsystem '%s' was not ready for 1 second\n", subsystem->subnqn);
897 			rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
898 			rsp->status.sc = SPDK_NVMF_FABRIC_SC_CONTROLLER_BUSY;
899 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
900 		}
901 
902 		/* Subsystem is not ready to handle a connect. Use a poller to retry it
903 		 * again later. Decrement the mgmt_io_outstanding to avoid the
904 		 * subsystem waiting for this command to complete before unpausing.
905 		 */
906 		sgroup = nvmf_subsystem_pg_from_connect_cmd(req);
907 		assert(sgroup != NULL);
908 		sgroup->mgmt_io_outstanding--;
909 		SPDK_DEBUGLOG(nvmf, "Subsystem '%s' is not ready for connect, retrying...\n", subsystem->subnqn);
910 		req->poller = SPDK_POLLER_REGISTER(retry_connect, req, 100);
911 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
912 	}
913 
914 	/* Ensure that hostnqn is null terminated */
915 	if (!memchr(data->hostnqn, '\0', SPDK_NVMF_NQN_MAX_LEN + 1)) {
916 		SPDK_ERRLOG("Connect HOSTNQN is not null terminated\n");
917 		SPDK_NVMF_INVALID_CONNECT_DATA(rsp, hostnqn);
918 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
919 	}
920 
921 	if (!nvmf_qpair_access_allowed(req->qpair, subsystem, data->hostnqn)) {
922 		rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
923 		rsp->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_HOST;
924 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
925 	}
926 
927 	return _nvmf_ctrlr_connect(req);
928 }
929 
930 static int
931 nvmf_ctrlr_association_remove(void *ctx)
932 {
933 	struct spdk_nvmf_ctrlr *ctrlr = ctx;
934 	int rc;
935 
936 	nvmf_ctrlr_stop_association_timer(ctrlr);
937 
938 	if (ctrlr->in_destruct) {
939 		return SPDK_POLLER_IDLE;
940 	}
941 	SPDK_DEBUGLOG(nvmf, "Disconnecting host from subsystem %s due to association timeout.\n",
942 		      ctrlr->subsys->subnqn);
943 
944 	if (ctrlr->admin_qpair) {
945 		rc = spdk_nvmf_qpair_disconnect(ctrlr->admin_qpair, NULL, NULL);
946 		if (rc < 0) {
947 			SPDK_ERRLOG("Fail to disconnect admin ctrlr qpair\n");
948 			assert(false);
949 		}
950 	}
951 
952 	return SPDK_POLLER_BUSY;
953 }
954 
955 static int
956 _nvmf_ctrlr_cc_reset_shn_done(void *ctx)
957 {
958 	struct spdk_nvmf_ctrlr *ctrlr = ctx;
959 	uint64_t now = spdk_get_ticks();
960 	uint32_t count;
961 
962 	if (ctrlr->cc_timer) {
963 		spdk_poller_unregister(&ctrlr->cc_timer);
964 	}
965 
966 	count = spdk_bit_array_count_set(ctrlr->qpair_mask);
967 	SPDK_DEBUGLOG(nvmf, "ctrlr %p active queue count %u\n", ctrlr, count);
968 
969 	if (count > 1) {
970 		if (now < ctrlr->cc_timeout_tsc) {
971 			/* restart cc timer */
972 			ctrlr->cc_timer = SPDK_POLLER_REGISTER(_nvmf_ctrlr_cc_reset_shn_done, ctrlr, 100 * 1000);
973 			return SPDK_POLLER_IDLE;
974 		} else {
975 			/* controller fatal status */
976 			SPDK_WARNLOG("IO timeout, ctrlr %p is in fatal status\n", ctrlr);
977 			nvmf_ctrlr_set_fatal_status(ctrlr);
978 		}
979 	}
980 
981 	spdk_poller_unregister(&ctrlr->cc_timeout_timer);
982 
983 	if (ctrlr->disconnect_is_shn) {
984 		ctrlr->vcprop.csts.bits.shst = SPDK_NVME_SHST_COMPLETE;
985 		ctrlr->disconnect_is_shn = false;
986 	} else {
987 		/* Only a subset of the registers are cleared out on a reset */
988 		ctrlr->vcprop.cc.raw = 0;
989 		ctrlr->vcprop.csts.raw = 0;
990 	}
991 
992 	/* After CC.EN transitions to 0 (due to shutdown or reset), the association
993 	 * between the host and controller shall be preserved for at least 2 minutes */
994 	if (ctrlr->association_timer) {
995 		SPDK_DEBUGLOG(nvmf, "Association timer already set\n");
996 		nvmf_ctrlr_stop_association_timer(ctrlr);
997 	}
998 	if (ctrlr->association_timeout) {
999 		ctrlr->association_timer = SPDK_POLLER_REGISTER(nvmf_ctrlr_association_remove, ctrlr,
1000 					   ctrlr->association_timeout * 1000);
1001 	}
1002 	ctrlr->disconnect_in_progress = false;
1003 	return SPDK_POLLER_BUSY;
1004 }
1005 
1006 static void
1007 nvmf_ctrlr_cc_reset_shn_done(struct spdk_io_channel_iter *i, int status)
1008 {
1009 	struct spdk_nvmf_ctrlr *ctrlr = spdk_io_channel_iter_get_ctx(i);
1010 
1011 	if (status < 0) {
1012 		SPDK_ERRLOG("Fail to disconnect io ctrlr qpairs\n");
1013 		assert(false);
1014 	}
1015 
1016 	_nvmf_ctrlr_cc_reset_shn_done((void *)ctrlr);
1017 }
1018 
1019 static void
1020 nvmf_bdev_complete_reset(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
1021 {
1022 	SPDK_NOTICELOG("Resetting bdev done with %s\n", success ? "success" : "failure");
1023 
1024 	spdk_bdev_free_io(bdev_io);
1025 }
1026 
1027 
1028 static int
1029 nvmf_ctrlr_cc_timeout(void *ctx)
1030 {
1031 	struct spdk_nvmf_ctrlr *ctrlr = ctx;
1032 	struct spdk_nvmf_poll_group *group = ctrlr->admin_qpair->group;
1033 	struct spdk_nvmf_ns *ns;
1034 	struct spdk_nvmf_subsystem_pg_ns_info *ns_info;
1035 
1036 	assert(group != NULL && group->sgroups != NULL);
1037 	spdk_poller_unregister(&ctrlr->cc_timeout_timer);
1038 	SPDK_DEBUGLOG(nvmf, "Ctrlr %p reset or shutdown timeout\n", ctrlr);
1039 
1040 	for (ns = spdk_nvmf_subsystem_get_first_ns(ctrlr->subsys); ns != NULL;
1041 	     ns = spdk_nvmf_subsystem_get_next_ns(ctrlr->subsys, ns)) {
1042 		if (ns->bdev == NULL) {
1043 			continue;
1044 		}
1045 		ns_info = &group->sgroups[ctrlr->subsys->id].ns_info[ns->opts.nsid - 1];
1046 		SPDK_NOTICELOG("Ctrlr %p resetting NSID %u\n", ctrlr, ns->opts.nsid);
1047 		spdk_bdev_reset(ns->desc, ns_info->channel, nvmf_bdev_complete_reset, NULL);
1048 	}
1049 
1050 	return SPDK_POLLER_BUSY;
1051 }
1052 
1053 const struct spdk_nvmf_registers *
1054 spdk_nvmf_ctrlr_get_regs(struct spdk_nvmf_ctrlr *ctrlr)
1055 {
1056 	return &ctrlr->vcprop;
1057 }
1058 
1059 void
1060 nvmf_ctrlr_set_fatal_status(struct spdk_nvmf_ctrlr *ctrlr)
1061 {
1062 	ctrlr->vcprop.csts.bits.cfs = 1;
1063 }
1064 
1065 static uint64_t
1066 nvmf_prop_get_cap(struct spdk_nvmf_ctrlr *ctrlr)
1067 {
1068 	return ctrlr->vcprop.cap.raw;
1069 }
1070 
1071 static uint64_t
1072 nvmf_prop_get_vs(struct spdk_nvmf_ctrlr *ctrlr)
1073 {
1074 	return ctrlr->vcprop.vs.raw;
1075 }
1076 
1077 static uint64_t
1078 nvmf_prop_get_cc(struct spdk_nvmf_ctrlr *ctrlr)
1079 {
1080 	return ctrlr->vcprop.cc.raw;
1081 }
1082 
1083 static bool
1084 nvmf_prop_set_cc(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value)
1085 {
1086 	union spdk_nvme_cc_register cc, diff;
1087 	uint32_t cc_timeout_ms;
1088 
1089 	cc.raw = value;
1090 
1091 	SPDK_DEBUGLOG(nvmf, "cur CC: 0x%08x\n", ctrlr->vcprop.cc.raw);
1092 	SPDK_DEBUGLOG(nvmf, "new CC: 0x%08x\n", cc.raw);
1093 
1094 	/*
1095 	 * Calculate which bits changed between the current and new CC.
1096 	 * Mark each bit as 0 once it is handled to determine if any unhandled bits were changed.
1097 	 */
1098 	diff.raw = cc.raw ^ ctrlr->vcprop.cc.raw;
1099 
1100 	if (diff.bits.en) {
1101 		if (cc.bits.en) {
1102 			SPDK_DEBUGLOG(nvmf, "Property Set CC Enable!\n");
1103 			nvmf_ctrlr_stop_association_timer(ctrlr);
1104 
1105 			ctrlr->vcprop.cc.bits.en = 1;
1106 			ctrlr->vcprop.csts.bits.rdy = 1;
1107 		} else {
1108 			SPDK_DEBUGLOG(nvmf, "Property Set CC Disable!\n");
1109 			if (ctrlr->disconnect_in_progress) {
1110 				SPDK_DEBUGLOG(nvmf, "Disconnect in progress\n");
1111 				return true;
1112 			}
1113 
1114 			ctrlr->cc_timeout_timer = SPDK_POLLER_REGISTER(nvmf_ctrlr_cc_timeout, ctrlr,
1115 						  NVMF_CC_RESET_SHN_TIMEOUT_IN_MS * 1000);
1116 			/* Make sure cc_timeout_ms is between cc_timeout_timer and Host reset/shutdown timeout */
1117 			cc_timeout_ms = (NVMF_CC_RESET_SHN_TIMEOUT_IN_MS + NVMF_CTRLR_RESET_SHN_TIMEOUT_IN_MS) / 2;
1118 			ctrlr->cc_timeout_tsc = spdk_get_ticks() + cc_timeout_ms * spdk_get_ticks_hz() / (uint64_t)1000;
1119 
1120 			ctrlr->vcprop.cc.bits.en = 0;
1121 			ctrlr->disconnect_in_progress = true;
1122 			ctrlr->disconnect_is_shn = false;
1123 			spdk_for_each_channel(ctrlr->subsys->tgt,
1124 					      nvmf_ctrlr_disconnect_io_qpairs_on_pg,
1125 					      ctrlr,
1126 					      nvmf_ctrlr_cc_reset_shn_done);
1127 		}
1128 		diff.bits.en = 0;
1129 	}
1130 
1131 	if (diff.bits.shn) {
1132 		if (cc.bits.shn == SPDK_NVME_SHN_NORMAL ||
1133 		    cc.bits.shn == SPDK_NVME_SHN_ABRUPT) {
1134 			SPDK_DEBUGLOG(nvmf, "Property Set CC Shutdown %u%ub!\n",
1135 				      cc.bits.shn >> 1, cc.bits.shn & 1);
1136 			if (ctrlr->disconnect_in_progress) {
1137 				SPDK_DEBUGLOG(nvmf, "Disconnect in progress\n");
1138 				return true;
1139 			}
1140 
1141 			ctrlr->cc_timeout_timer = SPDK_POLLER_REGISTER(nvmf_ctrlr_cc_timeout, ctrlr,
1142 						  NVMF_CC_RESET_SHN_TIMEOUT_IN_MS * 1000);
1143 			/* Make sure cc_timeout_ms is between cc_timeout_timer and Host reset/shutdown timeout */
1144 			cc_timeout_ms = (NVMF_CC_RESET_SHN_TIMEOUT_IN_MS + NVMF_CTRLR_RESET_SHN_TIMEOUT_IN_MS) / 2;
1145 			ctrlr->cc_timeout_tsc = spdk_get_ticks() + cc_timeout_ms * spdk_get_ticks_hz() / (uint64_t)1000;
1146 
1147 			ctrlr->vcprop.cc.bits.shn = cc.bits.shn;
1148 			ctrlr->disconnect_in_progress = true;
1149 			ctrlr->disconnect_is_shn = true;
1150 			spdk_for_each_channel(ctrlr->subsys->tgt,
1151 					      nvmf_ctrlr_disconnect_io_qpairs_on_pg,
1152 					      ctrlr,
1153 					      nvmf_ctrlr_cc_reset_shn_done);
1154 
1155 			/* From the time a shutdown is initiated the controller shall disable
1156 			 * Keep Alive timer */
1157 			nvmf_ctrlr_stop_keep_alive_timer(ctrlr);
1158 		} else if (cc.bits.shn == 0) {
1159 			ctrlr->vcprop.cc.bits.shn = 0;
1160 		} else {
1161 			SPDK_ERRLOG("Prop Set CC: Invalid SHN value %u%ub\n",
1162 				    cc.bits.shn >> 1, cc.bits.shn & 1);
1163 			return false;
1164 		}
1165 		diff.bits.shn = 0;
1166 	}
1167 
1168 	if (diff.bits.iosqes) {
1169 		SPDK_DEBUGLOG(nvmf, "Prop Set IOSQES = %u (%u bytes)\n",
1170 			      cc.bits.iosqes, 1u << cc.bits.iosqes);
1171 		ctrlr->vcprop.cc.bits.iosqes = cc.bits.iosqes;
1172 		diff.bits.iosqes = 0;
1173 	}
1174 
1175 	if (diff.bits.iocqes) {
1176 		SPDK_DEBUGLOG(nvmf, "Prop Set IOCQES = %u (%u bytes)\n",
1177 			      cc.bits.iocqes, 1u << cc.bits.iocqes);
1178 		ctrlr->vcprop.cc.bits.iocqes = cc.bits.iocqes;
1179 		diff.bits.iocqes = 0;
1180 	}
1181 
1182 	if (diff.bits.ams) {
1183 		SPDK_ERRLOG("Arbitration Mechanism Selected (AMS) 0x%x not supported!\n", cc.bits.ams);
1184 		return false;
1185 	}
1186 
1187 	if (diff.bits.mps) {
1188 		SPDK_ERRLOG("Memory Page Size (MPS) %u KiB not supported!\n", (1 << (2 + cc.bits.mps)));
1189 		return false;
1190 	}
1191 
1192 	if (diff.bits.css) {
1193 		SPDK_ERRLOG("I/O Command Set Selected (CSS) 0x%x not supported!\n", cc.bits.css);
1194 		return false;
1195 	}
1196 
1197 	if (diff.raw != 0) {
1198 		/* Print an error message, but don't fail the command in this case.
1199 		 * If we did want to fail in this case, we'd need to ensure we acted
1200 		 * on no other bits or the initiator gets confused. */
1201 		SPDK_ERRLOG("Prop Set CC toggled reserved bits 0x%x!\n", diff.raw);
1202 	}
1203 
1204 	return true;
1205 }
1206 
1207 static uint64_t
1208 nvmf_prop_get_csts(struct spdk_nvmf_ctrlr *ctrlr)
1209 {
1210 	return ctrlr->vcprop.csts.raw;
1211 }
1212 
1213 static uint64_t
1214 nvmf_prop_get_aqa(struct spdk_nvmf_ctrlr *ctrlr)
1215 {
1216 	return ctrlr->vcprop.aqa.raw;
1217 }
1218 
1219 static bool
1220 nvmf_prop_set_aqa(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value)
1221 {
1222 	union spdk_nvme_aqa_register aqa;
1223 
1224 	aqa.raw = value;
1225 
1226 	/*
1227 	 * We don't need to explicitly check for maximum size, as the fields are
1228 	 * limited to 12 bits (4096).
1229 	 */
1230 	if (aqa.bits.asqs < SPDK_NVME_ADMIN_QUEUE_MIN_ENTRIES - 1 ||
1231 	    aqa.bits.acqs < SPDK_NVME_ADMIN_QUEUE_MIN_ENTRIES - 1 ||
1232 	    aqa.bits.reserved1 != 0 || aqa.bits.reserved2 != 0) {
1233 		return false;
1234 	}
1235 
1236 	ctrlr->vcprop.aqa.raw = value;
1237 
1238 	return true;
1239 }
1240 
1241 static uint64_t
1242 nvmf_prop_get_asq(struct spdk_nvmf_ctrlr *ctrlr)
1243 {
1244 	return ctrlr->vcprop.asq;
1245 }
1246 
1247 static bool
1248 nvmf_prop_set_asq_lower(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value)
1249 {
1250 	ctrlr->vcprop.asq = (ctrlr->vcprop.asq & (0xFFFFFFFFULL << 32ULL)) | value;
1251 
1252 	return true;
1253 }
1254 
1255 static bool
1256 nvmf_prop_set_asq_upper(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value)
1257 {
1258 	ctrlr->vcprop.asq = (ctrlr->vcprop.asq & 0xFFFFFFFFULL) | ((uint64_t)value << 32ULL);
1259 
1260 	return true;
1261 }
1262 
1263 static uint64_t
1264 nvmf_prop_get_acq(struct spdk_nvmf_ctrlr *ctrlr)
1265 {
1266 	return ctrlr->vcprop.acq;
1267 }
1268 
1269 static bool
1270 nvmf_prop_set_acq_lower(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value)
1271 {
1272 	ctrlr->vcprop.acq = (ctrlr->vcprop.acq & (0xFFFFFFFFULL << 32ULL)) | value;
1273 
1274 	return true;
1275 }
1276 
1277 static bool
1278 nvmf_prop_set_acq_upper(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value)
1279 {
1280 	ctrlr->vcprop.acq = (ctrlr->vcprop.acq & 0xFFFFFFFFULL) | ((uint64_t)value << 32ULL);
1281 
1282 	return true;
1283 }
1284 
1285 struct nvmf_prop {
1286 	uint32_t ofst;
1287 	uint8_t size;
1288 	char name[11];
1289 	uint64_t (*get_cb)(struct spdk_nvmf_ctrlr *ctrlr);
1290 	bool (*set_cb)(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value);
1291 	bool (*set_upper_cb)(struct spdk_nvmf_ctrlr *ctrlr, uint32_t value);
1292 };
1293 
1294 #define PROP(field, size, get_cb, set_cb, set_upper_cb) \
1295 	{ \
1296 		offsetof(struct spdk_nvme_registers, field), \
1297 		size, \
1298 		#field, \
1299 		get_cb, set_cb, set_upper_cb \
1300 	}
1301 
1302 static const struct nvmf_prop nvmf_props[] = {
1303 	PROP(cap,  8, nvmf_prop_get_cap,  NULL,                    NULL),
1304 	PROP(vs,   4, nvmf_prop_get_vs,   NULL,                    NULL),
1305 	PROP(cc,   4, nvmf_prop_get_cc,   nvmf_prop_set_cc,        NULL),
1306 	PROP(csts, 4, nvmf_prop_get_csts, NULL,                    NULL),
1307 	PROP(aqa,  4, nvmf_prop_get_aqa,  nvmf_prop_set_aqa,       NULL),
1308 	PROP(asq,  8, nvmf_prop_get_asq,  nvmf_prop_set_asq_lower, nvmf_prop_set_asq_upper),
1309 	PROP(acq,  8, nvmf_prop_get_acq,  nvmf_prop_set_acq_lower, nvmf_prop_set_acq_upper),
1310 };
1311 
1312 static const struct nvmf_prop *
1313 find_prop(uint32_t ofst, uint8_t size)
1314 {
1315 	size_t i;
1316 
1317 	for (i = 0; i < SPDK_COUNTOF(nvmf_props); i++) {
1318 		const struct nvmf_prop *prop = &nvmf_props[i];
1319 
1320 		if ((ofst >= prop->ofst) && (ofst + size <= prop->ofst + prop->size)) {
1321 			return prop;
1322 		}
1323 	}
1324 
1325 	return NULL;
1326 }
1327 
1328 static int
1329 nvmf_property_get(struct spdk_nvmf_request *req)
1330 {
1331 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1332 	struct spdk_nvmf_fabric_prop_get_cmd *cmd = &req->cmd->prop_get_cmd;
1333 	struct spdk_nvmf_fabric_prop_get_rsp *response = &req->rsp->prop_get_rsp;
1334 	const struct nvmf_prop *prop;
1335 	uint8_t size;
1336 
1337 	response->status.sc = 0;
1338 	response->value.u64 = 0;
1339 
1340 	SPDK_DEBUGLOG(nvmf, "size %d, offset 0x%x\n",
1341 		      cmd->attrib.size, cmd->ofst);
1342 
1343 	switch (cmd->attrib.size) {
1344 	case SPDK_NVMF_PROP_SIZE_4:
1345 		size = 4;
1346 		break;
1347 	case SPDK_NVMF_PROP_SIZE_8:
1348 		size = 8;
1349 		break;
1350 	default:
1351 		SPDK_DEBUGLOG(nvmf, "Invalid size value %d\n", cmd->attrib.size);
1352 		response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
1353 		response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
1354 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1355 	}
1356 
1357 	prop = find_prop(cmd->ofst, size);
1358 	if (prop == NULL || prop->get_cb == NULL) {
1359 		response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
1360 		response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
1361 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1362 	}
1363 
1364 	SPDK_DEBUGLOG(nvmf, "name: %s\n", prop->name);
1365 
1366 	response->value.u64 = prop->get_cb(ctrlr);
1367 
1368 	if (size != prop->size) {
1369 		/* The size must be 4 and the prop->size is 8. Figure out which part of the property to read. */
1370 		assert(size == 4);
1371 		assert(prop->size == 8);
1372 
1373 		if (cmd->ofst == prop->ofst) {
1374 			/* Keep bottom 4 bytes only */
1375 			response->value.u64 &= 0xFFFFFFFF;
1376 		} else {
1377 			/* Keep top 4 bytes only */
1378 			response->value.u64 >>= 32;
1379 		}
1380 	}
1381 
1382 	SPDK_DEBUGLOG(nvmf, "response value: 0x%" PRIx64 "\n", response->value.u64);
1383 
1384 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1385 }
1386 
1387 static int
1388 nvmf_property_set(struct spdk_nvmf_request *req)
1389 {
1390 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1391 	struct spdk_nvmf_fabric_prop_set_cmd *cmd = &req->cmd->prop_set_cmd;
1392 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
1393 	const struct nvmf_prop *prop;
1394 	uint64_t value;
1395 	uint8_t size;
1396 	bool ret;
1397 
1398 	SPDK_DEBUGLOG(nvmf, "size %d, offset 0x%x, value 0x%" PRIx64 "\n",
1399 		      cmd->attrib.size, cmd->ofst, cmd->value.u64);
1400 
1401 	switch (cmd->attrib.size) {
1402 	case SPDK_NVMF_PROP_SIZE_4:
1403 		size = 4;
1404 		break;
1405 	case SPDK_NVMF_PROP_SIZE_8:
1406 		size = 8;
1407 		break;
1408 	default:
1409 		SPDK_DEBUGLOG(nvmf, "Invalid size value %d\n", cmd->attrib.size);
1410 		response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
1411 		response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
1412 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1413 	}
1414 
1415 	prop = find_prop(cmd->ofst, size);
1416 	if (prop == NULL || prop->set_cb == NULL) {
1417 		SPDK_INFOLOG(nvmf, "Invalid offset 0x%x\n", cmd->ofst);
1418 		response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
1419 		response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
1420 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1421 	}
1422 
1423 	SPDK_DEBUGLOG(nvmf, "name: %s\n", prop->name);
1424 
1425 	value = cmd->value.u64;
1426 
1427 	if (prop->size == 4) {
1428 		ret = prop->set_cb(ctrlr, (uint32_t)value);
1429 	} else if (size != prop->size) {
1430 		/* The size must be 4 and the prop->size is 8. Figure out which part of the property to write. */
1431 		assert(size == 4);
1432 		assert(prop->size == 8);
1433 
1434 		if (cmd->ofst == prop->ofst) {
1435 			ret = prop->set_cb(ctrlr, (uint32_t)value);
1436 		} else {
1437 			ret = prop->set_upper_cb(ctrlr, (uint32_t)value);
1438 		}
1439 	} else {
1440 		ret = prop->set_cb(ctrlr, (uint32_t)value);
1441 		if (ret) {
1442 			ret = prop->set_upper_cb(ctrlr, (uint32_t)(value >> 32));
1443 		}
1444 	}
1445 
1446 	if (!ret) {
1447 		SPDK_ERRLOG("prop set_cb failed\n");
1448 		response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
1449 		response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
1450 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1451 	}
1452 
1453 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1454 }
1455 
1456 static int
1457 nvmf_ctrlr_set_features_arbitration(struct spdk_nvmf_request *req)
1458 {
1459 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1460 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1461 
1462 	SPDK_DEBUGLOG(nvmf, "Set Features - Arbitration (cdw11 = 0x%0x)\n", cmd->cdw11);
1463 
1464 	ctrlr->feat.arbitration.raw = cmd->cdw11;
1465 	ctrlr->feat.arbitration.bits.reserved = 0;
1466 
1467 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1468 }
1469 
1470 static int
1471 nvmf_ctrlr_set_features_power_management(struct spdk_nvmf_request *req)
1472 {
1473 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1474 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1475 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
1476 
1477 	SPDK_DEBUGLOG(nvmf, "Set Features - Power Management (cdw11 = 0x%0x)\n", cmd->cdw11);
1478 
1479 	/* Only PS = 0 is allowed, since we report NPSS = 0 */
1480 	if (cmd->cdw11_bits.feat_power_management.bits.ps != 0) {
1481 		SPDK_ERRLOG("Invalid power state %u\n", cmd->cdw11_bits.feat_power_management.bits.ps);
1482 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
1483 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1484 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1485 	}
1486 
1487 	ctrlr->feat.power_management.raw = cmd->cdw11;
1488 	ctrlr->feat.power_management.bits.reserved = 0;
1489 
1490 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1491 }
1492 
1493 static bool
1494 temp_threshold_opts_valid(const union spdk_nvme_feat_temperature_threshold *opts)
1495 {
1496 	/*
1497 	 * Valid TMPSEL values:
1498 	 *  0000b - 1000b: temperature sensors
1499 	 *  1111b: set all implemented temperature sensors
1500 	 */
1501 	if (opts->bits.tmpsel >= 9 && opts->bits.tmpsel != 15) {
1502 		/* 1001b - 1110b: reserved */
1503 		SPDK_ERRLOG("Invalid TMPSEL %u\n", opts->bits.tmpsel);
1504 		return false;
1505 	}
1506 
1507 	/*
1508 	 * Valid THSEL values:
1509 	 *  00b: over temperature threshold
1510 	 *  01b: under temperature threshold
1511 	 */
1512 	if (opts->bits.thsel > 1) {
1513 		/* 10b - 11b: reserved */
1514 		SPDK_ERRLOG("Invalid THSEL %u\n", opts->bits.thsel);
1515 		return false;
1516 	}
1517 
1518 	return true;
1519 }
1520 
1521 static int
1522 nvmf_ctrlr_set_features_temperature_threshold(struct spdk_nvmf_request *req)
1523 {
1524 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1525 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
1526 
1527 	SPDK_DEBUGLOG(nvmf, "Set Features - Temperature Threshold (cdw11 = 0x%0x)\n", cmd->cdw11);
1528 
1529 	if (!temp_threshold_opts_valid(&cmd->cdw11_bits.feat_temp_threshold)) {
1530 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
1531 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1532 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1533 	}
1534 
1535 	/* TODO: no sensors implemented - ignore new values */
1536 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1537 }
1538 
1539 static int
1540 nvmf_ctrlr_get_features_temperature_threshold(struct spdk_nvmf_request *req)
1541 {
1542 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1543 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
1544 
1545 	SPDK_DEBUGLOG(nvmf, "Get Features - Temperature Threshold (cdw11 = 0x%0x)\n", cmd->cdw11);
1546 
1547 	if (!temp_threshold_opts_valid(&cmd->cdw11_bits.feat_temp_threshold)) {
1548 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
1549 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1550 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1551 	}
1552 
1553 	/* TODO: no sensors implemented - return 0 for all thresholds */
1554 	rsp->cdw0 = 0;
1555 
1556 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1557 }
1558 
1559 static int
1560 nvmf_ctrlr_get_features_interrupt_vector_configuration(struct spdk_nvmf_request *req)
1561 {
1562 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1563 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1564 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
1565 	union spdk_nvme_feat_interrupt_vector_configuration iv_conf = {};
1566 
1567 	SPDK_DEBUGLOG(nvmf, "Get Features - Interrupt Vector Configuration (cdw11 = 0x%0x)\n", cmd->cdw11);
1568 
1569 	iv_conf.bits.iv = cmd->cdw11_bits.feat_interrupt_vector_configuration.bits.iv;
1570 	iv_conf.bits.cd = ctrlr->feat.interrupt_vector_configuration.bits.cd;
1571 	rsp->cdw0 = iv_conf.raw;
1572 
1573 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1574 }
1575 
1576 static int
1577 nvmf_ctrlr_set_features_error_recovery(struct spdk_nvmf_request *req)
1578 {
1579 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1580 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1581 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
1582 
1583 	SPDK_DEBUGLOG(nvmf, "Set Features - Error Recovery (cdw11 = 0x%0x)\n", cmd->cdw11);
1584 
1585 	if (cmd->cdw11_bits.feat_error_recovery.bits.dulbe) {
1586 		/*
1587 		 * Host is not allowed to set this bit, since we don't advertise it in
1588 		 * Identify Namespace.
1589 		 */
1590 		SPDK_ERRLOG("Host set unsupported DULBE bit\n");
1591 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
1592 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1593 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1594 	}
1595 
1596 	ctrlr->feat.error_recovery.raw = cmd->cdw11;
1597 	ctrlr->feat.error_recovery.bits.reserved = 0;
1598 
1599 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1600 }
1601 
1602 static int
1603 nvmf_ctrlr_set_features_volatile_write_cache(struct spdk_nvmf_request *req)
1604 {
1605 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1606 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1607 
1608 	SPDK_DEBUGLOG(nvmf, "Set Features - Volatile Write Cache (cdw11 = 0x%0x)\n", cmd->cdw11);
1609 
1610 	ctrlr->feat.volatile_write_cache.raw = cmd->cdw11;
1611 	ctrlr->feat.volatile_write_cache.bits.reserved = 0;
1612 
1613 	SPDK_DEBUGLOG(nvmf, "Set Features - Volatile Write Cache %s\n",
1614 		      ctrlr->feat.volatile_write_cache.bits.wce ? "Enabled" : "Disabled");
1615 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1616 }
1617 
1618 static int
1619 nvmf_ctrlr_set_features_write_atomicity(struct spdk_nvmf_request *req)
1620 {
1621 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1622 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1623 
1624 	SPDK_DEBUGLOG(nvmf, "Set Features - Write Atomicity (cdw11 = 0x%0x)\n", cmd->cdw11);
1625 
1626 	ctrlr->feat.write_atomicity.raw = cmd->cdw11;
1627 	ctrlr->feat.write_atomicity.bits.reserved = 0;
1628 
1629 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1630 }
1631 
1632 static int
1633 nvmf_ctrlr_set_features_host_identifier(struct spdk_nvmf_request *req)
1634 {
1635 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
1636 
1637 	SPDK_ERRLOG("Set Features - Host Identifier not allowed\n");
1638 	response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
1639 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1640 }
1641 
1642 static int
1643 nvmf_ctrlr_get_features_host_identifier(struct spdk_nvmf_request *req)
1644 {
1645 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1646 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1647 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
1648 
1649 	SPDK_DEBUGLOG(nvmf, "Get Features - Host Identifier\n");
1650 
1651 	if (!cmd->cdw11_bits.feat_host_identifier.bits.exhid) {
1652 		/* NVMe over Fabrics requires EXHID=1 (128-bit/16-byte host ID) */
1653 		SPDK_ERRLOG("Get Features - Host Identifier with EXHID=0 not allowed\n");
1654 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1655 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1656 	}
1657 
1658 	if (req->data == NULL || req->length < sizeof(ctrlr->hostid)) {
1659 		SPDK_ERRLOG("Invalid data buffer for Get Features - Host Identifier\n");
1660 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1661 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1662 	}
1663 
1664 	spdk_uuid_copy((struct spdk_uuid *)req->data, &ctrlr->hostid);
1665 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1666 }
1667 
1668 static int
1669 nvmf_ctrlr_get_features_reservation_notification_mask(struct spdk_nvmf_request *req)
1670 {
1671 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1672 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1673 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
1674 	struct spdk_nvmf_ns *ns;
1675 
1676 	SPDK_DEBUGLOG(nvmf, "get Features - Reservation Notification Mask\n");
1677 
1678 	if (cmd->nsid == SPDK_NVME_GLOBAL_NS_TAG) {
1679 		SPDK_ERRLOG("get Features - Invalid Namespace ID\n");
1680 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1681 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1682 	}
1683 
1684 	ns = _nvmf_subsystem_get_ns(ctrlr->subsys, cmd->nsid);
1685 	if (ns == NULL) {
1686 		SPDK_ERRLOG("Set Features - Invalid Namespace ID\n");
1687 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1688 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1689 	}
1690 	rsp->cdw0 = ns->mask;
1691 
1692 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1693 }
1694 
1695 static int
1696 nvmf_ctrlr_set_features_reservation_notification_mask(struct spdk_nvmf_request *req)
1697 {
1698 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1699 	struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys;
1700 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1701 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
1702 	struct spdk_nvmf_ns *ns;
1703 
1704 	SPDK_DEBUGLOG(nvmf, "Set Features - Reservation Notification Mask\n");
1705 
1706 	if (cmd->nsid == SPDK_NVME_GLOBAL_NS_TAG) {
1707 		for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL;
1708 		     ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) {
1709 			ns->mask = cmd->cdw11;
1710 		}
1711 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1712 	}
1713 
1714 	ns = _nvmf_subsystem_get_ns(ctrlr->subsys, cmd->nsid);
1715 	if (ns == NULL) {
1716 		SPDK_ERRLOG("Set Features - Invalid Namespace ID\n");
1717 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1718 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1719 	}
1720 	ns->mask = cmd->cdw11;
1721 
1722 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1723 }
1724 
1725 static int
1726 nvmf_ctrlr_get_features_reservation_persistence(struct spdk_nvmf_request *req)
1727 {
1728 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1729 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1730 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
1731 	struct spdk_nvmf_ns *ns;
1732 
1733 	SPDK_DEBUGLOG(nvmf, "Get Features - Reservation Persistence\n");
1734 
1735 	ns = _nvmf_subsystem_get_ns(ctrlr->subsys, cmd->nsid);
1736 	/* NSID with SPDK_NVME_GLOBAL_NS_TAG (=0xffffffff) also included */
1737 	if (ns == NULL) {
1738 		SPDK_ERRLOG("Get Features - Invalid Namespace ID\n");
1739 		response->status.sct = SPDK_NVME_SCT_GENERIC;
1740 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1741 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1742 	}
1743 
1744 	response->cdw0 = ns->ptpl_activated;
1745 
1746 	response->status.sct = SPDK_NVME_SCT_GENERIC;
1747 	response->status.sc = SPDK_NVME_SC_SUCCESS;
1748 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1749 }
1750 
1751 static int
1752 nvmf_ctrlr_set_features_reservation_persistence(struct spdk_nvmf_request *req)
1753 {
1754 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1755 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1756 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
1757 	struct spdk_nvmf_ns *ns;
1758 	bool ptpl;
1759 
1760 	SPDK_DEBUGLOG(nvmf, "Set Features - Reservation Persistence\n");
1761 
1762 	ns = _nvmf_subsystem_get_ns(ctrlr->subsys, cmd->nsid);
1763 	ptpl = cmd->cdw11_bits.feat_rsv_persistence.bits.ptpl;
1764 
1765 	if (cmd->nsid != SPDK_NVME_GLOBAL_NS_TAG && ns && ns->ptpl_file) {
1766 		ns->ptpl_activated = ptpl;
1767 	} else if (cmd->nsid == SPDK_NVME_GLOBAL_NS_TAG) {
1768 		for (ns = spdk_nvmf_subsystem_get_first_ns(ctrlr->subsys); ns && ns->ptpl_file;
1769 		     ns = spdk_nvmf_subsystem_get_next_ns(ctrlr->subsys, ns)) {
1770 			ns->ptpl_activated = ptpl;
1771 		}
1772 	} else {
1773 		SPDK_ERRLOG("Set Features - Invalid Namespace ID or Reservation Configuration\n");
1774 		response->status.sct = SPDK_NVME_SCT_GENERIC;
1775 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1776 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1777 	}
1778 
1779 	/* TODO: Feature not changeable for now */
1780 	response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
1781 	response->status.sc = SPDK_NVME_SC_FEATURE_ID_NOT_SAVEABLE;
1782 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1783 }
1784 
1785 static int
1786 nvmf_ctrlr_get_features_host_behavior_support(struct spdk_nvmf_request *req)
1787 {
1788 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1789 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
1790 	struct spdk_nvme_host_behavior host_behavior = {};
1791 
1792 	SPDK_DEBUGLOG(nvmf, "Get Features - Host Behavior Support\n");
1793 
1794 	if (req->data == NULL || req->length < sizeof(struct spdk_nvme_host_behavior)) {
1795 		SPDK_ERRLOG("invalid data buffer for Host Behavior Support\n");
1796 		response->status.sct = SPDK_NVME_SCT_GENERIC;
1797 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1798 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1799 	}
1800 
1801 	host_behavior.acre = ctrlr->acre_enabled;
1802 	memcpy(req->data, &host_behavior, sizeof(host_behavior));
1803 
1804 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1805 }
1806 
1807 static int
1808 nvmf_ctrlr_set_features_host_behavior_support(struct spdk_nvmf_request *req)
1809 {
1810 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1811 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
1812 	struct spdk_nvme_host_behavior *host_behavior;
1813 
1814 	SPDK_DEBUGLOG(nvmf, "Set Features - Host Behavior Support\n");
1815 	if (req->iovcnt != 1) {
1816 		SPDK_ERRLOG("Host Behavior Support invalid iovcnt: %d\n", req->iovcnt);
1817 		response->status.sct = SPDK_NVME_SCT_GENERIC;
1818 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1819 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1820 	}
1821 	if (req->iov[0].iov_len != sizeof(struct spdk_nvme_host_behavior)) {
1822 		SPDK_ERRLOG("Host Behavior Support invalid iov_len: %zd\n", req->iov[0].iov_len);
1823 		response->status.sct = SPDK_NVME_SCT_GENERIC;
1824 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1825 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1826 	}
1827 
1828 	host_behavior = (struct spdk_nvme_host_behavior *)req->iov[0].iov_base;
1829 	if (host_behavior->acre == 0) {
1830 		ctrlr->acre_enabled = false;
1831 	} else if (host_behavior->acre == 1) {
1832 		ctrlr->acre_enabled = true;
1833 	} else {
1834 		SPDK_ERRLOG("Host Behavior Support invalid acre: 0x%02x\n", host_behavior->acre);
1835 		response->status.sct = SPDK_NVME_SCT_GENERIC;
1836 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1837 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1838 	}
1839 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1840 }
1841 
1842 static int
1843 nvmf_ctrlr_set_features_keep_alive_timer(struct spdk_nvmf_request *req)
1844 {
1845 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1846 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1847 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
1848 
1849 	SPDK_DEBUGLOG(nvmf, "Set Features - Keep Alive Timer (%u ms)\n", cmd->cdw11);
1850 
1851 	/*
1852 	 * if attempts to disable keep alive by setting kato to 0h
1853 	 * a status value of keep alive invalid shall be returned
1854 	 */
1855 	if (cmd->cdw11_bits.feat_keep_alive_timer.bits.kato == 0) {
1856 		rsp->status.sc = SPDK_NVME_SC_KEEP_ALIVE_INVALID;
1857 	} else if (cmd->cdw11_bits.feat_keep_alive_timer.bits.kato < MIN_KEEP_ALIVE_TIMEOUT_IN_MS) {
1858 		ctrlr->feat.keep_alive_timer.bits.kato = MIN_KEEP_ALIVE_TIMEOUT_IN_MS;
1859 	} else {
1860 		/* round up to milliseconds */
1861 		ctrlr->feat.keep_alive_timer.bits.kato = spdk_divide_round_up(
1862 					cmd->cdw11_bits.feat_keep_alive_timer.bits.kato,
1863 					KAS_DEFAULT_VALUE * KAS_TIME_UNIT_IN_MS) *
1864 				KAS_DEFAULT_VALUE * KAS_TIME_UNIT_IN_MS;
1865 	}
1866 
1867 	/*
1868 	 * if change the keep alive timeout value successfully
1869 	 * update the keep alive poller.
1870 	 */
1871 	if (cmd->cdw11_bits.feat_keep_alive_timer.bits.kato != 0) {
1872 		if (ctrlr->keep_alive_poller != NULL) {
1873 			spdk_poller_unregister(&ctrlr->keep_alive_poller);
1874 		}
1875 		ctrlr->keep_alive_poller = SPDK_POLLER_REGISTER(nvmf_ctrlr_keep_alive_poll, ctrlr,
1876 					   ctrlr->feat.keep_alive_timer.bits.kato * 1000);
1877 	}
1878 
1879 	SPDK_DEBUGLOG(nvmf, "Set Features - Keep Alive Timer set to %u ms\n",
1880 		      ctrlr->feat.keep_alive_timer.bits.kato);
1881 
1882 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1883 }
1884 
1885 static int
1886 nvmf_ctrlr_set_features_number_of_queues(struct spdk_nvmf_request *req)
1887 {
1888 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1889 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1890 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
1891 	uint32_t count;
1892 
1893 	SPDK_DEBUGLOG(nvmf, "Set Features - Number of Queues, cdw11 0x%x\n",
1894 		      req->cmd->nvme_cmd.cdw11);
1895 
1896 	if (cmd->cdw11_bits.feat_num_of_queues.bits.ncqr == UINT16_MAX ||
1897 	    cmd->cdw11_bits.feat_num_of_queues.bits.nsqr == UINT16_MAX) {
1898 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
1899 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1900 	}
1901 
1902 	count = spdk_bit_array_count_set(ctrlr->qpair_mask);
1903 	/* verify that the controller is ready to process commands */
1904 	if (count > 1) {
1905 		SPDK_DEBUGLOG(nvmf, "Queue pairs already active!\n");
1906 		rsp->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
1907 	} else {
1908 		/*
1909 		 * Ignore the value requested by the host -
1910 		 * always return the pre-configured value based on max_qpairs_allowed.
1911 		 */
1912 		rsp->cdw0 = ctrlr->feat.number_of_queues.raw;
1913 	}
1914 
1915 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1916 }
1917 
1918 int
1919 nvmf_ctrlr_save_aers(struct spdk_nvmf_ctrlr *ctrlr, uint16_t *aer_cids,
1920 		     uint16_t max_aers)
1921 {
1922 	struct spdk_nvmf_request *req;
1923 	uint16_t i;
1924 
1925 	if (!aer_cids || max_aers < ctrlr->nr_aer_reqs) {
1926 		return -EINVAL;
1927 	}
1928 
1929 	for (i = 0; i < ctrlr->nr_aer_reqs; i++) {
1930 		req = ctrlr->aer_req[i];
1931 		aer_cids[i] = req->cmd->nvme_cmd.cid;
1932 	}
1933 
1934 	return ctrlr->nr_aer_reqs;
1935 }
1936 
1937 int
1938 nvmf_ctrlr_save_migr_data(struct spdk_nvmf_ctrlr *ctrlr, struct nvmf_ctrlr_migr_data *data)
1939 {
1940 	uint32_t num_async_events = 0;
1941 	struct spdk_nvmf_async_event_completion *event, *event_tmp;
1942 
1943 	memcpy(&data->feat, &ctrlr->feat, sizeof(struct spdk_nvmf_ctrlr_feat));
1944 	data->cntlid = ctrlr->cntlid;
1945 	data->acre_enabled = ctrlr->acre_enabled;
1946 	data->notice_aen_mask = ctrlr->notice_aen_mask;
1947 
1948 	STAILQ_FOREACH_SAFE(event, &ctrlr->async_events, link, event_tmp) {
1949 		data->async_events[num_async_events++].raw = event->event.raw;
1950 		if (num_async_events == NVMF_MIGR_MAX_PENDING_AERS) {
1951 			SPDK_ERRLOG("%p has too many pending AERs\n", ctrlr);
1952 			break;
1953 		}
1954 	}
1955 	data->num_async_events = num_async_events;
1956 
1957 	return 0;
1958 }
1959 
1960 int
1961 nvmf_ctrlr_restore_migr_data(struct spdk_nvmf_ctrlr *ctrlr, struct nvmf_ctrlr_migr_data *data)
1962 {
1963 	struct spdk_nvmf_async_event_completion *event;
1964 	uint32_t i;
1965 
1966 	memcpy(&ctrlr->feat, &data->feat, sizeof(struct spdk_nvmf_ctrlr_feat));
1967 	ctrlr->acre_enabled = data->acre_enabled;
1968 	ctrlr->notice_aen_mask = data->notice_aen_mask;
1969 
1970 	for (i = 0; i < data->num_async_events; i++) {
1971 		event = calloc(1, sizeof(struct spdk_nvmf_async_event_completion));
1972 		if (!event) {
1973 			return -ENOMEM;
1974 		}
1975 		event->event.raw = data->async_events[i].raw;
1976 		STAILQ_INSERT_TAIL(&ctrlr->async_events, event, link);
1977 	}
1978 
1979 	return 0;
1980 }
1981 
1982 static int
1983 nvmf_ctrlr_set_features_async_event_configuration(struct spdk_nvmf_request *req)
1984 {
1985 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
1986 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
1987 
1988 	SPDK_DEBUGLOG(nvmf, "Set Features - Async Event Configuration, cdw11 0x%08x\n",
1989 		      cmd->cdw11);
1990 	ctrlr->feat.async_event_configuration.raw = cmd->cdw11;
1991 	ctrlr->feat.async_event_configuration.bits.reserved1 = 0;
1992 	ctrlr->feat.async_event_configuration.bits.reserved2 = 0;
1993 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
1994 }
1995 
1996 static int
1997 nvmf_ctrlr_async_event_request(struct spdk_nvmf_request *req)
1998 {
1999 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
2000 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
2001 	struct spdk_nvmf_subsystem_poll_group *sgroup;
2002 	struct spdk_nvmf_async_event_completion *pending_event;
2003 
2004 	SPDK_DEBUGLOG(nvmf, "Async Event Request\n");
2005 
2006 	/* AER cmd is an exception */
2007 	sgroup = &req->qpair->group->sgroups[ctrlr->subsys->id];
2008 	assert(sgroup != NULL);
2009 	sgroup->mgmt_io_outstanding--;
2010 
2011 	/* Four asynchronous events are supported for now */
2012 	if (ctrlr->nr_aer_reqs >= NVMF_MAX_ASYNC_EVENTS) {
2013 		SPDK_DEBUGLOG(nvmf, "AERL exceeded\n");
2014 		rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
2015 		rsp->status.sc = SPDK_NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED;
2016 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2017 	}
2018 
2019 	if (!STAILQ_EMPTY(&ctrlr->async_events)) {
2020 		pending_event = STAILQ_FIRST(&ctrlr->async_events);
2021 		rsp->cdw0 = pending_event->event.raw;
2022 		STAILQ_REMOVE(&ctrlr->async_events, pending_event, spdk_nvmf_async_event_completion, link);
2023 		free(pending_event);
2024 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2025 	}
2026 
2027 	ctrlr->aer_req[ctrlr->nr_aer_reqs++] = req;
2028 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
2029 }
2030 
2031 struct copy_iovs_ctx {
2032 	struct iovec *iovs;
2033 	int iovcnt;
2034 	int cur_iov_idx;
2035 	size_t cur_iov_offset;
2036 };
2037 
2038 static void
2039 _clear_iovs(struct iovec *iovs, int iovcnt)
2040 {
2041 	int iov_idx = 0;
2042 	struct iovec *iov;
2043 
2044 	while (iov_idx < iovcnt) {
2045 		iov = &iovs[iov_idx];
2046 		memset(iov->iov_base, 0, iov->iov_len);
2047 		iov_idx++;
2048 	}
2049 }
2050 
2051 static void
2052 _init_copy_iovs_ctx(struct copy_iovs_ctx *copy_ctx, struct iovec *iovs, int iovcnt)
2053 {
2054 	copy_ctx->iovs = iovs;
2055 	copy_ctx->iovcnt = iovcnt;
2056 	copy_ctx->cur_iov_idx = 0;
2057 	copy_ctx->cur_iov_offset = 0;
2058 }
2059 
2060 static size_t
2061 _copy_buf_to_iovs(struct copy_iovs_ctx *copy_ctx, const void *buf, size_t buf_len)
2062 {
2063 	size_t len, iov_remain_len, copied_len = 0;
2064 	struct iovec *iov;
2065 
2066 	if (buf_len == 0) {
2067 		return 0;
2068 	}
2069 
2070 	while (copy_ctx->cur_iov_idx < copy_ctx->iovcnt) {
2071 		iov = &copy_ctx->iovs[copy_ctx->cur_iov_idx];
2072 		iov_remain_len = iov->iov_len - copy_ctx->cur_iov_offset;
2073 		if (iov_remain_len == 0) {
2074 			copy_ctx->cur_iov_idx++;
2075 			copy_ctx->cur_iov_offset = 0;
2076 			continue;
2077 		}
2078 
2079 		len = spdk_min(iov_remain_len, buf_len - copied_len);
2080 		memcpy((char *)iov->iov_base + copy_ctx->cur_iov_offset,
2081 		       (const char *)buf + copied_len,
2082 		       len);
2083 		copied_len += len;
2084 		copy_ctx->cur_iov_offset += len;
2085 
2086 		if (buf_len == copied_len) {
2087 			return copied_len;
2088 		}
2089 	}
2090 
2091 	return copied_len;
2092 }
2093 
2094 static void
2095 nvmf_get_firmware_slot_log_page(struct iovec *iovs, int iovcnt, uint64_t offset, uint32_t length)
2096 {
2097 	struct spdk_nvme_firmware_page fw_page;
2098 	size_t copy_len;
2099 	struct copy_iovs_ctx copy_ctx;
2100 
2101 	_init_copy_iovs_ctx(&copy_ctx, iovs, iovcnt);
2102 
2103 	memset(&fw_page, 0, sizeof(fw_page));
2104 	fw_page.afi.active_slot = 1;
2105 	fw_page.afi.next_reset_slot = 0;
2106 	spdk_strcpy_pad(fw_page.revision[0], FW_VERSION, sizeof(fw_page.revision[0]), ' ');
2107 
2108 	if (offset < sizeof(fw_page)) {
2109 		copy_len = spdk_min(sizeof(fw_page) - offset, length);
2110 		if (copy_len > 0) {
2111 			_copy_buf_to_iovs(&copy_ctx, (const char *)&fw_page + offset, copy_len);
2112 		}
2113 	}
2114 }
2115 
2116 /*
2117  * Asynchronous Event Mask Bit
2118  */
2119 enum spdk_nvme_async_event_mask_bit {
2120 	/* Mask Namespace Change Notification */
2121 	SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGE_MASK_BIT		= 0,
2122 	/* Mask Asymmetric Namespace Access Change Notification */
2123 	SPDK_NVME_ASYNC_EVENT_ANA_CHANGE_MASK_BIT		= 1,
2124 	/* Mask Discovery Log Change Notification */
2125 	SPDK_NVME_ASYNC_EVENT_DISCOVERY_LOG_CHANGE_MASK_BIT	= 2,
2126 	/* Mask Reservation Log Page Available Notification */
2127 	SPDK_NVME_ASYNC_EVENT_RESERVATION_LOG_AVAIL_MASK_BIT	= 3,
2128 	/* Mask Error Event */
2129 	SPDK_NVME_ASYNC_EVENT_ERROR_MASK_BIT			= 4,
2130 	/* 4 - 63 Reserved */
2131 };
2132 
2133 static inline void
2134 nvmf_ctrlr_unmask_aen(struct spdk_nvmf_ctrlr *ctrlr,
2135 		      enum spdk_nvme_async_event_mask_bit mask)
2136 {
2137 	ctrlr->notice_aen_mask &= ~(1 << mask);
2138 }
2139 
2140 static inline bool
2141 nvmf_ctrlr_mask_aen(struct spdk_nvmf_ctrlr *ctrlr,
2142 		    enum spdk_nvme_async_event_mask_bit mask)
2143 {
2144 	if (ctrlr->notice_aen_mask & (1 << mask)) {
2145 		return false;
2146 	} else {
2147 		ctrlr->notice_aen_mask |= (1 << mask);
2148 		return true;
2149 	}
2150 }
2151 
2152 /* we have to use the typedef in the function declaration to appease astyle. */
2153 typedef enum spdk_nvme_ana_state spdk_nvme_ana_state_t;
2154 
2155 static inline spdk_nvme_ana_state_t
2156 nvmf_ctrlr_get_ana_state(struct spdk_nvmf_ctrlr *ctrlr, uint32_t anagrpid)
2157 {
2158 	if (!ctrlr->subsys->flags.ana_reporting) {
2159 		return SPDK_NVME_ANA_OPTIMIZED_STATE;
2160 	}
2161 
2162 	if (spdk_unlikely(ctrlr->listener == NULL)) {
2163 		return SPDK_NVME_ANA_INACCESSIBLE_STATE;
2164 	}
2165 
2166 	assert(anagrpid - 1 < ctrlr->subsys->max_nsid);
2167 	return ctrlr->listener->ana_state[anagrpid - 1];
2168 }
2169 
2170 static spdk_nvme_ana_state_t
2171 nvmf_ctrlr_get_ana_state_from_nsid(struct spdk_nvmf_ctrlr *ctrlr, uint32_t nsid)
2172 {
2173 	struct spdk_nvmf_ns *ns;
2174 
2175 	/* We do not have NVM subsystem specific ANA state. Hence if NSID is either
2176 	 * SPDK_NVMF_GLOBAL_NS_TAG, invalid, or for inactive namespace, return
2177 	 * the optimized state.
2178 	 */
2179 	ns = _nvmf_subsystem_get_ns(ctrlr->subsys, nsid);
2180 	if (ns == NULL) {
2181 		return SPDK_NVME_ANA_OPTIMIZED_STATE;
2182 	}
2183 
2184 	return nvmf_ctrlr_get_ana_state(ctrlr, ns->anagrpid);
2185 }
2186 
2187 static void
2188 nvmf_get_error_log_page(struct spdk_nvmf_ctrlr *ctrlr, struct iovec *iovs, int iovcnt,
2189 			uint64_t offset, uint32_t length, uint32_t rae)
2190 {
2191 	if (!rae) {
2192 		nvmf_ctrlr_unmask_aen(ctrlr, SPDK_NVME_ASYNC_EVENT_ERROR_MASK_BIT);
2193 	}
2194 
2195 	/* TODO: actually fill out log page data */
2196 }
2197 
2198 static void
2199 nvmf_get_ana_log_page(struct spdk_nvmf_ctrlr *ctrlr, struct iovec *iovs, int iovcnt,
2200 		      uint64_t offset, uint32_t length, uint32_t rae)
2201 {
2202 	struct spdk_nvme_ana_page ana_hdr;
2203 	struct spdk_nvme_ana_group_descriptor ana_desc;
2204 	size_t copy_len, copied_len;
2205 	uint32_t num_anagrp = 0, anagrpid;
2206 	struct spdk_nvmf_ns *ns;
2207 	struct copy_iovs_ctx copy_ctx;
2208 
2209 	_init_copy_iovs_ctx(&copy_ctx, iovs, iovcnt);
2210 
2211 	if (length == 0) {
2212 		goto done;
2213 	}
2214 
2215 	if (offset >= sizeof(ana_hdr)) {
2216 		offset -= sizeof(ana_hdr);
2217 	} else {
2218 		for (anagrpid = 1; anagrpid <= ctrlr->subsys->max_nsid; anagrpid++) {
2219 			if (ctrlr->subsys->ana_group[anagrpid - 1] > 0) {
2220 				num_anagrp++;
2221 			}
2222 		}
2223 
2224 		memset(&ana_hdr, 0, sizeof(ana_hdr));
2225 
2226 		ana_hdr.num_ana_group_desc = num_anagrp;
2227 		/* TODO: Support Change Count. */
2228 		ana_hdr.change_count = 0;
2229 
2230 		copy_len = spdk_min(sizeof(ana_hdr) - offset, length);
2231 		copied_len = _copy_buf_to_iovs(&copy_ctx, (const char *)&ana_hdr + offset, copy_len);
2232 		assert(copied_len == copy_len);
2233 		length -= copied_len;
2234 		offset = 0;
2235 	}
2236 
2237 	if (length == 0) {
2238 		goto done;
2239 	}
2240 
2241 	for (anagrpid = 1; anagrpid <= ctrlr->subsys->max_nsid; anagrpid++) {
2242 		if (ctrlr->subsys->ana_group[anagrpid - 1] == 0) {
2243 			continue;
2244 		}
2245 
2246 		if (offset >= sizeof(ana_desc)) {
2247 			offset -= sizeof(ana_desc);
2248 		} else {
2249 			memset(&ana_desc, 0, sizeof(ana_desc));
2250 
2251 			ana_desc.ana_group_id = anagrpid;
2252 			ana_desc.num_of_nsid = ctrlr->subsys->ana_group[anagrpid - 1];
2253 			ana_desc.ana_state = nvmf_ctrlr_get_ana_state(ctrlr, anagrpid);
2254 
2255 			copy_len = spdk_min(sizeof(ana_desc) - offset, length);
2256 			copied_len = _copy_buf_to_iovs(&copy_ctx, (const char *)&ana_desc + offset,
2257 						       copy_len);
2258 			assert(copied_len == copy_len);
2259 			length -= copied_len;
2260 			offset = 0;
2261 
2262 			if (length == 0) {
2263 				goto done;
2264 			}
2265 		}
2266 
2267 		/* TODO: Revisit here about O(n^2) cost if we have subsystem with
2268 		 * many namespaces in the future.
2269 		 */
2270 		for (ns = spdk_nvmf_subsystem_get_first_ns(ctrlr->subsys); ns != NULL;
2271 		     ns = spdk_nvmf_subsystem_get_next_ns(ctrlr->subsys, ns)) {
2272 			if (ns->anagrpid != anagrpid) {
2273 				continue;
2274 			}
2275 
2276 			if (offset >= sizeof(uint32_t)) {
2277 				offset -= sizeof(uint32_t);
2278 				continue;
2279 			}
2280 
2281 			copy_len = spdk_min(sizeof(uint32_t) - offset, length);
2282 			copied_len = _copy_buf_to_iovs(&copy_ctx, (const char *)&ns->nsid + offset,
2283 						       copy_len);
2284 			assert(copied_len == copy_len);
2285 			length -= copied_len;
2286 			offset = 0;
2287 
2288 			if (length == 0) {
2289 				goto done;
2290 			}
2291 		}
2292 	}
2293 
2294 done:
2295 	if (!rae) {
2296 		nvmf_ctrlr_unmask_aen(ctrlr, SPDK_NVME_ASYNC_EVENT_ANA_CHANGE_MASK_BIT);
2297 	}
2298 }
2299 
2300 void
2301 nvmf_ctrlr_ns_changed(struct spdk_nvmf_ctrlr *ctrlr, uint32_t nsid)
2302 {
2303 	uint16_t max_changes = SPDK_COUNTOF(ctrlr->changed_ns_list.ns_list);
2304 	uint16_t i;
2305 	bool found = false;
2306 
2307 	for (i = 0; i < ctrlr->changed_ns_list_count; i++) {
2308 		if (ctrlr->changed_ns_list.ns_list[i] == nsid) {
2309 			/* nsid is already in the list */
2310 			found = true;
2311 			break;
2312 		}
2313 	}
2314 
2315 	if (!found) {
2316 		if (ctrlr->changed_ns_list_count == max_changes) {
2317 			/* Out of space - set first entry to FFFFFFFFh and zero-fill the rest. */
2318 			ctrlr->changed_ns_list.ns_list[0] = 0xFFFFFFFFu;
2319 			for (i = 1; i < max_changes; i++) {
2320 				ctrlr->changed_ns_list.ns_list[i] = 0;
2321 			}
2322 		} else {
2323 			ctrlr->changed_ns_list.ns_list[ctrlr->changed_ns_list_count++] = nsid;
2324 		}
2325 	}
2326 }
2327 
2328 static void
2329 nvmf_get_changed_ns_list_log_page(struct spdk_nvmf_ctrlr *ctrlr,
2330 				  struct iovec *iovs, int iovcnt, uint64_t offset, uint32_t length, uint32_t rae)
2331 {
2332 	size_t copy_length;
2333 	struct copy_iovs_ctx copy_ctx;
2334 
2335 	_init_copy_iovs_ctx(&copy_ctx, iovs, iovcnt);
2336 
2337 	if (offset < sizeof(ctrlr->changed_ns_list)) {
2338 		copy_length = spdk_min(length, sizeof(ctrlr->changed_ns_list) - offset);
2339 		if (copy_length) {
2340 			_copy_buf_to_iovs(&copy_ctx, (char *)&ctrlr->changed_ns_list + offset, copy_length);
2341 		}
2342 	}
2343 
2344 	/* Clear log page each time it is read */
2345 	ctrlr->changed_ns_list_count = 0;
2346 	memset(&ctrlr->changed_ns_list, 0, sizeof(ctrlr->changed_ns_list));
2347 
2348 	if (!rae) {
2349 		nvmf_ctrlr_unmask_aen(ctrlr, SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGE_MASK_BIT);
2350 	}
2351 }
2352 
2353 /* The structure can be modified if we provide support for other commands in future */
2354 static const struct spdk_nvme_cmds_and_effect_log_page g_cmds_and_effect_log_page = {
2355 	.admin_cmds_supported = {
2356 		/* CSUPP, LBCC, NCC, NIC, CCC, CSE */
2357 		/* Get Log Page */
2358 		[SPDK_NVME_OPC_GET_LOG_PAGE]		= {1, 0, 0, 0, 0, 0, 0, 0},
2359 		/* Identify */
2360 		[SPDK_NVME_OPC_IDENTIFY]		= {1, 0, 0, 0, 0, 0, 0, 0},
2361 		/* Abort */
2362 		[SPDK_NVME_OPC_ABORT]			= {1, 0, 0, 0, 0, 0, 0, 0},
2363 		/* Set Features */
2364 		[SPDK_NVME_OPC_SET_FEATURES]		= {1, 0, 0, 0, 0, 0, 0, 0},
2365 		/* Get Features */
2366 		[SPDK_NVME_OPC_GET_FEATURES]		= {1, 0, 0, 0, 0, 0, 0, 0},
2367 		/* Async Event Request */
2368 		[SPDK_NVME_OPC_ASYNC_EVENT_REQUEST]	= {1, 0, 0, 0, 0, 0, 0, 0},
2369 		/* Keep Alive */
2370 		[SPDK_NVME_OPC_KEEP_ALIVE]		= {1, 0, 0, 0, 0, 0, 0, 0},
2371 	},
2372 	.io_cmds_supported = {
2373 		/* FLUSH */
2374 		[SPDK_NVME_OPC_FLUSH]			= {1, 1, 0, 0, 0, 0, 0, 0},
2375 		/* WRITE */
2376 		[SPDK_NVME_OPC_WRITE]			= {1, 1, 0, 0, 0, 0, 0, 0},
2377 		/* READ */
2378 		[SPDK_NVME_OPC_READ]			= {1, 0, 0, 0, 0, 0, 0, 0},
2379 		/* WRITE ZEROES */
2380 		[SPDK_NVME_OPC_WRITE_ZEROES]		= {1, 1, 0, 0, 0, 0, 0, 0},
2381 		/* DATASET MANAGEMENT */
2382 		[SPDK_NVME_OPC_DATASET_MANAGEMENT]	= {1, 1, 0, 0, 0, 0, 0, 0},
2383 		/* COMPARE */
2384 		[SPDK_NVME_OPC_COMPARE]			= {1, 0, 0, 0, 0, 0, 0, 0},
2385 	},
2386 };
2387 
2388 static void
2389 nvmf_get_cmds_and_effects_log_page(struct iovec *iovs, int iovcnt,
2390 				   uint64_t offset, uint32_t length)
2391 {
2392 	uint32_t page_size = sizeof(struct spdk_nvme_cmds_and_effect_log_page);
2393 	size_t copy_len = 0;
2394 	struct copy_iovs_ctx copy_ctx;
2395 
2396 	_init_copy_iovs_ctx(&copy_ctx, iovs, iovcnt);
2397 
2398 	if (offset < page_size) {
2399 		copy_len = spdk_min(page_size - offset, length);
2400 		_copy_buf_to_iovs(&copy_ctx, (char *)(&g_cmds_and_effect_log_page) + offset, copy_len);
2401 	}
2402 }
2403 
2404 static void
2405 nvmf_get_reservation_notification_log_page(struct spdk_nvmf_ctrlr *ctrlr,
2406 		struct iovec *iovs, int iovcnt, uint64_t offset, uint32_t length, uint32_t rae)
2407 {
2408 	uint32_t unit_log_len, avail_log_len, next_pos, copy_len;
2409 	struct spdk_nvmf_reservation_log *log, *log_tmp;
2410 	struct copy_iovs_ctx copy_ctx;
2411 
2412 	_init_copy_iovs_ctx(&copy_ctx, iovs, iovcnt);
2413 
2414 	unit_log_len = sizeof(struct spdk_nvme_reservation_notification_log);
2415 	/* No available log, return zeroed log pages */
2416 	if (!ctrlr->num_avail_log_pages) {
2417 		return;
2418 	}
2419 
2420 	avail_log_len = ctrlr->num_avail_log_pages * unit_log_len;
2421 	if (offset >= avail_log_len) {
2422 		return;
2423 	}
2424 
2425 	next_pos = 0;
2426 	TAILQ_FOREACH_SAFE(log, &ctrlr->log_head, link, log_tmp) {
2427 		TAILQ_REMOVE(&ctrlr->log_head, log, link);
2428 		ctrlr->num_avail_log_pages--;
2429 
2430 		next_pos += unit_log_len;
2431 		if (next_pos > offset) {
2432 			copy_len = spdk_min(next_pos - offset, length);
2433 			_copy_buf_to_iovs(&copy_ctx, &log->log, copy_len);
2434 			length -= copy_len;
2435 			offset += copy_len;
2436 		}
2437 		free(log);
2438 
2439 		if (length == 0) {
2440 			break;
2441 		}
2442 	}
2443 
2444 	if (!rae) {
2445 		nvmf_ctrlr_unmask_aen(ctrlr, SPDK_NVME_ASYNC_EVENT_RESERVATION_LOG_AVAIL_MASK_BIT);
2446 	}
2447 	return;
2448 }
2449 
2450 static int
2451 nvmf_ctrlr_get_log_page(struct spdk_nvmf_request *req)
2452 {
2453 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
2454 	struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys;
2455 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
2456 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
2457 	struct spdk_nvme_transport_id cmd_source_trid;
2458 	uint64_t offset, len;
2459 	uint32_t rae, numdl, numdu;
2460 	uint8_t lid;
2461 
2462 	if (req->data == NULL) {
2463 		SPDK_DEBUGLOG(nvmf, "get log command with no buffer\n");
2464 		response->status.sct = SPDK_NVME_SCT_GENERIC;
2465 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
2466 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2467 	}
2468 
2469 	offset = (uint64_t)cmd->cdw12 | ((uint64_t)cmd->cdw13 << 32);
2470 	if (offset & 3) {
2471 		SPDK_ERRLOG("Invalid log page offset 0x%" PRIx64 "\n", offset);
2472 		response->status.sct = SPDK_NVME_SCT_GENERIC;
2473 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
2474 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2475 	}
2476 
2477 	rae = cmd->cdw10_bits.get_log_page.rae;
2478 	numdl = cmd->cdw10_bits.get_log_page.numdl;
2479 	numdu = cmd->cdw11_bits.get_log_page.numdu;
2480 	len = ((numdu << 16) + numdl + (uint64_t)1) * 4;
2481 	if (len > req->length) {
2482 		SPDK_ERRLOG("Get log page: len (%" PRIu64 ") > buf size (%u)\n",
2483 			    len, req->length);
2484 		response->status.sct = SPDK_NVME_SCT_GENERIC;
2485 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
2486 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2487 	}
2488 
2489 	lid = cmd->cdw10_bits.get_log_page.lid;
2490 	SPDK_DEBUGLOG(nvmf, "Get log page: LID=0x%02X offset=0x%" PRIx64 " len=0x%" PRIx64 " rae=%u\n",
2491 		      lid, offset, len, rae);
2492 
2493 	if (subsystem->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
2494 		switch (lid) {
2495 		case SPDK_NVME_LOG_DISCOVERY:
2496 			if (spdk_nvmf_qpair_get_listen_trid(req->qpair, &cmd_source_trid)) {
2497 				SPDK_ERRLOG("Failed to get LOG_DISCOVERY source trid\n");
2498 				response->status.sct = SPDK_NVME_SCT_GENERIC;
2499 				response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2500 				return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2501 			}
2502 			nvmf_get_discovery_log_page(subsystem->tgt, ctrlr->hostnqn, req->iov, req->iovcnt,
2503 						    offset, len, &cmd_source_trid);
2504 			if (!rae) {
2505 				nvmf_ctrlr_unmask_aen(ctrlr, SPDK_NVME_ASYNC_EVENT_DISCOVERY_LOG_CHANGE_MASK_BIT);
2506 			}
2507 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2508 		default:
2509 			goto invalid_log_page;
2510 		}
2511 	} else {
2512 		if (offset > len) {
2513 			SPDK_ERRLOG("Get log page: offset (%" PRIu64 ") > len (%" PRIu64 ")\n",
2514 				    offset, len);
2515 			response->status.sct = SPDK_NVME_SCT_GENERIC;
2516 			response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
2517 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2518 		}
2519 
2520 		switch (lid) {
2521 		case SPDK_NVME_LOG_ERROR:
2522 			nvmf_get_error_log_page(ctrlr, req->iov, req->iovcnt, offset, len, rae);
2523 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2524 		case SPDK_NVME_LOG_HEALTH_INFORMATION:
2525 			/* TODO: actually fill out log page data */
2526 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2527 		case SPDK_NVME_LOG_FIRMWARE_SLOT:
2528 			nvmf_get_firmware_slot_log_page(req->iov, req->iovcnt, offset, len);
2529 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2530 		case SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS:
2531 			if (subsystem->flags.ana_reporting) {
2532 				nvmf_get_ana_log_page(ctrlr, req->iov, req->iovcnt, offset, len, rae);
2533 				return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2534 			} else {
2535 				goto invalid_log_page;
2536 			}
2537 		case SPDK_NVME_LOG_COMMAND_EFFECTS_LOG:
2538 			nvmf_get_cmds_and_effects_log_page(req->iov, req->iovcnt, offset, len);
2539 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2540 		case SPDK_NVME_LOG_CHANGED_NS_LIST:
2541 			nvmf_get_changed_ns_list_log_page(ctrlr, req->iov, req->iovcnt, offset, len, rae);
2542 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2543 		case SPDK_NVME_LOG_RESERVATION_NOTIFICATION:
2544 			nvmf_get_reservation_notification_log_page(ctrlr, req->iov, req->iovcnt, offset, len, rae);
2545 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2546 		default:
2547 			goto invalid_log_page;
2548 		}
2549 	}
2550 
2551 invalid_log_page:
2552 	SPDK_INFOLOG(nvmf, "Unsupported Get Log Page 0x%02X\n", lid);
2553 	response->status.sct = SPDK_NVME_SCT_GENERIC;
2554 	response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
2555 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2556 }
2557 
2558 int
2559 spdk_nvmf_ctrlr_identify_ns(struct spdk_nvmf_ctrlr *ctrlr,
2560 			    struct spdk_nvme_cmd *cmd,
2561 			    struct spdk_nvme_cpl *rsp,
2562 			    struct spdk_nvme_ns_data *nsdata)
2563 {
2564 	struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys;
2565 	struct spdk_nvmf_ns *ns;
2566 	uint32_t max_num_blocks;
2567 	enum spdk_nvme_ana_state ana_state;
2568 
2569 	if (cmd->nsid == 0 || cmd->nsid > subsystem->max_nsid) {
2570 		SPDK_ERRLOG("Identify Namespace for invalid NSID %u\n", cmd->nsid);
2571 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
2572 		rsp->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
2573 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2574 	}
2575 
2576 	ns = _nvmf_subsystem_get_ns(subsystem, cmd->nsid);
2577 	if (ns == NULL || ns->bdev == NULL) {
2578 		/*
2579 		 * Inactive namespaces should return a zero filled data structure.
2580 		 * The data buffer is already zeroed by nvmf_ctrlr_process_admin_cmd(),
2581 		 * so we can just return early here.
2582 		 */
2583 		SPDK_DEBUGLOG(nvmf, "Identify Namespace for inactive NSID %u\n", cmd->nsid);
2584 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
2585 		rsp->status.sc = SPDK_NVME_SC_SUCCESS;
2586 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2587 	}
2588 
2589 	nvmf_bdev_ctrlr_identify_ns(ns, nsdata, ctrlr->dif_insert_or_strip);
2590 
2591 	assert(ctrlr->admin_qpair);
2592 	/* Due to bug in the Linux kernel NVMe driver we have to set noiob no larger than mdts */
2593 	max_num_blocks = ctrlr->admin_qpair->transport->opts.max_io_size /
2594 			 (1U << nsdata->lbaf[nsdata->flbas.format].lbads);
2595 	if (nsdata->noiob > max_num_blocks) {
2596 		nsdata->noiob = max_num_blocks;
2597 	}
2598 
2599 	/* Set NOWS equal to Controller MDTS */
2600 	if (nsdata->nsfeat.optperf) {
2601 		nsdata->nows = max_num_blocks - 1;
2602 	}
2603 
2604 	if (subsystem->flags.ana_reporting) {
2605 		assert(ns->anagrpid - 1 < subsystem->max_nsid);
2606 		nsdata->anagrpid = ns->anagrpid;
2607 
2608 		ana_state = nvmf_ctrlr_get_ana_state(ctrlr, ns->anagrpid);
2609 		if (ana_state == SPDK_NVME_ANA_INACCESSIBLE_STATE ||
2610 		    ana_state == SPDK_NVME_ANA_PERSISTENT_LOSS_STATE) {
2611 			nsdata->nuse = 0;
2612 		}
2613 	}
2614 
2615 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2616 }
2617 
2618 static void
2619 nvmf_ctrlr_populate_oacs(struct spdk_nvmf_ctrlr *ctrlr,
2620 			 struct spdk_nvme_ctrlr_data *cdata)
2621 {
2622 	cdata->oacs = ctrlr->cdata.oacs;
2623 
2624 	cdata->oacs.virtualization_management =
2625 		g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_VIRTUALIZATION_MANAGEMENT].hdlr != NULL;
2626 	cdata->oacs.nvme_mi = g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_NVME_MI_SEND].hdlr != NULL
2627 			      && g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_NVME_MI_RECEIVE].hdlr != NULL;
2628 	cdata->oacs.directives = g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_DIRECTIVE_SEND].hdlr != NULL
2629 				 && g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_DIRECTIVE_RECEIVE].hdlr != NULL;
2630 	cdata->oacs.device_self_test =
2631 		g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_DEVICE_SELF_TEST].hdlr != NULL;
2632 	cdata->oacs.ns_manage = g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_NS_MANAGEMENT].hdlr != NULL
2633 				&& g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_NS_ATTACHMENT].hdlr != NULL;
2634 	cdata->oacs.firmware = g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD].hdlr !=
2635 			       NULL
2636 			       && g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_FIRMWARE_COMMIT].hdlr != NULL;
2637 	cdata->oacs.format =
2638 		g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_FORMAT_NVM].hdlr != NULL;
2639 	cdata->oacs.security = g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_SECURITY_SEND].hdlr != NULL
2640 			       && g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_SECURITY_RECEIVE].hdlr != NULL;
2641 	cdata->oacs.get_lba_status = g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_GET_LBA_STATUS].hdlr !=
2642 				     NULL;
2643 }
2644 
2645 int
2646 spdk_nvmf_ctrlr_identify_ctrlr(struct spdk_nvmf_ctrlr *ctrlr, struct spdk_nvme_ctrlr_data *cdata)
2647 {
2648 	struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys;
2649 	struct spdk_nvmf_transport *transport;
2650 
2651 	/*
2652 	 * Common fields for discovery and NVM subsystems
2653 	 */
2654 	assert(ctrlr->admin_qpair);
2655 	transport = ctrlr->admin_qpair->transport;
2656 	spdk_strcpy_pad(cdata->fr, FW_VERSION, sizeof(cdata->fr), ' ');
2657 	assert((transport->opts.max_io_size % 4096) == 0);
2658 	cdata->mdts = spdk_u32log2(transport->opts.max_io_size / 4096);
2659 	cdata->cntlid = ctrlr->cntlid;
2660 	cdata->ver = ctrlr->vcprop.vs;
2661 	cdata->aerl = ctrlr->cdata.aerl;
2662 	cdata->lpa.edlp = 1;
2663 	cdata->elpe = 127;
2664 	cdata->maxcmd = transport->opts.max_queue_depth;
2665 	cdata->sgls = ctrlr->cdata.sgls;
2666 	cdata->fuses.compare_and_write = 1;
2667 	cdata->acwu = 0; /* ACWU is 0-based. */
2668 	if (subsystem->flags.ana_reporting) {
2669 		cdata->mnan = subsystem->max_nsid;
2670 	}
2671 	spdk_strcpy_pad(cdata->subnqn, subsystem->subnqn, sizeof(cdata->subnqn), '\0');
2672 
2673 	SPDK_DEBUGLOG(nvmf, "ctrlr data: maxcmd 0x%x\n", cdata->maxcmd);
2674 	SPDK_DEBUGLOG(nvmf, "sgls data: 0x%x\n", from_le32(&cdata->sgls));
2675 
2676 
2677 	if (subsystem->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
2678 		/*
2679 		 * NVM Discovery subsystem fields
2680 		 */
2681 		cdata->oaes.discovery_log_change_notices = 1;
2682 	} else {
2683 		cdata->vid = ctrlr->cdata.vid;
2684 		cdata->ssvid = ctrlr->cdata.ssvid;
2685 		cdata->ieee[0] = ctrlr->cdata.ieee[0];
2686 		cdata->ieee[1] = ctrlr->cdata.ieee[1];
2687 		cdata->ieee[2] = ctrlr->cdata.ieee[2];
2688 
2689 		/*
2690 		 * NVM subsystem fields (reserved for discovery subsystems)
2691 		 */
2692 		spdk_strcpy_pad(cdata->mn, spdk_nvmf_subsystem_get_mn(subsystem), sizeof(cdata->mn), ' ');
2693 		spdk_strcpy_pad(cdata->sn, spdk_nvmf_subsystem_get_sn(subsystem), sizeof(cdata->sn), ' ');
2694 		cdata->kas = ctrlr->cdata.kas;
2695 
2696 		cdata->rab = 6;
2697 		cdata->cmic.multi_port = 1;
2698 		cdata->cmic.multi_ctrlr = 1;
2699 		cdata->oaes.ns_attribute_notices = 1;
2700 		cdata->ctratt.host_id_exhid_supported = 1;
2701 		/* We do not have any actual limitation to the number of abort commands.
2702 		 * We follow the recommendation by the NVMe specification.
2703 		 */
2704 		cdata->acl = NVMF_ABORT_COMMAND_LIMIT;
2705 		cdata->frmw.slot1_ro = 1;
2706 		cdata->frmw.num_slots = 1;
2707 
2708 		cdata->lpa.celp = 1; /* Command Effects log page supported */
2709 
2710 		cdata->sqes.min = 6;
2711 		cdata->sqes.max = 6;
2712 		cdata->cqes.min = 4;
2713 		cdata->cqes.max = 4;
2714 		cdata->nn = subsystem->max_nsid;
2715 		cdata->vwc.present = 1;
2716 		cdata->vwc.flush_broadcast = SPDK_NVME_FLUSH_BROADCAST_NOT_SUPPORTED;
2717 
2718 		cdata->nvmf_specific = ctrlr->cdata.nvmf_specific;
2719 
2720 		cdata->oncs.dsm = nvmf_ctrlr_dsm_supported(ctrlr);
2721 		cdata->oncs.write_zeroes = nvmf_ctrlr_write_zeroes_supported(ctrlr);
2722 		cdata->oncs.reservations = ctrlr->cdata.oncs.reservations;
2723 		if (subsystem->flags.ana_reporting) {
2724 			/* Asymmetric Namespace Access Reporting is supported. */
2725 			cdata->cmic.ana_reporting = 1;
2726 			cdata->oaes.ana_change_notices = 1;
2727 
2728 			cdata->anatt = ANA_TRANSITION_TIME_IN_SEC;
2729 			/* ANA Change state is not used, and ANA Persistent Loss state
2730 			 * is not supported for now.
2731 			 */
2732 			cdata->anacap.ana_optimized_state = 1;
2733 			cdata->anacap.ana_non_optimized_state = 1;
2734 			cdata->anacap.ana_inaccessible_state = 1;
2735 			/* ANAGRPID does not change while namespace is attached to controller */
2736 			cdata->anacap.no_change_anagrpid = 1;
2737 			cdata->anagrpmax = subsystem->max_nsid;
2738 			cdata->nanagrpid = subsystem->max_nsid;
2739 		}
2740 
2741 		nvmf_ctrlr_populate_oacs(ctrlr, cdata);
2742 
2743 		assert(subsystem->tgt != NULL);
2744 		cdata->crdt[0] = subsystem->tgt->crdt[0];
2745 		cdata->crdt[1] = subsystem->tgt->crdt[1];
2746 		cdata->crdt[2] = subsystem->tgt->crdt[2];
2747 
2748 		SPDK_DEBUGLOG(nvmf, "ext ctrlr data: ioccsz 0x%x\n",
2749 			      cdata->nvmf_specific.ioccsz);
2750 		SPDK_DEBUGLOG(nvmf, "ext ctrlr data: iorcsz 0x%x\n",
2751 			      cdata->nvmf_specific.iorcsz);
2752 		SPDK_DEBUGLOG(nvmf, "ext ctrlr data: icdoff 0x%x\n",
2753 			      cdata->nvmf_specific.icdoff);
2754 		SPDK_DEBUGLOG(nvmf, "ext ctrlr data: ctrattr 0x%x\n",
2755 			      *(uint8_t *)&cdata->nvmf_specific.ctrattr);
2756 		SPDK_DEBUGLOG(nvmf, "ext ctrlr data: msdbd 0x%x\n",
2757 			      cdata->nvmf_specific.msdbd);
2758 	}
2759 
2760 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2761 }
2762 
2763 static int
2764 nvmf_ctrlr_identify_active_ns_list(struct spdk_nvmf_subsystem *subsystem,
2765 				   struct spdk_nvme_cmd *cmd,
2766 				   struct spdk_nvme_cpl *rsp,
2767 				   struct spdk_nvme_ns_list *ns_list)
2768 {
2769 	struct spdk_nvmf_ns *ns;
2770 	uint32_t count = 0;
2771 
2772 	if (cmd->nsid >= 0xfffffffeUL) {
2773 		SPDK_ERRLOG("Identify Active Namespace List with invalid NSID %u\n", cmd->nsid);
2774 		rsp->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
2775 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2776 	}
2777 
2778 	memset(ns_list, 0, sizeof(*ns_list));
2779 
2780 	for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL;
2781 	     ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) {
2782 		if (ns->opts.nsid <= cmd->nsid) {
2783 			continue;
2784 		}
2785 
2786 		ns_list->ns_list[count++] = ns->opts.nsid;
2787 		if (count == SPDK_COUNTOF(ns_list->ns_list)) {
2788 			break;
2789 		}
2790 	}
2791 
2792 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2793 }
2794 
2795 static void
2796 _add_ns_id_desc(void **buf_ptr, size_t *buf_remain,
2797 		enum spdk_nvme_nidt type,
2798 		const void *data, size_t data_size)
2799 {
2800 	struct spdk_nvme_ns_id_desc *desc;
2801 	size_t desc_size = sizeof(*desc) + data_size;
2802 
2803 	/*
2804 	 * These should never fail in practice, since all valid NS ID descriptors
2805 	 * should be defined so that they fit in the available 4096-byte buffer.
2806 	 */
2807 	assert(data_size > 0);
2808 	assert(data_size <= UINT8_MAX);
2809 	assert(desc_size < *buf_remain);
2810 	if (data_size == 0 || data_size > UINT8_MAX || desc_size > *buf_remain) {
2811 		return;
2812 	}
2813 
2814 	desc = *buf_ptr;
2815 	desc->nidt = type;
2816 	desc->nidl = data_size;
2817 	memcpy(desc->nid, data, data_size);
2818 
2819 	*buf_ptr += desc_size;
2820 	*buf_remain -= desc_size;
2821 }
2822 
2823 static int
2824 nvmf_ctrlr_identify_ns_id_descriptor_list(
2825 	struct spdk_nvmf_subsystem *subsystem,
2826 	struct spdk_nvme_cmd *cmd,
2827 	struct spdk_nvme_cpl *rsp,
2828 	void *id_desc_list, size_t id_desc_list_size)
2829 {
2830 	struct spdk_nvmf_ns *ns;
2831 	size_t buf_remain = id_desc_list_size;
2832 	void *buf_ptr = id_desc_list;
2833 
2834 	ns = _nvmf_subsystem_get_ns(subsystem, cmd->nsid);
2835 	if (ns == NULL || ns->bdev == NULL) {
2836 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
2837 		rsp->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
2838 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2839 	}
2840 
2841 #define ADD_ID_DESC(type, data, size) \
2842 	do { \
2843 		if (!spdk_mem_all_zero(data, size)) { \
2844 			_add_ns_id_desc(&buf_ptr, &buf_remain, type, data, size); \
2845 		} \
2846 	} while (0)
2847 
2848 	ADD_ID_DESC(SPDK_NVME_NIDT_EUI64, ns->opts.eui64, sizeof(ns->opts.eui64));
2849 	ADD_ID_DESC(SPDK_NVME_NIDT_NGUID, ns->opts.nguid, sizeof(ns->opts.nguid));
2850 	ADD_ID_DESC(SPDK_NVME_NIDT_UUID, &ns->opts.uuid, sizeof(ns->opts.uuid));
2851 
2852 	/*
2853 	 * The list is automatically 0-terminated because controller to host buffers in
2854 	 * admin commands always get zeroed in nvmf_ctrlr_process_admin_cmd().
2855 	 */
2856 
2857 #undef ADD_ID_DESC
2858 
2859 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2860 }
2861 
2862 static int
2863 nvmf_ctrlr_identify(struct spdk_nvmf_request *req)
2864 {
2865 	uint8_t cns;
2866 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
2867 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
2868 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
2869 	struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys;
2870 
2871 	if (req->data == NULL || req->length < 4096) {
2872 		SPDK_DEBUGLOG(nvmf, "identify command with invalid buffer\n");
2873 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
2874 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
2875 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2876 	}
2877 
2878 	cns = cmd->cdw10_bits.identify.cns;
2879 
2880 	if (subsystem->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY &&
2881 	    cns != SPDK_NVME_IDENTIFY_CTRLR) {
2882 		/* Discovery controllers only support Identify Controller */
2883 		goto invalid_cns;
2884 	}
2885 
2886 	switch (cns) {
2887 	case SPDK_NVME_IDENTIFY_NS:
2888 		return spdk_nvmf_ctrlr_identify_ns(ctrlr, cmd, rsp, req->data);
2889 	case SPDK_NVME_IDENTIFY_CTRLR:
2890 		return spdk_nvmf_ctrlr_identify_ctrlr(ctrlr, req->data);
2891 	case SPDK_NVME_IDENTIFY_ACTIVE_NS_LIST:
2892 		return nvmf_ctrlr_identify_active_ns_list(subsystem, cmd, rsp, req->data);
2893 	case SPDK_NVME_IDENTIFY_NS_ID_DESCRIPTOR_LIST:
2894 		return nvmf_ctrlr_identify_ns_id_descriptor_list(subsystem, cmd, rsp, req->data, req->length);
2895 	default:
2896 		goto invalid_cns;
2897 	}
2898 
2899 invalid_cns:
2900 	SPDK_INFOLOG(nvmf, "Identify command with unsupported CNS 0x%02x\n", cns);
2901 	rsp->status.sct = SPDK_NVME_SCT_GENERIC;
2902 	rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
2903 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
2904 }
2905 
2906 static bool
2907 nvmf_qpair_abort_aer(struct spdk_nvmf_qpair *qpair, uint16_t cid)
2908 {
2909 	struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
2910 	struct spdk_nvmf_request *req;
2911 	int i;
2912 
2913 	if (!nvmf_qpair_is_admin_queue(qpair)) {
2914 		return false;
2915 	}
2916 
2917 	assert(spdk_get_thread() == ctrlr->thread);
2918 
2919 	for (i = 0; i < ctrlr->nr_aer_reqs; i++) {
2920 		if (ctrlr->aer_req[i]->cmd->nvme_cmd.cid == cid) {
2921 			SPDK_DEBUGLOG(nvmf, "Aborting AER request\n");
2922 			req = ctrlr->aer_req[i];
2923 			ctrlr->aer_req[i] = NULL;
2924 			ctrlr->nr_aer_reqs--;
2925 
2926 			/* Move the last req to the aborting position for making aer_reqs
2927 			 * in continuous
2928 			 */
2929 			if (i < ctrlr->nr_aer_reqs) {
2930 				ctrlr->aer_req[i] = ctrlr->aer_req[ctrlr->nr_aer_reqs];
2931 				ctrlr->aer_req[ctrlr->nr_aer_reqs] = NULL;
2932 			}
2933 
2934 			req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
2935 			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_ABORTED_BY_REQUEST;
2936 			_nvmf_request_complete(req);
2937 			return true;
2938 		}
2939 	}
2940 
2941 	return false;
2942 }
2943 
2944 void
2945 nvmf_qpair_abort_pending_zcopy_reqs(struct spdk_nvmf_qpair *qpair)
2946 {
2947 	struct spdk_nvmf_request *req, *tmp;
2948 
2949 	TAILQ_FOREACH_SAFE(req, &qpair->outstanding, link, tmp) {
2950 		if (req->zcopy_phase == NVMF_ZCOPY_PHASE_EXECUTE) {
2951 			/* Zero-copy requests are kept on the outstanding queue from the moment
2952 			 * zcopy_start is sent until a zcopy_end callback is received.  Therefore,
2953 			 * we can't remove them from the outstanding queue here, but need to rely on
2954 			 * the transport to do a zcopy_end to release their buffers and, in turn,
2955 			 * remove them from the queue.
2956 			 */
2957 			req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
2958 			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_ABORTED_BY_REQUEST;
2959 			nvmf_transport_req_free(req);
2960 		}
2961 	}
2962 }
2963 
2964 static void
2965 nvmf_qpair_abort_request(struct spdk_nvmf_qpair *qpair, struct spdk_nvmf_request *req)
2966 {
2967 	uint16_t cid = req->cmd->nvme_cmd.cdw10_bits.abort.cid;
2968 
2969 	if (nvmf_qpair_abort_aer(qpair, cid)) {
2970 		SPDK_DEBUGLOG(nvmf, "abort ctrlr=%p sqid=%u cid=%u successful\n",
2971 			      qpair->ctrlr, qpair->qid, cid);
2972 		req->rsp->nvme_cpl.cdw0 &= ~1U; /* Command successfully aborted */
2973 
2974 		spdk_nvmf_request_complete(req);
2975 		return;
2976 	}
2977 
2978 	nvmf_transport_qpair_abort_request(qpair, req);
2979 }
2980 
2981 static void
2982 nvmf_ctrlr_abort_done(struct spdk_io_channel_iter *i, int status)
2983 {
2984 	struct spdk_nvmf_request *req = spdk_io_channel_iter_get_ctx(i);
2985 
2986 	if (status == 0) {
2987 		/* There was no qpair whose ID matches SQID of the abort command.
2988 		 * Hence call _nvmf_request_complete() here.
2989 		 */
2990 		_nvmf_request_complete(req);
2991 	}
2992 }
2993 
2994 static void
2995 nvmf_ctrlr_abort_on_pg(struct spdk_io_channel_iter *i)
2996 {
2997 	struct spdk_nvmf_request *req = spdk_io_channel_iter_get_ctx(i);
2998 	struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
2999 	struct spdk_nvmf_poll_group *group = spdk_io_channel_get_ctx(ch);
3000 	uint16_t sqid = req->cmd->nvme_cmd.cdw10_bits.abort.sqid;
3001 	struct spdk_nvmf_qpair *qpair;
3002 
3003 	TAILQ_FOREACH(qpair, &group->qpairs, link) {
3004 		if (qpair->ctrlr == req->qpair->ctrlr && qpair->qid == sqid) {
3005 			/* Found the qpair */
3006 
3007 			nvmf_qpair_abort_request(qpair, req);
3008 
3009 			/* Return -1 for the status so the iteration across threads stops. */
3010 			spdk_for_each_channel_continue(i, -1);
3011 			return;
3012 		}
3013 	}
3014 
3015 	spdk_for_each_channel_continue(i, 0);
3016 }
3017 
3018 static int
3019 nvmf_ctrlr_abort(struct spdk_nvmf_request *req)
3020 {
3021 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
3022 
3023 	rsp->cdw0 = 1U; /* Command not aborted */
3024 	rsp->status.sct = SPDK_NVME_SCT_GENERIC;
3025 	rsp->status.sc = SPDK_NVME_SC_SUCCESS;
3026 
3027 	/* Send a message to each poll group, searching for this ctrlr, sqid, and command. */
3028 	spdk_for_each_channel(req->qpair->ctrlr->subsys->tgt,
3029 			      nvmf_ctrlr_abort_on_pg,
3030 			      req,
3031 			      nvmf_ctrlr_abort_done
3032 			     );
3033 
3034 	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
3035 }
3036 
3037 int
3038 nvmf_ctrlr_abort_request(struct spdk_nvmf_request *req)
3039 {
3040 	struct spdk_nvmf_request *req_to_abort = req->req_to_abort;
3041 	struct spdk_bdev *bdev;
3042 	struct spdk_bdev_desc *desc;
3043 	struct spdk_io_channel *ch;
3044 	int rc;
3045 
3046 	assert(req_to_abort != NULL);
3047 
3048 	if (g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_ABORT].hdlr &&
3049 	    nvmf_qpair_is_admin_queue(req_to_abort->qpair)) {
3050 		return g_nvmf_custom_admin_cmd_hdlrs[SPDK_NVME_OPC_ABORT].hdlr(req);
3051 	}
3052 
3053 	rc = spdk_nvmf_request_get_bdev(req_to_abort->cmd->nvme_cmd.nsid, req_to_abort,
3054 					&bdev, &desc, &ch);
3055 	if (rc != 0) {
3056 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3057 	}
3058 
3059 	return spdk_nvmf_bdev_ctrlr_abort_cmd(bdev, desc, ch, req, req_to_abort);
3060 }
3061 
3062 static int
3063 get_features_generic(struct spdk_nvmf_request *req, uint32_t cdw0)
3064 {
3065 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
3066 
3067 	rsp->cdw0 = cdw0;
3068 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3069 }
3070 
3071 /* we have to use the typedef in the function declaration to appease astyle. */
3072 typedef enum spdk_nvme_path_status_code spdk_nvme_path_status_code_t;
3073 
3074 static spdk_nvme_path_status_code_t
3075 _nvme_ana_state_to_path_status(enum spdk_nvme_ana_state ana_state)
3076 {
3077 	switch (ana_state) {
3078 	case SPDK_NVME_ANA_INACCESSIBLE_STATE:
3079 		return SPDK_NVME_SC_ASYMMETRIC_ACCESS_INACCESSIBLE;
3080 	case SPDK_NVME_ANA_PERSISTENT_LOSS_STATE:
3081 		return SPDK_NVME_SC_ASYMMETRIC_ACCESS_PERSISTENT_LOSS;
3082 	case SPDK_NVME_ANA_CHANGE_STATE:
3083 		return SPDK_NVME_SC_ASYMMETRIC_ACCESS_TRANSITION;
3084 	default:
3085 		return SPDK_NVME_SC_INTERNAL_PATH_ERROR;
3086 	}
3087 }
3088 
3089 static int
3090 nvmf_ctrlr_get_features(struct spdk_nvmf_request *req)
3091 {
3092 	uint8_t feature;
3093 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
3094 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
3095 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
3096 	enum spdk_nvme_ana_state ana_state;
3097 
3098 	feature = cmd->cdw10_bits.get_features.fid;
3099 
3100 	if (ctrlr->subsys->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
3101 		/*
3102 		 * Features supported by Discovery controller
3103 		 */
3104 		switch (feature) {
3105 		case SPDK_NVME_FEAT_KEEP_ALIVE_TIMER:
3106 			return get_features_generic(req, ctrlr->feat.keep_alive_timer.raw);
3107 		case SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION:
3108 			return get_features_generic(req, ctrlr->feat.async_event_configuration.raw);
3109 		default:
3110 			SPDK_INFOLOG(nvmf, "Get Features command with unsupported feature ID 0x%02x\n", feature);
3111 			response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
3112 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3113 		}
3114 	}
3115 	/*
3116 	 * Process Get Features command for non-discovery controller
3117 	 */
3118 	ana_state = nvmf_ctrlr_get_ana_state_from_nsid(ctrlr, cmd->nsid);
3119 	switch (ana_state) {
3120 	case SPDK_NVME_ANA_INACCESSIBLE_STATE:
3121 	case SPDK_NVME_ANA_PERSISTENT_LOSS_STATE:
3122 	case SPDK_NVME_ANA_CHANGE_STATE:
3123 		switch (feature) {
3124 		case SPDK_NVME_FEAT_ERROR_RECOVERY:
3125 		case SPDK_NVME_FEAT_WRITE_ATOMICITY:
3126 		case SPDK_NVME_FEAT_HOST_RESERVE_MASK:
3127 		case SPDK_NVME_FEAT_HOST_RESERVE_PERSIST:
3128 			response->status.sct = SPDK_NVME_SCT_PATH;
3129 			response->status.sc = _nvme_ana_state_to_path_status(ana_state);
3130 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3131 		default:
3132 			break;
3133 		}
3134 		break;
3135 	default:
3136 		break;
3137 	}
3138 
3139 	switch (feature) {
3140 	case SPDK_NVME_FEAT_ARBITRATION:
3141 		return get_features_generic(req, ctrlr->feat.arbitration.raw);
3142 	case SPDK_NVME_FEAT_POWER_MANAGEMENT:
3143 		return get_features_generic(req, ctrlr->feat.power_management.raw);
3144 	case SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD:
3145 		return nvmf_ctrlr_get_features_temperature_threshold(req);
3146 	case SPDK_NVME_FEAT_ERROR_RECOVERY:
3147 		return get_features_generic(req, ctrlr->feat.error_recovery.raw);
3148 	case SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE:
3149 		return get_features_generic(req, ctrlr->feat.volatile_write_cache.raw);
3150 	case SPDK_NVME_FEAT_NUMBER_OF_QUEUES:
3151 		return get_features_generic(req, ctrlr->feat.number_of_queues.raw);
3152 	case SPDK_NVME_FEAT_INTERRUPT_COALESCING:
3153 		return get_features_generic(req, ctrlr->feat.interrupt_coalescing.raw);
3154 	case SPDK_NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION:
3155 		return nvmf_ctrlr_get_features_interrupt_vector_configuration(req);
3156 	case SPDK_NVME_FEAT_WRITE_ATOMICITY:
3157 		return get_features_generic(req, ctrlr->feat.write_atomicity.raw);
3158 	case SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION:
3159 		return get_features_generic(req, ctrlr->feat.async_event_configuration.raw);
3160 	case SPDK_NVME_FEAT_KEEP_ALIVE_TIMER:
3161 		return get_features_generic(req, ctrlr->feat.keep_alive_timer.raw);
3162 	case SPDK_NVME_FEAT_HOST_IDENTIFIER:
3163 		return nvmf_ctrlr_get_features_host_identifier(req);
3164 	case SPDK_NVME_FEAT_HOST_RESERVE_MASK:
3165 		return nvmf_ctrlr_get_features_reservation_notification_mask(req);
3166 	case SPDK_NVME_FEAT_HOST_RESERVE_PERSIST:
3167 		return nvmf_ctrlr_get_features_reservation_persistence(req);
3168 	case SPDK_NVME_FEAT_HOST_BEHAVIOR_SUPPORT:
3169 		return nvmf_ctrlr_get_features_host_behavior_support(req);
3170 	default:
3171 		SPDK_INFOLOG(nvmf, "Get Features command with unsupported feature ID 0x%02x\n", feature);
3172 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
3173 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3174 	}
3175 }
3176 
3177 static int
3178 nvmf_ctrlr_set_features(struct spdk_nvmf_request *req)
3179 {
3180 	uint8_t feature, save;
3181 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
3182 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
3183 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
3184 	enum spdk_nvme_ana_state ana_state;
3185 	/*
3186 	 * Features are not saveable by the controller as indicated by
3187 	 * ONCS field of the Identify Controller data.
3188 	 * */
3189 	save = cmd->cdw10_bits.set_features.sv;
3190 	if (save) {
3191 		response->status.sc = SPDK_NVME_SC_FEATURE_ID_NOT_SAVEABLE;
3192 		response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
3193 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3194 	}
3195 
3196 	feature = cmd->cdw10_bits.set_features.fid;
3197 
3198 	if (ctrlr->subsys->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
3199 		/*
3200 		 * Features supported by Discovery controller
3201 		 */
3202 		switch (feature) {
3203 		case SPDK_NVME_FEAT_KEEP_ALIVE_TIMER:
3204 			return nvmf_ctrlr_set_features_keep_alive_timer(req);
3205 		case SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION:
3206 			return nvmf_ctrlr_set_features_async_event_configuration(req);
3207 		default:
3208 			SPDK_INFOLOG(nvmf, "Set Features command with unsupported feature ID 0x%02x\n", feature);
3209 			response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
3210 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3211 		}
3212 	}
3213 	/*
3214 	 * Process Set Features command for non-discovery controller
3215 	 */
3216 	ana_state = nvmf_ctrlr_get_ana_state_from_nsid(ctrlr, cmd->nsid);
3217 	switch (ana_state) {
3218 	case SPDK_NVME_ANA_INACCESSIBLE_STATE:
3219 	case SPDK_NVME_ANA_CHANGE_STATE:
3220 		if (cmd->nsid == SPDK_NVME_GLOBAL_NS_TAG) {
3221 			response->status.sct = SPDK_NVME_SCT_PATH;
3222 			response->status.sc = _nvme_ana_state_to_path_status(ana_state);
3223 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3224 		} else {
3225 			switch (feature) {
3226 			case SPDK_NVME_FEAT_ERROR_RECOVERY:
3227 			case SPDK_NVME_FEAT_WRITE_ATOMICITY:
3228 			case SPDK_NVME_FEAT_HOST_RESERVE_MASK:
3229 			case SPDK_NVME_FEAT_HOST_RESERVE_PERSIST:
3230 				response->status.sct = SPDK_NVME_SCT_PATH;
3231 				response->status.sc = _nvme_ana_state_to_path_status(ana_state);
3232 				return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3233 			default:
3234 				break;
3235 			}
3236 		}
3237 		break;
3238 	case SPDK_NVME_ANA_PERSISTENT_LOSS_STATE:
3239 		response->status.sct = SPDK_NVME_SCT_PATH;
3240 		response->status.sc = SPDK_NVME_SC_ASYMMETRIC_ACCESS_PERSISTENT_LOSS;
3241 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3242 	default:
3243 		break;
3244 	}
3245 
3246 	switch (feature) {
3247 	case SPDK_NVME_FEAT_ARBITRATION:
3248 		return nvmf_ctrlr_set_features_arbitration(req);
3249 	case SPDK_NVME_FEAT_POWER_MANAGEMENT:
3250 		return nvmf_ctrlr_set_features_power_management(req);
3251 	case SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD:
3252 		return nvmf_ctrlr_set_features_temperature_threshold(req);
3253 	case SPDK_NVME_FEAT_ERROR_RECOVERY:
3254 		return nvmf_ctrlr_set_features_error_recovery(req);
3255 	case SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE:
3256 		return nvmf_ctrlr_set_features_volatile_write_cache(req);
3257 	case SPDK_NVME_FEAT_NUMBER_OF_QUEUES:
3258 		return nvmf_ctrlr_set_features_number_of_queues(req);
3259 	case SPDK_NVME_FEAT_INTERRUPT_COALESCING:
3260 		response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
3261 		response->status.sc = SPDK_NVME_SC_FEATURE_NOT_CHANGEABLE;
3262 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3263 	case SPDK_NVME_FEAT_WRITE_ATOMICITY:
3264 		return nvmf_ctrlr_set_features_write_atomicity(req);
3265 	case SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION:
3266 		return nvmf_ctrlr_set_features_async_event_configuration(req);
3267 	case SPDK_NVME_FEAT_KEEP_ALIVE_TIMER:
3268 		return nvmf_ctrlr_set_features_keep_alive_timer(req);
3269 	case SPDK_NVME_FEAT_HOST_IDENTIFIER:
3270 		return nvmf_ctrlr_set_features_host_identifier(req);
3271 	case SPDK_NVME_FEAT_HOST_RESERVE_MASK:
3272 		return nvmf_ctrlr_set_features_reservation_notification_mask(req);
3273 	case SPDK_NVME_FEAT_HOST_RESERVE_PERSIST:
3274 		return nvmf_ctrlr_set_features_reservation_persistence(req);
3275 	case SPDK_NVME_FEAT_HOST_BEHAVIOR_SUPPORT:
3276 		return nvmf_ctrlr_set_features_host_behavior_support(req);
3277 	default:
3278 		SPDK_INFOLOG(nvmf, "Set Features command with unsupported feature ID 0x%02x\n", feature);
3279 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
3280 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3281 	}
3282 }
3283 
3284 static int
3285 nvmf_ctrlr_keep_alive(struct spdk_nvmf_request *req)
3286 {
3287 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
3288 
3289 	SPDK_DEBUGLOG(nvmf, "Keep Alive\n");
3290 	/*
3291 	 * To handle keep alive just clear or reset the
3292 	 * ctrlr based keep alive duration counter.
3293 	 * When added, a separate timer based process
3294 	 * will monitor if the time since last recorded
3295 	 * keep alive has exceeded the max duration and
3296 	 * take appropriate action.
3297 	 */
3298 	ctrlr->last_keep_alive_tick = spdk_get_ticks();
3299 
3300 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3301 }
3302 
3303 int
3304 nvmf_ctrlr_process_admin_cmd(struct spdk_nvmf_request *req)
3305 {
3306 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
3307 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
3308 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
3309 	int rc;
3310 
3311 	if (ctrlr == NULL) {
3312 		SPDK_ERRLOG("Admin command sent before CONNECT\n");
3313 		response->status.sct = SPDK_NVME_SCT_GENERIC;
3314 		response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
3315 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3316 	}
3317 
3318 	assert(spdk_get_thread() == ctrlr->thread);
3319 
3320 	if (cmd->fuse != 0) {
3321 		/* Fused admin commands are not supported. */
3322 		response->status.sct = SPDK_NVME_SCT_GENERIC;
3323 		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
3324 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3325 	}
3326 
3327 	if (ctrlr->vcprop.cc.bits.en != 1) {
3328 		SPDK_ERRLOG("Admin command sent to disabled controller\n");
3329 		response->status.sct = SPDK_NVME_SCT_GENERIC;
3330 		response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
3331 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3332 	}
3333 
3334 	if (req->data && spdk_nvme_opc_get_data_transfer(cmd->opc) == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
3335 		_clear_iovs(req->iov, req->iovcnt);
3336 	}
3337 
3338 	if (ctrlr->subsys->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
3339 		/* Discovery controllers only support these admin OPS. */
3340 		switch (cmd->opc) {
3341 		case SPDK_NVME_OPC_IDENTIFY:
3342 		case SPDK_NVME_OPC_GET_LOG_PAGE:
3343 		case SPDK_NVME_OPC_KEEP_ALIVE:
3344 		case SPDK_NVME_OPC_SET_FEATURES:
3345 		case SPDK_NVME_OPC_GET_FEATURES:
3346 		case SPDK_NVME_OPC_ASYNC_EVENT_REQUEST:
3347 			break;
3348 		default:
3349 			goto invalid_opcode;
3350 		}
3351 	}
3352 
3353 	/* Call a custom adm cmd handler if set. Aborts are handled in a different path (see nvmf_passthru_admin_cmd) */
3354 	if (g_nvmf_custom_admin_cmd_hdlrs[cmd->opc].hdlr && cmd->opc != SPDK_NVME_OPC_ABORT) {
3355 		rc = g_nvmf_custom_admin_cmd_hdlrs[cmd->opc].hdlr(req);
3356 		if (rc >= SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
3357 			/* The handler took care of this command */
3358 			return rc;
3359 		}
3360 	}
3361 
3362 	switch (cmd->opc) {
3363 	case SPDK_NVME_OPC_GET_LOG_PAGE:
3364 		return nvmf_ctrlr_get_log_page(req);
3365 	case SPDK_NVME_OPC_IDENTIFY:
3366 		return nvmf_ctrlr_identify(req);
3367 	case SPDK_NVME_OPC_ABORT:
3368 		return nvmf_ctrlr_abort(req);
3369 	case SPDK_NVME_OPC_GET_FEATURES:
3370 		return nvmf_ctrlr_get_features(req);
3371 	case SPDK_NVME_OPC_SET_FEATURES:
3372 		return nvmf_ctrlr_set_features(req);
3373 	case SPDK_NVME_OPC_ASYNC_EVENT_REQUEST:
3374 		return nvmf_ctrlr_async_event_request(req);
3375 	case SPDK_NVME_OPC_KEEP_ALIVE:
3376 		return nvmf_ctrlr_keep_alive(req);
3377 
3378 	case SPDK_NVME_OPC_CREATE_IO_SQ:
3379 	case SPDK_NVME_OPC_CREATE_IO_CQ:
3380 	case SPDK_NVME_OPC_DELETE_IO_SQ:
3381 	case SPDK_NVME_OPC_DELETE_IO_CQ:
3382 		/* Create and Delete I/O CQ/SQ not allowed in NVMe-oF */
3383 		goto invalid_opcode;
3384 
3385 	default:
3386 		goto invalid_opcode;
3387 	}
3388 
3389 invalid_opcode:
3390 	SPDK_INFOLOG(nvmf, "Unsupported admin opcode 0x%x\n", cmd->opc);
3391 	response->status.sct = SPDK_NVME_SCT_GENERIC;
3392 	response->status.sc = SPDK_NVME_SC_INVALID_OPCODE;
3393 	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3394 }
3395 
3396 static int
3397 nvmf_ctrlr_process_fabrics_cmd(struct spdk_nvmf_request *req)
3398 {
3399 	struct spdk_nvmf_qpair *qpair = req->qpair;
3400 	struct spdk_nvmf_capsule_cmd *cap_hdr;
3401 
3402 	cap_hdr = &req->cmd->nvmf_cmd;
3403 
3404 	if (qpair->ctrlr == NULL) {
3405 		/* No ctrlr established yet; the only valid command is Connect */
3406 		if (cap_hdr->fctype == SPDK_NVMF_FABRIC_COMMAND_CONNECT) {
3407 			return nvmf_ctrlr_cmd_connect(req);
3408 		} else {
3409 			SPDK_DEBUGLOG(nvmf, "Got fctype 0x%x, expected Connect\n",
3410 				      cap_hdr->fctype);
3411 			req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
3412 			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
3413 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3414 		}
3415 	} else if (nvmf_qpair_is_admin_queue(qpair)) {
3416 		/*
3417 		 * Controller session is established, and this is an admin queue.
3418 		 * Disallow Connect and allow other fabrics commands.
3419 		 */
3420 		switch (cap_hdr->fctype) {
3421 		case SPDK_NVMF_FABRIC_COMMAND_PROPERTY_SET:
3422 			return nvmf_property_set(req);
3423 		case SPDK_NVMF_FABRIC_COMMAND_PROPERTY_GET:
3424 			return nvmf_property_get(req);
3425 		default:
3426 			SPDK_DEBUGLOG(nvmf, "unknown fctype 0x%02x\n",
3427 				      cap_hdr->fctype);
3428 			req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
3429 			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
3430 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3431 		}
3432 	} else {
3433 		/* Controller session is established, and this is an I/O queue */
3434 		/* For now, no I/O-specific Fabrics commands are implemented (other than Connect) */
3435 		SPDK_DEBUGLOG(nvmf, "Unexpected I/O fctype 0x%x\n", cap_hdr->fctype);
3436 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
3437 		req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
3438 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3439 	}
3440 }
3441 
3442 static inline void
3443 nvmf_ctrlr_queue_pending_async_event(struct spdk_nvmf_ctrlr *ctrlr,
3444 				     union spdk_nvme_async_event_completion *event)
3445 {
3446 	struct spdk_nvmf_async_event_completion *nvmf_event;
3447 
3448 	nvmf_event = calloc(1, sizeof(*nvmf_event));
3449 	if (!nvmf_event) {
3450 		SPDK_ERRLOG("Alloc nvmf event failed, ignore the event\n");
3451 		return;
3452 	}
3453 	nvmf_event->event.raw = event->raw;
3454 	STAILQ_INSERT_TAIL(&ctrlr->async_events, nvmf_event, link);
3455 }
3456 
3457 static inline int
3458 nvmf_ctrlr_async_event_notification(struct spdk_nvmf_ctrlr *ctrlr,
3459 				    union spdk_nvme_async_event_completion *event)
3460 {
3461 	struct spdk_nvmf_request *req;
3462 	struct spdk_nvme_cpl *rsp;
3463 
3464 	assert(spdk_get_thread() == ctrlr->thread);
3465 
3466 	/* If there is no outstanding AER request, queue the event.  Then
3467 	 * if an AER is later submitted, this event can be sent as a
3468 	 * response.
3469 	 */
3470 	if (ctrlr->nr_aer_reqs == 0) {
3471 		nvmf_ctrlr_queue_pending_async_event(ctrlr, event);
3472 		return 0;
3473 	}
3474 
3475 	req = ctrlr->aer_req[--ctrlr->nr_aer_reqs];
3476 	rsp = &req->rsp->nvme_cpl;
3477 
3478 	rsp->cdw0 = event->raw;
3479 
3480 	_nvmf_request_complete(req);
3481 	ctrlr->aer_req[ctrlr->nr_aer_reqs] = NULL;
3482 
3483 	return 0;
3484 }
3485 
3486 int
3487 nvmf_ctrlr_async_event_ns_notice(struct spdk_nvmf_ctrlr *ctrlr)
3488 {
3489 	union spdk_nvme_async_event_completion event = {0};
3490 
3491 	/* Users may disable the event notification */
3492 	if (!ctrlr->feat.async_event_configuration.bits.ns_attr_notice) {
3493 		return 0;
3494 	}
3495 
3496 	if (!nvmf_ctrlr_mask_aen(ctrlr, SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGE_MASK_BIT)) {
3497 		return 0;
3498 	}
3499 
3500 	event.bits.async_event_type = SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE;
3501 	event.bits.async_event_info = SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED;
3502 	event.bits.log_page_identifier = SPDK_NVME_LOG_CHANGED_NS_LIST;
3503 
3504 	return nvmf_ctrlr_async_event_notification(ctrlr, &event);
3505 }
3506 
3507 int
3508 nvmf_ctrlr_async_event_ana_change_notice(struct spdk_nvmf_ctrlr *ctrlr)
3509 {
3510 	union spdk_nvme_async_event_completion event = {0};
3511 
3512 	/* Users may disable the event notification */
3513 	if (!ctrlr->feat.async_event_configuration.bits.ana_change_notice) {
3514 		return 0;
3515 	}
3516 
3517 	if (!nvmf_ctrlr_mask_aen(ctrlr, SPDK_NVME_ASYNC_EVENT_ANA_CHANGE_MASK_BIT)) {
3518 		return 0;
3519 	}
3520 
3521 	event.bits.async_event_type = SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE;
3522 	event.bits.async_event_info = SPDK_NVME_ASYNC_EVENT_ANA_CHANGE;
3523 	event.bits.log_page_identifier = SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS;
3524 
3525 	return nvmf_ctrlr_async_event_notification(ctrlr, &event);
3526 }
3527 
3528 void
3529 nvmf_ctrlr_async_event_reservation_notification(struct spdk_nvmf_ctrlr *ctrlr)
3530 {
3531 	union spdk_nvme_async_event_completion event = {0};
3532 
3533 	if (!ctrlr->num_avail_log_pages) {
3534 		return;
3535 	}
3536 
3537 	if (!nvmf_ctrlr_mask_aen(ctrlr, SPDK_NVME_ASYNC_EVENT_RESERVATION_LOG_AVAIL_MASK_BIT)) {
3538 		return;
3539 	}
3540 
3541 	event.bits.async_event_type = SPDK_NVME_ASYNC_EVENT_TYPE_IO;
3542 	event.bits.async_event_info = SPDK_NVME_ASYNC_EVENT_RESERVATION_LOG_AVAIL;
3543 	event.bits.log_page_identifier = SPDK_NVME_LOG_RESERVATION_NOTIFICATION;
3544 
3545 	nvmf_ctrlr_async_event_notification(ctrlr, &event);
3546 }
3547 
3548 void
3549 nvmf_ctrlr_async_event_discovery_log_change_notice(void *ctx)
3550 {
3551 	union spdk_nvme_async_event_completion event = {0};
3552 	struct spdk_nvmf_ctrlr *ctrlr = ctx;
3553 
3554 	/* Users may disable the event notification manually or
3555 	 * it may not be enabled due to keep alive timeout
3556 	 * not being set in connect command to discovery controller.
3557 	 */
3558 	if (!ctrlr->feat.async_event_configuration.bits.discovery_log_change_notice) {
3559 		return;
3560 	}
3561 
3562 	if (!nvmf_ctrlr_mask_aen(ctrlr, SPDK_NVME_ASYNC_EVENT_DISCOVERY_LOG_CHANGE_MASK_BIT)) {
3563 		return;
3564 	}
3565 
3566 	event.bits.async_event_type = SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE;
3567 	event.bits.async_event_info = SPDK_NVME_ASYNC_EVENT_DISCOVERY_LOG_CHANGE;
3568 	event.bits.log_page_identifier = SPDK_NVME_LOG_DISCOVERY;
3569 
3570 	nvmf_ctrlr_async_event_notification(ctrlr, &event);
3571 }
3572 
3573 int
3574 nvmf_ctrlr_async_event_error_event(struct spdk_nvmf_ctrlr *ctrlr,
3575 				   union spdk_nvme_async_event_completion event)
3576 {
3577 	if (!nvmf_ctrlr_mask_aen(ctrlr, SPDK_NVME_ASYNC_EVENT_ERROR_MASK_BIT)) {
3578 		return 0;
3579 	}
3580 
3581 	if (event.bits.async_event_type != SPDK_NVME_ASYNC_EVENT_TYPE_ERROR ||
3582 	    event.bits.async_event_info > SPDK_NVME_ASYNC_EVENT_FW_IMAGE_LOAD) {
3583 		return 0;
3584 	}
3585 
3586 	return nvmf_ctrlr_async_event_notification(ctrlr, &event);
3587 }
3588 
3589 void
3590 nvmf_qpair_free_aer(struct spdk_nvmf_qpair *qpair)
3591 {
3592 	struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
3593 	int i;
3594 
3595 	if (!nvmf_qpair_is_admin_queue(qpair)) {
3596 		return;
3597 	}
3598 
3599 	assert(spdk_get_thread() == ctrlr->thread);
3600 
3601 	for (i = 0; i < ctrlr->nr_aer_reqs; i++) {
3602 		spdk_nvmf_request_free(ctrlr->aer_req[i]);
3603 		ctrlr->aer_req[i] = NULL;
3604 	}
3605 
3606 	ctrlr->nr_aer_reqs = 0;
3607 }
3608 
3609 void
3610 nvmf_ctrlr_abort_aer(struct spdk_nvmf_ctrlr *ctrlr)
3611 {
3612 	struct spdk_nvmf_request *req;
3613 	int i;
3614 
3615 	assert(spdk_get_thread() == ctrlr->thread);
3616 
3617 	if (!ctrlr->nr_aer_reqs) {
3618 		return;
3619 	}
3620 
3621 	for (i = 0; i < ctrlr->nr_aer_reqs; i++) {
3622 		req = ctrlr->aer_req[i];
3623 
3624 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
3625 		req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_ABORTED_BY_REQUEST;
3626 		_nvmf_request_complete(req);
3627 
3628 		ctrlr->aer_req[i] = NULL;
3629 	}
3630 
3631 	ctrlr->nr_aer_reqs = 0;
3632 }
3633 
3634 static void
3635 _nvmf_ctrlr_add_reservation_log(void *ctx)
3636 {
3637 	struct spdk_nvmf_reservation_log *log = (struct spdk_nvmf_reservation_log *)ctx;
3638 	struct spdk_nvmf_ctrlr *ctrlr = log->ctrlr;
3639 
3640 	ctrlr->log_page_count++;
3641 
3642 	/* Maximum number of queued log pages is 255 */
3643 	if (ctrlr->num_avail_log_pages == 0xff) {
3644 		struct spdk_nvmf_reservation_log *entry;
3645 		entry = TAILQ_LAST(&ctrlr->log_head, log_page_head);
3646 		entry->log.log_page_count = ctrlr->log_page_count;
3647 		free(log);
3648 		return;
3649 	}
3650 
3651 	log->log.log_page_count = ctrlr->log_page_count;
3652 	log->log.num_avail_log_pages = ctrlr->num_avail_log_pages++;
3653 	TAILQ_INSERT_TAIL(&ctrlr->log_head, log, link);
3654 
3655 	nvmf_ctrlr_async_event_reservation_notification(ctrlr);
3656 }
3657 
3658 void
3659 nvmf_ctrlr_reservation_notice_log(struct spdk_nvmf_ctrlr *ctrlr,
3660 				  struct spdk_nvmf_ns *ns,
3661 				  enum spdk_nvme_reservation_notification_log_page_type type)
3662 {
3663 	struct spdk_nvmf_reservation_log *log;
3664 
3665 	switch (type) {
3666 	case SPDK_NVME_RESERVATION_LOG_PAGE_EMPTY:
3667 		return;
3668 	case SPDK_NVME_REGISTRATION_PREEMPTED:
3669 		if (ns->mask & SPDK_NVME_REGISTRATION_PREEMPTED_MASK) {
3670 			return;
3671 		}
3672 		break;
3673 	case SPDK_NVME_RESERVATION_RELEASED:
3674 		if (ns->mask & SPDK_NVME_RESERVATION_RELEASED_MASK) {
3675 			return;
3676 		}
3677 		break;
3678 	case SPDK_NVME_RESERVATION_PREEMPTED:
3679 		if (ns->mask & SPDK_NVME_RESERVATION_PREEMPTED_MASK) {
3680 			return;
3681 		}
3682 		break;
3683 	default:
3684 		return;
3685 	}
3686 
3687 	log = calloc(1, sizeof(*log));
3688 	if (!log) {
3689 		SPDK_ERRLOG("Alloc log page failed, ignore the log\n");
3690 		return;
3691 	}
3692 	log->ctrlr = ctrlr;
3693 	log->log.type = type;
3694 	log->log.nsid = ns->nsid;
3695 
3696 	spdk_thread_send_msg(ctrlr->thread, _nvmf_ctrlr_add_reservation_log, log);
3697 }
3698 
3699 /* Check from subsystem poll group's namespace information data structure */
3700 static bool
3701 nvmf_ns_info_ctrlr_is_registrant(struct spdk_nvmf_subsystem_pg_ns_info *ns_info,
3702 				 struct spdk_nvmf_ctrlr *ctrlr)
3703 {
3704 	uint32_t i;
3705 
3706 	for (i = 0; i < SPDK_NVMF_MAX_NUM_REGISTRANTS; i++) {
3707 		if (!spdk_uuid_compare(&ns_info->reg_hostid[i], &ctrlr->hostid)) {
3708 			return true;
3709 		}
3710 	}
3711 
3712 	return false;
3713 }
3714 
3715 /*
3716  * Check the NVMe command is permitted or not for current controller(Host).
3717  */
3718 static int
3719 nvmf_ns_reservation_request_check(struct spdk_nvmf_subsystem_pg_ns_info *ns_info,
3720 				  struct spdk_nvmf_ctrlr *ctrlr,
3721 				  struct spdk_nvmf_request *req)
3722 {
3723 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
3724 	enum spdk_nvme_reservation_type rtype = ns_info->rtype;
3725 	uint8_t status = SPDK_NVME_SC_SUCCESS;
3726 	uint8_t racqa;
3727 	bool is_registrant;
3728 
3729 	/* No valid reservation */
3730 	if (!rtype) {
3731 		return 0;
3732 	}
3733 
3734 	is_registrant = nvmf_ns_info_ctrlr_is_registrant(ns_info, ctrlr);
3735 	/* All registrants type and current ctrlr is a valid registrant */
3736 	if ((rtype == SPDK_NVME_RESERVE_WRITE_EXCLUSIVE_ALL_REGS ||
3737 	     rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_ALL_REGS) && is_registrant) {
3738 		return 0;
3739 	} else if (!spdk_uuid_compare(&ns_info->holder_id, &ctrlr->hostid)) {
3740 		return 0;
3741 	}
3742 
3743 	/* Non-holder for current controller */
3744 	switch (cmd->opc) {
3745 	case SPDK_NVME_OPC_READ:
3746 	case SPDK_NVME_OPC_COMPARE:
3747 		if (rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS) {
3748 			status = SPDK_NVME_SC_RESERVATION_CONFLICT;
3749 			goto exit;
3750 		}
3751 		if ((rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_REG_ONLY ||
3752 		     rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_ALL_REGS) && !is_registrant) {
3753 			status = SPDK_NVME_SC_RESERVATION_CONFLICT;
3754 		}
3755 		break;
3756 	case SPDK_NVME_OPC_FLUSH:
3757 	case SPDK_NVME_OPC_WRITE:
3758 	case SPDK_NVME_OPC_WRITE_UNCORRECTABLE:
3759 	case SPDK_NVME_OPC_WRITE_ZEROES:
3760 	case SPDK_NVME_OPC_DATASET_MANAGEMENT:
3761 		if (rtype == SPDK_NVME_RESERVE_WRITE_EXCLUSIVE ||
3762 		    rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS) {
3763 			status = SPDK_NVME_SC_RESERVATION_CONFLICT;
3764 			goto exit;
3765 		}
3766 		if (!is_registrant) {
3767 			status = SPDK_NVME_SC_RESERVATION_CONFLICT;
3768 		}
3769 		break;
3770 	case SPDK_NVME_OPC_RESERVATION_ACQUIRE:
3771 		racqa = cmd->cdw10_bits.resv_acquire.racqa;
3772 		if (racqa == SPDK_NVME_RESERVE_ACQUIRE) {
3773 			status = SPDK_NVME_SC_RESERVATION_CONFLICT;
3774 			goto exit;
3775 		}
3776 		if (!is_registrant) {
3777 			status = SPDK_NVME_SC_RESERVATION_CONFLICT;
3778 		}
3779 		break;
3780 	case SPDK_NVME_OPC_RESERVATION_RELEASE:
3781 		if (!is_registrant) {
3782 			status = SPDK_NVME_SC_RESERVATION_CONFLICT;
3783 		}
3784 		break;
3785 	default:
3786 		break;
3787 	}
3788 
3789 exit:
3790 	req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
3791 	req->rsp->nvme_cpl.status.sc = status;
3792 	if (status == SPDK_NVME_SC_RESERVATION_CONFLICT) {
3793 		return -EPERM;
3794 	}
3795 
3796 	return 0;
3797 }
3798 
3799 static int
3800 nvmf_ctrlr_process_io_fused_cmd(struct spdk_nvmf_request *req, struct spdk_bdev *bdev,
3801 				struct spdk_bdev_desc *desc, struct spdk_io_channel *ch)
3802 {
3803 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
3804 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
3805 	struct spdk_nvmf_request *first_fused_req = req->qpair->first_fused_req;
3806 	int rc;
3807 
3808 	if (cmd->fuse == SPDK_NVME_CMD_FUSE_FIRST) {
3809 		/* first fused operation (should be compare) */
3810 		if (first_fused_req != NULL) {
3811 			struct spdk_nvme_cpl *fused_response = &first_fused_req->rsp->nvme_cpl;
3812 
3813 			SPDK_ERRLOG("Wrong sequence of fused operations\n");
3814 
3815 			/* abort req->qpair->first_fused_request and continue with new fused command */
3816 			fused_response->status.sc = SPDK_NVME_SC_ABORTED_MISSING_FUSED;
3817 			fused_response->status.sct = SPDK_NVME_SCT_GENERIC;
3818 			_nvmf_request_complete(first_fused_req);
3819 		} else if (cmd->opc != SPDK_NVME_OPC_COMPARE) {
3820 			SPDK_ERRLOG("Wrong op code of fused operations\n");
3821 			rsp->status.sct = SPDK_NVME_SCT_GENERIC;
3822 			rsp->status.sc = SPDK_NVME_SC_INVALID_OPCODE;
3823 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3824 		}
3825 
3826 		req->qpair->first_fused_req = req;
3827 		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
3828 	} else if (cmd->fuse == SPDK_NVME_CMD_FUSE_SECOND) {
3829 		/* second fused operation (should be write) */
3830 		if (first_fused_req == NULL) {
3831 			SPDK_ERRLOG("Wrong sequence of fused operations\n");
3832 			rsp->status.sct = SPDK_NVME_SCT_GENERIC;
3833 			rsp->status.sc = SPDK_NVME_SC_ABORTED_MISSING_FUSED;
3834 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3835 		} else if (cmd->opc != SPDK_NVME_OPC_WRITE) {
3836 			struct spdk_nvme_cpl *fused_response = &first_fused_req->rsp->nvme_cpl;
3837 
3838 			SPDK_ERRLOG("Wrong op code of fused operations\n");
3839 
3840 			/* abort req->qpair->first_fused_request and fail current command */
3841 			fused_response->status.sc = SPDK_NVME_SC_ABORTED_MISSING_FUSED;
3842 			fused_response->status.sct = SPDK_NVME_SCT_GENERIC;
3843 			_nvmf_request_complete(first_fused_req);
3844 
3845 			rsp->status.sct = SPDK_NVME_SCT_GENERIC;
3846 			rsp->status.sc = SPDK_NVME_SC_INVALID_OPCODE;
3847 			req->qpair->first_fused_req = NULL;
3848 			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3849 		}
3850 
3851 		/* save request of first command to generate response later */
3852 		req->first_fused_req = first_fused_req;
3853 		req->qpair->first_fused_req = NULL;
3854 	} else {
3855 		SPDK_ERRLOG("Invalid fused command fuse field.\n");
3856 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
3857 		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
3858 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3859 	}
3860 
3861 	rc = nvmf_bdev_ctrlr_compare_and_write_cmd(bdev, desc, ch, req->first_fused_req, req);
3862 
3863 	if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
3864 		if (spdk_nvme_cpl_is_error(rsp)) {
3865 			struct spdk_nvme_cpl *fused_response = &first_fused_req->rsp->nvme_cpl;
3866 
3867 			fused_response->status = rsp->status;
3868 			rsp->status.sct = SPDK_NVME_SCT_GENERIC;
3869 			rsp->status.sc = SPDK_NVME_SC_ABORTED_FAILED_FUSED;
3870 			/* Complete first of fused commands. Second will be completed by upper layer */
3871 			_nvmf_request_complete(first_fused_req);
3872 			req->first_fused_req = NULL;
3873 		}
3874 	}
3875 
3876 	return rc;
3877 }
3878 
3879 bool
3880 nvmf_ctrlr_use_zcopy(struct spdk_nvmf_request *req)
3881 {
3882 	struct spdk_nvmf_transport *transport = req->qpair->transport;
3883 	struct spdk_nvmf_ns *ns;
3884 
3885 	assert(req->zcopy_phase == NVMF_ZCOPY_PHASE_NONE);
3886 
3887 	if (!transport->opts.zcopy) {
3888 		return false;
3889 	}
3890 
3891 	if (nvmf_qpair_is_admin_queue(req->qpair)) {
3892 		/* Admin queue */
3893 		return false;
3894 	}
3895 
3896 	if ((req->cmd->nvme_cmd.opc != SPDK_NVME_OPC_WRITE) &&
3897 	    (req->cmd->nvme_cmd.opc != SPDK_NVME_OPC_READ)) {
3898 		/* Not a READ or WRITE command */
3899 		return false;
3900 	}
3901 
3902 	if (req->cmd->nvme_cmd.fuse != SPDK_NVME_CMD_FUSE_NONE) {
3903 		/* Fused commands dont use zcopy buffers */
3904 		return false;
3905 	}
3906 
3907 	ns = _nvmf_subsystem_get_ns(req->qpair->ctrlr->subsys, req->cmd->nvme_cmd.nsid);
3908 	if (ns == NULL || ns->bdev == NULL || !ns->zcopy) {
3909 		return false;
3910 	}
3911 
3912 	req->zcopy_phase = NVMF_ZCOPY_PHASE_INIT;
3913 	return true;
3914 }
3915 
3916 void
3917 spdk_nvmf_request_zcopy_start(struct spdk_nvmf_request *req)
3918 {
3919 	assert(req->zcopy_phase == NVMF_ZCOPY_PHASE_INIT);
3920 
3921 	/* Set iovcnt to be the maximum number of iovs that the ZCOPY can use */
3922 	req->iovcnt = NVMF_REQ_MAX_BUFFERS;
3923 
3924 	spdk_nvmf_request_exec(req);
3925 }
3926 
3927 void
3928 spdk_nvmf_request_zcopy_end(struct spdk_nvmf_request *req, bool commit)
3929 {
3930 	assert(req->zcopy_phase == NVMF_ZCOPY_PHASE_EXECUTE);
3931 	req->zcopy_phase = NVMF_ZCOPY_PHASE_END_PENDING;
3932 
3933 	nvmf_bdev_ctrlr_zcopy_end(req, commit);
3934 }
3935 
3936 int
3937 nvmf_ctrlr_process_io_cmd(struct spdk_nvmf_request *req)
3938 {
3939 	uint32_t nsid;
3940 	struct spdk_nvmf_ns *ns;
3941 	struct spdk_bdev *bdev;
3942 	struct spdk_bdev_desc *desc;
3943 	struct spdk_io_channel *ch;
3944 	struct spdk_nvmf_poll_group *group = req->qpair->group;
3945 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
3946 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
3947 	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
3948 	struct spdk_nvmf_subsystem_pg_ns_info *ns_info;
3949 	enum spdk_nvme_ana_state ana_state;
3950 
3951 	/* pre-set response details for this command */
3952 	response->status.sc = SPDK_NVME_SC_SUCCESS;
3953 	nsid = cmd->nsid;
3954 
3955 	if (spdk_unlikely(ctrlr == NULL)) {
3956 		SPDK_ERRLOG("I/O command sent before CONNECT\n");
3957 		response->status.sct = SPDK_NVME_SCT_GENERIC;
3958 		response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
3959 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3960 	}
3961 
3962 	if (spdk_unlikely(ctrlr->vcprop.cc.bits.en != 1)) {
3963 		SPDK_ERRLOG("I/O command sent to disabled controller\n");
3964 		response->status.sct = SPDK_NVME_SCT_GENERIC;
3965 		response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
3966 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3967 	}
3968 
3969 	ns = _nvmf_subsystem_get_ns(ctrlr->subsys, nsid);
3970 	if (ns == NULL || ns->bdev == NULL) {
3971 		SPDK_DEBUGLOG(nvmf, "Unsuccessful query for nsid %u\n", cmd->nsid);
3972 		response->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
3973 		response->status.dnr = 1;
3974 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3975 	}
3976 
3977 	ana_state = nvmf_ctrlr_get_ana_state(ctrlr, ns->anagrpid);
3978 	if (spdk_unlikely(ana_state != SPDK_NVME_ANA_OPTIMIZED_STATE &&
3979 			  ana_state != SPDK_NVME_ANA_NON_OPTIMIZED_STATE)) {
3980 		SPDK_DEBUGLOG(nvmf, "Fail I/O command due to ANA state %d\n",
3981 			      ana_state);
3982 		response->status.sct = SPDK_NVME_SCT_PATH;
3983 		response->status.sc = _nvme_ana_state_to_path_status(ana_state);
3984 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
3985 	}
3986 
3987 	if (spdk_likely(ctrlr->listener != NULL)) {
3988 		SPDK_DTRACE_PROBE3(nvmf_request_io_exec_path, req,
3989 				   ctrlr->listener->trid->traddr,
3990 				   ctrlr->listener->trid->trsvcid);
3991 	}
3992 
3993 	/* scan-build falsely reporting dereference of null pointer */
3994 	assert(group != NULL && group->sgroups != NULL);
3995 	ns_info = &group->sgroups[ctrlr->subsys->id].ns_info[nsid - 1];
3996 	if (nvmf_ns_reservation_request_check(ns_info, ctrlr, req)) {
3997 		SPDK_DEBUGLOG(nvmf, "Reservation Conflict for nsid %u, opcode %u\n",
3998 			      cmd->nsid, cmd->opc);
3999 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
4000 	}
4001 
4002 	bdev = ns->bdev;
4003 	desc = ns->desc;
4004 	ch = ns_info->channel;
4005 
4006 	if (spdk_unlikely(cmd->fuse & SPDK_NVME_CMD_FUSE_MASK)) {
4007 		return nvmf_ctrlr_process_io_fused_cmd(req, bdev, desc, ch);
4008 	} else if (spdk_unlikely(req->qpair->first_fused_req != NULL)) {
4009 		struct spdk_nvme_cpl *fused_response = &req->qpair->first_fused_req->rsp->nvme_cpl;
4010 
4011 		SPDK_ERRLOG("Expected second of fused commands - failing first of fused commands\n");
4012 
4013 		/* abort req->qpair->first_fused_request and continue with new command */
4014 		fused_response->status.sc = SPDK_NVME_SC_ABORTED_MISSING_FUSED;
4015 		fused_response->status.sct = SPDK_NVME_SCT_GENERIC;
4016 		_nvmf_request_complete(req->qpair->first_fused_req);
4017 		req->qpair->first_fused_req = NULL;
4018 	}
4019 
4020 	if (spdk_nvmf_request_using_zcopy(req)) {
4021 		assert(req->zcopy_phase == NVMF_ZCOPY_PHASE_INIT);
4022 		return nvmf_bdev_ctrlr_zcopy_start(bdev, desc, ch, req);
4023 	} else {
4024 		switch (cmd->opc) {
4025 		case SPDK_NVME_OPC_READ:
4026 			return nvmf_bdev_ctrlr_read_cmd(bdev, desc, ch, req);
4027 		case SPDK_NVME_OPC_WRITE:
4028 			return nvmf_bdev_ctrlr_write_cmd(bdev, desc, ch, req);
4029 		case SPDK_NVME_OPC_COMPARE:
4030 			return nvmf_bdev_ctrlr_compare_cmd(bdev, desc, ch, req);
4031 		case SPDK_NVME_OPC_WRITE_ZEROES:
4032 			return nvmf_bdev_ctrlr_write_zeroes_cmd(bdev, desc, ch, req);
4033 		case SPDK_NVME_OPC_FLUSH:
4034 			return nvmf_bdev_ctrlr_flush_cmd(bdev, desc, ch, req);
4035 		case SPDK_NVME_OPC_DATASET_MANAGEMENT:
4036 			return nvmf_bdev_ctrlr_dsm_cmd(bdev, desc, ch, req);
4037 		case SPDK_NVME_OPC_RESERVATION_REGISTER:
4038 		case SPDK_NVME_OPC_RESERVATION_ACQUIRE:
4039 		case SPDK_NVME_OPC_RESERVATION_RELEASE:
4040 		case SPDK_NVME_OPC_RESERVATION_REPORT:
4041 			spdk_thread_send_msg(ctrlr->subsys->thread, nvmf_ns_reservation_request, req);
4042 			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
4043 		default:
4044 			return nvmf_bdev_ctrlr_nvme_passthru_io(bdev, desc, ch, req);
4045 		}
4046 	}
4047 }
4048 
4049 static void
4050 nvmf_qpair_request_cleanup(struct spdk_nvmf_qpair *qpair)
4051 {
4052 	if (qpair->state == SPDK_NVMF_QPAIR_DEACTIVATING) {
4053 		assert(qpair->state_cb != NULL);
4054 
4055 		if (TAILQ_EMPTY(&qpair->outstanding)) {
4056 			qpair->state_cb(qpair->state_cb_arg, 0);
4057 		}
4058 	}
4059 }
4060 
4061 int
4062 spdk_nvmf_request_free(struct spdk_nvmf_request *req)
4063 {
4064 	struct spdk_nvmf_qpair *qpair = req->qpair;
4065 
4066 	TAILQ_REMOVE(&qpair->outstanding, req, link);
4067 	if (nvmf_transport_req_free(req)) {
4068 		SPDK_ERRLOG("Unable to free transport level request resources.\n");
4069 	}
4070 
4071 	nvmf_qpair_request_cleanup(qpair);
4072 
4073 	return 0;
4074 }
4075 
4076 static void
4077 _nvmf_request_complete(void *ctx)
4078 {
4079 	struct spdk_nvmf_request *req = ctx;
4080 	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
4081 	struct spdk_nvmf_qpair *qpair;
4082 	struct spdk_nvmf_subsystem_poll_group *sgroup = NULL;
4083 	struct spdk_nvmf_subsystem_pg_ns_info *ns_info;
4084 	bool is_aer = false;
4085 	uint32_t nsid;
4086 	bool paused;
4087 	uint8_t opcode;
4088 
4089 	rsp->sqid = 0;
4090 	rsp->status.p = 0;
4091 	rsp->cid = req->cmd->nvme_cmd.cid;
4092 	nsid = req->cmd->nvme_cmd.nsid;
4093 	opcode = req->cmd->nvmf_cmd.opcode;
4094 
4095 	qpair = req->qpair;
4096 	if (qpair->ctrlr) {
4097 		sgroup = &qpair->group->sgroups[qpair->ctrlr->subsys->id];
4098 		assert(sgroup != NULL);
4099 		is_aer = req->cmd->nvme_cmd.opc == SPDK_NVME_OPC_ASYNC_EVENT_REQUEST;
4100 
4101 		/*
4102 		 * Set the crd value.
4103 		 * If the the IO has any error, and dnr (DoNotRetry) is not 1,
4104 		 * and ACRE is enabled, we will set the crd to 1 to select the first CRDT.
4105 		 */
4106 		if (spdk_nvme_cpl_is_error(rsp) &&
4107 		    rsp->status.dnr == 0 &&
4108 		    qpair->ctrlr->acre_enabled) {
4109 			rsp->status.crd = 1;
4110 		}
4111 	} else if (spdk_unlikely(nvmf_request_is_fabric_connect(req))) {
4112 		sgroup = nvmf_subsystem_pg_from_connect_cmd(req);
4113 	}
4114 
4115 	if (SPDK_DEBUGLOG_FLAG_ENABLED("nvmf")) {
4116 		spdk_nvme_print_completion(qpair->qid, rsp);
4117 	}
4118 
4119 	switch (req->zcopy_phase) {
4120 	case NVMF_ZCOPY_PHASE_NONE:
4121 		TAILQ_REMOVE(&qpair->outstanding, req, link);
4122 		break;
4123 	case NVMF_ZCOPY_PHASE_INIT:
4124 		if (spdk_unlikely(spdk_nvme_cpl_is_error(rsp))) {
4125 			req->zcopy_phase = NVMF_ZCOPY_PHASE_INIT_FAILED;
4126 			TAILQ_REMOVE(&qpair->outstanding, req, link);
4127 		} else {
4128 			req->zcopy_phase = NVMF_ZCOPY_PHASE_EXECUTE;
4129 		}
4130 		break;
4131 	case NVMF_ZCOPY_PHASE_EXECUTE:
4132 		break;
4133 	case NVMF_ZCOPY_PHASE_END_PENDING:
4134 		TAILQ_REMOVE(&qpair->outstanding, req, link);
4135 		req->zcopy_phase = NVMF_ZCOPY_PHASE_COMPLETE;
4136 		break;
4137 	default:
4138 		SPDK_ERRLOG("Invalid ZCOPY phase %u\n", req->zcopy_phase);
4139 		break;
4140 	}
4141 
4142 	if (nvmf_transport_req_complete(req)) {
4143 		SPDK_ERRLOG("Transport request completion error!\n");
4144 	}
4145 
4146 	/* AER cmd is an exception */
4147 	if (sgroup && !is_aer) {
4148 		if (spdk_unlikely(opcode == SPDK_NVME_OPC_FABRIC ||
4149 				  nvmf_qpair_is_admin_queue(qpair))) {
4150 			assert(sgroup->mgmt_io_outstanding > 0);
4151 			sgroup->mgmt_io_outstanding--;
4152 		} else {
4153 			if (req->zcopy_phase == NVMF_ZCOPY_PHASE_NONE ||
4154 			    req->zcopy_phase == NVMF_ZCOPY_PHASE_COMPLETE ||
4155 			    req->zcopy_phase == NVMF_ZCOPY_PHASE_INIT_FAILED) {
4156 				/* End of request */
4157 
4158 				/* NOTE: This implicitly also checks for 0, since 0 - 1 wraps around to UINT32_MAX. */
4159 				if (spdk_likely(nsid - 1 < sgroup->num_ns)) {
4160 					sgroup->ns_info[nsid - 1].io_outstanding--;
4161 				}
4162 			}
4163 		}
4164 
4165 		if (spdk_unlikely(sgroup->state == SPDK_NVMF_SUBSYSTEM_PAUSING &&
4166 				  sgroup->mgmt_io_outstanding == 0)) {
4167 			paused = true;
4168 			for (nsid = 0; nsid < sgroup->num_ns; nsid++) {
4169 				ns_info = &sgroup->ns_info[nsid];
4170 
4171 				if (ns_info->state == SPDK_NVMF_SUBSYSTEM_PAUSING &&
4172 				    ns_info->io_outstanding > 0) {
4173 					paused = false;
4174 					break;
4175 				}
4176 			}
4177 
4178 			if (paused) {
4179 				sgroup->state = SPDK_NVMF_SUBSYSTEM_PAUSED;
4180 				sgroup->cb_fn(sgroup->cb_arg, 0);
4181 				sgroup->cb_fn = NULL;
4182 				sgroup->cb_arg = NULL;
4183 			}
4184 		}
4185 
4186 	}
4187 
4188 	nvmf_qpair_request_cleanup(qpair);
4189 }
4190 
4191 int
4192 spdk_nvmf_request_complete(struct spdk_nvmf_request *req)
4193 {
4194 	struct spdk_nvmf_qpair *qpair = req->qpair;
4195 
4196 	spdk_thread_exec_msg(qpair->group->thread, _nvmf_request_complete, req);
4197 
4198 	return 0;
4199 }
4200 
4201 void
4202 spdk_nvmf_request_exec_fabrics(struct spdk_nvmf_request *req)
4203 {
4204 	struct spdk_nvmf_qpair *qpair = req->qpair;
4205 	struct spdk_nvmf_subsystem_poll_group *sgroup = NULL;
4206 	enum spdk_nvmf_request_exec_status status;
4207 
4208 	if (qpair->ctrlr) {
4209 		sgroup = &qpair->group->sgroups[qpair->ctrlr->subsys->id];
4210 	} else if (spdk_unlikely(nvmf_request_is_fabric_connect(req))) {
4211 		sgroup = nvmf_subsystem_pg_from_connect_cmd(req);
4212 	}
4213 
4214 	assert(sgroup != NULL);
4215 	sgroup->mgmt_io_outstanding++;
4216 
4217 	/* Place the request on the outstanding list so we can keep track of it */
4218 	TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);
4219 
4220 	assert(req->cmd->nvmf_cmd.opcode == SPDK_NVME_OPC_FABRIC);
4221 	status = nvmf_ctrlr_process_fabrics_cmd(req);
4222 
4223 	if (status == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
4224 		_nvmf_request_complete(req);
4225 	}
4226 }
4227 
4228 static bool nvmf_check_subsystem_active(struct spdk_nvmf_request *req)
4229 {
4230 	struct spdk_nvmf_qpair *qpair = req->qpair;
4231 	struct spdk_nvmf_subsystem_poll_group *sgroup = NULL;
4232 	struct spdk_nvmf_subsystem_pg_ns_info *ns_info;
4233 	uint32_t nsid;
4234 
4235 	if (qpair->ctrlr) {
4236 		sgroup = &qpair->group->sgroups[qpair->ctrlr->subsys->id];
4237 		assert(sgroup != NULL);
4238 	} else if (spdk_unlikely(nvmf_request_is_fabric_connect(req))) {
4239 		sgroup = nvmf_subsystem_pg_from_connect_cmd(req);
4240 	}
4241 
4242 	/* Check if the subsystem is paused (if there is a subsystem) */
4243 	if (sgroup != NULL) {
4244 		if (spdk_unlikely(req->cmd->nvmf_cmd.opcode == SPDK_NVME_OPC_FABRIC ||
4245 				  nvmf_qpair_is_admin_queue(qpair))) {
4246 			if (sgroup->state != SPDK_NVMF_SUBSYSTEM_ACTIVE) {
4247 				/* The subsystem is not currently active. Queue this request. */
4248 				TAILQ_INSERT_TAIL(&sgroup->queued, req, link);
4249 				return false;
4250 			}
4251 			sgroup->mgmt_io_outstanding++;
4252 		} else {
4253 			nsid = req->cmd->nvme_cmd.nsid;
4254 
4255 			/* NOTE: This implicitly also checks for 0, since 0 - 1 wraps around to UINT32_MAX. */
4256 			if (spdk_unlikely(nsid - 1 >= sgroup->num_ns)) {
4257 				req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
4258 				req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
4259 				req->rsp->nvme_cpl.status.dnr = 1;
4260 				TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);
4261 				_nvmf_request_complete(req);
4262 				return false;
4263 			}
4264 
4265 			ns_info = &sgroup->ns_info[nsid - 1];
4266 			if (ns_info->channel == NULL) {
4267 				/* This can can happen if host sends I/O to a namespace that is
4268 				 * in the process of being added, but before the full addition
4269 				 * process is complete.  Report invalid namespace in that case.
4270 				 */
4271 				req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
4272 				req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
4273 				req->rsp->nvme_cpl.status.dnr = 1;
4274 				TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);
4275 				ns_info->io_outstanding++;
4276 				_nvmf_request_complete(req);
4277 				return false;
4278 			}
4279 
4280 			if (ns_info->state != SPDK_NVMF_SUBSYSTEM_ACTIVE) {
4281 				/* The namespace is not currently active. Queue this request. */
4282 				TAILQ_INSERT_TAIL(&sgroup->queued, req, link);
4283 				return false;
4284 			}
4285 
4286 			ns_info->io_outstanding++;
4287 		}
4288 
4289 		if (qpair->state != SPDK_NVMF_QPAIR_ACTIVE) {
4290 			req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
4291 			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
4292 			TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);
4293 			_nvmf_request_complete(req);
4294 			return false;
4295 		}
4296 	}
4297 
4298 	return true;
4299 }
4300 
4301 void
4302 spdk_nvmf_request_exec(struct spdk_nvmf_request *req)
4303 {
4304 	struct spdk_nvmf_qpair *qpair = req->qpair;
4305 	struct spdk_nvmf_transport *transport = qpair->transport;
4306 	enum spdk_nvmf_request_exec_status status;
4307 
4308 	if (!nvmf_check_subsystem_active(req)) {
4309 		return;
4310 	}
4311 
4312 	if (SPDK_DEBUGLOG_FLAG_ENABLED("nvmf")) {
4313 		spdk_nvme_print_command(qpair->qid, &req->cmd->nvme_cmd);
4314 	}
4315 
4316 	/* Place the request on the outstanding list so we can keep track of it */
4317 	TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);
4318 
4319 	if (spdk_unlikely((req->cmd->nvmf_cmd.opcode == SPDK_NVME_OPC_FABRIC) &&
4320 			  spdk_nvme_trtype_is_fabrics(transport->ops->type))) {
4321 		status = nvmf_ctrlr_process_fabrics_cmd(req);
4322 	} else if (spdk_unlikely(nvmf_qpair_is_admin_queue(qpair))) {
4323 		status = nvmf_ctrlr_process_admin_cmd(req);
4324 	} else {
4325 		status = nvmf_ctrlr_process_io_cmd(req);
4326 	}
4327 
4328 	if (status == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
4329 		_nvmf_request_complete(req);
4330 	}
4331 }
4332 
4333 static bool
4334 nvmf_ctrlr_get_dif_ctx(struct spdk_nvmf_ctrlr *ctrlr, struct spdk_nvme_cmd *cmd,
4335 		       struct spdk_dif_ctx *dif_ctx)
4336 {
4337 	struct spdk_nvmf_ns *ns;
4338 	struct spdk_bdev *bdev;
4339 
4340 	if (ctrlr == NULL || cmd == NULL) {
4341 		return false;
4342 	}
4343 
4344 	ns = _nvmf_subsystem_get_ns(ctrlr->subsys, cmd->nsid);
4345 	if (ns == NULL || ns->bdev == NULL) {
4346 		return false;
4347 	}
4348 
4349 	bdev = ns->bdev;
4350 
4351 	switch (cmd->opc) {
4352 	case SPDK_NVME_OPC_READ:
4353 	case SPDK_NVME_OPC_WRITE:
4354 	case SPDK_NVME_OPC_COMPARE:
4355 		return nvmf_bdev_ctrlr_get_dif_ctx(bdev, cmd, dif_ctx);
4356 	default:
4357 		break;
4358 	}
4359 
4360 	return false;
4361 }
4362 
4363 bool
4364 spdk_nvmf_request_get_dif_ctx(struct spdk_nvmf_request *req, struct spdk_dif_ctx *dif_ctx)
4365 {
4366 	struct spdk_nvmf_qpair *qpair = req->qpair;
4367 	struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
4368 
4369 	if (spdk_likely(ctrlr == NULL || !ctrlr->dif_insert_or_strip)) {
4370 		return false;
4371 	}
4372 
4373 	if (spdk_unlikely(qpair->state != SPDK_NVMF_QPAIR_ACTIVE)) {
4374 		return false;
4375 	}
4376 
4377 	if (spdk_unlikely(req->cmd->nvmf_cmd.opcode == SPDK_NVME_OPC_FABRIC)) {
4378 		return false;
4379 	}
4380 
4381 	if (spdk_unlikely(nvmf_qpair_is_admin_queue(qpair))) {
4382 		return false;
4383 	}
4384 
4385 	return nvmf_ctrlr_get_dif_ctx(ctrlr, &req->cmd->nvme_cmd, dif_ctx);
4386 }
4387 
4388 void
4389 spdk_nvmf_set_custom_admin_cmd_hdlr(uint8_t opc, spdk_nvmf_custom_cmd_hdlr hdlr)
4390 {
4391 	g_nvmf_custom_admin_cmd_hdlrs[opc].hdlr = hdlr;
4392 }
4393 
4394 static int
4395 nvmf_passthru_admin_cmd(struct spdk_nvmf_request *req)
4396 {
4397 	struct spdk_bdev *bdev;
4398 	struct spdk_bdev_desc *desc;
4399 	struct spdk_io_channel *ch;
4400 	struct spdk_nvme_cmd *cmd = spdk_nvmf_request_get_cmd(req);
4401 	struct spdk_nvme_cpl *response = spdk_nvmf_request_get_response(req);
4402 	uint32_t bdev_nsid;
4403 	int rc;
4404 
4405 	if (g_nvmf_custom_admin_cmd_hdlrs[cmd->opc].nsid == 0) {
4406 		bdev_nsid = cmd->nsid;
4407 	} else {
4408 		bdev_nsid = g_nvmf_custom_admin_cmd_hdlrs[cmd->opc].nsid;
4409 	}
4410 
4411 	rc = spdk_nvmf_request_get_bdev(bdev_nsid, req, &bdev, &desc, &ch);
4412 	if (rc) {
4413 		response->status.sct = SPDK_NVME_SCT_GENERIC;
4414 		response->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
4415 		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
4416 	}
4417 	return spdk_nvmf_bdev_ctrlr_nvme_passthru_admin(bdev, desc, ch, req, NULL);
4418 }
4419 
4420 void
4421 spdk_nvmf_set_passthru_admin_cmd(uint8_t opc, uint32_t forward_nsid)
4422 {
4423 	g_nvmf_custom_admin_cmd_hdlrs[opc].hdlr = nvmf_passthru_admin_cmd;
4424 	g_nvmf_custom_admin_cmd_hdlrs[opc].nsid = forward_nsid;
4425 }
4426 
4427 int
4428 spdk_nvmf_request_get_bdev(uint32_t nsid, struct spdk_nvmf_request *req,
4429 			   struct spdk_bdev **bdev, struct spdk_bdev_desc **desc, struct spdk_io_channel **ch)
4430 {
4431 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
4432 	struct spdk_nvmf_ns *ns;
4433 	struct spdk_nvmf_poll_group *group = req->qpair->group;
4434 	struct spdk_nvmf_subsystem_pg_ns_info *ns_info;
4435 
4436 	*bdev = NULL;
4437 	*desc = NULL;
4438 	*ch = NULL;
4439 
4440 	ns = _nvmf_subsystem_get_ns(ctrlr->subsys, nsid);
4441 	if (ns == NULL || ns->bdev == NULL) {
4442 		return -EINVAL;
4443 	}
4444 
4445 	assert(group != NULL && group->sgroups != NULL);
4446 	ns_info = &group->sgroups[ctrlr->subsys->id].ns_info[nsid - 1];
4447 	*bdev = ns->bdev;
4448 	*desc = ns->desc;
4449 	*ch = ns_info->channel;
4450 
4451 	return 0;
4452 }
4453 
4454 struct spdk_nvmf_ctrlr *spdk_nvmf_request_get_ctrlr(struct spdk_nvmf_request *req)
4455 {
4456 	return req->qpair->ctrlr;
4457 }
4458 
4459 struct spdk_nvme_cmd *spdk_nvmf_request_get_cmd(struct spdk_nvmf_request *req)
4460 {
4461 	return &req->cmd->nvme_cmd;
4462 }
4463 
4464 struct spdk_nvme_cpl *spdk_nvmf_request_get_response(struct spdk_nvmf_request *req)
4465 {
4466 	return &req->rsp->nvme_cpl;
4467 }
4468 
4469 struct spdk_nvmf_subsystem *spdk_nvmf_request_get_subsystem(struct spdk_nvmf_request *req)
4470 {
4471 	return req->qpair->ctrlr->subsys;
4472 }
4473 
4474 void spdk_nvmf_request_get_data(struct spdk_nvmf_request *req, void **data, uint32_t *length)
4475 {
4476 	*data = req->data;
4477 	*length = req->length;
4478 }
4479 
4480 struct spdk_nvmf_subsystem *spdk_nvmf_ctrlr_get_subsystem(struct spdk_nvmf_ctrlr *ctrlr)
4481 {
4482 	return ctrlr->subsys;
4483 }
4484 
4485 uint16_t spdk_nvmf_ctrlr_get_id(struct spdk_nvmf_ctrlr *ctrlr)
4486 {
4487 	return ctrlr->cntlid;
4488 }
4489 
4490 struct spdk_nvmf_request *spdk_nvmf_request_get_req_to_abort(struct spdk_nvmf_request *req)
4491 {
4492 	return req->req_to_abort;
4493 }
4494