xref: /spdk/lib/nvme/nvme_qpair.c (revision d39963a9da590629d3a9bc6e3011e433229555c7)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *   Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 #include "nvme_internal.h"
36 #include "spdk/nvme_ocssd.h"
37 #include "spdk/string.h"
38 
39 #define NVME_CMD_DPTR_STR_SIZE 256
40 
41 static int nvme_qpair_resubmit_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req);
42 
43 struct nvme_string {
44 	uint16_t	value;
45 	const char	*str;
46 };
47 
48 static const struct nvme_string admin_opcode[] = {
49 	{ SPDK_NVME_OPC_DELETE_IO_SQ, "DELETE IO SQ" },
50 	{ SPDK_NVME_OPC_CREATE_IO_SQ, "CREATE IO SQ" },
51 	{ SPDK_NVME_OPC_GET_LOG_PAGE, "GET LOG PAGE" },
52 	{ SPDK_NVME_OPC_DELETE_IO_CQ, "DELETE IO CQ" },
53 	{ SPDK_NVME_OPC_CREATE_IO_CQ, "CREATE IO CQ" },
54 	{ SPDK_NVME_OPC_IDENTIFY, "IDENTIFY" },
55 	{ SPDK_NVME_OPC_ABORT, "ABORT" },
56 	{ SPDK_NVME_OPC_SET_FEATURES, "SET FEATURES" },
57 	{ SPDK_NVME_OPC_GET_FEATURES, "GET FEATURES" },
58 	{ SPDK_NVME_OPC_ASYNC_EVENT_REQUEST, "ASYNC EVENT REQUEST" },
59 	{ SPDK_NVME_OPC_NS_MANAGEMENT, "NAMESPACE MANAGEMENT" },
60 	{ SPDK_NVME_OPC_FIRMWARE_COMMIT, "FIRMWARE COMMIT" },
61 	{ SPDK_NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD, "FIRMWARE IMAGE DOWNLOAD" },
62 	{ SPDK_NVME_OPC_DEVICE_SELF_TEST, "DEVICE SELF-TEST" },
63 	{ SPDK_NVME_OPC_NS_ATTACHMENT, "NAMESPACE ATTACHMENT" },
64 	{ SPDK_NVME_OPC_KEEP_ALIVE, "KEEP ALIVE" },
65 	{ SPDK_NVME_OPC_DIRECTIVE_SEND, "DIRECTIVE SEND" },
66 	{ SPDK_NVME_OPC_DIRECTIVE_RECEIVE, "DIRECTIVE RECEIVE" },
67 	{ SPDK_NVME_OPC_VIRTUALIZATION_MANAGEMENT, "VIRTUALIZATION MANAGEMENT" },
68 	{ SPDK_NVME_OPC_NVME_MI_SEND, "NVME-MI SEND" },
69 	{ SPDK_NVME_OPC_NVME_MI_RECEIVE, "NVME-MI RECEIVE" },
70 	{ SPDK_NVME_OPC_DOORBELL_BUFFER_CONFIG, "DOORBELL BUFFER CONFIG" },
71 	{ SPDK_NVME_OPC_FABRIC, "FABRIC" },
72 	{ SPDK_NVME_OPC_FORMAT_NVM, "FORMAT NVM" },
73 	{ SPDK_NVME_OPC_SECURITY_SEND, "SECURITY SEND" },
74 	{ SPDK_NVME_OPC_SECURITY_RECEIVE, "SECURITY RECEIVE" },
75 	{ SPDK_NVME_OPC_SANITIZE, "SANITIZE" },
76 	{ SPDK_NVME_OPC_GET_LBA_STATUS, "GET LBA STATUS" },
77 	{ SPDK_OCSSD_OPC_GEOMETRY, "OCSSD / GEOMETRY" },
78 	{ 0xFFFF, "ADMIN COMMAND" }
79 };
80 
81 static const struct nvme_string fabric_opcode[] = {
82 	{ SPDK_NVMF_FABRIC_COMMAND_PROPERTY_SET, "PROPERTY SET" },
83 	{ SPDK_NVMF_FABRIC_COMMAND_CONNECT, "CONNECT" },
84 	{ SPDK_NVMF_FABRIC_COMMAND_PROPERTY_GET, "PROPERTY GET" },
85 	{ SPDK_NVMF_FABRIC_COMMAND_AUTHENTICATION_SEND, "AUTHENTICATION SEND" },
86 	{ SPDK_NVMF_FABRIC_COMMAND_AUTHENTICATION_RECV, "AUTHENTICATION RECV" },
87 	{ 0xFFFF, "RESERVED / VENDOR SPECIFIC" }
88 };
89 
90 static const struct nvme_string feat_opcode[] = {
91 	{ SPDK_NVME_FEAT_ARBITRATION, "ARBITRATION" },
92 	{ SPDK_NVME_FEAT_POWER_MANAGEMENT, "POWER MANAGEMENT" },
93 	{ SPDK_NVME_FEAT_LBA_RANGE_TYPE, "LBA RANGE TYPE" },
94 	{ SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD, "TEMPERATURE THRESHOLD" },
95 	{ SPDK_NVME_FEAT_ERROR_RECOVERY, "ERROR_RECOVERY" },
96 	{ SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE, "VOLATILE WRITE CACHE" },
97 	{ SPDK_NVME_FEAT_NUMBER_OF_QUEUES, "NUMBER OF QUEUES" },
98 	{ SPDK_NVME_FEAT_INTERRUPT_COALESCING, "INTERRUPT COALESCING" },
99 	{ SPDK_NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION, "INTERRUPT VECTOR CONFIGURATION" },
100 	{ SPDK_NVME_FEAT_WRITE_ATOMICITY, "WRITE ATOMICITY" },
101 	{ SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION, "ASYNC EVENT CONFIGURATION" },
102 	{ SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION, "AUTONOMOUS POWER STATE TRANSITION" },
103 	{ SPDK_NVME_FEAT_HOST_MEM_BUFFER, "HOST MEM BUFFER" },
104 	{ SPDK_NVME_FEAT_TIMESTAMP, "TIMESTAMP" },
105 	{ SPDK_NVME_FEAT_KEEP_ALIVE_TIMER, "KEEP ALIVE TIMER" },
106 	{ SPDK_NVME_FEAT_HOST_CONTROLLED_THERMAL_MANAGEMENT, "HOST CONTROLLED THERMAL MANAGEMENT" },
107 	{ SPDK_NVME_FEAT_NON_OPERATIONAL_POWER_STATE_CONFIG, "NON OPERATIONAL POWER STATE CONFIG" },
108 	{ SPDK_NVME_FEAT_SOFTWARE_PROGRESS_MARKER, "SOFTWARE PROGRESS MARKER" },
109 	{ SPDK_NVME_FEAT_HOST_IDENTIFIER, "HOST IDENTIFIER" },
110 	{ SPDK_NVME_FEAT_HOST_RESERVE_MASK, "HOST RESERVE MASK" },
111 	{ SPDK_NVME_FEAT_HOST_RESERVE_PERSIST, "HOST RESERVE PERSIST" },
112 	{ 0xFFFF, "RESERVED" }
113 };
114 
115 static const struct nvme_string io_opcode[] = {
116 	{ SPDK_NVME_OPC_FLUSH, "FLUSH" },
117 	{ SPDK_NVME_OPC_WRITE, "WRITE" },
118 	{ SPDK_NVME_OPC_READ, "READ" },
119 	{ SPDK_NVME_OPC_WRITE_UNCORRECTABLE, "WRITE UNCORRECTABLE" },
120 	{ SPDK_NVME_OPC_COMPARE, "COMPARE" },
121 	{ SPDK_NVME_OPC_WRITE_ZEROES, "WRITE ZEROES" },
122 	{ SPDK_NVME_OPC_DATASET_MANAGEMENT, "DATASET MANAGEMENT" },
123 	{ SPDK_NVME_OPC_RESERVATION_REGISTER, "RESERVATION REGISTER" },
124 	{ SPDK_NVME_OPC_RESERVATION_REPORT, "RESERVATION REPORT" },
125 	{ SPDK_NVME_OPC_RESERVATION_ACQUIRE, "RESERVATION ACQUIRE" },
126 	{ SPDK_NVME_OPC_RESERVATION_RELEASE, "RESERVATION RELEASE" },
127 	{ SPDK_OCSSD_OPC_VECTOR_RESET, "OCSSD / VECTOR RESET" },
128 	{ SPDK_OCSSD_OPC_VECTOR_WRITE, "OCSSD / VECTOR WRITE" },
129 	{ SPDK_OCSSD_OPC_VECTOR_READ, "OCSSD / VECTOR READ" },
130 	{ SPDK_OCSSD_OPC_VECTOR_COPY, "OCSSD / VECTOR COPY" },
131 	{ 0xFFFF, "IO COMMAND" }
132 };
133 
134 static const struct nvme_string sgl_type[] = {
135 	{ SPDK_NVME_SGL_TYPE_DATA_BLOCK, "DATA BLOCK" },
136 	{ SPDK_NVME_SGL_TYPE_BIT_BUCKET, "BIT BUCKET" },
137 	{ SPDK_NVME_SGL_TYPE_SEGMENT, "SEGMENT" },
138 	{ SPDK_NVME_SGL_TYPE_LAST_SEGMENT, "LAST SEGMENT" },
139 	{ SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK, "KEYED DATA BLOCK" },
140 	{ SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK, "TRANSPORT DATA BLOCK" },
141 	{ SPDK_NVME_SGL_TYPE_VENDOR_SPECIFIC, "VENDOR SPECIFIC" },
142 	{ 0xFFFF, "RESERVED" }
143 };
144 
145 static const struct nvme_string sgl_subtype[] = {
146 	{ SPDK_NVME_SGL_SUBTYPE_ADDRESS, "ADDRESS" },
147 	{ SPDK_NVME_SGL_SUBTYPE_OFFSET, "OFFSET" },
148 	{ SPDK_NVME_SGL_SUBTYPE_TRANSPORT, "TRANSPORT" },
149 	{ SPDK_NVME_SGL_SUBTYPE_INVALIDATE_KEY, "INVALIDATE KEY" },
150 	{ 0xFFFF, "RESERVED" }
151 };
152 
153 static const char *
154 nvme_get_string(const struct nvme_string *strings, uint16_t value)
155 {
156 	const struct nvme_string *entry;
157 
158 	entry = strings;
159 
160 	while (entry->value != 0xFFFF) {
161 		if (entry->value == value) {
162 			return entry->str;
163 		}
164 		entry++;
165 	}
166 	return entry->str;
167 }
168 
169 static void
170 nvme_get_sgl_unkeyed(char *buf, size_t size, struct spdk_nvme_cmd *cmd)
171 {
172 	struct spdk_nvme_sgl_descriptor *sgl = &cmd->dptr.sgl1;
173 
174 	snprintf(buf, size, " len:0x%x", sgl->unkeyed.length);
175 }
176 
177 static void
178 nvme_get_sgl_keyed(char *buf, size_t size, struct spdk_nvme_cmd *cmd)
179 {
180 	struct spdk_nvme_sgl_descriptor *sgl = &cmd->dptr.sgl1;
181 
182 	snprintf(buf, size, " len:0x%x key:0x%x", sgl->keyed.length, sgl->keyed.key);
183 }
184 
185 static void
186 nvme_get_sgl(char *buf, size_t size, struct spdk_nvme_cmd *cmd)
187 {
188 	struct spdk_nvme_sgl_descriptor *sgl = &cmd->dptr.sgl1;
189 	int c;
190 
191 	c = snprintf(buf, size, "SGL %s %s 0x%" PRIx64, nvme_get_string(sgl_type, sgl->generic.type),
192 		     nvme_get_string(sgl_subtype, sgl->generic.subtype), sgl->address);
193 	assert(c >= 0 && (size_t)c < size);
194 
195 	if (sgl->generic.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK) {
196 		nvme_get_sgl_unkeyed(buf + c, size - c, cmd);
197 	}
198 
199 	if (sgl->generic.type == SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK) {
200 		nvme_get_sgl_keyed(buf + c, size - c, cmd);
201 	}
202 }
203 
204 static void
205 nvme_get_prp(char *buf, size_t size, struct spdk_nvme_cmd *cmd)
206 {
207 	snprintf(buf, size, "PRP1 0x%" PRIx64 " PRP2 0x%" PRIx64, cmd->dptr.prp.prp1, cmd->dptr.prp.prp2);
208 }
209 
210 static void
211 nvme_get_dptr(char *buf, size_t size, struct spdk_nvme_cmd *cmd)
212 {
213 	if (spdk_nvme_opc_get_data_transfer(cmd->opc) != SPDK_NVME_DATA_NONE) {
214 		switch (cmd->psdt) {
215 		case SPDK_NVME_PSDT_PRP:
216 			nvme_get_prp(buf, size, cmd);
217 			break;
218 		case SPDK_NVME_PSDT_SGL_MPTR_CONTIG:
219 		case SPDK_NVME_PSDT_SGL_MPTR_SGL:
220 			nvme_get_sgl(buf, size, cmd);
221 			break;
222 		default:
223 			;
224 		}
225 	}
226 }
227 
228 static void
229 nvme_admin_qpair_print_command(uint16_t qid, struct spdk_nvme_cmd *cmd)
230 {
231 	struct spdk_nvmf_capsule_cmd *fcmd = (void *)cmd;
232 	char dptr[NVME_CMD_DPTR_STR_SIZE] = {'\0'};
233 
234 	assert(cmd != NULL);
235 
236 	nvme_get_dptr(dptr, sizeof(dptr), cmd);
237 
238 	switch ((int)cmd->opc) {
239 	case SPDK_NVME_OPC_SET_FEATURES:
240 	case SPDK_NVME_OPC_GET_FEATURES:
241 		SPDK_NOTICELOG("%s %s cid:%d cdw10:%08x %s\n",
242 			       nvme_get_string(admin_opcode, cmd->opc), nvme_get_string(feat_opcode,
243 					       cmd->cdw10_bits.set_features.fid), cmd->cid, cmd->cdw10, dptr);
244 		break;
245 	case SPDK_NVME_OPC_FABRIC:
246 		SPDK_NOTICELOG("%s %s qid:%d cid:%d %s\n",
247 			       nvme_get_string(admin_opcode, cmd->opc), nvme_get_string(fabric_opcode, fcmd->fctype), qid,
248 			       fcmd->cid, dptr);
249 		break;
250 	default:
251 		SPDK_NOTICELOG("%s (%02x) qid:%d cid:%d nsid:%x cdw10:%08x cdw11:%08x %s\n",
252 			       nvme_get_string(admin_opcode, cmd->opc), cmd->opc, qid, cmd->cid, cmd->nsid, cmd->cdw10,
253 			       cmd->cdw11, dptr);
254 	}
255 }
256 
257 static void
258 nvme_io_qpair_print_command(uint16_t qid, struct spdk_nvme_cmd *cmd)
259 {
260 	char dptr[NVME_CMD_DPTR_STR_SIZE] = {'\0'};
261 
262 	assert(cmd != NULL);
263 
264 	nvme_get_dptr(dptr, sizeof(dptr), cmd);
265 
266 	switch ((int)cmd->opc) {
267 	case SPDK_NVME_OPC_WRITE:
268 	case SPDK_NVME_OPC_READ:
269 	case SPDK_NVME_OPC_WRITE_UNCORRECTABLE:
270 	case SPDK_NVME_OPC_COMPARE:
271 		SPDK_NOTICELOG("%s sqid:%d cid:%d nsid:%d "
272 			       "lba:%llu len:%d %s\n",
273 			       nvme_get_string(io_opcode, cmd->opc), qid, cmd->cid, cmd->nsid,
274 			       ((unsigned long long)cmd->cdw11 << 32) + cmd->cdw10,
275 			       (cmd->cdw12 & 0xFFFF) + 1, dptr);
276 		break;
277 	case SPDK_NVME_OPC_FLUSH:
278 	case SPDK_NVME_OPC_DATASET_MANAGEMENT:
279 		SPDK_NOTICELOG("%s sqid:%d cid:%d nsid:%d\n",
280 			       nvme_get_string(io_opcode, cmd->opc), qid, cmd->cid, cmd->nsid);
281 		break;
282 	default:
283 		SPDK_NOTICELOG("%s (%02x) sqid:%d cid:%d nsid:%d\n",
284 			       nvme_get_string(io_opcode, cmd->opc), cmd->opc, qid, cmd->cid, cmd->nsid);
285 		break;
286 	}
287 }
288 
289 void
290 spdk_nvme_print_command(uint16_t qid, struct spdk_nvme_cmd *cmd)
291 {
292 	assert(cmd != NULL);
293 
294 	if (qid == 0 || cmd->opc == SPDK_NVME_OPC_FABRIC) {
295 		nvme_admin_qpair_print_command(qid, cmd);
296 	} else {
297 		nvme_io_qpair_print_command(qid, cmd);
298 	}
299 }
300 
301 void
302 spdk_nvme_qpair_print_command(struct spdk_nvme_qpair *qpair, struct spdk_nvme_cmd *cmd)
303 {
304 	assert(qpair != NULL);
305 	assert(cmd != NULL);
306 
307 	spdk_nvme_print_command(qpair->id, cmd);
308 }
309 
310 static const struct nvme_string generic_status[] = {
311 	{ SPDK_NVME_SC_SUCCESS, "SUCCESS" },
312 	{ SPDK_NVME_SC_INVALID_OPCODE, "INVALID OPCODE" },
313 	{ SPDK_NVME_SC_INVALID_FIELD, "INVALID FIELD" },
314 	{ SPDK_NVME_SC_COMMAND_ID_CONFLICT, "COMMAND ID CONFLICT" },
315 	{ SPDK_NVME_SC_DATA_TRANSFER_ERROR, "DATA TRANSFER ERROR" },
316 	{ SPDK_NVME_SC_ABORTED_POWER_LOSS, "ABORTED - POWER LOSS" },
317 	{ SPDK_NVME_SC_INTERNAL_DEVICE_ERROR, "INTERNAL DEVICE ERROR" },
318 	{ SPDK_NVME_SC_ABORTED_BY_REQUEST, "ABORTED - BY REQUEST" },
319 	{ SPDK_NVME_SC_ABORTED_SQ_DELETION, "ABORTED - SQ DELETION" },
320 	{ SPDK_NVME_SC_ABORTED_FAILED_FUSED, "ABORTED - FAILED FUSED" },
321 	{ SPDK_NVME_SC_ABORTED_MISSING_FUSED, "ABORTED - MISSING FUSED" },
322 	{ SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT, "INVALID NAMESPACE OR FORMAT" },
323 	{ SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR, "COMMAND SEQUENCE ERROR" },
324 	{ SPDK_NVME_SC_INVALID_SGL_SEG_DESCRIPTOR, "INVALID SGL SEGMENT DESCRIPTOR" },
325 	{ SPDK_NVME_SC_INVALID_NUM_SGL_DESCIRPTORS, "INVALID NUMBER OF SGL DESCRIPTORS" },
326 	{ SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID, "DATA SGL LENGTH INVALID" },
327 	{ SPDK_NVME_SC_METADATA_SGL_LENGTH_INVALID, "METADATA SGL LENGTH INVALID" },
328 	{ SPDK_NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID, "SGL DESCRIPTOR TYPE INVALID" },
329 	{ SPDK_NVME_SC_INVALID_CONTROLLER_MEM_BUF, "INVALID CONTROLLER MEMORY BUFFER" },
330 	{ SPDK_NVME_SC_INVALID_PRP_OFFSET, "INVALID PRP OFFSET" },
331 	{ SPDK_NVME_SC_ATOMIC_WRITE_UNIT_EXCEEDED, "ATOMIC WRITE UNIT EXCEEDED" },
332 	{ SPDK_NVME_SC_OPERATION_DENIED, "OPERATION DENIED" },
333 	{ SPDK_NVME_SC_INVALID_SGL_OFFSET, "INVALID SGL OFFSET" },
334 	{ SPDK_NVME_SC_HOSTID_INCONSISTENT_FORMAT, "HOSTID INCONSISTENT FORMAT" },
335 	{ SPDK_NVME_SC_KEEP_ALIVE_EXPIRED, "KEEP ALIVE EXPIRED" },
336 	{ SPDK_NVME_SC_KEEP_ALIVE_INVALID, "KEEP ALIVE INVALID" },
337 	{ SPDK_NVME_SC_ABORTED_PREEMPT, "ABORTED - PREEMPT AND ABORT" },
338 	{ SPDK_NVME_SC_SANITIZE_FAILED, "SANITIZE FAILED" },
339 	{ SPDK_NVME_SC_SANITIZE_IN_PROGRESS, "SANITIZE IN PROGRESS" },
340 	{ SPDK_NVME_SC_SGL_DATA_BLOCK_GRANULARITY_INVALID, "DATA BLOCK GRANULARITY INVALID" },
341 	{ SPDK_NVME_SC_COMMAND_INVALID_IN_CMB, "COMMAND NOT SUPPORTED FOR QUEUE IN CMB" },
342 	{ SPDK_NVME_SC_LBA_OUT_OF_RANGE, "LBA OUT OF RANGE" },
343 	{ SPDK_NVME_SC_CAPACITY_EXCEEDED, "CAPACITY EXCEEDED" },
344 	{ SPDK_NVME_SC_NAMESPACE_NOT_READY, "NAMESPACE NOT READY" },
345 	{ SPDK_NVME_SC_RESERVATION_CONFLICT, "RESERVATION CONFLICT" },
346 	{ SPDK_NVME_SC_FORMAT_IN_PROGRESS, "FORMAT IN PROGRESS" },
347 	{ 0xFFFF, "GENERIC" }
348 };
349 
350 static const struct nvme_string command_specific_status[] = {
351 	{ SPDK_NVME_SC_COMPLETION_QUEUE_INVALID, "INVALID COMPLETION QUEUE" },
352 	{ SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER, "INVALID QUEUE IDENTIFIER" },
353 	{ SPDK_NVME_SC_INVALID_QUEUE_SIZE, "INVALID QUEUE SIZE" },
354 	{ SPDK_NVME_SC_ABORT_COMMAND_LIMIT_EXCEEDED, "ABORT CMD LIMIT EXCEEDED" },
355 	{ SPDK_NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED, "ASYNC LIMIT EXCEEDED" },
356 	{ SPDK_NVME_SC_INVALID_FIRMWARE_SLOT, "INVALID FIRMWARE SLOT" },
357 	{ SPDK_NVME_SC_INVALID_FIRMWARE_IMAGE, "INVALID FIRMWARE IMAGE" },
358 	{ SPDK_NVME_SC_INVALID_INTERRUPT_VECTOR, "INVALID INTERRUPT VECTOR" },
359 	{ SPDK_NVME_SC_INVALID_LOG_PAGE, "INVALID LOG PAGE" },
360 	{ SPDK_NVME_SC_INVALID_FORMAT, "INVALID FORMAT" },
361 	{ SPDK_NVME_SC_FIRMWARE_REQ_CONVENTIONAL_RESET, "FIRMWARE REQUIRES CONVENTIONAL RESET" },
362 	{ SPDK_NVME_SC_INVALID_QUEUE_DELETION, "INVALID QUEUE DELETION" },
363 	{ SPDK_NVME_SC_FEATURE_ID_NOT_SAVEABLE, "FEATURE ID NOT SAVEABLE" },
364 	{ SPDK_NVME_SC_FEATURE_NOT_CHANGEABLE, "FEATURE NOT CHANGEABLE" },
365 	{ SPDK_NVME_SC_FEATURE_NOT_NAMESPACE_SPECIFIC, "FEATURE NOT NAMESPACE SPECIFIC" },
366 	{ SPDK_NVME_SC_FIRMWARE_REQ_NVM_RESET, "FIRMWARE REQUIRES NVM RESET" },
367 	{ SPDK_NVME_SC_FIRMWARE_REQ_RESET, "FIRMWARE REQUIRES RESET" },
368 	{ SPDK_NVME_SC_FIRMWARE_REQ_MAX_TIME_VIOLATION, "FIRMWARE REQUIRES MAX TIME VIOLATION" },
369 	{ SPDK_NVME_SC_FIRMWARE_ACTIVATION_PROHIBITED, "FIRMWARE ACTIVATION PROHIBITED" },
370 	{ SPDK_NVME_SC_OVERLAPPING_RANGE, "OVERLAPPING RANGE" },
371 	{ SPDK_NVME_SC_NAMESPACE_INSUFFICIENT_CAPACITY, "NAMESPACE INSUFFICIENT CAPACITY" },
372 	{ SPDK_NVME_SC_NAMESPACE_ID_UNAVAILABLE, "NAMESPACE ID UNAVAILABLE" },
373 	{ SPDK_NVME_SC_NAMESPACE_ALREADY_ATTACHED, "NAMESPACE ALREADY ATTACHED" },
374 	{ SPDK_NVME_SC_NAMESPACE_IS_PRIVATE, "NAMESPACE IS PRIVATE" },
375 	{ SPDK_NVME_SC_NAMESPACE_NOT_ATTACHED, "NAMESPACE NOT ATTACHED" },
376 	{ SPDK_NVME_SC_THINPROVISIONING_NOT_SUPPORTED, "THINPROVISIONING NOT SUPPORTED" },
377 	{ SPDK_NVME_SC_CONTROLLER_LIST_INVALID, "CONTROLLER LIST INVALID" },
378 	{ SPDK_NVME_SC_DEVICE_SELF_TEST_IN_PROGRESS, "DEVICE SELF-TEST IN PROGRESS" },
379 	{ SPDK_NVME_SC_BOOT_PARTITION_WRITE_PROHIBITED, "BOOT PARTITION WRITE PROHIBITED" },
380 	{ SPDK_NVME_SC_INVALID_CTRLR_ID, "INVALID CONTROLLER ID" },
381 	{ SPDK_NVME_SC_INVALID_SECONDARY_CTRLR_STATE, "INVALID SECONDARY CONTROLLER STATE" },
382 	{ SPDK_NVME_SC_INVALID_NUM_CTRLR_RESOURCES, "INVALID NUMBER OF CONTROLLER RESOURCES" },
383 	{ SPDK_NVME_SC_INVALID_RESOURCE_ID, "INVALID RESOURCE IDENTIFIER" },
384 	{ SPDK_NVME_SC_STREAM_RESOURCE_ALLOCATION_FAILED, "STREAM RESOURCE ALLOCATION FAILED"},
385 	{ SPDK_NVME_SC_CONFLICTING_ATTRIBUTES, "CONFLICTING ATTRIBUTES" },
386 	{ SPDK_NVME_SC_INVALID_PROTECTION_INFO, "INVALID PROTECTION INFO" },
387 	{ SPDK_NVME_SC_ATTEMPTED_WRITE_TO_RO_RANGE, "WRITE TO RO RANGE" },
388 	{ 0xFFFF, "COMMAND SPECIFIC" }
389 };
390 
391 static const struct nvme_string media_error_status[] = {
392 	{ SPDK_NVME_SC_WRITE_FAULTS, "WRITE FAULTS" },
393 	{ SPDK_NVME_SC_UNRECOVERED_READ_ERROR, "UNRECOVERED READ ERROR" },
394 	{ SPDK_NVME_SC_GUARD_CHECK_ERROR, "GUARD CHECK ERROR" },
395 	{ SPDK_NVME_SC_APPLICATION_TAG_CHECK_ERROR, "APPLICATION TAG CHECK ERROR" },
396 	{ SPDK_NVME_SC_REFERENCE_TAG_CHECK_ERROR, "REFERENCE TAG CHECK ERROR" },
397 	{ SPDK_NVME_SC_COMPARE_FAILURE, "COMPARE FAILURE" },
398 	{ SPDK_NVME_SC_ACCESS_DENIED, "ACCESS DENIED" },
399 	{ SPDK_NVME_SC_DEALLOCATED_OR_UNWRITTEN_BLOCK, "DEALLOCATED OR UNWRITTEN BLOCK" },
400 	{ SPDK_OCSSD_SC_OFFLINE_CHUNK, "RESET OFFLINE CHUNK" },
401 	{ SPDK_OCSSD_SC_INVALID_RESET, "INVALID RESET" },
402 	{ SPDK_OCSSD_SC_WRITE_FAIL_WRITE_NEXT_UNIT, "WRITE FAIL WRITE NEXT UNIT" },
403 	{ SPDK_OCSSD_SC_WRITE_FAIL_CHUNK_EARLY_CLOSE, "WRITE FAIL CHUNK EARLY CLOSE" },
404 	{ SPDK_OCSSD_SC_OUT_OF_ORDER_WRITE, "OUT OF ORDER WRITE" },
405 	{ SPDK_OCSSD_SC_READ_HIGH_ECC, "READ HIGH ECC" },
406 	{ 0xFFFF, "MEDIA ERROR" }
407 };
408 
409 static const struct nvme_string path_status[] = {
410 	{ SPDK_NVME_SC_INTERNAL_PATH_ERROR, "INTERNAL PATH ERROR" },
411 	{ SPDK_NVME_SC_CONTROLLER_PATH_ERROR, "CONTROLLER PATH ERROR" },
412 	{ SPDK_NVME_SC_HOST_PATH_ERROR, "HOST PATH ERROR" },
413 	{ SPDK_NVME_SC_ABORTED_BY_HOST, "ABORTED BY HOST" },
414 	{ 0xFFFF, "PATH ERROR" }
415 };
416 
417 const char *
418 spdk_nvme_cpl_get_status_string(const struct spdk_nvme_status *status)
419 {
420 	const struct nvme_string *entry;
421 
422 	switch (status->sct) {
423 	case SPDK_NVME_SCT_GENERIC:
424 		entry = generic_status;
425 		break;
426 	case SPDK_NVME_SCT_COMMAND_SPECIFIC:
427 		entry = command_specific_status;
428 		break;
429 	case SPDK_NVME_SCT_MEDIA_ERROR:
430 		entry = media_error_status;
431 		break;
432 	case SPDK_NVME_SCT_PATH:
433 		entry = path_status;
434 		break;
435 	case SPDK_NVME_SCT_VENDOR_SPECIFIC:
436 		return "VENDOR SPECIFIC";
437 	default:
438 		return "RESERVED";
439 	}
440 
441 	return nvme_get_string(entry, status->sc);
442 }
443 
444 void
445 spdk_nvme_print_completion(uint16_t qid, struct spdk_nvme_cpl *cpl)
446 {
447 	assert(cpl != NULL);
448 
449 	/* Check that sqid matches qid. Note that sqid is reserved
450 	 * for fabrics so don't print an error when sqid is 0. */
451 	if (cpl->sqid != qid && cpl->sqid != 0) {
452 		SPDK_ERRLOG("sqid %u doesn't match qid\n", cpl->sqid);
453 	}
454 
455 	SPDK_NOTICELOG("%s (%02x/%02x) qid:%d cid:%d cdw0:%x sqhd:%04x p:%x m:%x dnr:%x\n",
456 		       spdk_nvme_cpl_get_status_string(&cpl->status),
457 		       cpl->status.sct, cpl->status.sc, qid, cpl->cid, cpl->cdw0,
458 		       cpl->sqhd, cpl->status.p, cpl->status.m, cpl->status.dnr);
459 }
460 
461 void
462 spdk_nvme_qpair_print_completion(struct spdk_nvme_qpair *qpair, struct spdk_nvme_cpl *cpl)
463 {
464 	spdk_nvme_print_completion(qpair->id, cpl);
465 }
466 
467 bool
468 nvme_completion_is_retry(const struct spdk_nvme_cpl *cpl)
469 {
470 	/*
471 	 * TODO: spec is not clear how commands that are aborted due
472 	 *  to TLER will be marked.  So for now, it seems
473 	 *  NAMESPACE_NOT_READY is the only case where we should
474 	 *  look at the DNR bit.
475 	 */
476 	switch ((int)cpl->status.sct) {
477 	case SPDK_NVME_SCT_GENERIC:
478 		switch ((int)cpl->status.sc) {
479 		case SPDK_NVME_SC_NAMESPACE_NOT_READY:
480 		case SPDK_NVME_SC_FORMAT_IN_PROGRESS:
481 			if (cpl->status.dnr) {
482 				return false;
483 			} else {
484 				return true;
485 			}
486 		case SPDK_NVME_SC_INVALID_OPCODE:
487 		case SPDK_NVME_SC_INVALID_FIELD:
488 		case SPDK_NVME_SC_COMMAND_ID_CONFLICT:
489 		case SPDK_NVME_SC_DATA_TRANSFER_ERROR:
490 		case SPDK_NVME_SC_ABORTED_POWER_LOSS:
491 		case SPDK_NVME_SC_INTERNAL_DEVICE_ERROR:
492 		case SPDK_NVME_SC_ABORTED_BY_REQUEST:
493 		case SPDK_NVME_SC_ABORTED_SQ_DELETION:
494 		case SPDK_NVME_SC_ABORTED_FAILED_FUSED:
495 		case SPDK_NVME_SC_ABORTED_MISSING_FUSED:
496 		case SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT:
497 		case SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR:
498 		case SPDK_NVME_SC_LBA_OUT_OF_RANGE:
499 		case SPDK_NVME_SC_CAPACITY_EXCEEDED:
500 		default:
501 			return false;
502 		}
503 	case SPDK_NVME_SCT_PATH:
504 		/*
505 		 * Per NVMe TP 4028 (Path and Transport Error Enhancements), retries should be
506 		 * based on the setting of the DNR bit for Internal Path Error
507 		 */
508 		switch ((int)cpl->status.sc) {
509 		case SPDK_NVME_SC_INTERNAL_PATH_ERROR:
510 			return !cpl->status.dnr;
511 		default:
512 			return false;
513 		}
514 	case SPDK_NVME_SCT_COMMAND_SPECIFIC:
515 	case SPDK_NVME_SCT_MEDIA_ERROR:
516 	case SPDK_NVME_SCT_VENDOR_SPECIFIC:
517 	default:
518 		return false;
519 	}
520 }
521 
522 static void
523 nvme_qpair_manual_complete_request(struct spdk_nvme_qpair *qpair,
524 				   struct nvme_request *req, uint32_t sct, uint32_t sc,
525 				   uint32_t dnr, bool print_on_error)
526 {
527 	struct spdk_nvme_cpl	cpl;
528 	bool			error;
529 
530 	memset(&cpl, 0, sizeof(cpl));
531 	cpl.sqid = qpair->id;
532 	cpl.status.sct = sct;
533 	cpl.status.sc = sc;
534 	cpl.status.dnr = dnr;
535 
536 	error = spdk_nvme_cpl_is_error(&cpl);
537 
538 	if (error && print_on_error && !qpair->ctrlr->opts.disable_error_logging) {
539 		SPDK_NOTICELOG("Command completed manually:\n");
540 		spdk_nvme_qpair_print_command(qpair, &req->cmd);
541 		spdk_nvme_qpair_print_completion(qpair, &cpl);
542 	}
543 
544 	nvme_complete_request(req->cb_fn, req->cb_arg, qpair, req, &cpl);
545 	nvme_free_request(req);
546 }
547 
548 void
549 nvme_qpair_abort_queued_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr)
550 {
551 	struct nvme_request		*req;
552 	STAILQ_HEAD(, nvme_request)	tmp;
553 
554 	STAILQ_INIT(&tmp);
555 	STAILQ_SWAP(&tmp, &qpair->queued_req, nvme_request);
556 
557 	while (!STAILQ_EMPTY(&tmp)) {
558 		req = STAILQ_FIRST(&tmp);
559 		STAILQ_REMOVE_HEAD(&tmp, stailq);
560 		if (!qpair->ctrlr->opts.disable_error_logging) {
561 			SPDK_ERRLOG("aborting queued i/o\n");
562 		}
563 		nvme_qpair_manual_complete_request(qpair, req, SPDK_NVME_SCT_GENERIC,
564 						   SPDK_NVME_SC_ABORTED_SQ_DELETION, dnr, true);
565 	}
566 }
567 
568 /* The callback to a request may submit the next request which is queued and
569  * then the same callback may abort it immediately. This repetition may cause
570  * infinite recursive calls. Hence move aborting requests to another list here
571  * and abort them later at resubmission.
572  */
573 static void
574 _nvme_qpair_complete_abort_queued_reqs(struct spdk_nvme_qpair *qpair)
575 {
576 	struct nvme_request		*req;
577 	STAILQ_HEAD(, nvme_request)	tmp;
578 
579 	if (spdk_likely(STAILQ_EMPTY(&qpair->aborting_queued_req))) {
580 		return;
581 	}
582 
583 	STAILQ_INIT(&tmp);
584 	STAILQ_SWAP(&tmp, &qpair->aborting_queued_req, nvme_request);
585 
586 	while (!STAILQ_EMPTY(&tmp)) {
587 		req = STAILQ_FIRST(&tmp);
588 		STAILQ_REMOVE_HEAD(&tmp, stailq);
589 		nvme_qpair_manual_complete_request(qpair, req, SPDK_NVME_SCT_GENERIC,
590 						   SPDK_NVME_SC_ABORTED_BY_REQUEST, 1, true);
591 	}
592 }
593 
594 uint32_t
595 nvme_qpair_abort_queued_reqs_with_cbarg(struct spdk_nvme_qpair *qpair, void *cmd_cb_arg)
596 {
597 	struct nvme_request	*req, *tmp;
598 	uint32_t		aborting = 0;
599 
600 	STAILQ_FOREACH_SAFE(req, &qpair->queued_req, stailq, tmp) {
601 		if ((req->cb_arg != cmd_cb_arg) &&
602 		    (req->parent == NULL || req->parent->cb_arg != cmd_cb_arg)) {
603 			continue;
604 		}
605 
606 		STAILQ_REMOVE(&qpair->queued_req, req, nvme_request, stailq);
607 		STAILQ_INSERT_TAIL(&qpair->aborting_queued_req, req, stailq);
608 		if (!qpair->ctrlr->opts.disable_error_logging) {
609 			SPDK_ERRLOG("aborting queued i/o\n");
610 		}
611 		aborting++;
612 	}
613 
614 	return aborting;
615 }
616 
617 static inline bool
618 nvme_qpair_check_enabled(struct spdk_nvme_qpair *qpair)
619 {
620 	struct nvme_request *req;
621 
622 	/*
623 	 * Either during initial connect or reset, the qpair should follow the given state machine.
624 	 * QPAIR_DISABLED->QPAIR_CONNECTING->QPAIR_CONNECTED->QPAIR_ENABLING->QPAIR_ENABLED. In the
625 	 * reset case, once the qpair is properly connected, we need to abort any outstanding requests
626 	 * from the old transport connection and encourage the application to retry them. We also need
627 	 * to submit any queued requests that built up while we were in the connected or enabling state.
628 	 */
629 	if (nvme_qpair_get_state(qpair) == NVME_QPAIR_CONNECTED && !qpair->ctrlr->is_resetting) {
630 		nvme_qpair_set_state(qpair, NVME_QPAIR_ENABLING);
631 		/*
632 		 * PCIe is special, for fabrics transports, we can abort requests before disconnect during reset
633 		 * but we have historically not disconnected pcie qpairs during reset so we have to abort requests
634 		 * here.
635 		 */
636 		if (qpair->ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE &&
637 		    !qpair->is_new_qpair) {
638 			nvme_qpair_abort_all_queued_reqs(qpair, 0);
639 			nvme_transport_qpair_abort_reqs(qpair, 0);
640 		}
641 
642 		nvme_qpair_set_state(qpair, NVME_QPAIR_ENABLED);
643 		while (!STAILQ_EMPTY(&qpair->queued_req)) {
644 			req = STAILQ_FIRST(&qpair->queued_req);
645 			STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq);
646 			if (nvme_qpair_resubmit_request(qpair, req)) {
647 				break;
648 			}
649 		}
650 	}
651 
652 	/*
653 	 * When doing a reset, we must disconnect the qpair on the proper core.
654 	 * Note, reset is the only case where we set the failure reason without
655 	 * setting the qpair state since reset is done at the generic layer on the
656 	 * controller thread and we can't disconnect I/O qpairs from the controller
657 	 * thread.
658 	 */
659 	if (qpair->transport_failure_reason != SPDK_NVME_QPAIR_FAILURE_NONE &&
660 	    nvme_qpair_get_state(qpair) == NVME_QPAIR_ENABLED) {
661 		/* Don't disconnect PCIe qpairs. They are a special case for reset. */
662 		if (qpair->ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) {
663 			nvme_ctrlr_disconnect_qpair(qpair);
664 		}
665 		return false;
666 	}
667 
668 	return nvme_qpair_get_state(qpair) == NVME_QPAIR_ENABLED;
669 }
670 
671 void
672 nvme_qpair_resubmit_requests(struct spdk_nvme_qpair *qpair, uint32_t num_requests)
673 {
674 	uint32_t i;
675 	int resubmit_rc;
676 	struct nvme_request *req;
677 
678 	assert(num_requests > 0);
679 
680 	for (i = 0; i < num_requests; i++) {
681 		if (qpair->ctrlr->is_resetting) {
682 			break;
683 		}
684 		if ((req = STAILQ_FIRST(&qpair->queued_req)) == NULL) {
685 			break;
686 		}
687 		STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq);
688 		resubmit_rc = nvme_qpair_resubmit_request(qpair, req);
689 		if (spdk_unlikely(resubmit_rc != 0)) {
690 			SPDK_DEBUGLOG(nvme, "Unable to resubmit as many requests as we completed.\n");
691 			break;
692 		}
693 	}
694 
695 	_nvme_qpair_complete_abort_queued_reqs(qpair);
696 }
697 
698 static void
699 nvme_complete_register_operations(struct spdk_nvme_qpair *qpair)
700 {
701 	struct nvme_register_completion *ctx;
702 	struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
703 	STAILQ_HEAD(, nvme_register_completion) operations;
704 
705 	STAILQ_INIT(&operations);
706 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
707 	STAILQ_SWAP(&ctrlr->register_operations, &operations, nvme_register_completion);
708 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
709 
710 	while (!STAILQ_EMPTY(&operations)) {
711 		ctx = STAILQ_FIRST(&operations);
712 		STAILQ_REMOVE_HEAD(&operations, stailq);
713 		if (ctx->cb_fn != NULL) {
714 			ctx->cb_fn(ctx->cb_ctx, ctx->value, &ctx->cpl);
715 		}
716 		free(ctx);
717 	}
718 }
719 
720 int32_t
721 spdk_nvme_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_completions)
722 {
723 	int32_t ret;
724 	struct nvme_request *req, *tmp;
725 
726 	if (spdk_unlikely(qpair->ctrlr->is_failed &&
727 			  nvme_qpair_get_state(qpair) != NVME_QPAIR_DISCONNECTING)) {
728 		if (qpair->ctrlr->is_removed) {
729 			nvme_qpair_set_state(qpair, NVME_QPAIR_DESTROYING);
730 			nvme_qpair_abort_all_queued_reqs(qpair, 0);
731 			nvme_transport_qpair_abort_reqs(qpair, 0);
732 		}
733 		return -ENXIO;
734 	}
735 
736 	if (spdk_unlikely(!nvme_qpair_check_enabled(qpair) &&
737 			  !(nvme_qpair_get_state(qpair) == NVME_QPAIR_CONNECTING ||
738 			    nvme_qpair_get_state(qpair) == NVME_QPAIR_DISCONNECTING))) {
739 		/*
740 		 * qpair is not enabled, likely because a controller reset is
741 		 *  in progress.
742 		 */
743 		return -ENXIO;
744 	}
745 
746 	/* error injection for those queued error requests */
747 	if (spdk_unlikely(!STAILQ_EMPTY(&qpair->err_req_head))) {
748 		STAILQ_FOREACH_SAFE(req, &qpair->err_req_head, stailq, tmp) {
749 			if (spdk_get_ticks() - req->submit_tick > req->timeout_tsc) {
750 				STAILQ_REMOVE(&qpair->err_req_head, req, nvme_request, stailq);
751 				nvme_qpair_manual_complete_request(qpair, req,
752 								   req->cpl.status.sct,
753 								   req->cpl.status.sc, 0, true);
754 			}
755 		}
756 	}
757 
758 	qpair->in_completion_context = 1;
759 	ret = nvme_transport_qpair_process_completions(qpair, max_completions);
760 	if (ret < 0) {
761 		if (ret == -ENXIO && nvme_qpair_get_state(qpair) == NVME_QPAIR_DISCONNECTING) {
762 			ret = 0;
763 		} else {
764 			SPDK_ERRLOG("CQ transport error %d (%s) on qpair id %hu\n",
765 				    ret, spdk_strerror(-ret), qpair->id);
766 			if (nvme_qpair_is_admin_queue(qpair)) {
767 				nvme_ctrlr_fail(qpair->ctrlr, false);
768 			}
769 		}
770 	}
771 	qpair->in_completion_context = 0;
772 	if (qpair->delete_after_completion_context) {
773 		/*
774 		 * A request to delete this qpair was made in the context of this completion
775 		 *  routine - so it is safe to delete it now.
776 		 */
777 		spdk_nvme_ctrlr_free_io_qpair(qpair);
778 		return ret;
779 	}
780 
781 	/*
782 	 * At this point, ret must represent the number of completions we reaped.
783 	 * submit as many queued requests as we completed.
784 	 */
785 	if (ret > 0) {
786 		nvme_qpair_resubmit_requests(qpair, ret);
787 	}
788 
789 	/* Complete any pending register operations */
790 	if (nvme_qpair_is_admin_queue(qpair)) {
791 		nvme_complete_register_operations(qpair);
792 	}
793 
794 	return ret;
795 }
796 
797 spdk_nvme_qp_failure_reason
798 spdk_nvme_qpair_get_failure_reason(struct spdk_nvme_qpair *qpair)
799 {
800 	return qpair->transport_failure_reason;
801 }
802 
803 int
804 nvme_qpair_init(struct spdk_nvme_qpair *qpair, uint16_t id,
805 		struct spdk_nvme_ctrlr *ctrlr,
806 		enum spdk_nvme_qprio qprio,
807 		uint32_t num_requests, bool async)
808 {
809 	size_t req_size_padded;
810 	uint32_t i;
811 
812 	qpair->id = id;
813 	qpair->qprio = qprio;
814 
815 	qpair->in_completion_context = 0;
816 	qpair->delete_after_completion_context = 0;
817 	qpair->no_deletion_notification_needed = 0;
818 
819 	qpair->ctrlr = ctrlr;
820 	qpair->trtype = ctrlr->trid.trtype;
821 	qpair->is_new_qpair = true;
822 	qpair->async = async;
823 	qpair->poll_status = NULL;
824 
825 	STAILQ_INIT(&qpair->free_req);
826 	STAILQ_INIT(&qpair->queued_req);
827 	STAILQ_INIT(&qpair->aborting_queued_req);
828 	TAILQ_INIT(&qpair->err_cmd_head);
829 	STAILQ_INIT(&qpair->err_req_head);
830 
831 	req_size_padded = (sizeof(struct nvme_request) + 63) & ~(size_t)63;
832 
833 	/* Add one for the reserved_req */
834 	num_requests++;
835 
836 	qpair->req_buf = spdk_zmalloc(req_size_padded * num_requests, 64, NULL,
837 				      SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE);
838 	if (qpair->req_buf == NULL) {
839 		SPDK_ERRLOG("no memory to allocate qpair(cntlid:0x%x sqid:%d) req_buf with %d request\n",
840 			    ctrlr->cntlid, qpair->id, num_requests);
841 		return -ENOMEM;
842 	}
843 
844 	for (i = 0; i < num_requests; i++) {
845 		struct nvme_request *req = qpair->req_buf + i * req_size_padded;
846 
847 		req->qpair = qpair;
848 		if (i == 0) {
849 			qpair->reserved_req = req;
850 		} else {
851 			STAILQ_INSERT_HEAD(&qpair->free_req, req, stailq);
852 		}
853 	}
854 
855 	return 0;
856 }
857 
858 void
859 nvme_qpair_complete_error_reqs(struct spdk_nvme_qpair *qpair)
860 {
861 	struct nvme_request		*req;
862 
863 	while (!STAILQ_EMPTY(&qpair->err_req_head)) {
864 		req = STAILQ_FIRST(&qpair->err_req_head);
865 		STAILQ_REMOVE_HEAD(&qpair->err_req_head, stailq);
866 		nvme_qpair_manual_complete_request(qpair, req,
867 						   req->cpl.status.sct,
868 						   req->cpl.status.sc, 0, true);
869 	}
870 }
871 
872 void
873 nvme_qpair_deinit(struct spdk_nvme_qpair *qpair)
874 {
875 	struct nvme_error_cmd *cmd, *entry;
876 
877 	nvme_qpair_abort_queued_reqs(qpair, 0);
878 	_nvme_qpair_complete_abort_queued_reqs(qpair);
879 	nvme_qpair_complete_error_reqs(qpair);
880 
881 	TAILQ_FOREACH_SAFE(cmd, &qpair->err_cmd_head, link, entry) {
882 		TAILQ_REMOVE(&qpair->err_cmd_head, cmd, link);
883 		spdk_free(cmd);
884 	}
885 
886 	spdk_free(qpair->req_buf);
887 }
888 
889 static inline int
890 _nvme_qpair_submit_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req)
891 {
892 	int			rc = 0;
893 	struct nvme_request	*child_req, *tmp;
894 	struct nvme_error_cmd	*cmd;
895 	struct spdk_nvme_ctrlr	*ctrlr = qpair->ctrlr;
896 	bool			child_req_failed = false;
897 
898 	nvme_qpair_check_enabled(qpair);
899 
900 	if (spdk_unlikely(nvme_qpair_get_state(qpair) == NVME_QPAIR_DISCONNECTED ||
901 			  nvme_qpair_get_state(qpair) == NVME_QPAIR_DISCONNECTING ||
902 			  nvme_qpair_get_state(qpair) == NVME_QPAIR_DESTROYING)) {
903 		TAILQ_FOREACH_SAFE(child_req, &req->children, child_tailq, tmp) {
904 			nvme_request_remove_child(req, child_req);
905 			nvme_request_free_children(child_req);
906 			nvme_free_request(child_req);
907 		}
908 		if (req->parent != NULL) {
909 			nvme_request_remove_child(req->parent, req);
910 		}
911 		nvme_free_request(req);
912 		return -ENXIO;
913 	}
914 
915 	if (req->num_children) {
916 		/*
917 		 * This is a split (parent) request. Submit all of the children but not the parent
918 		 * request itself, since the parent is the original unsplit request.
919 		 */
920 		TAILQ_FOREACH_SAFE(child_req, &req->children, child_tailq, tmp) {
921 			if (spdk_likely(!child_req_failed)) {
922 				rc = nvme_qpair_submit_request(qpair, child_req);
923 				if (spdk_unlikely(rc != 0)) {
924 					child_req_failed = true;
925 				}
926 			} else { /* free remaining child_reqs since one child_req fails */
927 				nvme_request_remove_child(req, child_req);
928 				nvme_request_free_children(child_req);
929 				nvme_free_request(child_req);
930 			}
931 		}
932 
933 		if (spdk_unlikely(child_req_failed)) {
934 			/* part of children requests have been submitted,
935 			 * return success since we must wait for those children to complete,
936 			 * but set the parent request to failure.
937 			 */
938 			if (req->num_children) {
939 				req->cpl.status.sct = SPDK_NVME_SCT_GENERIC;
940 				req->cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
941 				return 0;
942 			}
943 			goto error;
944 		}
945 
946 		return rc;
947 	}
948 
949 	/* queue those requests which matches with opcode in err_cmd list */
950 	if (spdk_unlikely(!TAILQ_EMPTY(&qpair->err_cmd_head))) {
951 		TAILQ_FOREACH(cmd, &qpair->err_cmd_head, link) {
952 			if (!cmd->do_not_submit) {
953 				continue;
954 			}
955 
956 			if ((cmd->opc == req->cmd.opc) && cmd->err_count) {
957 				/* add to error request list and set cpl */
958 				req->timeout_tsc = cmd->timeout_tsc;
959 				req->submit_tick = spdk_get_ticks();
960 				req->cpl.status.sct = cmd->status.sct;
961 				req->cpl.status.sc = cmd->status.sc;
962 				STAILQ_INSERT_TAIL(&qpair->err_req_head, req, stailq);
963 				cmd->err_count--;
964 				return 0;
965 			}
966 		}
967 	}
968 
969 	if (spdk_unlikely(ctrlr->is_failed)) {
970 		rc = -ENXIO;
971 		goto error;
972 	}
973 
974 	/* assign submit_tick before submitting req to specific transport */
975 	if (spdk_unlikely(ctrlr->timeout_enabled)) {
976 		if (req->submit_tick == 0) { /* req submitted for the first time */
977 			req->submit_tick = spdk_get_ticks();
978 			req->timed_out = false;
979 		}
980 	} else {
981 		req->submit_tick = 0;
982 	}
983 
984 	/* Allow two cases:
985 	 * 1. NVMe qpair is enabled.
986 	 * 2. Always allow fabrics commands through - these get
987 	 * the controller out of reset state.
988 	 */
989 	if (spdk_likely(nvme_qpair_get_state(qpair) == NVME_QPAIR_ENABLED) ||
990 	    (req->cmd.opc == SPDK_NVME_OPC_FABRIC &&
991 	     nvme_qpair_get_state(qpair) == NVME_QPAIR_CONNECTING)) {
992 		rc = nvme_transport_qpair_submit_request(qpair, req);
993 	} else {
994 		/* The controller is being reset - queue this request and
995 		 *  submit it later when the reset is completed.
996 		 */
997 		return -EAGAIN;
998 	}
999 
1000 	if (spdk_likely(rc == 0)) {
1001 		if (SPDK_DEBUGLOG_FLAG_ENABLED("nvme")) {
1002 			spdk_nvme_print_command(qpair->id, &req->cmd);
1003 		}
1004 		req->queued = false;
1005 		return 0;
1006 	}
1007 
1008 	if (rc == -EAGAIN) {
1009 		return -EAGAIN;
1010 	}
1011 
1012 error:
1013 	if (req->parent != NULL) {
1014 		nvme_request_remove_child(req->parent, req);
1015 	}
1016 
1017 	/* The request is from queued_req list we should trigger the callback from caller */
1018 	if (spdk_unlikely(req->queued)) {
1019 		nvme_qpair_manual_complete_request(qpair, req, SPDK_NVME_SCT_GENERIC,
1020 						   SPDK_NVME_SC_INTERNAL_DEVICE_ERROR, true, true);
1021 		return rc;
1022 	}
1023 
1024 	nvme_free_request(req);
1025 
1026 	return rc;
1027 }
1028 
1029 int
1030 nvme_qpair_submit_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req)
1031 {
1032 	int rc;
1033 
1034 	if (spdk_unlikely(!STAILQ_EMPTY(&qpair->queued_req) && req->num_children == 0)) {
1035 		/*
1036 		 * Requests that have no children should be sent to the transport after all
1037 		 * currently queued requests. Requests with children will be split and go back
1038 		 * through this path.  We need to make an exception for the fabrics commands
1039 		 * while the qpair is connecting to be able to send the connect command
1040 		 * asynchronously.
1041 		 */
1042 		if (req->cmd.opc != SPDK_NVME_OPC_FABRIC ||
1043 		    nvme_qpair_get_state(qpair) != NVME_QPAIR_CONNECTING) {
1044 			STAILQ_INSERT_TAIL(&qpair->queued_req, req, stailq);
1045 			req->queued = true;
1046 			return 0;
1047 		}
1048 	}
1049 
1050 	rc = _nvme_qpair_submit_request(qpair, req);
1051 	if (rc == -EAGAIN) {
1052 		STAILQ_INSERT_TAIL(&qpair->queued_req, req, stailq);
1053 		req->queued = true;
1054 		rc = 0;
1055 	}
1056 
1057 	return rc;
1058 }
1059 
1060 static int
1061 nvme_qpair_resubmit_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req)
1062 {
1063 	int rc;
1064 
1065 	/*
1066 	 * We should never have a request with children on the queue.
1067 	 * This is necessary to preserve the 1:1 relationship between
1068 	 * completions and resubmissions.
1069 	 */
1070 	assert(req->num_children == 0);
1071 	assert(req->queued);
1072 	rc = _nvme_qpair_submit_request(qpair, req);
1073 	if (spdk_unlikely(rc == -EAGAIN)) {
1074 		STAILQ_INSERT_HEAD(&qpair->queued_req, req, stailq);
1075 	}
1076 
1077 	return rc;
1078 }
1079 
1080 void
1081 nvme_qpair_abort_all_queued_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr)
1082 {
1083 	nvme_qpair_complete_error_reqs(qpair);
1084 	nvme_qpair_abort_queued_reqs(qpair, dnr);
1085 	_nvme_qpair_complete_abort_queued_reqs(qpair);
1086 }
1087 
1088 int
1089 spdk_nvme_qpair_add_cmd_error_injection(struct spdk_nvme_ctrlr *ctrlr,
1090 					struct spdk_nvme_qpair *qpair,
1091 					uint8_t opc, bool do_not_submit,
1092 					uint64_t timeout_in_us,
1093 					uint32_t err_count,
1094 					uint8_t sct, uint8_t sc)
1095 {
1096 	struct nvme_error_cmd *entry, *cmd = NULL;
1097 	int rc = 0;
1098 
1099 	if (qpair == NULL) {
1100 		qpair = ctrlr->adminq;
1101 		nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1102 	}
1103 
1104 	TAILQ_FOREACH(entry, &qpair->err_cmd_head, link) {
1105 		if (entry->opc == opc) {
1106 			cmd = entry;
1107 			break;
1108 		}
1109 	}
1110 
1111 	if (cmd == NULL) {
1112 		cmd = spdk_zmalloc(sizeof(*cmd), 64, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
1113 		if (!cmd) {
1114 			rc = -ENOMEM;
1115 			goto out;
1116 		}
1117 		TAILQ_INSERT_TAIL(&qpair->err_cmd_head, cmd, link);
1118 	}
1119 
1120 	cmd->do_not_submit = do_not_submit;
1121 	cmd->err_count = err_count;
1122 	cmd->timeout_tsc = timeout_in_us * spdk_get_ticks_hz() / 1000000ULL;
1123 	cmd->opc = opc;
1124 	cmd->status.sct = sct;
1125 	cmd->status.sc = sc;
1126 out:
1127 	if (nvme_qpair_is_admin_queue(qpair)) {
1128 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1129 	}
1130 
1131 	return rc;
1132 }
1133 
1134 void
1135 spdk_nvme_qpair_remove_cmd_error_injection(struct spdk_nvme_ctrlr *ctrlr,
1136 		struct spdk_nvme_qpair *qpair,
1137 		uint8_t opc)
1138 {
1139 	struct nvme_error_cmd *cmd, *entry;
1140 
1141 	if (qpair == NULL) {
1142 		qpair = ctrlr->adminq;
1143 		nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1144 	}
1145 
1146 	TAILQ_FOREACH_SAFE(cmd, &qpair->err_cmd_head, link, entry) {
1147 		if (cmd->opc == opc) {
1148 			TAILQ_REMOVE(&qpair->err_cmd_head, cmd, link);
1149 			spdk_free(cmd);
1150 			break;
1151 		}
1152 	}
1153 
1154 	if (nvme_qpair_is_admin_queue(qpair)) {
1155 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1156 	}
1157 }
1158 
1159 uint16_t
1160 spdk_nvme_qpair_get_id(struct spdk_nvme_qpair *qpair)
1161 {
1162 	return qpair->id;
1163 }
1164