xref: /spdk/lib/nvme/nvme_qpair.c (revision 28b353a57912106cce63632d5f2a23b5a8af4616)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2015 Intel Corporation.
3  *   All rights reserved.
4  *   Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5  */
6 
7 #include "nvme_internal.h"
8 #include "spdk/nvme_ocssd.h"
9 #include "spdk/string.h"
10 
11 #define NVME_CMD_DPTR_STR_SIZE 256
12 
13 static int nvme_qpair_resubmit_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req);
14 
15 struct nvme_string {
16 	uint16_t	value;
17 	const char	*str;
18 };
19 
20 static const struct nvme_string admin_opcode[] = {
21 	{ SPDK_NVME_OPC_DELETE_IO_SQ, "DELETE IO SQ" },
22 	{ SPDK_NVME_OPC_CREATE_IO_SQ, "CREATE IO SQ" },
23 	{ SPDK_NVME_OPC_GET_LOG_PAGE, "GET LOG PAGE" },
24 	{ SPDK_NVME_OPC_DELETE_IO_CQ, "DELETE IO CQ" },
25 	{ SPDK_NVME_OPC_CREATE_IO_CQ, "CREATE IO CQ" },
26 	{ SPDK_NVME_OPC_IDENTIFY, "IDENTIFY" },
27 	{ SPDK_NVME_OPC_ABORT, "ABORT" },
28 	{ SPDK_NVME_OPC_SET_FEATURES, "SET FEATURES" },
29 	{ SPDK_NVME_OPC_GET_FEATURES, "GET FEATURES" },
30 	{ SPDK_NVME_OPC_ASYNC_EVENT_REQUEST, "ASYNC EVENT REQUEST" },
31 	{ SPDK_NVME_OPC_NS_MANAGEMENT, "NAMESPACE MANAGEMENT" },
32 	{ SPDK_NVME_OPC_FIRMWARE_COMMIT, "FIRMWARE COMMIT" },
33 	{ SPDK_NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD, "FIRMWARE IMAGE DOWNLOAD" },
34 	{ SPDK_NVME_OPC_DEVICE_SELF_TEST, "DEVICE SELF-TEST" },
35 	{ SPDK_NVME_OPC_NS_ATTACHMENT, "NAMESPACE ATTACHMENT" },
36 	{ SPDK_NVME_OPC_KEEP_ALIVE, "KEEP ALIVE" },
37 	{ SPDK_NVME_OPC_DIRECTIVE_SEND, "DIRECTIVE SEND" },
38 	{ SPDK_NVME_OPC_DIRECTIVE_RECEIVE, "DIRECTIVE RECEIVE" },
39 	{ SPDK_NVME_OPC_VIRTUALIZATION_MANAGEMENT, "VIRTUALIZATION MANAGEMENT" },
40 	{ SPDK_NVME_OPC_NVME_MI_SEND, "NVME-MI SEND" },
41 	{ SPDK_NVME_OPC_NVME_MI_RECEIVE, "NVME-MI RECEIVE" },
42 	{ SPDK_NVME_OPC_DOORBELL_BUFFER_CONFIG, "DOORBELL BUFFER CONFIG" },
43 	{ SPDK_NVME_OPC_FABRIC, "FABRIC" },
44 	{ SPDK_NVME_OPC_FORMAT_NVM, "FORMAT NVM" },
45 	{ SPDK_NVME_OPC_SECURITY_SEND, "SECURITY SEND" },
46 	{ SPDK_NVME_OPC_SECURITY_RECEIVE, "SECURITY RECEIVE" },
47 	{ SPDK_NVME_OPC_SANITIZE, "SANITIZE" },
48 	{ SPDK_NVME_OPC_GET_LBA_STATUS, "GET LBA STATUS" },
49 	{ SPDK_OCSSD_OPC_GEOMETRY, "OCSSD / GEOMETRY" },
50 	{ 0xFFFF, "ADMIN COMMAND" }
51 };
52 
53 static const struct nvme_string fabric_opcode[] = {
54 	{ SPDK_NVMF_FABRIC_COMMAND_PROPERTY_SET, "PROPERTY SET" },
55 	{ SPDK_NVMF_FABRIC_COMMAND_CONNECT, "CONNECT" },
56 	{ SPDK_NVMF_FABRIC_COMMAND_PROPERTY_GET, "PROPERTY GET" },
57 	{ SPDK_NVMF_FABRIC_COMMAND_AUTHENTICATION_SEND, "AUTHENTICATION SEND" },
58 	{ SPDK_NVMF_FABRIC_COMMAND_AUTHENTICATION_RECV, "AUTHENTICATION RECV" },
59 	{ 0xFFFF, "RESERVED / VENDOR SPECIFIC" }
60 };
61 
62 static const struct nvme_string feat_opcode[] = {
63 	{ SPDK_NVME_FEAT_ARBITRATION, "ARBITRATION" },
64 	{ SPDK_NVME_FEAT_POWER_MANAGEMENT, "POWER MANAGEMENT" },
65 	{ SPDK_NVME_FEAT_LBA_RANGE_TYPE, "LBA RANGE TYPE" },
66 	{ SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD, "TEMPERATURE THRESHOLD" },
67 	{ SPDK_NVME_FEAT_ERROR_RECOVERY, "ERROR_RECOVERY" },
68 	{ SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE, "VOLATILE WRITE CACHE" },
69 	{ SPDK_NVME_FEAT_NUMBER_OF_QUEUES, "NUMBER OF QUEUES" },
70 	{ SPDK_NVME_FEAT_INTERRUPT_COALESCING, "INTERRUPT COALESCING" },
71 	{ SPDK_NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION, "INTERRUPT VECTOR CONFIGURATION" },
72 	{ SPDK_NVME_FEAT_WRITE_ATOMICITY, "WRITE ATOMICITY" },
73 	{ SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION, "ASYNC EVENT CONFIGURATION" },
74 	{ SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION, "AUTONOMOUS POWER STATE TRANSITION" },
75 	{ SPDK_NVME_FEAT_HOST_MEM_BUFFER, "HOST MEM BUFFER" },
76 	{ SPDK_NVME_FEAT_TIMESTAMP, "TIMESTAMP" },
77 	{ SPDK_NVME_FEAT_KEEP_ALIVE_TIMER, "KEEP ALIVE TIMER" },
78 	{ SPDK_NVME_FEAT_HOST_CONTROLLED_THERMAL_MANAGEMENT, "HOST CONTROLLED THERMAL MANAGEMENT" },
79 	{ SPDK_NVME_FEAT_NON_OPERATIONAL_POWER_STATE_CONFIG, "NON OPERATIONAL POWER STATE CONFIG" },
80 	{ SPDK_NVME_FEAT_SOFTWARE_PROGRESS_MARKER, "SOFTWARE PROGRESS MARKER" },
81 	{ SPDK_NVME_FEAT_HOST_IDENTIFIER, "HOST IDENTIFIER" },
82 	{ SPDK_NVME_FEAT_HOST_RESERVE_MASK, "HOST RESERVE MASK" },
83 	{ SPDK_NVME_FEAT_HOST_RESERVE_PERSIST, "HOST RESERVE PERSIST" },
84 	{ 0xFFFF, "RESERVED" }
85 };
86 
87 static const struct nvme_string io_opcode[] = {
88 	{ SPDK_NVME_OPC_FLUSH, "FLUSH" },
89 	{ SPDK_NVME_OPC_WRITE, "WRITE" },
90 	{ SPDK_NVME_OPC_READ, "READ" },
91 	{ SPDK_NVME_OPC_WRITE_UNCORRECTABLE, "WRITE UNCORRECTABLE" },
92 	{ SPDK_NVME_OPC_COMPARE, "COMPARE" },
93 	{ SPDK_NVME_OPC_WRITE_ZEROES, "WRITE ZEROES" },
94 	{ SPDK_NVME_OPC_DATASET_MANAGEMENT, "DATASET MANAGEMENT" },
95 	{ SPDK_NVME_OPC_RESERVATION_REGISTER, "RESERVATION REGISTER" },
96 	{ SPDK_NVME_OPC_RESERVATION_REPORT, "RESERVATION REPORT" },
97 	{ SPDK_NVME_OPC_RESERVATION_ACQUIRE, "RESERVATION ACQUIRE" },
98 	{ SPDK_NVME_OPC_RESERVATION_RELEASE, "RESERVATION RELEASE" },
99 	{ SPDK_OCSSD_OPC_VECTOR_RESET, "OCSSD / VECTOR RESET" },
100 	{ SPDK_OCSSD_OPC_VECTOR_WRITE, "OCSSD / VECTOR WRITE" },
101 	{ SPDK_OCSSD_OPC_VECTOR_READ, "OCSSD / VECTOR READ" },
102 	{ SPDK_OCSSD_OPC_VECTOR_COPY, "OCSSD / VECTOR COPY" },
103 	{ 0xFFFF, "IO COMMAND" }
104 };
105 
106 static const struct nvme_string sgl_type[] = {
107 	{ SPDK_NVME_SGL_TYPE_DATA_BLOCK, "DATA BLOCK" },
108 	{ SPDK_NVME_SGL_TYPE_BIT_BUCKET, "BIT BUCKET" },
109 	{ SPDK_NVME_SGL_TYPE_SEGMENT, "SEGMENT" },
110 	{ SPDK_NVME_SGL_TYPE_LAST_SEGMENT, "LAST SEGMENT" },
111 	{ SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK, "KEYED DATA BLOCK" },
112 	{ SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK, "TRANSPORT DATA BLOCK" },
113 	{ SPDK_NVME_SGL_TYPE_VENDOR_SPECIFIC, "VENDOR SPECIFIC" },
114 	{ 0xFFFF, "RESERVED" }
115 };
116 
117 static const struct nvme_string sgl_subtype[] = {
118 	{ SPDK_NVME_SGL_SUBTYPE_ADDRESS, "ADDRESS" },
119 	{ SPDK_NVME_SGL_SUBTYPE_OFFSET, "OFFSET" },
120 	{ SPDK_NVME_SGL_SUBTYPE_TRANSPORT, "TRANSPORT" },
121 	{ SPDK_NVME_SGL_SUBTYPE_INVALIDATE_KEY, "INVALIDATE KEY" },
122 	{ 0xFFFF, "RESERVED" }
123 };
124 
125 static const char *
126 nvme_get_string(const struct nvme_string *strings, uint16_t value)
127 {
128 	const struct nvme_string *entry;
129 
130 	entry = strings;
131 
132 	while (entry->value != 0xFFFF) {
133 		if (entry->value == value) {
134 			return entry->str;
135 		}
136 		entry++;
137 	}
138 	return entry->str;
139 }
140 
141 static void
142 nvme_get_sgl_unkeyed(char *buf, size_t size, struct spdk_nvme_cmd *cmd)
143 {
144 	struct spdk_nvme_sgl_descriptor *sgl = &cmd->dptr.sgl1;
145 
146 	snprintf(buf, size, " len:0x%x", sgl->unkeyed.length);
147 }
148 
149 static void
150 nvme_get_sgl_keyed(char *buf, size_t size, struct spdk_nvme_cmd *cmd)
151 {
152 	struct spdk_nvme_sgl_descriptor *sgl = &cmd->dptr.sgl1;
153 
154 	snprintf(buf, size, " len:0x%x key:0x%x", sgl->keyed.length, sgl->keyed.key);
155 }
156 
157 static void
158 nvme_get_sgl(char *buf, size_t size, struct spdk_nvme_cmd *cmd)
159 {
160 	struct spdk_nvme_sgl_descriptor *sgl = &cmd->dptr.sgl1;
161 	int c;
162 
163 	c = snprintf(buf, size, "SGL %s %s 0x%" PRIx64, nvme_get_string(sgl_type, sgl->generic.type),
164 		     nvme_get_string(sgl_subtype, sgl->generic.subtype), sgl->address);
165 	assert(c >= 0 && (size_t)c < size);
166 
167 	if (sgl->generic.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK) {
168 		nvme_get_sgl_unkeyed(buf + c, size - c, cmd);
169 	}
170 
171 	if (sgl->generic.type == SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK) {
172 		nvme_get_sgl_keyed(buf + c, size - c, cmd);
173 	}
174 }
175 
176 static void
177 nvme_get_prp(char *buf, size_t size, struct spdk_nvme_cmd *cmd)
178 {
179 	snprintf(buf, size, "PRP1 0x%" PRIx64 " PRP2 0x%" PRIx64, cmd->dptr.prp.prp1, cmd->dptr.prp.prp2);
180 }
181 
182 static void
183 nvme_get_dptr(char *buf, size_t size, struct spdk_nvme_cmd *cmd)
184 {
185 	if (spdk_nvme_opc_get_data_transfer(cmd->opc) != SPDK_NVME_DATA_NONE) {
186 		switch (cmd->psdt) {
187 		case SPDK_NVME_PSDT_PRP:
188 			nvme_get_prp(buf, size, cmd);
189 			break;
190 		case SPDK_NVME_PSDT_SGL_MPTR_CONTIG:
191 		case SPDK_NVME_PSDT_SGL_MPTR_SGL:
192 			nvme_get_sgl(buf, size, cmd);
193 			break;
194 		default:
195 			;
196 		}
197 	}
198 }
199 
200 static void
201 nvme_admin_qpair_print_command(uint16_t qid, struct spdk_nvme_cmd *cmd)
202 {
203 	struct spdk_nvmf_capsule_cmd *fcmd = (void *)cmd;
204 	char dptr[NVME_CMD_DPTR_STR_SIZE] = {'\0'};
205 
206 	assert(cmd != NULL);
207 
208 	nvme_get_dptr(dptr, sizeof(dptr), cmd);
209 
210 	switch ((int)cmd->opc) {
211 	case SPDK_NVME_OPC_SET_FEATURES:
212 	case SPDK_NVME_OPC_GET_FEATURES:
213 		SPDK_NOTICELOG("%s %s cid:%d cdw10:%08x %s\n",
214 			       nvme_get_string(admin_opcode, cmd->opc), nvme_get_string(feat_opcode,
215 					       cmd->cdw10_bits.set_features.fid), cmd->cid, cmd->cdw10, dptr);
216 		break;
217 	case SPDK_NVME_OPC_FABRIC:
218 		SPDK_NOTICELOG("%s %s qid:%d cid:%d %s\n",
219 			       nvme_get_string(admin_opcode, cmd->opc), nvme_get_string(fabric_opcode, fcmd->fctype), qid,
220 			       fcmd->cid, dptr);
221 		break;
222 	default:
223 		SPDK_NOTICELOG("%s (%02x) qid:%d cid:%d nsid:%x cdw10:%08x cdw11:%08x %s\n",
224 			       nvme_get_string(admin_opcode, cmd->opc), cmd->opc, qid, cmd->cid, cmd->nsid, cmd->cdw10,
225 			       cmd->cdw11, dptr);
226 	}
227 }
228 
229 static void
230 nvme_io_qpair_print_command(uint16_t qid, struct spdk_nvme_cmd *cmd)
231 {
232 	char dptr[NVME_CMD_DPTR_STR_SIZE] = {'\0'};
233 
234 	assert(cmd != NULL);
235 
236 	nvme_get_dptr(dptr, sizeof(dptr), cmd);
237 
238 	switch ((int)cmd->opc) {
239 	case SPDK_NVME_OPC_WRITE:
240 	case SPDK_NVME_OPC_READ:
241 	case SPDK_NVME_OPC_WRITE_UNCORRECTABLE:
242 	case SPDK_NVME_OPC_COMPARE:
243 		SPDK_NOTICELOG("%s sqid:%d cid:%d nsid:%d "
244 			       "lba:%llu len:%d %s\n",
245 			       nvme_get_string(io_opcode, cmd->opc), qid, cmd->cid, cmd->nsid,
246 			       ((unsigned long long)cmd->cdw11 << 32) + cmd->cdw10,
247 			       (cmd->cdw12 & 0xFFFF) + 1, dptr);
248 		break;
249 	case SPDK_NVME_OPC_FLUSH:
250 	case SPDK_NVME_OPC_DATASET_MANAGEMENT:
251 		SPDK_NOTICELOG("%s sqid:%d cid:%d nsid:%d\n",
252 			       nvme_get_string(io_opcode, cmd->opc), qid, cmd->cid, cmd->nsid);
253 		break;
254 	default:
255 		SPDK_NOTICELOG("%s (%02x) sqid:%d cid:%d nsid:%d\n",
256 			       nvme_get_string(io_opcode, cmd->opc), cmd->opc, qid, cmd->cid, cmd->nsid);
257 		break;
258 	}
259 }
260 
261 void
262 spdk_nvme_print_command(uint16_t qid, struct spdk_nvme_cmd *cmd)
263 {
264 	assert(cmd != NULL);
265 
266 	if (qid == 0 || cmd->opc == SPDK_NVME_OPC_FABRIC) {
267 		nvme_admin_qpair_print_command(qid, cmd);
268 	} else {
269 		nvme_io_qpair_print_command(qid, cmd);
270 	}
271 }
272 
273 void
274 spdk_nvme_qpair_print_command(struct spdk_nvme_qpair *qpair, struct spdk_nvme_cmd *cmd)
275 {
276 	assert(qpair != NULL);
277 	assert(cmd != NULL);
278 
279 	spdk_nvme_print_command(qpair->id, cmd);
280 }
281 
282 static const struct nvme_string status_type[] = {
283 	{ SPDK_NVME_SCT_GENERIC, "GENERIC" },
284 	{ SPDK_NVME_SCT_COMMAND_SPECIFIC, "COMMAND SPECIFIC" },
285 	{ SPDK_NVME_SCT_MEDIA_ERROR, "MEDIA ERROR" },
286 	{ SPDK_NVME_SCT_PATH, "PATH" },
287 	{ SPDK_NVME_SCT_VENDOR_SPECIFIC, "VENDOR SPECIFIC" },
288 	{ 0xFFFF, "RESERVED" },
289 };
290 
291 static const struct nvme_string generic_status[] = {
292 	{ SPDK_NVME_SC_SUCCESS, "SUCCESS" },
293 	{ SPDK_NVME_SC_INVALID_OPCODE, "INVALID OPCODE" },
294 	{ SPDK_NVME_SC_INVALID_FIELD, "INVALID FIELD" },
295 	{ SPDK_NVME_SC_COMMAND_ID_CONFLICT, "COMMAND ID CONFLICT" },
296 	{ SPDK_NVME_SC_DATA_TRANSFER_ERROR, "DATA TRANSFER ERROR" },
297 	{ SPDK_NVME_SC_ABORTED_POWER_LOSS, "ABORTED - POWER LOSS" },
298 	{ SPDK_NVME_SC_INTERNAL_DEVICE_ERROR, "INTERNAL DEVICE ERROR" },
299 	{ SPDK_NVME_SC_ABORTED_BY_REQUEST, "ABORTED - BY REQUEST" },
300 	{ SPDK_NVME_SC_ABORTED_SQ_DELETION, "ABORTED - SQ DELETION" },
301 	{ SPDK_NVME_SC_ABORTED_FAILED_FUSED, "ABORTED - FAILED FUSED" },
302 	{ SPDK_NVME_SC_ABORTED_MISSING_FUSED, "ABORTED - MISSING FUSED" },
303 	{ SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT, "INVALID NAMESPACE OR FORMAT" },
304 	{ SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR, "COMMAND SEQUENCE ERROR" },
305 	{ SPDK_NVME_SC_INVALID_SGL_SEG_DESCRIPTOR, "INVALID SGL SEGMENT DESCRIPTOR" },
306 	{ SPDK_NVME_SC_INVALID_NUM_SGL_DESCIRPTORS, "INVALID NUMBER OF SGL DESCRIPTORS" },
307 	{ SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID, "DATA SGL LENGTH INVALID" },
308 	{ SPDK_NVME_SC_METADATA_SGL_LENGTH_INVALID, "METADATA SGL LENGTH INVALID" },
309 	{ SPDK_NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID, "SGL DESCRIPTOR TYPE INVALID" },
310 	{ SPDK_NVME_SC_INVALID_CONTROLLER_MEM_BUF, "INVALID CONTROLLER MEMORY BUFFER" },
311 	{ SPDK_NVME_SC_INVALID_PRP_OFFSET, "INVALID PRP OFFSET" },
312 	{ SPDK_NVME_SC_ATOMIC_WRITE_UNIT_EXCEEDED, "ATOMIC WRITE UNIT EXCEEDED" },
313 	{ SPDK_NVME_SC_OPERATION_DENIED, "OPERATION DENIED" },
314 	{ SPDK_NVME_SC_INVALID_SGL_OFFSET, "INVALID SGL OFFSET" },
315 	{ SPDK_NVME_SC_HOSTID_INCONSISTENT_FORMAT, "HOSTID INCONSISTENT FORMAT" },
316 	{ SPDK_NVME_SC_KEEP_ALIVE_EXPIRED, "KEEP ALIVE EXPIRED" },
317 	{ SPDK_NVME_SC_KEEP_ALIVE_INVALID, "KEEP ALIVE INVALID" },
318 	{ SPDK_NVME_SC_ABORTED_PREEMPT, "ABORTED - PREEMPT AND ABORT" },
319 	{ SPDK_NVME_SC_SANITIZE_FAILED, "SANITIZE FAILED" },
320 	{ SPDK_NVME_SC_SANITIZE_IN_PROGRESS, "SANITIZE IN PROGRESS" },
321 	{ SPDK_NVME_SC_SGL_DATA_BLOCK_GRANULARITY_INVALID, "DATA BLOCK GRANULARITY INVALID" },
322 	{ SPDK_NVME_SC_COMMAND_INVALID_IN_CMB, "COMMAND NOT SUPPORTED FOR QUEUE IN CMB" },
323 	{ SPDK_NVME_SC_COMMAND_NAMESPACE_IS_PROTECTED, "COMMAND NAMESPACE IS PROTECTED" },
324 	{ SPDK_NVME_SC_COMMAND_INTERRUPTED, "COMMAND INTERRUPTED" },
325 	{ SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR, "COMMAND TRANSIENT TRANSPORT ERROR" },
326 	{ SPDK_NVME_SC_LBA_OUT_OF_RANGE, "LBA OUT OF RANGE" },
327 	{ SPDK_NVME_SC_CAPACITY_EXCEEDED, "CAPACITY EXCEEDED" },
328 	{ SPDK_NVME_SC_NAMESPACE_NOT_READY, "NAMESPACE NOT READY" },
329 	{ SPDK_NVME_SC_RESERVATION_CONFLICT, "RESERVATION CONFLICT" },
330 	{ SPDK_NVME_SC_FORMAT_IN_PROGRESS, "FORMAT IN PROGRESS" },
331 	{ SPDK_NVME_SC_INVALID_VALUE_SIZE, "INVALID VALUE SIZE" },
332 	{ SPDK_NVME_SC_INVALID_KEY_SIZE, "INVALID KEY SIZE" },
333 	{ SPDK_NVME_SC_KV_KEY_DOES_NOT_EXIST, "KV KEY DOES NOT EXIST" },
334 	{ SPDK_NVME_SC_UNRECOVERED_ERROR, "UNRECOVERED ERROR" },
335 	{ SPDK_NVME_SC_KEY_EXISTS, "KEY EXISTS" },
336 	{ 0xFFFF, "GENERIC" }
337 };
338 
339 static const struct nvme_string command_specific_status[] = {
340 	{ SPDK_NVME_SC_COMPLETION_QUEUE_INVALID, "INVALID COMPLETION QUEUE" },
341 	{ SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER, "INVALID QUEUE IDENTIFIER" },
342 	{ SPDK_NVME_SC_INVALID_QUEUE_SIZE, "INVALID QUEUE SIZE" },
343 	{ SPDK_NVME_SC_ABORT_COMMAND_LIMIT_EXCEEDED, "ABORT CMD LIMIT EXCEEDED" },
344 	{ SPDK_NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED, "ASYNC LIMIT EXCEEDED" },
345 	{ SPDK_NVME_SC_INVALID_FIRMWARE_SLOT, "INVALID FIRMWARE SLOT" },
346 	{ SPDK_NVME_SC_INVALID_FIRMWARE_IMAGE, "INVALID FIRMWARE IMAGE" },
347 	{ SPDK_NVME_SC_INVALID_INTERRUPT_VECTOR, "INVALID INTERRUPT VECTOR" },
348 	{ SPDK_NVME_SC_INVALID_LOG_PAGE, "INVALID LOG PAGE" },
349 	{ SPDK_NVME_SC_INVALID_FORMAT, "INVALID FORMAT" },
350 	{ SPDK_NVME_SC_FIRMWARE_REQ_CONVENTIONAL_RESET, "FIRMWARE REQUIRES CONVENTIONAL RESET" },
351 	{ SPDK_NVME_SC_INVALID_QUEUE_DELETION, "INVALID QUEUE DELETION" },
352 	{ SPDK_NVME_SC_FEATURE_ID_NOT_SAVEABLE, "FEATURE ID NOT SAVEABLE" },
353 	{ SPDK_NVME_SC_FEATURE_NOT_CHANGEABLE, "FEATURE NOT CHANGEABLE" },
354 	{ SPDK_NVME_SC_FEATURE_NOT_NAMESPACE_SPECIFIC, "FEATURE NOT NAMESPACE SPECIFIC" },
355 	{ SPDK_NVME_SC_FIRMWARE_REQ_NVM_RESET, "FIRMWARE REQUIRES NVM RESET" },
356 	{ SPDK_NVME_SC_FIRMWARE_REQ_RESET, "FIRMWARE REQUIRES RESET" },
357 	{ SPDK_NVME_SC_FIRMWARE_REQ_MAX_TIME_VIOLATION, "FIRMWARE REQUIRES MAX TIME VIOLATION" },
358 	{ SPDK_NVME_SC_FIRMWARE_ACTIVATION_PROHIBITED, "FIRMWARE ACTIVATION PROHIBITED" },
359 	{ SPDK_NVME_SC_OVERLAPPING_RANGE, "OVERLAPPING RANGE" },
360 	{ SPDK_NVME_SC_NAMESPACE_INSUFFICIENT_CAPACITY, "NAMESPACE INSUFFICIENT CAPACITY" },
361 	{ SPDK_NVME_SC_NAMESPACE_ID_UNAVAILABLE, "NAMESPACE ID UNAVAILABLE" },
362 	{ SPDK_NVME_SC_NAMESPACE_ALREADY_ATTACHED, "NAMESPACE ALREADY ATTACHED" },
363 	{ SPDK_NVME_SC_NAMESPACE_IS_PRIVATE, "NAMESPACE IS PRIVATE" },
364 	{ SPDK_NVME_SC_NAMESPACE_NOT_ATTACHED, "NAMESPACE NOT ATTACHED" },
365 	{ SPDK_NVME_SC_THINPROVISIONING_NOT_SUPPORTED, "THINPROVISIONING NOT SUPPORTED" },
366 	{ SPDK_NVME_SC_CONTROLLER_LIST_INVALID, "CONTROLLER LIST INVALID" },
367 	{ SPDK_NVME_SC_DEVICE_SELF_TEST_IN_PROGRESS, "DEVICE SELF-TEST IN PROGRESS" },
368 	{ SPDK_NVME_SC_BOOT_PARTITION_WRITE_PROHIBITED, "BOOT PARTITION WRITE PROHIBITED" },
369 	{ SPDK_NVME_SC_INVALID_CTRLR_ID, "INVALID CONTROLLER ID" },
370 	{ SPDK_NVME_SC_INVALID_SECONDARY_CTRLR_STATE, "INVALID SECONDARY CONTROLLER STATE" },
371 	{ SPDK_NVME_SC_INVALID_NUM_CTRLR_RESOURCES, "INVALID NUMBER OF CONTROLLER RESOURCES" },
372 	{ SPDK_NVME_SC_INVALID_RESOURCE_ID, "INVALID RESOURCE IDENTIFIER" },
373 	{ SPDK_NVME_SC_SANITIZE_PROHIBITED, "SANITIZE PROHIBITED" },
374 	{ SPDK_NVME_SC_ANA_GROUP_IDENTIFIER_INVALID, "ANA GROUP IDENTIFIER INVALID" },
375 	{ SPDK_NVME_SC_ANA_ATTACH_FAILED, "ANA ATTACH FAILED" },
376 	{ SPDK_NVME_SC_INSUFFICIENT_CAPACITY, "INSUFFICIENT CAPACITY" },
377 	{ SPDK_NVME_SC_NAMESPACE_ATTACH_LIMIT_EXCEEDED, "NAMESPACE ATTACH LIMIT EXCEEDED" },
378 	{ SPDK_NVME_SC_PROHIBIT_CMD_EXEC_NOT_SUPPORTED, "PROHIBIT COMMAND EXEC NOT SUPPORTED" },
379 	{ SPDK_NVME_SC_IOCS_NOT_SUPPORTED, "IOCS NOT SUPPORTED" },
380 	{ SPDK_NVME_SC_IOCS_NOT_ENABLED, "IOCS NOT ENABLED" },
381 	{ SPDK_NVME_SC_IOCS_COMBINATION_REJECTED, "IOCS COMBINATION REJECTED" },
382 	{ SPDK_NVME_SC_INVALID_IOCS, "INVALID IOCS" },
383 	{ SPDK_NVME_SC_IDENTIFIER_UNAVAILABLE, "IDENTIFIER UNAVAILABLE" },
384 	{ SPDK_NVME_SC_STREAM_RESOURCE_ALLOCATION_FAILED, "STREAM RESOURCE ALLOCATION FAILED"},
385 	{ SPDK_NVME_SC_CONFLICTING_ATTRIBUTES, "CONFLICTING ATTRIBUTES" },
386 	{ SPDK_NVME_SC_INVALID_PROTECTION_INFO, "INVALID PROTECTION INFO" },
387 	{ SPDK_NVME_SC_ATTEMPTED_WRITE_TO_RO_RANGE, "WRITE TO RO RANGE" },
388 	{ SPDK_NVME_SC_CMD_SIZE_LIMIT_SIZE_EXCEEDED, "CMD SIZE LIMIT SIZE EXCEEDED" },
389 	{ SPDK_NVME_SC_ZONED_BOUNDARY_ERROR, "ZONED BOUNDARY ERROR" },
390 	{ SPDK_NVME_SC_ZONE_IS_FULL, "ZONE IS FULL" },
391 	{ SPDK_NVME_SC_ZONE_IS_READ_ONLY, "ZONE IS READ ONLY" },
392 	{ SPDK_NVME_SC_ZONE_IS_OFFLINE, "ZONE IS OFFLINE" },
393 	{ SPDK_NVME_SC_ZONE_INVALID_WRITE, "ZONE INVALID WRITE" },
394 	{ SPDK_NVME_SC_TOO_MANY_ACTIVE_ZONES, "TOO MANY ACTIVE ZONES" },
395 	{ SPDK_NVME_SC_TOO_MANY_OPEN_ZONES, "TOO MANY OPEN ZONES" },
396 	{ SPDK_NVME_SC_INVALID_ZONE_STATE_TRANSITION, "INVALID ZONE STATE TRANSITION" },
397 	{ 0xFFFF, "COMMAND SPECIFIC" }
398 };
399 
400 static const struct nvme_string media_error_status[] = {
401 	{ SPDK_NVME_SC_WRITE_FAULTS, "WRITE FAULTS" },
402 	{ SPDK_NVME_SC_UNRECOVERED_READ_ERROR, "UNRECOVERED READ ERROR" },
403 	{ SPDK_NVME_SC_GUARD_CHECK_ERROR, "GUARD CHECK ERROR" },
404 	{ SPDK_NVME_SC_APPLICATION_TAG_CHECK_ERROR, "APPLICATION TAG CHECK ERROR" },
405 	{ SPDK_NVME_SC_REFERENCE_TAG_CHECK_ERROR, "REFERENCE TAG CHECK ERROR" },
406 	{ SPDK_NVME_SC_COMPARE_FAILURE, "COMPARE FAILURE" },
407 	{ SPDK_NVME_SC_ACCESS_DENIED, "ACCESS DENIED" },
408 	{ SPDK_NVME_SC_DEALLOCATED_OR_UNWRITTEN_BLOCK, "DEALLOCATED OR UNWRITTEN BLOCK" },
409 	{ SPDK_NVME_SC_END_TO_END_STORAGE_TAG_CHECK_ERROR, "END TO END STORAGE TAG CHECK ERROR" },
410 	{ SPDK_OCSSD_SC_OFFLINE_CHUNK, "RESET OFFLINE CHUNK" },
411 	{ SPDK_OCSSD_SC_INVALID_RESET, "INVALID RESET" },
412 	{ SPDK_OCSSD_SC_WRITE_FAIL_WRITE_NEXT_UNIT, "WRITE FAIL WRITE NEXT UNIT" },
413 	{ SPDK_OCSSD_SC_WRITE_FAIL_CHUNK_EARLY_CLOSE, "WRITE FAIL CHUNK EARLY CLOSE" },
414 	{ SPDK_OCSSD_SC_OUT_OF_ORDER_WRITE, "OUT OF ORDER WRITE" },
415 	{ SPDK_OCSSD_SC_READ_HIGH_ECC, "READ HIGH ECC" },
416 	{ 0xFFFF, "MEDIA ERROR" }
417 };
418 
419 static const struct nvme_string path_status[] = {
420 	{ SPDK_NVME_SC_INTERNAL_PATH_ERROR, "INTERNAL PATH ERROR" },
421 	{ SPDK_NVME_SC_ASYMMETRIC_ACCESS_PERSISTENT_LOSS, "ASYMMETRIC ACCESS PERSISTENT LOSS" },
422 	{ SPDK_NVME_SC_ASYMMETRIC_ACCESS_INACCESSIBLE, "ASYMMETRIC ACCESS INACCESSIBLE" },
423 	{ SPDK_NVME_SC_ASYMMETRIC_ACCESS_TRANSITION, "ASYMMETRIC ACCESS TRANSITION" },
424 	{ SPDK_NVME_SC_CONTROLLER_PATH_ERROR, "CONTROLLER PATH ERROR" },
425 	{ SPDK_NVME_SC_HOST_PATH_ERROR, "HOST PATH ERROR" },
426 	{ SPDK_NVME_SC_ABORTED_BY_HOST, "ABORTED BY HOST" },
427 	{ 0xFFFF, "PATH ERROR" }
428 };
429 
430 const char *
431 spdk_nvme_cpl_get_status_string(const struct spdk_nvme_status *status)
432 {
433 	const struct nvme_string *entry;
434 
435 	switch (status->sct) {
436 	case SPDK_NVME_SCT_GENERIC:
437 		entry = generic_status;
438 		break;
439 	case SPDK_NVME_SCT_COMMAND_SPECIFIC:
440 		entry = command_specific_status;
441 		break;
442 	case SPDK_NVME_SCT_MEDIA_ERROR:
443 		entry = media_error_status;
444 		break;
445 	case SPDK_NVME_SCT_PATH:
446 		entry = path_status;
447 		break;
448 	case SPDK_NVME_SCT_VENDOR_SPECIFIC:
449 		return "VENDOR SPECIFIC";
450 	default:
451 		return "RESERVED";
452 	}
453 
454 	return nvme_get_string(entry, status->sc);
455 }
456 
457 const char *
458 spdk_nvme_cpl_get_status_type_string(const struct spdk_nvme_status *status)
459 {
460 	return nvme_get_string(status_type, status->sct);
461 }
462 
463 void
464 spdk_nvme_print_completion(uint16_t qid, struct spdk_nvme_cpl *cpl)
465 {
466 	assert(cpl != NULL);
467 
468 	/* Check that sqid matches qid. Note that sqid is reserved
469 	 * for fabrics so don't print an error when sqid is 0. */
470 	if (cpl->sqid != qid && cpl->sqid != 0) {
471 		SPDK_ERRLOG("sqid %u doesn't match qid\n", cpl->sqid);
472 	}
473 
474 	SPDK_NOTICELOG("%s (%02x/%02x) qid:%d cid:%d cdw0:%x sqhd:%04x p:%x m:%x dnr:%x\n",
475 		       spdk_nvme_cpl_get_status_string(&cpl->status),
476 		       cpl->status.sct, cpl->status.sc, qid, cpl->cid, cpl->cdw0,
477 		       cpl->sqhd, cpl->status.p, cpl->status.m, cpl->status.dnr);
478 }
479 
480 void
481 spdk_nvme_qpair_print_completion(struct spdk_nvme_qpair *qpair, struct spdk_nvme_cpl *cpl)
482 {
483 	spdk_nvme_print_completion(qpair->id, cpl);
484 }
485 
486 bool
487 nvme_completion_is_retry(const struct spdk_nvme_cpl *cpl)
488 {
489 	/*
490 	 * TODO: spec is not clear how commands that are aborted due
491 	 *  to TLER will be marked.  So for now, it seems
492 	 *  NAMESPACE_NOT_READY is the only case where we should
493 	 *  look at the DNR bit.
494 	 */
495 	switch ((int)cpl->status.sct) {
496 	case SPDK_NVME_SCT_GENERIC:
497 		switch ((int)cpl->status.sc) {
498 		case SPDK_NVME_SC_NAMESPACE_NOT_READY:
499 		case SPDK_NVME_SC_FORMAT_IN_PROGRESS:
500 			if (cpl->status.dnr) {
501 				return false;
502 			} else {
503 				return true;
504 			}
505 		case SPDK_NVME_SC_INVALID_OPCODE:
506 		case SPDK_NVME_SC_INVALID_FIELD:
507 		case SPDK_NVME_SC_COMMAND_ID_CONFLICT:
508 		case SPDK_NVME_SC_DATA_TRANSFER_ERROR:
509 		case SPDK_NVME_SC_ABORTED_POWER_LOSS:
510 		case SPDK_NVME_SC_INTERNAL_DEVICE_ERROR:
511 		case SPDK_NVME_SC_ABORTED_BY_REQUEST:
512 		case SPDK_NVME_SC_ABORTED_SQ_DELETION:
513 		case SPDK_NVME_SC_ABORTED_FAILED_FUSED:
514 		case SPDK_NVME_SC_ABORTED_MISSING_FUSED:
515 		case SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT:
516 		case SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR:
517 		case SPDK_NVME_SC_LBA_OUT_OF_RANGE:
518 		case SPDK_NVME_SC_CAPACITY_EXCEEDED:
519 		default:
520 			return false;
521 		}
522 	case SPDK_NVME_SCT_PATH:
523 		/*
524 		 * Per NVMe TP 4028 (Path and Transport Error Enhancements), retries should be
525 		 * based on the setting of the DNR bit for Internal Path Error
526 		 */
527 		switch ((int)cpl->status.sc) {
528 		case SPDK_NVME_SC_INTERNAL_PATH_ERROR:
529 			return !cpl->status.dnr;
530 		default:
531 			return false;
532 		}
533 	case SPDK_NVME_SCT_COMMAND_SPECIFIC:
534 	case SPDK_NVME_SCT_MEDIA_ERROR:
535 	case SPDK_NVME_SCT_VENDOR_SPECIFIC:
536 	default:
537 		return false;
538 	}
539 }
540 
541 static void
542 nvme_qpair_manual_complete_request(struct spdk_nvme_qpair *qpair,
543 				   struct nvme_request *req, uint32_t sct, uint32_t sc,
544 				   uint32_t dnr, bool print_on_error)
545 {
546 	struct spdk_nvme_cpl	cpl;
547 	bool			error;
548 
549 	memset(&cpl, 0, sizeof(cpl));
550 	cpl.sqid = qpair->id;
551 	cpl.status.sct = sct;
552 	cpl.status.sc = sc;
553 	cpl.status.dnr = dnr;
554 
555 	error = spdk_nvme_cpl_is_error(&cpl);
556 
557 	if (error && print_on_error && !qpair->ctrlr->opts.disable_error_logging) {
558 		SPDK_NOTICELOG("Command completed manually:\n");
559 		spdk_nvme_qpair_print_command(qpair, &req->cmd);
560 		spdk_nvme_qpair_print_completion(qpair, &cpl);
561 	}
562 
563 	nvme_complete_request(req->cb_fn, req->cb_arg, qpair, req, &cpl);
564 }
565 
566 void
567 nvme_qpair_abort_queued_reqs(struct spdk_nvme_qpair *qpair)
568 {
569 	struct nvme_request		*req;
570 	STAILQ_HEAD(, nvme_request)	tmp;
571 
572 	STAILQ_INIT(&tmp);
573 	STAILQ_SWAP(&tmp, &qpair->queued_req, nvme_request);
574 
575 	while (!STAILQ_EMPTY(&tmp)) {
576 		req = STAILQ_FIRST(&tmp);
577 		STAILQ_REMOVE_HEAD(&tmp, stailq);
578 		if (!qpair->ctrlr->opts.disable_error_logging) {
579 			SPDK_ERRLOG("aborting queued i/o\n");
580 		}
581 		nvme_qpair_manual_complete_request(qpair, req, SPDK_NVME_SCT_GENERIC,
582 						   SPDK_NVME_SC_ABORTED_SQ_DELETION, qpair->abort_dnr, true);
583 	}
584 }
585 
586 /* The callback to a request may submit the next request which is queued and
587  * then the same callback may abort it immediately. This repetition may cause
588  * infinite recursive calls. Hence move aborting requests to another list here
589  * and abort them later at resubmission.
590  */
591 static void
592 _nvme_qpair_complete_abort_queued_reqs(struct spdk_nvme_qpair *qpair)
593 {
594 	struct nvme_request		*req;
595 	STAILQ_HEAD(, nvme_request)	tmp;
596 
597 	if (spdk_likely(STAILQ_EMPTY(&qpair->aborting_queued_req))) {
598 		return;
599 	}
600 
601 	STAILQ_INIT(&tmp);
602 	STAILQ_SWAP(&tmp, &qpair->aborting_queued_req, nvme_request);
603 
604 	while (!STAILQ_EMPTY(&tmp)) {
605 		req = STAILQ_FIRST(&tmp);
606 		STAILQ_REMOVE_HEAD(&tmp, stailq);
607 		nvme_qpair_manual_complete_request(qpair, req, SPDK_NVME_SCT_GENERIC,
608 						   SPDK_NVME_SC_ABORTED_BY_REQUEST, 1, true);
609 	}
610 }
611 
612 uint32_t
613 nvme_qpair_abort_queued_reqs_with_cbarg(struct spdk_nvme_qpair *qpair, void *cmd_cb_arg)
614 {
615 	struct nvme_request	*req, *tmp;
616 	uint32_t		aborting = 0;
617 
618 	STAILQ_FOREACH_SAFE(req, &qpair->queued_req, stailq, tmp) {
619 		if (!nvme_request_abort_match(req, cmd_cb_arg)) {
620 			continue;
621 		}
622 
623 		STAILQ_REMOVE(&qpair->queued_req, req, nvme_request, stailq);
624 		STAILQ_INSERT_TAIL(&qpair->aborting_queued_req, req, stailq);
625 		if (!qpair->ctrlr->opts.disable_error_logging) {
626 			SPDK_ERRLOG("aborting queued i/o\n");
627 		}
628 		aborting++;
629 	}
630 
631 	return aborting;
632 }
633 
634 static inline bool
635 nvme_qpair_check_enabled(struct spdk_nvme_qpair *qpair)
636 {
637 	struct nvme_request *req;
638 
639 	/*
640 	 * Either during initial connect or reset, the qpair should follow the given state machine.
641 	 * QPAIR_DISABLED->QPAIR_CONNECTING->QPAIR_CONNECTED->QPAIR_ENABLING->QPAIR_ENABLED. In the
642 	 * reset case, once the qpair is properly connected, we need to abort any outstanding requests
643 	 * from the old transport connection and encourage the application to retry them. We also need
644 	 * to submit any queued requests that built up while we were in the connected or enabling state.
645 	 */
646 	if (spdk_unlikely(nvme_qpair_get_state(qpair) == NVME_QPAIR_CONNECTED &&
647 			  !qpair->ctrlr->is_resetting)) {
648 		nvme_qpair_set_state(qpair, NVME_QPAIR_ENABLING);
649 		/*
650 		 * PCIe is special, for fabrics transports, we can abort requests before disconnect during reset
651 		 * but we have historically not disconnected pcie qpairs during reset so we have to abort requests
652 		 * here.
653 		 */
654 		if (qpair->ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE &&
655 		    !qpair->is_new_qpair) {
656 			nvme_qpair_abort_all_queued_reqs(qpair);
657 			nvme_transport_qpair_abort_reqs(qpair);
658 		}
659 
660 		nvme_qpair_set_state(qpair, NVME_QPAIR_ENABLED);
661 		while (!STAILQ_EMPTY(&qpair->queued_req)) {
662 			req = STAILQ_FIRST(&qpair->queued_req);
663 			STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq);
664 			if (nvme_qpair_resubmit_request(qpair, req)) {
665 				break;
666 			}
667 		}
668 	}
669 
670 	/*
671 	 * When doing a reset, we must disconnect the qpair on the proper core.
672 	 * Note, reset is the only case where we set the failure reason without
673 	 * setting the qpair state since reset is done at the generic layer on the
674 	 * controller thread and we can't disconnect I/O qpairs from the controller
675 	 * thread.
676 	 */
677 	if (spdk_unlikely(qpair->transport_failure_reason != SPDK_NVME_QPAIR_FAILURE_NONE &&
678 			  nvme_qpair_get_state(qpair) == NVME_QPAIR_ENABLED)) {
679 		/* Don't disconnect PCIe qpairs. They are a special case for reset. */
680 		if (qpair->ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) {
681 			nvme_ctrlr_disconnect_qpair(qpair);
682 		}
683 		if (qpair->transport_failure_reason == SPDK_NVME_QPAIR_FAILURE_RESET) {
684 			/*
685 			 * For multi-process, a synchronous reset may not reconnect
686 			 * foreign IO qpairs. So we will reconnect them here instead.
687 			 */
688 			nvme_ctrlr_reinitialize_io_qpair(qpair->ctrlr, qpair);
689 		}
690 		return false;
691 	}
692 
693 	return nvme_qpair_get_state(qpair) == NVME_QPAIR_ENABLED;
694 }
695 
696 void
697 nvme_qpair_resubmit_requests(struct spdk_nvme_qpair *qpair, uint32_t num_requests)
698 {
699 	uint32_t i;
700 	int resubmit_rc;
701 	struct nvme_request *req;
702 
703 	assert(num_requests > 0);
704 
705 	for (i = 0; i < num_requests; i++) {
706 		if (qpair->ctrlr->is_resetting) {
707 			break;
708 		}
709 		if ((req = STAILQ_FIRST(&qpair->queued_req)) == NULL) {
710 			break;
711 		}
712 		STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq);
713 		resubmit_rc = nvme_qpair_resubmit_request(qpair, req);
714 		if (spdk_unlikely(resubmit_rc != 0)) {
715 			SPDK_DEBUGLOG(nvme, "Unable to resubmit as many requests as we completed.\n");
716 			break;
717 		}
718 	}
719 
720 	_nvme_qpair_complete_abort_queued_reqs(qpair);
721 }
722 
723 static void
724 nvme_complete_register_operations(struct spdk_nvme_qpair *qpair)
725 {
726 	struct nvme_register_completion *ctx, *tmp;
727 	struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
728 	STAILQ_HEAD(, nvme_register_completion) operations;
729 
730 	STAILQ_INIT(&operations);
731 	nvme_ctrlr_lock(ctrlr);
732 	STAILQ_FOREACH_SAFE(ctx, &ctrlr->register_operations, stailq, tmp) {
733 		/* We need to make sure we complete the register operation in
734 		 * the correct process.
735 		 */
736 		if (ctx->pid != getpid()) {
737 			continue;
738 		}
739 		STAILQ_REMOVE(&ctrlr->register_operations, ctx, nvme_register_completion, stailq);
740 		STAILQ_INSERT_TAIL(&operations, ctx, stailq);
741 	}
742 	nvme_ctrlr_unlock(ctrlr);
743 
744 	while (!STAILQ_EMPTY(&operations)) {
745 		ctx = STAILQ_FIRST(&operations);
746 		STAILQ_REMOVE_HEAD(&operations, stailq);
747 		if (ctx->cb_fn != NULL) {
748 			ctx->cb_fn(ctx->cb_ctx, ctx->value, &ctx->cpl);
749 		}
750 		spdk_free(ctx);
751 	}
752 }
753 
754 int
755 spdk_nvme_qpair_get_fd(struct spdk_nvme_qpair *qpair, struct spdk_event_handler_opts *opts)
756 {
757 	struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
758 
759 	return nvme_transport_qpair_get_fd(ctrlr, qpair, opts);
760 }
761 
762 int32_t
763 spdk_nvme_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_completions)
764 {
765 	int32_t ret;
766 	struct nvme_request *req, *tmp;
767 
768 	/* Complete any pending register operations */
769 	if (nvme_qpair_is_admin_queue(qpair)) {
770 		nvme_complete_register_operations(qpair);
771 	}
772 
773 	if (spdk_unlikely(qpair->ctrlr->is_failed &&
774 			  nvme_qpair_get_state(qpair) != NVME_QPAIR_DISCONNECTING)) {
775 		if (qpair->ctrlr->is_removed) {
776 			nvme_qpair_set_state(qpair, NVME_QPAIR_DESTROYING);
777 			nvme_qpair_abort_all_queued_reqs(qpair);
778 			nvme_transport_qpair_abort_reqs(qpair);
779 		}
780 		return -ENXIO;
781 	}
782 
783 	if (spdk_unlikely(!nvme_qpair_check_enabled(qpair) &&
784 			  !(nvme_qpair_get_state(qpair) == NVME_QPAIR_CONNECTING ||
785 			    nvme_qpair_get_state(qpair) == NVME_QPAIR_DISCONNECTING))) {
786 		/*
787 		 * qpair is not enabled, likely because a controller reset is
788 		 *  in progress.
789 		 */
790 		return -ENXIO;
791 	}
792 
793 	/* error injection for those queued error requests */
794 	if (spdk_unlikely(!STAILQ_EMPTY(&qpair->err_req_head))) {
795 		STAILQ_FOREACH_SAFE(req, &qpair->err_req_head, stailq, tmp) {
796 			if (req->pid == getpid() &&
797 			    spdk_get_ticks() - req->submit_tick > req->timeout_tsc) {
798 				STAILQ_REMOVE(&qpair->err_req_head, req, nvme_request, stailq);
799 				nvme_qpair_manual_complete_request(qpair, req,
800 								   req->cpl.status.sct,
801 								   req->cpl.status.sc, qpair->abort_dnr, true);
802 			}
803 		}
804 	}
805 
806 	qpair->in_completion_context = 1;
807 	ret = nvme_transport_qpair_process_completions(qpair, max_completions);
808 	if (ret < 0) {
809 		if (ret == -ENXIO && nvme_qpair_get_state(qpair) == NVME_QPAIR_DISCONNECTING) {
810 			ret = 0;
811 		} else {
812 			NVME_CTRLR_ERRLOG(qpair->ctrlr, "CQ transport error %d (%s) on qpair id %hu\n",
813 					  ret, spdk_strerror(-ret), qpair->id);
814 			if (nvme_qpair_is_admin_queue(qpair)) {
815 				nvme_ctrlr_fail(qpair->ctrlr, false);
816 			}
817 		}
818 	}
819 	qpair->in_completion_context = 0;
820 	if (qpair->delete_after_completion_context) {
821 		/*
822 		 * A request to delete this qpair was made in the context of this completion
823 		 *  routine - so it is safe to delete it now.
824 		 */
825 		spdk_nvme_ctrlr_free_io_qpair(qpair);
826 		return ret;
827 	}
828 
829 	/*
830 	 * At this point, ret must represent the number of completions we reaped.
831 	 * submit as many queued requests as we completed.
832 	 */
833 	if (ret > 0) {
834 		nvme_qpair_resubmit_requests(qpair, ret);
835 	} else {
836 		_nvme_qpair_complete_abort_queued_reqs(qpair);
837 	}
838 
839 	return ret;
840 }
841 
842 spdk_nvme_qp_failure_reason
843 spdk_nvme_qpair_get_failure_reason(struct spdk_nvme_qpair *qpair)
844 {
845 	return qpair->transport_failure_reason;
846 }
847 
848 void
849 spdk_nvme_qpair_set_abort_dnr(struct spdk_nvme_qpair *qpair, bool dnr)
850 {
851 	qpair->abort_dnr = dnr ? 1 : 0;
852 }
853 
854 bool
855 spdk_nvme_qpair_is_connected(struct spdk_nvme_qpair *qpair)
856 {
857 	return nvme_qpair_get_state(qpair) >= NVME_QPAIR_CONNECTED &&
858 	       nvme_qpair_get_state(qpair) <= NVME_QPAIR_ENABLED;
859 }
860 
861 int
862 nvme_qpair_init(struct spdk_nvme_qpair *qpair, uint16_t id,
863 		struct spdk_nvme_ctrlr *ctrlr,
864 		enum spdk_nvme_qprio qprio,
865 		uint32_t num_requests, bool async)
866 {
867 	struct nvme_request *req;
868 	size_t req_size_padded;
869 	uint32_t i;
870 
871 	qpair->id = id;
872 	qpair->qprio = qprio;
873 
874 	qpair->in_completion_context = 0;
875 	qpair->delete_after_completion_context = 0;
876 	qpair->no_deletion_notification_needed = 0;
877 
878 	qpair->ctrlr = ctrlr;
879 	qpair->trtype = ctrlr->trid.trtype;
880 	qpair->is_new_qpair = true;
881 	qpair->async = async;
882 	qpair->poll_status = NULL;
883 	qpair->num_outstanding_reqs = 0;
884 
885 	STAILQ_INIT(&qpair->free_req);
886 	STAILQ_INIT(&qpair->queued_req);
887 	STAILQ_INIT(&qpair->aborting_queued_req);
888 	TAILQ_INIT(&qpair->err_cmd_head);
889 	STAILQ_INIT(&qpair->err_req_head);
890 
891 	req_size_padded = (sizeof(struct nvme_request) + 63) & ~(size_t)63;
892 
893 	/* Add one for the reserved_req */
894 	num_requests++;
895 
896 	qpair->req_buf = spdk_zmalloc(req_size_padded * num_requests, 64, NULL,
897 				      SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_SHARE);
898 	if (qpair->req_buf == NULL) {
899 		SPDK_ERRLOG("no memory to allocate qpair(cntlid:0x%x sqid:%d) req_buf with %d request\n",
900 			    ctrlr->cntlid, qpair->id, num_requests);
901 		return -ENOMEM;
902 	}
903 
904 	for (i = 0; i < num_requests; i++) {
905 		req = (void *)((uintptr_t)qpair->req_buf + i * req_size_padded);
906 
907 		req->qpair = qpair;
908 		if (i == 0) {
909 			qpair->reserved_req = req;
910 		} else {
911 			STAILQ_INSERT_HEAD(&qpair->free_req, req, stailq);
912 		}
913 	}
914 
915 	return 0;
916 }
917 
918 void
919 nvme_qpair_complete_error_reqs(struct spdk_nvme_qpair *qpair)
920 {
921 	struct nvme_request		*req;
922 
923 	while (!STAILQ_EMPTY(&qpair->err_req_head)) {
924 		req = STAILQ_FIRST(&qpair->err_req_head);
925 		assert(req->pid == getpid());
926 		STAILQ_REMOVE_HEAD(&qpair->err_req_head, stailq);
927 		nvme_qpair_manual_complete_request(qpair, req,
928 						   req->cpl.status.sct,
929 						   req->cpl.status.sc, qpair->abort_dnr, true);
930 	}
931 }
932 
933 void
934 nvme_qpair_deinit(struct spdk_nvme_qpair *qpair)
935 {
936 	struct nvme_error_cmd *cmd, *entry;
937 
938 	nvme_qpair_abort_queued_reqs(qpair);
939 	_nvme_qpair_complete_abort_queued_reqs(qpair);
940 	nvme_qpair_complete_error_reqs(qpair);
941 
942 	TAILQ_FOREACH_SAFE(cmd, &qpair->err_cmd_head, link, entry) {
943 		TAILQ_REMOVE(&qpair->err_cmd_head, cmd, link);
944 		spdk_free(cmd);
945 	}
946 
947 	spdk_free(qpair->req_buf);
948 }
949 
950 static inline int
951 _nvme_qpair_submit_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req)
952 {
953 	int			rc = 0;
954 	struct nvme_request	*child_req, *tmp;
955 	struct nvme_error_cmd	*cmd;
956 	struct spdk_nvme_ctrlr	*ctrlr = qpair->ctrlr;
957 	bool			child_req_failed = false;
958 
959 	nvme_qpair_check_enabled(qpair);
960 
961 	if (spdk_unlikely(nvme_qpair_get_state(qpair) == NVME_QPAIR_DISCONNECTED ||
962 			  nvme_qpair_get_state(qpair) == NVME_QPAIR_DISCONNECTING ||
963 			  nvme_qpair_get_state(qpair) == NVME_QPAIR_DESTROYING)) {
964 		TAILQ_FOREACH_SAFE(child_req, &req->children, child_tailq, tmp) {
965 			nvme_request_remove_child(req, child_req);
966 			nvme_request_free_children(child_req);
967 			nvme_free_request(child_req);
968 		}
969 
970 		rc = -ENXIO;
971 		goto error;
972 	}
973 
974 	if (req->num_children) {
975 		/*
976 		 * This is a split (parent) request. Submit all of the children but not the parent
977 		 * request itself, since the parent is the original unsplit request.
978 		 */
979 		TAILQ_FOREACH_SAFE(child_req, &req->children, child_tailq, tmp) {
980 			if (spdk_likely(!child_req_failed)) {
981 				rc = nvme_qpair_submit_request(qpair, child_req);
982 				if (spdk_unlikely(rc != 0)) {
983 					child_req_failed = true;
984 				}
985 			} else { /* free remaining child_reqs since one child_req fails */
986 				nvme_request_remove_child(req, child_req);
987 				nvme_request_free_children(child_req);
988 				nvme_free_request(child_req);
989 			}
990 		}
991 
992 		if (spdk_unlikely(child_req_failed)) {
993 			/* part of children requests have been submitted,
994 			 * return success since we must wait for those children to complete,
995 			 * but set the parent request to failure.
996 			 */
997 			if (req->num_children) {
998 				req->cpl.status.sct = SPDK_NVME_SCT_GENERIC;
999 				req->cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
1000 				return 0;
1001 			}
1002 			goto error;
1003 		}
1004 
1005 		return rc;
1006 	}
1007 
1008 	/* queue those requests which matches with opcode in err_cmd list */
1009 	if (spdk_unlikely(!TAILQ_EMPTY(&qpair->err_cmd_head))) {
1010 		TAILQ_FOREACH(cmd, &qpair->err_cmd_head, link) {
1011 			if (!cmd->do_not_submit) {
1012 				continue;
1013 			}
1014 
1015 			if ((cmd->opc == req->cmd.opc) && cmd->err_count) {
1016 				/* add to error request list and set cpl */
1017 				req->timeout_tsc = cmd->timeout_tsc;
1018 				req->submit_tick = spdk_get_ticks();
1019 				req->cpl.status.sct = cmd->status.sct;
1020 				req->cpl.status.sc = cmd->status.sc;
1021 				STAILQ_INSERT_TAIL(&qpair->err_req_head, req, stailq);
1022 				cmd->err_count--;
1023 				return 0;
1024 			}
1025 		}
1026 	}
1027 
1028 	if (spdk_unlikely(ctrlr->is_failed)) {
1029 		rc = -ENXIO;
1030 		goto error;
1031 	}
1032 
1033 	/* assign submit_tick before submitting req to specific transport */
1034 	if (spdk_unlikely(ctrlr->timeout_enabled)) {
1035 		if (req->submit_tick == 0) { /* req submitted for the first time */
1036 			req->submit_tick = spdk_get_ticks();
1037 			req->timed_out = false;
1038 		}
1039 	} else {
1040 		req->submit_tick = 0;
1041 	}
1042 
1043 	/* Allow two cases:
1044 	 * 1. NVMe qpair is enabled.
1045 	 * 2. Always allow fabrics commands through - these get
1046 	 * the controller out of reset state.
1047 	 */
1048 	if (spdk_likely(nvme_qpair_get_state(qpair) == NVME_QPAIR_ENABLED) ||
1049 	    (req->cmd.opc == SPDK_NVME_OPC_FABRIC &&
1050 	     nvme_qpair_get_state(qpair) == NVME_QPAIR_CONNECTING)) {
1051 		rc = nvme_transport_qpair_submit_request(qpair, req);
1052 	} else {
1053 		/* The controller is being reset - queue this request and
1054 		 *  submit it later when the reset is completed.
1055 		 */
1056 		return -EAGAIN;
1057 	}
1058 
1059 	if (spdk_likely(rc == 0)) {
1060 		if (SPDK_DEBUGLOG_FLAG_ENABLED("nvme")) {
1061 			spdk_nvme_print_command(qpair->id, &req->cmd);
1062 		}
1063 		req->queued = false;
1064 		return 0;
1065 	}
1066 
1067 	if (rc == -EAGAIN) {
1068 		return -EAGAIN;
1069 	}
1070 
1071 error:
1072 	if (req->parent != NULL) {
1073 		nvme_request_remove_child(req->parent, req);
1074 	}
1075 
1076 	/* The request is from queued_req list we should trigger the callback from caller */
1077 	if (spdk_unlikely(req->queued)) {
1078 		if (rc == -ENXIO) {
1079 			nvme_qpair_manual_complete_request(qpair, req, SPDK_NVME_SCT_GENERIC,
1080 							   SPDK_NVME_SC_ABORTED_SQ_DELETION,
1081 							   qpair->abort_dnr, true);
1082 		} else {
1083 			nvme_qpair_manual_complete_request(qpair, req, SPDK_NVME_SCT_GENERIC,
1084 							   SPDK_NVME_SC_INTERNAL_DEVICE_ERROR,
1085 							   true, true);
1086 		}
1087 		return rc;
1088 	}
1089 
1090 	nvme_cleanup_user_req(req);
1091 	nvme_free_request(req);
1092 
1093 	return rc;
1094 }
1095 
1096 int
1097 nvme_qpair_submit_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req)
1098 {
1099 	int rc;
1100 
1101 	if (spdk_unlikely(!STAILQ_EMPTY(&qpair->queued_req) && req->num_children == 0)) {
1102 		/*
1103 		 * Requests that have no children should be sent to the transport after all
1104 		 * currently queued requests. Requests with children will be split and go back
1105 		 * through this path.  We need to make an exception for the fabrics commands
1106 		 * while the qpair is connecting to be able to send the connect command
1107 		 * asynchronously.
1108 		 */
1109 		if (req->cmd.opc != SPDK_NVME_OPC_FABRIC ||
1110 		    nvme_qpair_get_state(qpair) != NVME_QPAIR_CONNECTING) {
1111 			STAILQ_INSERT_TAIL(&qpair->queued_req, req, stailq);
1112 			req->queued = true;
1113 			return 0;
1114 		}
1115 	}
1116 
1117 	rc = _nvme_qpair_submit_request(qpair, req);
1118 	if (rc == -EAGAIN) {
1119 		STAILQ_INSERT_TAIL(&qpair->queued_req, req, stailq);
1120 		req->queued = true;
1121 		rc = 0;
1122 	}
1123 
1124 	return rc;
1125 }
1126 
1127 static int
1128 nvme_qpair_resubmit_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req)
1129 {
1130 	int rc;
1131 
1132 	/*
1133 	 * We should never have a request with children on the queue.
1134 	 * This is necessary to preserve the 1:1 relationship between
1135 	 * completions and resubmissions.
1136 	 */
1137 	assert(req->num_children == 0);
1138 	assert(req->queued);
1139 	rc = _nvme_qpair_submit_request(qpair, req);
1140 	if (spdk_unlikely(rc == -EAGAIN)) {
1141 		STAILQ_INSERT_HEAD(&qpair->queued_req, req, stailq);
1142 	}
1143 
1144 	return rc;
1145 }
1146 
1147 void
1148 nvme_qpair_abort_all_queued_reqs(struct spdk_nvme_qpair *qpair)
1149 {
1150 	nvme_qpair_complete_error_reqs(qpair);
1151 	nvme_qpair_abort_queued_reqs(qpair);
1152 	_nvme_qpair_complete_abort_queued_reqs(qpair);
1153 	if (nvme_qpair_is_admin_queue(qpair)) {
1154 		nvme_ctrlr_abort_queued_aborts(qpair->ctrlr);
1155 	}
1156 }
1157 
1158 int
1159 spdk_nvme_qpair_add_cmd_error_injection(struct spdk_nvme_ctrlr *ctrlr,
1160 					struct spdk_nvme_qpair *qpair,
1161 					uint8_t opc, bool do_not_submit,
1162 					uint64_t timeout_in_us,
1163 					uint32_t err_count,
1164 					uint8_t sct, uint8_t sc)
1165 {
1166 	struct nvme_error_cmd *entry, *cmd = NULL;
1167 	int rc = 0;
1168 
1169 	if (qpair == NULL) {
1170 		qpair = ctrlr->adminq;
1171 		nvme_ctrlr_lock(ctrlr);
1172 	}
1173 
1174 	TAILQ_FOREACH(entry, &qpair->err_cmd_head, link) {
1175 		if (entry->opc == opc) {
1176 			cmd = entry;
1177 			break;
1178 		}
1179 	}
1180 
1181 	if (cmd == NULL) {
1182 		cmd = spdk_zmalloc(sizeof(*cmd), 64, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
1183 		if (!cmd) {
1184 			rc = -ENOMEM;
1185 			goto out;
1186 		}
1187 		TAILQ_INSERT_TAIL(&qpair->err_cmd_head, cmd, link);
1188 	}
1189 
1190 	cmd->do_not_submit = do_not_submit;
1191 	cmd->err_count = err_count;
1192 	cmd->timeout_tsc = timeout_in_us * spdk_get_ticks_hz() / 1000000ULL;
1193 	cmd->opc = opc;
1194 	cmd->status.sct = sct;
1195 	cmd->status.sc = sc;
1196 out:
1197 	if (nvme_qpair_is_admin_queue(qpair)) {
1198 		nvme_ctrlr_unlock(ctrlr);
1199 	}
1200 
1201 	return rc;
1202 }
1203 
1204 void
1205 spdk_nvme_qpair_remove_cmd_error_injection(struct spdk_nvme_ctrlr *ctrlr,
1206 		struct spdk_nvme_qpair *qpair,
1207 		uint8_t opc)
1208 {
1209 	struct nvme_error_cmd *cmd, *entry;
1210 
1211 	if (qpair == NULL) {
1212 		qpair = ctrlr->adminq;
1213 		nvme_ctrlr_lock(ctrlr);
1214 	}
1215 
1216 	TAILQ_FOREACH_SAFE(cmd, &qpair->err_cmd_head, link, entry) {
1217 		if (cmd->opc == opc) {
1218 			TAILQ_REMOVE(&qpair->err_cmd_head, cmd, link);
1219 			spdk_free(cmd);
1220 			break;
1221 		}
1222 	}
1223 
1224 	if (nvme_qpair_is_admin_queue(qpair)) {
1225 		nvme_ctrlr_unlock(ctrlr);
1226 	}
1227 }
1228 
1229 uint16_t
1230 spdk_nvme_qpair_get_id(struct spdk_nvme_qpair *qpair)
1231 {
1232 	return qpair->id;
1233 }
1234 
1235 uint32_t
1236 spdk_nvme_qpair_get_num_outstanding_reqs(struct spdk_nvme_qpair *qpair)
1237 {
1238 	return qpair->num_outstanding_reqs;
1239 }
1240