xref: /spdk/lib/nvmf/tcp.c (revision b8c964e2909a3c11af83ad01a2605f314c164c64)
1488570ebSJim Harris /*   SPDX-License-Identifier: BSD-3-Clause
2a6dbe372Spaul luse  *   Copyright (C) 2018 Intel Corporation. All rights reserved.
39727aa28SAlexey Marchuk  *   Copyright (c) 2019, 2020 Mellanox Technologies LTD. All rights reserved.
4956fd5e1SAlexey Marchuk  *   Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5e956be96SZiye Yang  */
6e956be96SZiye Yang 
7081f080aSBen Walker #include "spdk/accel.h"
8e956be96SZiye Yang #include "spdk/stdinc.h"
9e956be96SZiye Yang #include "spdk/crc32.h"
10e956be96SZiye Yang #include "spdk/endian.h"
11e956be96SZiye Yang #include "spdk/assert.h"
12e956be96SZiye Yang #include "spdk/thread.h"
13cc353f0eSBen Walker #include "spdk/nvmf_transport.h"
14e956be96SZiye Yang #include "spdk/string.h"
15e956be96SZiye Yang #include "spdk/trace.h"
16e956be96SZiye Yang #include "spdk/util.h"
17c1fbbfbeSAlexey Marchuk #include "spdk/log.h"
18ee164e62SKonrad Sztyber #include "spdk/keyring.h"
19e956be96SZiye Yang 
2034a0d851SShuhei Matsumoto #include "spdk_internal/assert.h"
21e956be96SZiye Yang #include "spdk_internal/nvme_tcp.h"
22c1fbbfbeSAlexey Marchuk #include "spdk_internal/sock.h"
23e956be96SZiye Yang 
24b8a87e6aSShuhei Matsumoto #include "nvmf_internal.h"
25c7d22538SKrzysztof Goreczny #include "transport.h"
26b8a87e6aSShuhei Matsumoto 
27c37e776eSKrzysztof Karas #include "spdk_internal/trace_defs.h"
28c37e776eSKrzysztof Karas 
29e956be96SZiye Yang #define NVMF_TCP_MAX_ACCEPT_SOCK_ONE_TIME 16
300903e14bSZiye Yang #define SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY 16
31f766d1e4SDarek Stojaczyk #define SPDK_NVMF_TCP_DEFAULT_SOCK_PRIORITY 0
324fe47d6fSAlexey Marchuk #define SPDK_NVMF_TCP_DEFAULT_CONTROL_MSG_NUM 32
33f766d1e4SDarek Stojaczyk #define SPDK_NVMF_TCP_DEFAULT_SUCCESS_OPTIMIZATION true
34e956be96SZiye Yang 
35bf887576SMengjinWu #define SPDK_NVMF_TCP_MIN_IO_QUEUE_DEPTH 2
36bf887576SMengjinWu #define SPDK_NVMF_TCP_MAX_IO_QUEUE_DEPTH 65535
37f1bec928SMengjinWu #define SPDK_NVMF_TCP_MIN_ADMIN_QUEUE_DEPTH 2
38f1bec928SMengjinWu #define SPDK_NVMF_TCP_MAX_ADMIN_QUEUE_DEPTH 4096
39bf887576SMengjinWu 
40bf887576SMengjinWu #define SPDK_NVMF_TCP_DEFAULT_MAX_IO_QUEUE_DEPTH 128
41bf887576SMengjinWu #define SPDK_NVMF_TCP_DEFAULT_MAX_ADMIN_QUEUE_DEPTH 128
42bf887576SMengjinWu #define SPDK_NVMF_TCP_DEFAULT_MAX_QPAIRS_PER_CTRLR 128
43bf887576SMengjinWu #define SPDK_NVMF_TCP_DEFAULT_IN_CAPSULE_DATA_SIZE 4096
44bf887576SMengjinWu #define SPDK_NVMF_TCP_DEFAULT_MAX_IO_SIZE 131072
45bf887576SMengjinWu #define SPDK_NVMF_TCP_DEFAULT_IO_UNIT_SIZE 131072
46bf887576SMengjinWu #define SPDK_NVMF_TCP_DEFAULT_NUM_SHARED_BUFFERS 511
473b138377SJim Harris #define SPDK_NVMF_TCP_DEFAULT_BUFFER_CACHE_SIZE UINT32_MAX
48bf887576SMengjinWu #define SPDK_NVMF_TCP_DEFAULT_DIF_INSERT_OR_STRIP false
49bf887576SMengjinWu #define SPDK_NVMF_TCP_DEFAULT_ABORT_TIMEOUT_SEC 1
50bf887576SMengjinWu 
51f038354eSSeth Howell const struct spdk_nvmf_transport_ops spdk_nvmf_transport_tcp;
527662387cSKrzysztof Karas static bool g_tls_log = false;
53f038354eSSeth Howell 
54e956be96SZiye Yang /* spdk nvmf related structure */
55e956be96SZiye Yang enum spdk_nvmf_tcp_req_state {
56e956be96SZiye Yang 
57e956be96SZiye Yang 	/* The request is not currently in use */
58e956be96SZiye Yang 	TCP_REQUEST_STATE_FREE = 0,
59e956be96SZiye Yang 
60e956be96SZiye Yang 	/* Initial state when request first received */
6143f23e3dSKonrad Sztyber 	TCP_REQUEST_STATE_NEW = 1,
62e956be96SZiye Yang 
63e956be96SZiye Yang 	/* The request is queued until a data buffer is available. */
6443f23e3dSKonrad Sztyber 	TCP_REQUEST_STATE_NEED_BUFFER = 2,
65e956be96SZiye Yang 
66c7d22538SKrzysztof Goreczny 	/* The request has the data buffer available */
67c7d22538SKrzysztof Goreczny 	TCP_REQUEST_STATE_HAVE_BUFFER = 3,
68c7d22538SKrzysztof Goreczny 
69794d47d4SKonrad Sztyber 	/* The request is waiting for zcopy_start to finish */
70c7d22538SKrzysztof Goreczny 	TCP_REQUEST_STATE_AWAITING_ZCOPY_START = 4,
71794d47d4SKonrad Sztyber 
72794d47d4SKonrad Sztyber 	/* The request has received a zero-copy buffer */
73c7d22538SKrzysztof Goreczny 	TCP_REQUEST_STATE_ZCOPY_START_COMPLETED = 5,
74794d47d4SKonrad Sztyber 
75e956be96SZiye Yang 	/* The request is currently transferring data from the host to the controller. */
76c7d22538SKrzysztof Goreczny 	TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER = 6,
77e956be96SZiye Yang 
7848a547fdSBen Walker 	/* The request is waiting for the R2T send acknowledgement. */
79c7d22538SKrzysztof Goreczny 	TCP_REQUEST_STATE_AWAITING_R2T_ACK = 7,
8048a547fdSBen Walker 
81e956be96SZiye Yang 	/* The request is ready to execute at the block device */
82c7d22538SKrzysztof Goreczny 	TCP_REQUEST_STATE_READY_TO_EXECUTE = 8,
83e956be96SZiye Yang 
84e956be96SZiye Yang 	/* The request is currently executing at the block device */
85c7d22538SKrzysztof Goreczny 	TCP_REQUEST_STATE_EXECUTING = 9,
86e956be96SZiye Yang 
873f912cf0SMichal Berger 	/* The request is waiting for zcopy buffers to be committed */
88c7d22538SKrzysztof Goreczny 	TCP_REQUEST_STATE_AWAITING_ZCOPY_COMMIT = 10,
89794d47d4SKonrad Sztyber 
90e956be96SZiye Yang 	/* The request finished executing at the block device */
91c7d22538SKrzysztof Goreczny 	TCP_REQUEST_STATE_EXECUTED = 11,
92e956be96SZiye Yang 
93e956be96SZiye Yang 	/* The request is ready to send a completion */
94c7d22538SKrzysztof Goreczny 	TCP_REQUEST_STATE_READY_TO_COMPLETE = 12,
95e956be96SZiye Yang 
96e956be96SZiye Yang 	/* The request is currently transferring final pdus from the controller to the host. */
97c7d22538SKrzysztof Goreczny 	TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST = 13,
98e956be96SZiye Yang 
99794d47d4SKonrad Sztyber 	/* The request is waiting for zcopy buffers to be released (without committing) */
100c7d22538SKrzysztof Goreczny 	TCP_REQUEST_STATE_AWAITING_ZCOPY_RELEASE = 14,
101794d47d4SKonrad Sztyber 
102e956be96SZiye Yang 	/* The request completed and can be marked free. */
103c7d22538SKrzysztof Goreczny 	TCP_REQUEST_STATE_COMPLETED = 15,
104e956be96SZiye Yang 
105e956be96SZiye Yang 	/* Terminator */
106e956be96SZiye Yang 	TCP_REQUEST_NUM_STATES,
107e956be96SZiye Yang };
108e956be96SZiye Yang 
109b95aae63SKonrad Sztyber enum nvmf_tcp_qpair_state {
110b95aae63SKonrad Sztyber 	NVMF_TCP_QPAIR_STATE_INVALID = 0,
111b95aae63SKonrad Sztyber 	NVMF_TCP_QPAIR_STATE_INITIALIZING = 1,
112b95aae63SKonrad Sztyber 	NVMF_TCP_QPAIR_STATE_RUNNING = 2,
113b95aae63SKonrad Sztyber 	NVMF_TCP_QPAIR_STATE_EXITING = 3,
114b95aae63SKonrad Sztyber 	NVMF_TCP_QPAIR_STATE_EXITED = 4,
115b95aae63SKonrad Sztyber };
116b95aae63SKonrad Sztyber 
1172d07fa15SBen Walker static const char *spdk_nvmf_tcp_term_req_fes_str[] = {
118e956be96SZiye Yang 	"Invalid PDU Header Field",
119e956be96SZiye Yang 	"PDU Sequence Error",
120e956be96SZiye Yang 	"Header Digiest Error",
121e956be96SZiye Yang 	"Data Transfer Out of Range",
122e956be96SZiye Yang 	"R2T Limit Exceeded",
123e956be96SZiye Yang 	"Unsupported parameter",
124e956be96SZiye Yang };
125e956be96SZiye Yang 
1260eae0106SJim Harris static void
1270eae0106SJim Harris nvmf_tcp_trace(void)
128e956be96SZiye Yang {
12926d44a12SJim Harris 	spdk_trace_register_owner_type(OWNER_TYPE_NVMF_TCP, 't');
130e956be96SZiye Yang 	spdk_trace_register_object(OBJECT_NVMF_TCP_IO, 'r');
131617184beSJim Harris 	spdk_trace_register_description("TCP_REQ_NEW",
132e956be96SZiye Yang 					TRACE_TCP_REQUEST_STATE_NEW,
13326d44a12SJim Harris 					OWNER_TYPE_NVMF_TCP, OBJECT_NVMF_TCP_IO, 1,
134b68ae4fbSAtul Malakar 					SPDK_TRACE_ARG_TYPE_INT, "qd");
135617184beSJim Harris 	spdk_trace_register_description("TCP_REQ_NEED_BUFFER",
136e956be96SZiye Yang 					TRACE_TCP_REQUEST_STATE_NEED_BUFFER,
13726d44a12SJim Harris 					OWNER_TYPE_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0,
138ea1a6608SJim Harris 					SPDK_TRACE_ARG_TYPE_INT, "");
139c7d22538SKrzysztof Goreczny 	spdk_trace_register_description("TCP_REQ_HAVE_BUFFER",
140c7d22538SKrzysztof Goreczny 					TRACE_TCP_REQUEST_STATE_HAVE_BUFFER,
141c7d22538SKrzysztof Goreczny 					OWNER_TYPE_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0,
142c7d22538SKrzysztof Goreczny 					SPDK_TRACE_ARG_TYPE_INT, "");
14345ded6b8SKonrad Sztyber 	spdk_trace_register_description("TCP_REQ_WAIT_ZCPY_START",
14445ded6b8SKonrad Sztyber 					TRACE_TCP_REQUEST_STATE_AWAIT_ZCOPY_START,
14526d44a12SJim Harris 					OWNER_TYPE_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0,
146ea1a6608SJim Harris 					SPDK_TRACE_ARG_TYPE_INT, "");
14745ded6b8SKonrad Sztyber 	spdk_trace_register_description("TCP_REQ_ZCPY_START_CPL",
14845ded6b8SKonrad Sztyber 					TRACE_TCP_REQUEST_STATE_ZCOPY_START_COMPLETED,
14926d44a12SJim Harris 					OWNER_TYPE_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0,
150ea1a6608SJim Harris 					SPDK_TRACE_ARG_TYPE_INT, "");
151617184beSJim Harris 	spdk_trace_register_description("TCP_REQ_TX_H_TO_C",
152e956be96SZiye Yang 					TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER,
15326d44a12SJim Harris 					OWNER_TYPE_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0,
154ea1a6608SJim Harris 					SPDK_TRACE_ARG_TYPE_INT, "");
155617184beSJim Harris 	spdk_trace_register_description("TCP_REQ_RDY_TO_EXECUTE",
156e956be96SZiye Yang 					TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE,
15726d44a12SJim Harris 					OWNER_TYPE_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0,
158ea1a6608SJim Harris 					SPDK_TRACE_ARG_TYPE_INT, "");
159617184beSJim Harris 	spdk_trace_register_description("TCP_REQ_EXECUTING",
160e956be96SZiye Yang 					TRACE_TCP_REQUEST_STATE_EXECUTING,
16126d44a12SJim Harris 					OWNER_TYPE_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0,
162ea1a6608SJim Harris 					SPDK_TRACE_ARG_TYPE_INT, "");
16345ded6b8SKonrad Sztyber 	spdk_trace_register_description("TCP_REQ_WAIT_ZCPY_CMT",
16445ded6b8SKonrad Sztyber 					TRACE_TCP_REQUEST_STATE_AWAIT_ZCOPY_COMMIT,
16526d44a12SJim Harris 					OWNER_TYPE_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0,
166ea1a6608SJim Harris 					SPDK_TRACE_ARG_TYPE_INT, "");
167617184beSJim Harris 	spdk_trace_register_description("TCP_REQ_EXECUTED",
168e956be96SZiye Yang 					TRACE_TCP_REQUEST_STATE_EXECUTED,
16926d44a12SJim Harris 					OWNER_TYPE_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0,
170ea1a6608SJim Harris 					SPDK_TRACE_ARG_TYPE_INT, "");
171617184beSJim Harris 	spdk_trace_register_description("TCP_REQ_RDY_TO_COMPLETE",
172e956be96SZiye Yang 					TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE,
17326d44a12SJim Harris 					OWNER_TYPE_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0,
174ea1a6608SJim Harris 					SPDK_TRACE_ARG_TYPE_INT, "");
175b6206d65SJim Harris 	spdk_trace_register_description("TCP_REQ_TRANSFER_C2H",
176e956be96SZiye Yang 					TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST,
17726d44a12SJim Harris 					OWNER_TYPE_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0,
178ea1a6608SJim Harris 					SPDK_TRACE_ARG_TYPE_INT, "");
17945ded6b8SKonrad Sztyber 	spdk_trace_register_description("TCP_REQ_AWAIT_ZCPY_RLS",
18045ded6b8SKonrad Sztyber 					TRACE_TCP_REQUEST_STATE_AWAIT_ZCOPY_RELEASE,
18126d44a12SJim Harris 					OWNER_TYPE_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0,
182ea1a6608SJim Harris 					SPDK_TRACE_ARG_TYPE_INT, "");
183617184beSJim Harris 	spdk_trace_register_description("TCP_REQ_COMPLETED",
184e956be96SZiye Yang 					TRACE_TCP_REQUEST_STATE_COMPLETED,
18526d44a12SJim Harris 					OWNER_TYPE_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0,
186b68ae4fbSAtul Malakar 					SPDK_TRACE_ARG_TYPE_INT, "qd");
187b6206d65SJim Harris 	spdk_trace_register_description("TCP_READ_DONE",
188b92c3d41SJim Harris 					TRACE_TCP_READ_FROM_SOCKET_DONE,
18926d44a12SJim Harris 					OWNER_TYPE_NVMF_TCP, OBJECT_NONE, 0,
190ea1a6608SJim Harris 					SPDK_TRACE_ARG_TYPE_INT, "");
19148a547fdSBen Walker 	spdk_trace_register_description("TCP_REQ_AWAIT_R2T_ACK",
19248a547fdSBen Walker 					TRACE_TCP_REQUEST_STATE_AWAIT_R2T_ACK,
19326d44a12SJim Harris 					OWNER_TYPE_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0,
194ea1a6608SJim Harris 					SPDK_TRACE_ARG_TYPE_INT, "");
1958107587bSKrzysztof Karas 
1968107587bSKrzysztof Karas 	spdk_trace_register_description("TCP_QP_CREATE", TRACE_TCP_QP_CREATE,
19726d44a12SJim Harris 					OWNER_TYPE_NVMF_TCP, OBJECT_NONE, 0,
19840cf86f2SJim Harris 					SPDK_TRACE_ARG_TYPE_INT, "");
1998107587bSKrzysztof Karas 	spdk_trace_register_description("TCP_QP_SOCK_INIT", TRACE_TCP_QP_SOCK_INIT,
20026d44a12SJim Harris 					OWNER_TYPE_NVMF_TCP, OBJECT_NONE, 0,
2018107587bSKrzysztof Karas 					SPDK_TRACE_ARG_TYPE_INT, "");
2028107587bSKrzysztof Karas 	spdk_trace_register_description("TCP_QP_STATE_CHANGE", TRACE_TCP_QP_STATE_CHANGE,
20326d44a12SJim Harris 					OWNER_TYPE_NVMF_TCP, OBJECT_NONE, 0,
2048107587bSKrzysztof Karas 					SPDK_TRACE_ARG_TYPE_INT, "state");
2058107587bSKrzysztof Karas 	spdk_trace_register_description("TCP_QP_DISCONNECT", TRACE_TCP_QP_DISCONNECT,
20626d44a12SJim Harris 					OWNER_TYPE_NVMF_TCP, OBJECT_NONE, 0,
2078107587bSKrzysztof Karas 					SPDK_TRACE_ARG_TYPE_INT, "");
2088107587bSKrzysztof Karas 	spdk_trace_register_description("TCP_QP_DESTROY", TRACE_TCP_QP_DESTROY,
20926d44a12SJim Harris 					OWNER_TYPE_NVMF_TCP, OBJECT_NONE, 0,
2108107587bSKrzysztof Karas 					SPDK_TRACE_ARG_TYPE_INT, "");
2118107587bSKrzysztof Karas 	spdk_trace_register_description("TCP_QP_ABORT_REQ", TRACE_TCP_QP_ABORT_REQ,
21226d44a12SJim Harris 					OWNER_TYPE_NVMF_TCP, OBJECT_NONE, 0,
213ea1a6608SJim Harris 					SPDK_TRACE_ARG_TYPE_INT, "");
21498b1301fSKrzysztof Karas 	spdk_trace_register_description("TCP_QP_RCV_STATE_CHANGE", TRACE_TCP_QP_RCV_STATE_CHANGE,
21526d44a12SJim Harris 					OWNER_TYPE_NVMF_TCP, OBJECT_NONE, 0,
2168107587bSKrzysztof Karas 					SPDK_TRACE_ARG_TYPE_INT, "state");
2175d95e315SKrzysztof Karas 
2185d95e315SKrzysztof Karas 	spdk_trace_tpoint_register_relation(TRACE_BDEV_IO_START, OBJECT_NVMF_TCP_IO, 1);
2195d95e315SKrzysztof Karas 	spdk_trace_tpoint_register_relation(TRACE_BDEV_IO_DONE, OBJECT_NVMF_TCP_IO, 0);
2209df4931bSJim Harris 	spdk_trace_tpoint_register_relation(TRACE_SOCK_REQ_QUEUE, OBJECT_NVMF_TCP_IO, 0);
2219df4931bSJim Harris 	spdk_trace_tpoint_register_relation(TRACE_SOCK_REQ_PEND, OBJECT_NVMF_TCP_IO, 0);
2229df4931bSJim Harris 	spdk_trace_tpoint_register_relation(TRACE_SOCK_REQ_COMPLETE, OBJECT_NVMF_TCP_IO, 0);
223e956be96SZiye Yang }
2240eae0106SJim Harris SPDK_TRACE_REGISTER_FN(nvmf_tcp_trace, "nvmf_tcp", TRACE_GROUP_NVMF_TCP)
225e956be96SZiye Yang 
226c57bafedSBen Walker struct spdk_nvmf_tcp_req  {
227e956be96SZiye Yang 	struct spdk_nvmf_request		req;
228e956be96SZiye Yang 	struct spdk_nvme_cpl			rsp;
229e956be96SZiye Yang 	struct spdk_nvme_cmd			cmd;
230e956be96SZiye Yang 
231a2adca79SBen Walker 	/* A PDU that can be used for sending responses. This is
232a2adca79SBen Walker 	 * not the incoming PDU! */
233a2adca79SBen Walker 	struct nvme_tcp_pdu			*pdu;
234a2adca79SBen Walker 
235a85057eaSAlexey Marchuk 	/* In-capsule data buffer */
236a85057eaSAlexey Marchuk 	uint8_t					*buf;
237c81c10c5SJim Harris 
238c81c10c5SJim Harris 	struct spdk_nvmf_tcp_req		*fused_pair;
239c81c10c5SJim Harris 
240a2adca79SBen Walker 	/*
241a2adca79SBen Walker 	 * The PDU for a request may be used multiple times in serial over
242a2adca79SBen Walker 	 * the request's lifetime. For example, first to send an R2T, then
243a2adca79SBen Walker 	 * to send a completion. To catch mistakes where the PDU is used
244a2adca79SBen Walker 	 * twice at the same time, add a debug flag here for init/fini.
245a2adca79SBen Walker 	 */
246a2adca79SBen Walker 	bool					pdu_in_use;
247cc6920a4SJosh Soref 	bool					has_in_capsule_data;
248c81c10c5SJim Harris 	bool					fused_failed;
249e956be96SZiye Yang 
250e956be96SZiye Yang 	/* transfer_tag */
251e956be96SZiye Yang 	uint16_t				ttag;
252e956be96SZiye Yang 
25357efada5SZiye Yang 	enum spdk_nvmf_tcp_req_state		state;
25457efada5SZiye Yang 
255e956be96SZiye Yang 	/*
256fdfb7908SBen Walker 	 * h2c_offset is used when we receive the h2c_data PDU.
257e956be96SZiye Yang 	 */
258fdfb7908SBen Walker 	uint32_t				h2c_offset;
259e956be96SZiye Yang 
2602bc819ddSShuhei Matsumoto 	STAILQ_ENTRY(spdk_nvmf_tcp_req)		link;
261c57bafedSBen Walker 	TAILQ_ENTRY(spdk_nvmf_tcp_req)		state_link;
262c7d22538SKrzysztof Goreczny 	STAILQ_ENTRY(spdk_nvmf_tcp_req)		control_msg_link;
263e956be96SZiye Yang };
264e956be96SZiye Yang 
2652b59852bSBen Walker struct spdk_nvmf_tcp_qpair {
266e956be96SZiye Yang 	struct spdk_nvmf_qpair			qpair;
26763de221bSBen Walker 	struct spdk_nvmf_tcp_poll_group		*group;
268e956be96SZiye Yang 	struct spdk_sock			*sock;
269e956be96SZiye Yang 
270e956be96SZiye Yang 	enum nvme_tcp_pdu_recv_state		recv_state;
271b95aae63SKonrad Sztyber 	enum nvmf_tcp_qpair_state		state;
272e956be96SZiye Yang 
273a2adca79SBen Walker 	/* PDU being actively received */
274f8ac678eSZiye Yang 	struct nvme_tcp_pdu			*pdu_in_progress;
2754dba5072SBen Walker 
276c81c10c5SJim Harris 	struct spdk_nvmf_tcp_req		*fused_first;
277c81c10c5SJim Harris 
278e956be96SZiye Yang 	/* Queues to track the requests in all states */
27960af3c00SRui Chang 	TAILQ_HEAD(, spdk_nvmf_tcp_req)		tcp_req_working_queue;
28060af3c00SRui Chang 	TAILQ_HEAD(, spdk_nvmf_tcp_req)		tcp_req_free_queue;
2817fc2c085SMengjinWu 	SLIST_HEAD(, nvme_tcp_pdu)		tcp_pdu_free_queue;
28278df9be4SBen Walker 	/* Number of working pdus */
28378df9be4SBen Walker 	uint32_t				tcp_pdu_working_count;
28460af3c00SRui Chang 
285e956be96SZiye Yang 	/* Number of requests in each state */
286444cf90cSBen Walker 	uint32_t				state_cntr[TCP_REQUEST_NUM_STATES];
287e956be96SZiye Yang 
288e956be96SZiye Yang 	uint8_t					cpda;
289e956be96SZiye Yang 
290e956be96SZiye Yang 	bool					host_hdgst_enable;
291e956be96SZiye Yang 	bool					host_ddgst_enable;
292e956be96SZiye Yang 
293*b8c964e2SJim Harris 	bool					await_req_msg_pending;
294*b8c964e2SJim Harris 
295f600ca4cSAlexey Marchuk 	/* This is a spare PDU used for sending special management
296f600ca4cSAlexey Marchuk 	 * operations. Primarily, this is used for the initial
297f600ca4cSAlexey Marchuk 	 * connection response and c2h termination request. */
298f600ca4cSAlexey Marchuk 	struct nvme_tcp_pdu			*mgmt_pdu;
299f600ca4cSAlexey Marchuk 
300f600ca4cSAlexey Marchuk 	/* Arrays of in-capsule buffers, requests, and pdus.
301f600ca4cSAlexey Marchuk 	 * Each array is 'resource_count' number of elements */
302f600ca4cSAlexey Marchuk 	void					*bufs;
303f600ca4cSAlexey Marchuk 	struct spdk_nvmf_tcp_req		*reqs;
304f600ca4cSAlexey Marchuk 	struct nvme_tcp_pdu			*pdus;
305f600ca4cSAlexey Marchuk 	uint32_t				resource_count;
306f600ca4cSAlexey Marchuk 	uint32_t				recv_buf_size;
307f600ca4cSAlexey Marchuk 
308f600ca4cSAlexey Marchuk 	struct spdk_nvmf_tcp_port		*port;
309f600ca4cSAlexey Marchuk 
310e956be96SZiye Yang 	/* IP address */
311e956be96SZiye Yang 	char					initiator_addr[SPDK_NVMF_TRADDR_MAX_LEN];
312e956be96SZiye Yang 	char					target_addr[SPDK_NVMF_TRADDR_MAX_LEN];
313e956be96SZiye Yang 
314e956be96SZiye Yang 	/* IP port */
315e956be96SZiye Yang 	uint16_t				initiator_port;
316e956be96SZiye Yang 	uint16_t				target_port;
317e956be96SZiye Yang 
318b7e1f79cSKonrad Sztyber 	/* Wait until the host terminates the connection (e.g. after sending C2HTermReq) */
319b7e1f79cSKonrad Sztyber 	bool					wait_terminate;
320b7e1f79cSKonrad Sztyber 
3212d0ce5b4SZiye Yang 	/* Timer used to destroy qpair after detecting transport error issue if initiator does
3222d0ce5b4SZiye Yang 	 *  not close the connection.
3232d0ce5b4SZiye Yang 	 */
3242d0ce5b4SZiye Yang 	struct spdk_poller			*timeout_poller;
3252d0ce5b4SZiye Yang 
3263056c8acSKonrad Sztyber 	spdk_nvmf_transport_qpair_fini_cb	fini_cb_fn;
3273056c8acSKonrad Sztyber 	void					*fini_cb_arg;
328d478edd4SZiye Yang 
3292b59852bSBen Walker 	TAILQ_ENTRY(spdk_nvmf_tcp_qpair)	link;
330a2ae7b96SKrzysztof Goreczny 	bool					pending_flush;
331e956be96SZiye Yang };
332e956be96SZiye Yang 
33385fa4324SAlexey Marchuk struct spdk_nvmf_tcp_control_msg {
33485fa4324SAlexey Marchuk 	STAILQ_ENTRY(spdk_nvmf_tcp_control_msg) link;
33585fa4324SAlexey Marchuk };
33685fa4324SAlexey Marchuk 
33785fa4324SAlexey Marchuk struct spdk_nvmf_tcp_control_msg_list {
33885fa4324SAlexey Marchuk 	void *msg_buf;
33985fa4324SAlexey Marchuk 	STAILQ_HEAD(, spdk_nvmf_tcp_control_msg) free_msgs;
340c7d22538SKrzysztof Goreczny 	STAILQ_HEAD(, spdk_nvmf_tcp_req) waiting_for_msg_reqs;
34185fa4324SAlexey Marchuk };
34285fa4324SAlexey Marchuk 
343e956be96SZiye Yang struct spdk_nvmf_tcp_poll_group {
344e956be96SZiye Yang 	struct spdk_nvmf_transport_poll_group	group;
345e956be96SZiye Yang 	struct spdk_sock_group			*sock_group;
34663de221bSBen Walker 
3472b59852bSBen Walker 	TAILQ_HEAD(, spdk_nvmf_tcp_qpair)	qpairs;
34885fa4324SAlexey Marchuk 
349f0956d33SZiye Yang 	struct spdk_io_channel			*accel_channel;
35085fa4324SAlexey Marchuk 	struct spdk_nvmf_tcp_control_msg_list	*control_msg_list;
351d619f6c2STomasz Zawadzki 
352d619f6c2STomasz Zawadzki 	TAILQ_ENTRY(spdk_nvmf_tcp_poll_group)	link;
353e956be96SZiye Yang };
354e956be96SZiye Yang 
355e956be96SZiye Yang struct spdk_nvmf_tcp_port {
3566d8f1fc6SJacek Kalwas 	const struct spdk_nvme_transport_id	*trid;
357e956be96SZiye Yang 	struct spdk_sock			*listen_sock;
358cff94374SKrzysztof Goreczny 	struct spdk_nvmf_transport		*transport;
359e956be96SZiye Yang 	TAILQ_ENTRY(spdk_nvmf_tcp_port)		link;
360e956be96SZiye Yang };
361e956be96SZiye Yang 
362f766d1e4SDarek Stojaczyk struct tcp_transport_opts {
363f766d1e4SDarek Stojaczyk 	bool		c2h_success;
3644fe47d6fSAlexey Marchuk 	uint16_t	control_msg_num;
365f766d1e4SDarek Stojaczyk 	uint32_t	sock_priority;
366f766d1e4SDarek Stojaczyk };
367f766d1e4SDarek Stojaczyk 
368663243cbSKrzysztof Karas struct tcp_psk_entry {
369663243cbSKrzysztof Karas 	char				hostnqn[SPDK_NVMF_NQN_MAX_LEN + 1];
370663243cbSKrzysztof Karas 	char				subnqn[SPDK_NVMF_NQN_MAX_LEN + 1];
371599a8884SKonrad Sztyber 	char				pskid[NVMF_PSK_IDENTITY_LEN];
372663243cbSKrzysztof Karas 	uint8_t				psk[SPDK_TLS_PSK_MAX_LEN];
373ee164e62SKonrad Sztyber 	struct spdk_key			*key;
3747a50a6bcSKrzysztof Karas 	uint32_t			psk_size;
375169ee6c3SKrzysztof Karas 	enum nvme_tcp_cipher_suite	tls_cipher_suite;
376663243cbSKrzysztof Karas 	TAILQ_ENTRY(tcp_psk_entry)	link;
377663243cbSKrzysztof Karas };
378663243cbSKrzysztof Karas 
379e956be96SZiye Yang struct spdk_nvmf_tcp_transport {
380e956be96SZiye Yang 	struct spdk_nvmf_transport		transport;
381f766d1e4SDarek Stojaczyk 	struct tcp_transport_opts               tcp_opts;
38279606beeSKonrad Sztyber 	uint32_t				ack_timeout;
383e956be96SZiye Yang 
384d619f6c2STomasz Zawadzki 	struct spdk_nvmf_tcp_poll_group		*next_pg;
385d619f6c2STomasz Zawadzki 
38643022da3SJacek Kalwas 	struct spdk_poller			*accept_poller;
3870e983c56SKrzysztof Goreczny 	struct spdk_sock_group			*listen_sock_group;
388e956be96SZiye Yang 
389e956be96SZiye Yang 	TAILQ_HEAD(, spdk_nvmf_tcp_port)	ports;
390d619f6c2STomasz Zawadzki 	TAILQ_HEAD(, spdk_nvmf_tcp_poll_group)	poll_groups;
391663243cbSKrzysztof Karas 
392663243cbSKrzysztof Karas 	TAILQ_HEAD(, tcp_psk_entry)		psks;
393e956be96SZiye Yang };
394e956be96SZiye Yang 
395f766d1e4SDarek Stojaczyk static const struct spdk_json_object_decoder tcp_transport_opts_decoder[] = {
396f766d1e4SDarek Stojaczyk 	{
397f766d1e4SDarek Stojaczyk 		"c2h_success", offsetof(struct tcp_transport_opts, c2h_success),
398f766d1e4SDarek Stojaczyk 		spdk_json_decode_bool, true
399f766d1e4SDarek Stojaczyk 	},
400f766d1e4SDarek Stojaczyk 	{
4014fe47d6fSAlexey Marchuk 		"control_msg_num", offsetof(struct tcp_transport_opts, control_msg_num),
4024fe47d6fSAlexey Marchuk 		spdk_json_decode_uint16, true
4034fe47d6fSAlexey Marchuk 	},
4044fe47d6fSAlexey Marchuk 	{
405f766d1e4SDarek Stojaczyk 		"sock_priority", offsetof(struct tcp_transport_opts, sock_priority),
406f766d1e4SDarek Stojaczyk 		spdk_json_decode_uint32, true
407f766d1e4SDarek Stojaczyk 	},
408f766d1e4SDarek Stojaczyk };
409f766d1e4SDarek Stojaczyk 
4104de405abSSeth Howell static bool nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport,
411c57bafedSBen Walker 				 struct spdk_nvmf_tcp_req *tcp_req);
41285fa4324SAlexey Marchuk static void nvmf_tcp_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group);
413e956be96SZiye Yang 
4143e5ea7ffSAlexey Marchuk static void _nvmf_tcp_send_c2h_data(struct spdk_nvmf_tcp_qpair *tqpair,
4153e5ea7ffSAlexey Marchuk 				    struct spdk_nvmf_tcp_req *tcp_req);
416*b8c964e2SJim Harris static void nvmf_tcp_qpair_process(struct spdk_nvmf_tcp_qpair *tqpair);
4173e5ea7ffSAlexey Marchuk 
4184c33c7aeSMengjinWu static inline void
4194de405abSSeth Howell nvmf_tcp_req_set_state(struct spdk_nvmf_tcp_req *tcp_req,
420e956be96SZiye Yang 		       enum spdk_nvmf_tcp_req_state state)
421e956be96SZiye Yang {
422e956be96SZiye Yang 	struct spdk_nvmf_qpair *qpair;
4232b59852bSBen Walker 	struct spdk_nvmf_tcp_qpair *tqpair;
424e956be96SZiye Yang 
425e956be96SZiye Yang 	qpair = tcp_req->req.qpair;
4262b59852bSBen Walker 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
427e956be96SZiye Yang 
428444cf90cSBen Walker 	assert(tqpair->state_cntr[tcp_req->state] > 0);
429e956be96SZiye Yang 	tqpair->state_cntr[tcp_req->state]--;
430e956be96SZiye Yang 	tqpair->state_cntr[state]++;
431e956be96SZiye Yang 
432e956be96SZiye Yang 	tcp_req->state = state;
433e956be96SZiye Yang }
434e956be96SZiye Yang 
435a2adca79SBen Walker static inline struct nvme_tcp_pdu *
436a2adca79SBen Walker nvmf_tcp_req_pdu_init(struct spdk_nvmf_tcp_req *tcp_req)
437e956be96SZiye Yang {
438a2adca79SBen Walker 	assert(tcp_req->pdu_in_use == false);
439e956be96SZiye Yang 
440a2adca79SBen Walker 	memset(tcp_req->pdu, 0, sizeof(*tcp_req->pdu));
441a2adca79SBen Walker 	tcp_req->pdu->qpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair);
442a2adca79SBen Walker 
443a2adca79SBen Walker 	return tcp_req->pdu;
444e956be96SZiye Yang }
445e956be96SZiye Yang 
446c57bafedSBen Walker static struct spdk_nvmf_tcp_req *
4474de405abSSeth Howell nvmf_tcp_req_get(struct spdk_nvmf_tcp_qpair *tqpair)
448e956be96SZiye Yang {
449c57bafedSBen Walker 	struct spdk_nvmf_tcp_req *tcp_req;
450e956be96SZiye Yang 
45160af3c00SRui Chang 	tcp_req = TAILQ_FIRST(&tqpair->tcp_req_free_queue);
4527fc2c085SMengjinWu 	if (spdk_unlikely(!tcp_req)) {
453e956be96SZiye Yang 		return NULL;
454e956be96SZiye Yang 	}
455e956be96SZiye Yang 
456e956be96SZiye Yang 	memset(&tcp_req->rsp, 0, sizeof(tcp_req->rsp));
457fdfb7908SBen Walker 	tcp_req->h2c_offset = 0;
458cc6920a4SJosh Soref 	tcp_req->has_in_capsule_data = false;
45915ae31fbSBen Walker 	tcp_req->req.dif_enabled = false;
4606631c2a8SKonrad Sztyber 	tcp_req->req.zcopy_phase = NVMF_ZCOPY_PHASE_NONE;
461e956be96SZiye Yang 
46260af3c00SRui Chang 	TAILQ_REMOVE(&tqpair->tcp_req_free_queue, tcp_req, state_link);
46360af3c00SRui Chang 	TAILQ_INSERT_TAIL(&tqpair->tcp_req_working_queue, tcp_req, state_link);
464b68ae4fbSAtul Malakar 	tqpair->qpair.queue_depth++;
4654de405abSSeth Howell 	nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEW);
466e956be96SZiye Yang 	return tcp_req;
467e956be96SZiye Yang }
468e956be96SZiye Yang 
469*b8c964e2SJim Harris static void
470*b8c964e2SJim Harris handle_await_req(void *arg)
471*b8c964e2SJim Harris {
472*b8c964e2SJim Harris 	struct spdk_nvmf_tcp_qpair *tqpair = arg;
473*b8c964e2SJim Harris 
474*b8c964e2SJim Harris 	tqpair->await_req_msg_pending = false;
475*b8c964e2SJim Harris 	if (tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_REQ) {
476*b8c964e2SJim Harris 		nvmf_tcp_qpair_process(tqpair);
477*b8c964e2SJim Harris 	}
478*b8c964e2SJim Harris }
479*b8c964e2SJim Harris 
48060af3c00SRui Chang static inline void
48160af3c00SRui Chang nvmf_tcp_req_put(struct spdk_nvmf_tcp_qpair *tqpair, struct spdk_nvmf_tcp_req *tcp_req)
48260af3c00SRui Chang {
48375169d0dSKonrad Sztyber 	assert(!tcp_req->pdu_in_use);
48475169d0dSKonrad Sztyber 
48560af3c00SRui Chang 	TAILQ_REMOVE(&tqpair->tcp_req_working_queue, tcp_req, state_link);
48660af3c00SRui Chang 	TAILQ_INSERT_TAIL(&tqpair->tcp_req_free_queue, tcp_req, state_link);
487b68ae4fbSAtul Malakar 	tqpair->qpair.queue_depth--;
48860af3c00SRui Chang 	nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_FREE);
489*b8c964e2SJim Harris 	if (tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_REQ &&
490*b8c964e2SJim Harris 	    !tqpair->await_req_msg_pending) {
491*b8c964e2SJim Harris 		tqpair->await_req_msg_pending = true;
492*b8c964e2SJim Harris 		spdk_thread_send_msg(spdk_get_thread(), handle_await_req, tqpair);
493*b8c964e2SJim Harris 	}
49460af3c00SRui Chang }
49560af3c00SRui Chang 
496e956be96SZiye Yang static void
497c7d22538SKrzysztof Goreczny nvmf_tcp_req_get_buffers_done(struct spdk_nvmf_request *req)
498c7d22538SKrzysztof Goreczny {
499c7d22538SKrzysztof Goreczny 	struct spdk_nvmf_tcp_req *tcp_req;
500c7d22538SKrzysztof Goreczny 	struct spdk_nvmf_transport *transport;
501c7d22538SKrzysztof Goreczny 	struct spdk_nvmf_tcp_transport *ttransport;
502c7d22538SKrzysztof Goreczny 
503c7d22538SKrzysztof Goreczny 	tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req);
504c7d22538SKrzysztof Goreczny 	transport = req->qpair->transport;
505c7d22538SKrzysztof Goreczny 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
506c7d22538SKrzysztof Goreczny 
507c7d22538SKrzysztof Goreczny 	nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_HAVE_BUFFER);
508c7d22538SKrzysztof Goreczny 	nvmf_tcp_req_process(ttransport, tcp_req);
509c7d22538SKrzysztof Goreczny }
510c7d22538SKrzysztof Goreczny 
511c7d22538SKrzysztof Goreczny static void
512304fde15SZiye Yang nvmf_tcp_request_free(void *cb_arg)
513e956be96SZiye Yang {
514e956be96SZiye Yang 	struct spdk_nvmf_tcp_transport *ttransport;
515304fde15SZiye Yang 	struct spdk_nvmf_tcp_req *tcp_req = cb_arg;
516e956be96SZiye Yang 
51763a60a0cSBen Walker 	assert(tcp_req != NULL);
518e956be96SZiye Yang 
5192172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvmf_tcp, "tcp_req=%p will be freed\n", tcp_req);
520e956be96SZiye Yang 	ttransport = SPDK_CONTAINEROF(tcp_req->req.qpair->transport,
521e956be96SZiye Yang 				      struct spdk_nvmf_tcp_transport, transport);
5224de405abSSeth Howell 	nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_COMPLETED);
5234de405abSSeth Howell 	nvmf_tcp_req_process(ttransport, tcp_req);
524e956be96SZiye Yang }
525e956be96SZiye Yang 
526e956be96SZiye Yang static int
5274de405abSSeth Howell nvmf_tcp_req_free(struct spdk_nvmf_request *req)
528e956be96SZiye Yang {
529c57bafedSBen Walker 	struct spdk_nvmf_tcp_req *tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req);
530e956be96SZiye Yang 
531e956be96SZiye Yang 	nvmf_tcp_request_free(tcp_req);
532e956be96SZiye Yang 
533e956be96SZiye Yang 	return 0;
534e956be96SZiye Yang }
535e956be96SZiye Yang 
536e956be96SZiye Yang static void
5374de405abSSeth Howell nvmf_tcp_drain_state_queue(struct spdk_nvmf_tcp_qpair *tqpair,
538e956be96SZiye Yang 			   enum spdk_nvmf_tcp_req_state state)
539e956be96SZiye Yang {
540c57bafedSBen Walker 	struct spdk_nvmf_tcp_req *tcp_req, *req_tmp;
541e956be96SZiye Yang 
54260af3c00SRui Chang 	assert(state != TCP_REQUEST_STATE_FREE);
54360af3c00SRui Chang 	TAILQ_FOREACH_SAFE(tcp_req, &tqpair->tcp_req_working_queue, state_link, req_tmp) {
54460af3c00SRui Chang 		if (state == tcp_req->state) {
545e956be96SZiye Yang 			nvmf_tcp_request_free(tcp_req);
546e956be96SZiye Yang 		}
547e956be96SZiye Yang 	}
54860af3c00SRui Chang }
549e956be96SZiye Yang 
550c7d22538SKrzysztof Goreczny static inline void
551c7d22538SKrzysztof Goreczny nvmf_tcp_request_get_buffers_abort(struct spdk_nvmf_tcp_req *tcp_req)
552c7d22538SKrzysztof Goreczny {
553c7d22538SKrzysztof Goreczny 	/* Request can wait either for the iobuf or control_msg */
554c7d22538SKrzysztof Goreczny 	struct spdk_nvmf_poll_group *group = tcp_req->req.qpair->group;
555c7d22538SKrzysztof Goreczny 	struct spdk_nvmf_transport *transport = tcp_req->req.qpair->transport;
556c7d22538SKrzysztof Goreczny 	struct spdk_nvmf_transport_poll_group *tgroup = nvmf_get_transport_poll_group(group, transport);
557c7d22538SKrzysztof Goreczny 	struct spdk_nvmf_tcp_poll_group *tcp_group = SPDK_CONTAINEROF(tgroup,
558c7d22538SKrzysztof Goreczny 			struct spdk_nvmf_tcp_poll_group, group);
559c7d22538SKrzysztof Goreczny 	struct spdk_nvmf_tcp_req *tmp_req, *abort_req;
560c7d22538SKrzysztof Goreczny 
561c7d22538SKrzysztof Goreczny 	assert(tcp_req->state == TCP_REQUEST_STATE_NEED_BUFFER);
562c7d22538SKrzysztof Goreczny 
563c7d22538SKrzysztof Goreczny 	STAILQ_FOREACH_SAFE(abort_req, &tcp_group->control_msg_list->waiting_for_msg_reqs, control_msg_link,
564c7d22538SKrzysztof Goreczny 			    tmp_req) {
565c7d22538SKrzysztof Goreczny 		if (abort_req == tcp_req) {
566c7d22538SKrzysztof Goreczny 			STAILQ_REMOVE(&tcp_group->control_msg_list->waiting_for_msg_reqs, abort_req, spdk_nvmf_tcp_req,
567c7d22538SKrzysztof Goreczny 				      control_msg_link);
568c7d22538SKrzysztof Goreczny 			return;
569c7d22538SKrzysztof Goreczny 		}
570c7d22538SKrzysztof Goreczny 	}
571c7d22538SKrzysztof Goreczny 
572c7d22538SKrzysztof Goreczny 	if (!nvmf_request_get_buffers_abort(&tcp_req->req)) {
573c7d22538SKrzysztof Goreczny 		SPDK_ERRLOG("Failed to abort tcp_req=%p\n", tcp_req);
574c7d22538SKrzysztof Goreczny 		assert(0 && "Should never happen");
575c7d22538SKrzysztof Goreczny 	}
576c7d22538SKrzysztof Goreczny }
577c7d22538SKrzysztof Goreczny 
578e956be96SZiye Yang static void
5794de405abSSeth Howell nvmf_tcp_cleanup_all_states(struct spdk_nvmf_tcp_qpair *tqpair)
580e956be96SZiye Yang {
581c57bafedSBen Walker 	struct spdk_nvmf_tcp_req *tcp_req, *req_tmp;
582e956be96SZiye Yang 
5834de405abSSeth Howell 	nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST);
5844de405abSSeth Howell 	nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEW);
585e956be96SZiye Yang 
586c7d22538SKrzysztof Goreczny 	/* Wipe the requests waiting for buffer from the waiting list */
58760af3c00SRui Chang 	TAILQ_FOREACH_SAFE(tcp_req, &tqpair->tcp_req_working_queue, state_link, req_tmp) {
58860af3c00SRui Chang 		if (tcp_req->state == TCP_REQUEST_STATE_NEED_BUFFER) {
589c7d22538SKrzysztof Goreczny 			nvmf_tcp_request_get_buffers_abort(tcp_req);
590e956be96SZiye Yang 		}
59160af3c00SRui Chang 	}
592e956be96SZiye Yang 
5934de405abSSeth Howell 	nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEED_BUFFER);
5944de405abSSeth Howell 	nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_EXECUTING);
5954de405abSSeth Howell 	nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
5964de405abSSeth Howell 	nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_AWAITING_R2T_ACK);
597e956be96SZiye Yang }
598e956be96SZiye Yang 
599e956be96SZiye Yang static void
6002b59852bSBen Walker nvmf_tcp_dump_qpair_req_contents(struct spdk_nvmf_tcp_qpair *tqpair)
601b62a1f9eSZiye Yang {
602b62a1f9eSZiye Yang 	int i;
603c57bafedSBen Walker 	struct spdk_nvmf_tcp_req *tcp_req;
604b62a1f9eSZiye Yang 
605b62a1f9eSZiye Yang 	SPDK_ERRLOG("Dumping contents of queue pair (QID %d)\n", tqpair->qpair.qid);
606b62a1f9eSZiye Yang 	for (i = 1; i < TCP_REQUEST_NUM_STATES; i++) {
607444cf90cSBen Walker 		SPDK_ERRLOG("\tNum of requests in state[%d] = %u\n", i, tqpair->state_cntr[i]);
60860af3c00SRui Chang 		TAILQ_FOREACH(tcp_req, &tqpair->tcp_req_working_queue, state_link) {
60960af3c00SRui Chang 			if ((int)tcp_req->state == i) {
610005b053aSShuhei Matsumoto 				SPDK_ERRLOG("\t\tRequest Data From Pool: %d\n", tcp_req->req.data_from_pool);
611b62a1f9eSZiye Yang 				SPDK_ERRLOG("\t\tRequest opcode: %d\n", tcp_req->req.cmd->nvmf_cmd.opcode);
612b62a1f9eSZiye Yang 			}
613b62a1f9eSZiye Yang 		}
614b62a1f9eSZiye Yang 	}
61560af3c00SRui Chang }
616b62a1f9eSZiye Yang 
617b62a1f9eSZiye Yang static void
6183056c8acSKonrad Sztyber _nvmf_tcp_qpair_destroy(void *_tqpair)
619e956be96SZiye Yang {
6203056c8acSKonrad Sztyber 	struct spdk_nvmf_tcp_qpair *tqpair = _tqpair;
6213056c8acSKonrad Sztyber 	spdk_nvmf_transport_qpair_fini_cb cb_fn = tqpair->fini_cb_fn;
6223056c8acSKonrad Sztyber 	void *cb_arg = tqpair->fini_cb_arg;
623b62a1f9eSZiye Yang 	int err = 0;
624b62a1f9eSZiye Yang 
625ea1a6608SJim Harris 	spdk_trace_record(TRACE_TCP_QP_DESTROY, tqpair->qpair.trace_id, 0, 0);
6268107587bSKrzysztof Karas 
6272172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvmf_tcp, "enter\n");
628e956be96SZiye Yang 
6295d497f6cSBen Walker 	err = spdk_sock_close(&tqpair->sock);
6305d497f6cSBen Walker 	assert(err == 0);
6314de405abSSeth Howell 	nvmf_tcp_cleanup_all_states(tqpair);
632e956be96SZiye Yang 
6334dba5072SBen Walker 	if (tqpair->state_cntr[TCP_REQUEST_STATE_FREE] != tqpair->resource_count) {
634e956be96SZiye Yang 		SPDK_ERRLOG("tqpair(%p) free tcp request num is %u but should be %u\n", tqpair,
635e956be96SZiye Yang 			    tqpair->state_cntr[TCP_REQUEST_STATE_FREE],
6364dba5072SBen Walker 			    tqpair->resource_count);
637b62a1f9eSZiye Yang 		err++;
638e956be96SZiye Yang 	}
639e956be96SZiye Yang 
640b62a1f9eSZiye Yang 	if (err > 0) {
641b62a1f9eSZiye Yang 		nvmf_tcp_dump_qpair_req_contents(tqpair);
642b62a1f9eSZiye Yang 	}
6434dba5072SBen Walker 
64414adf7f7SKonrad Sztyber 	/* The timeout poller might still be registered here if we close the qpair before host
64514adf7f7SKonrad Sztyber 	 * terminates the connection.
64614adf7f7SKonrad Sztyber 	 */
64714adf7f7SKonrad Sztyber 	spdk_poller_unregister(&tqpair->timeout_poller);
6484dba5072SBen Walker 	spdk_dma_free(tqpair->pdus);
649e956be96SZiye Yang 	free(tqpair->reqs);
65036ccca2cSDarek Stojaczyk 	spdk_free(tqpair->bufs);
651ea1a6608SJim Harris 	spdk_trace_unregister_owner(tqpair->qpair.trace_id);
652e956be96SZiye Yang 	free(tqpair);
6533056c8acSKonrad Sztyber 
6543056c8acSKonrad Sztyber 	if (cb_fn != NULL) {
6553056c8acSKonrad Sztyber 		cb_fn(cb_arg);
6563056c8acSKonrad Sztyber 	}
6573056c8acSKonrad Sztyber 
6582172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvmf_tcp, "Leave\n");
659e956be96SZiye Yang }
660e956be96SZiye Yang 
661f766d1e4SDarek Stojaczyk static void
6623056c8acSKonrad Sztyber nvmf_tcp_qpair_destroy(struct spdk_nvmf_tcp_qpair *tqpair)
6633056c8acSKonrad Sztyber {
6643056c8acSKonrad Sztyber 	/* Delay the destruction to make sure it isn't performed from the context of a sock
6653056c8acSKonrad Sztyber 	 * callback.  Otherwise, spdk_sock_close() might not abort pending requests, causing their
6663056c8acSKonrad Sztyber 	 * completions to be executed after the qpair is freed.  (Note: this fixed issue #2471.)
6673056c8acSKonrad Sztyber 	 */
6683056c8acSKonrad Sztyber 	spdk_thread_send_msg(spdk_get_thread(), _nvmf_tcp_qpair_destroy, tqpair);
6693056c8acSKonrad Sztyber }
6703056c8acSKonrad Sztyber 
6713056c8acSKonrad Sztyber static void
672f766d1e4SDarek Stojaczyk nvmf_tcp_dump_opts(struct spdk_nvmf_transport *transport, struct spdk_json_write_ctx *w)
673f766d1e4SDarek Stojaczyk {
674f766d1e4SDarek Stojaczyk 	struct spdk_nvmf_tcp_transport	*ttransport;
675f766d1e4SDarek Stojaczyk 	assert(w != NULL);
676f766d1e4SDarek Stojaczyk 
677f766d1e4SDarek Stojaczyk 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
678f766d1e4SDarek Stojaczyk 	spdk_json_write_named_bool(w, "c2h_success", ttransport->tcp_opts.c2h_success);
679f766d1e4SDarek Stojaczyk 	spdk_json_write_named_uint32(w, "sock_priority", ttransport->tcp_opts.sock_priority);
680f766d1e4SDarek Stojaczyk }
681f766d1e4SDarek Stojaczyk 
68243939e67SKonrad Sztyber static void
68343939e67SKonrad Sztyber nvmf_tcp_free_psk_entry(struct tcp_psk_entry *entry)
68443939e67SKonrad Sztyber {
68543939e67SKonrad Sztyber 	if (entry == NULL) {
68643939e67SKonrad Sztyber 		return;
68743939e67SKonrad Sztyber 	}
68843939e67SKonrad Sztyber 
68943939e67SKonrad Sztyber 	spdk_memset_s(entry->psk, sizeof(entry->psk), 0, sizeof(entry->psk));
690ee164e62SKonrad Sztyber 	spdk_keyring_put_key(entry->key);
69143939e67SKonrad Sztyber 	free(entry);
69243939e67SKonrad Sztyber }
69343939e67SKonrad Sztyber 
694e816c8fdSSeth Howell static int
6950d98a949SNaresh Gottumukkala nvmf_tcp_destroy(struct spdk_nvmf_transport *transport,
6960d98a949SNaresh Gottumukkala 		 spdk_nvmf_transport_destroy_done_cb cb_fn, void *cb_arg)
697e816c8fdSSeth Howell {
698e816c8fdSSeth Howell 	struct spdk_nvmf_tcp_transport	*ttransport;
699663243cbSKrzysztof Karas 	struct tcp_psk_entry *entry, *tmp;
700e816c8fdSSeth Howell 
701e816c8fdSSeth Howell 	assert(transport != NULL);
702e816c8fdSSeth Howell 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
703e816c8fdSSeth Howell 
704663243cbSKrzysztof Karas 	TAILQ_FOREACH_SAFE(entry, &ttransport->psks, link, tmp) {
705663243cbSKrzysztof Karas 		TAILQ_REMOVE(&ttransport->psks, entry, link);
70643939e67SKonrad Sztyber 		nvmf_tcp_free_psk_entry(entry);
707663243cbSKrzysztof Karas 	}
708663243cbSKrzysztof Karas 
70943022da3SJacek Kalwas 	spdk_poller_unregister(&ttransport->accept_poller);
71081fc34dfSKrzysztof Goreczny 	spdk_sock_group_unregister_interrupt(ttransport->listen_sock_group);
7110e983c56SKrzysztof Goreczny 	spdk_sock_group_close(&ttransport->listen_sock_group);
712e816c8fdSSeth Howell 	free(ttransport);
7130d98a949SNaresh Gottumukkala 
7140d98a949SNaresh Gottumukkala 	if (cb_fn) {
7150d98a949SNaresh Gottumukkala 		cb_fn(cb_arg);
7160d98a949SNaresh Gottumukkala 	}
717e816c8fdSSeth Howell 	return 0;
718e816c8fdSSeth Howell }
719e816c8fdSSeth Howell 
7208dd1cd21SBen Walker static int nvmf_tcp_accept(void *ctx);
72143022da3SJacek Kalwas 
7220e983c56SKrzysztof Goreczny static void nvmf_tcp_accept_cb(void *ctx, struct spdk_sock_group *group, struct spdk_sock *sock);
7230e983c56SKrzysztof Goreczny 
724e956be96SZiye Yang static struct spdk_nvmf_transport *
7254de405abSSeth Howell nvmf_tcp_create(struct spdk_nvmf_transport_opts *opts)
726e956be96SZiye Yang {
727e956be96SZiye Yang 	struct spdk_nvmf_tcp_transport *ttransport;
728e956be96SZiye Yang 	uint32_t sge_count;
729e816c8fdSSeth Howell 	uint32_t min_shared_buffers;
73081fc34dfSKrzysztof Goreczny 	int rc;
73181fc34dfSKrzysztof Goreczny 	uint64_t period;
732e956be96SZiye Yang 
733e956be96SZiye Yang 	ttransport = calloc(1, sizeof(*ttransport));
734e956be96SZiye Yang 	if (!ttransport) {
735e956be96SZiye Yang 		return NULL;
736e956be96SZiye Yang 	}
737e956be96SZiye Yang 
738e956be96SZiye Yang 	TAILQ_INIT(&ttransport->ports);
739d619f6c2STomasz Zawadzki 	TAILQ_INIT(&ttransport->poll_groups);
740663243cbSKrzysztof Karas 	TAILQ_INIT(&ttransport->psks);
741e956be96SZiye Yang 
742e956be96SZiye Yang 	ttransport->transport.ops = &spdk_nvmf_transport_tcp;
743e956be96SZiye Yang 
744f766d1e4SDarek Stojaczyk 	ttransport->tcp_opts.c2h_success = SPDK_NVMF_TCP_DEFAULT_SUCCESS_OPTIMIZATION;
745f766d1e4SDarek Stojaczyk 	ttransport->tcp_opts.sock_priority = SPDK_NVMF_TCP_DEFAULT_SOCK_PRIORITY;
7464fe47d6fSAlexey Marchuk 	ttransport->tcp_opts.control_msg_num = SPDK_NVMF_TCP_DEFAULT_CONTROL_MSG_NUM;
747f766d1e4SDarek Stojaczyk 	if (opts->transport_specific != NULL &&
748f766d1e4SDarek Stojaczyk 	    spdk_json_decode_object_relaxed(opts->transport_specific, tcp_transport_opts_decoder,
749f766d1e4SDarek Stojaczyk 					    SPDK_COUNTOF(tcp_transport_opts_decoder),
750f766d1e4SDarek Stojaczyk 					    &ttransport->tcp_opts)) {
751f766d1e4SDarek Stojaczyk 		SPDK_ERRLOG("spdk_json_decode_object_relaxed failed\n");
752f766d1e4SDarek Stojaczyk 		free(ttransport);
753f766d1e4SDarek Stojaczyk 		return NULL;
754f766d1e4SDarek Stojaczyk 	}
755f766d1e4SDarek Stojaczyk 
756e956be96SZiye Yang 	SPDK_NOTICELOG("*** TCP Transport Init ***\n");
757e956be96SZiye Yang 
7582172c432STomasz Zawadzki 	SPDK_INFOLOG(nvmf_tcp, "*** TCP Transport Init ***\n"
75904d09f92SZiye Yang 		     "  Transport opts:  max_ioq_depth=%d, max_io_size=%d,\n"
7601551197dSAlexey Marchuk 		     "  max_io_qpairs_per_ctrlr=%d, io_unit_size=%d,\n"
76158f16244SZiye Yang 		     "  in_capsule_data_size=%d, max_aq_depth=%d\n"
762aa322721SShuhei Matsumoto 		     "  num_shared_buffers=%d, c2h_success=%d,\n"
76326e0ef9aSShuhei Matsumoto 		     "  dif_insert_or_strip=%d, sock_priority=%d\n"
76479606beeSKonrad Sztyber 		     "  abort_timeout_sec=%d, control_msg_num=%hu\n"
76579606beeSKonrad Sztyber 		     "  ack_timeout=%d\n",
76604d09f92SZiye Yang 		     opts->max_queue_depth,
76704d09f92SZiye Yang 		     opts->max_io_size,
7681551197dSAlexey Marchuk 		     opts->max_qpairs_per_ctrlr - 1,
76904d09f92SZiye Yang 		     opts->io_unit_size,
77004d09f92SZiye Yang 		     opts->in_capsule_data_size,
77158f16244SZiye Yang 		     opts->max_aq_depth,
7726629202cSOr Gerlitz 		     opts->num_shared_buffers,
773f766d1e4SDarek Stojaczyk 		     ttransport->tcp_opts.c2h_success,
7746ad6a113SZiye Yang 		     opts->dif_insert_or_strip,
775f766d1e4SDarek Stojaczyk 		     ttransport->tcp_opts.sock_priority,
7764fe47d6fSAlexey Marchuk 		     opts->abort_timeout_sec,
77779606beeSKonrad Sztyber 		     ttransport->tcp_opts.control_msg_num,
77879606beeSKonrad Sztyber 		     opts->ack_timeout);
7796ad6a113SZiye Yang 
780f766d1e4SDarek Stojaczyk 	if (ttransport->tcp_opts.sock_priority > SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY) {
7816ad6a113SZiye Yang 		SPDK_ERRLOG("Unsupported socket_priority=%d, the current range is: 0 to %d\n"
7826ad6a113SZiye Yang 			    "you can use man 7 socket to view the range of priority under SO_PRIORITY item\n",
783f766d1e4SDarek Stojaczyk 			    ttransport->tcp_opts.sock_priority, SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY);
7846ad6a113SZiye Yang 		free(ttransport);
7856ad6a113SZiye Yang 		return NULL;
7866ad6a113SZiye Yang 	}
787e956be96SZiye Yang 
7884fe47d6fSAlexey Marchuk 	if (ttransport->tcp_opts.control_msg_num == 0 &&
7894fe47d6fSAlexey Marchuk 	    opts->in_capsule_data_size < SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE) {
7904fe47d6fSAlexey Marchuk 		SPDK_WARNLOG("TCP param control_msg_num can't be 0 if ICD is less than %u bytes. Using default value %u\n",
7914fe47d6fSAlexey Marchuk 			     SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE, SPDK_NVMF_TCP_DEFAULT_CONTROL_MSG_NUM);
7924fe47d6fSAlexey Marchuk 		ttransport->tcp_opts.control_msg_num = SPDK_NVMF_TCP_DEFAULT_CONTROL_MSG_NUM;
7934fe47d6fSAlexey Marchuk 	}
7944fe47d6fSAlexey Marchuk 
795e956be96SZiye Yang 	/* I/O unit size cannot be larger than max I/O size */
79604d09f92SZiye Yang 	if (opts->io_unit_size > opts->max_io_size) {
797100c5371SMengjinWu 		SPDK_WARNLOG("TCP param io_unit_size %u can't be larger than max_io_size %u. Using max_io_size as io_unit_size\n",
798100c5371SMengjinWu 			     opts->io_unit_size, opts->max_io_size);
79904d09f92SZiye Yang 		opts->io_unit_size = opts->max_io_size;
800e956be96SZiye Yang 	}
801e956be96SZiye Yang 
802100c5371SMengjinWu 	/* In capsule data size cannot be larger than max I/O size */
803100c5371SMengjinWu 	if (opts->in_capsule_data_size > opts->max_io_size) {
804100c5371SMengjinWu 		SPDK_WARNLOG("TCP param ICD size %u can't be larger than max_io_size %u. Using max_io_size as ICD size\n",
805100c5371SMengjinWu 			     opts->io_unit_size, opts->max_io_size);
806100c5371SMengjinWu 		opts->in_capsule_data_size = opts->max_io_size;
807100c5371SMengjinWu 	}
808100c5371SMengjinWu 
809bf887576SMengjinWu 	/* max IO queue depth cannot be smaller than 2 or larger than 65535.
810bf887576SMengjinWu 	 * We will not check SPDK_NVMF_TCP_MAX_IO_QUEUE_DEPTH, because max_queue_depth is 16bits and always not larger than 64k. */
811bf887576SMengjinWu 	if (opts->max_queue_depth < SPDK_NVMF_TCP_MIN_IO_QUEUE_DEPTH) {
812bf887576SMengjinWu 		SPDK_WARNLOG("TCP param max_queue_depth %u can't be smaller than %u or larger than %u. Using default value %u\n",
813bf887576SMengjinWu 			     opts->max_queue_depth, SPDK_NVMF_TCP_MIN_IO_QUEUE_DEPTH,
814bf887576SMengjinWu 			     SPDK_NVMF_TCP_MAX_IO_QUEUE_DEPTH, SPDK_NVMF_TCP_DEFAULT_MAX_IO_QUEUE_DEPTH);
815bf887576SMengjinWu 		opts->max_queue_depth = SPDK_NVMF_TCP_DEFAULT_MAX_IO_QUEUE_DEPTH;
816bf887576SMengjinWu 	}
817bf887576SMengjinWu 
818f1bec928SMengjinWu 	/* max admin queue depth cannot be smaller than 2 or larger than 4096 */
819f1bec928SMengjinWu 	if (opts->max_aq_depth < SPDK_NVMF_TCP_MIN_ADMIN_QUEUE_DEPTH ||
820f1bec928SMengjinWu 	    opts->max_aq_depth > SPDK_NVMF_TCP_MAX_ADMIN_QUEUE_DEPTH) {
821f1bec928SMengjinWu 		SPDK_WARNLOG("TCP param max_aq_depth %u can't be smaller than %u or larger than %u. Using default value %u\n",
822f1bec928SMengjinWu 			     opts->max_aq_depth, SPDK_NVMF_TCP_MIN_ADMIN_QUEUE_DEPTH,
823f1bec928SMengjinWu 			     SPDK_NVMF_TCP_MAX_ADMIN_QUEUE_DEPTH, SPDK_NVMF_TCP_DEFAULT_MAX_ADMIN_QUEUE_DEPTH);
824f1bec928SMengjinWu 		opts->max_aq_depth = SPDK_NVMF_TCP_DEFAULT_MAX_ADMIN_QUEUE_DEPTH;
825f1bec928SMengjinWu 	}
826f1bec928SMengjinWu 
82704d09f92SZiye Yang 	sge_count = opts->max_io_size / opts->io_unit_size;
828e956be96SZiye Yang 	if (sge_count > SPDK_NVMF_MAX_SGL_ENTRIES) {
82904d09f92SZiye Yang 		SPDK_ERRLOG("Unsupported IO Unit size specified, %d bytes\n", opts->io_unit_size);
830e956be96SZiye Yang 		free(ttransport);
831e956be96SZiye Yang 		return NULL;
832e956be96SZiye Yang 	}
833e956be96SZiye Yang 
8343b138377SJim Harris 	/* If buf_cache_size == UINT32_MAX, we will dynamically pick a cache size later that we know will fit. */
8353b138377SJim Harris 	if (opts->buf_cache_size < UINT32_MAX) {
836e9b9510aSAlexey Marchuk 		min_shared_buffers = spdk_env_get_core_count() * opts->buf_cache_size;
837e816c8fdSSeth Howell 		if (min_shared_buffers > opts->num_shared_buffers) {
838e816c8fdSSeth Howell 			SPDK_ERRLOG("There are not enough buffers to satisfy "
839e816c8fdSSeth Howell 				    "per-poll group caches for each thread. (%" PRIu32 ") "
840e816c8fdSSeth Howell 				    "supplied. (%" PRIu32 ") required\n", opts->num_shared_buffers, min_shared_buffers);
841e816c8fdSSeth Howell 			SPDK_ERRLOG("Please specify a larger number of shared buffers\n");
842c6b9113bSKonrad Sztyber 			free(ttransport);
843e816c8fdSSeth Howell 			return NULL;
844e816c8fdSSeth Howell 		}
8453b138377SJim Harris 	}
846e816c8fdSSeth Howell 
84781fc34dfSKrzysztof Goreczny 	period = spdk_interrupt_mode_is_enabled() ? 0 : opts->acceptor_poll_rate;
84881fc34dfSKrzysztof Goreczny 	ttransport->accept_poller = SPDK_POLLER_REGISTER(nvmf_tcp_accept, &ttransport->transport, period);
84943022da3SJacek Kalwas 	if (!ttransport->accept_poller) {
85043022da3SJacek Kalwas 		free(ttransport);
85143022da3SJacek Kalwas 		return NULL;
85243022da3SJacek Kalwas 	}
85343022da3SJacek Kalwas 
8545c782a70SJim Harris 	spdk_poller_register_interrupt(ttransport->accept_poller, NULL, NULL);
85581fc34dfSKrzysztof Goreczny 
8560e983c56SKrzysztof Goreczny 	ttransport->listen_sock_group = spdk_sock_group_create(NULL);
8570e983c56SKrzysztof Goreczny 	if (ttransport->listen_sock_group == NULL) {
8580e983c56SKrzysztof Goreczny 		SPDK_ERRLOG("Failed to create socket group for listen sockets\n");
8590e983c56SKrzysztof Goreczny 		spdk_poller_unregister(&ttransport->accept_poller);
8600e983c56SKrzysztof Goreczny 		free(ttransport);
8610e983c56SKrzysztof Goreczny 		return NULL;
8620e983c56SKrzysztof Goreczny 	}
8630e983c56SKrzysztof Goreczny 
86481fc34dfSKrzysztof Goreczny 	if (spdk_interrupt_mode_is_enabled()) {
86581fc34dfSKrzysztof Goreczny 		rc = SPDK_SOCK_GROUP_REGISTER_INTERRUPT(ttransport->listen_sock_group,
86681fc34dfSKrzysztof Goreczny 							SPDK_INTERRUPT_EVENT_IN | SPDK_INTERRUPT_EVENT_OUT, nvmf_tcp_accept, &ttransport->transport);
86781fc34dfSKrzysztof Goreczny 		if (rc != 0) {
86881fc34dfSKrzysztof Goreczny 			SPDK_ERRLOG("Failed to register interrupt for listen socker sock group\n");
86981fc34dfSKrzysztof Goreczny 			spdk_sock_group_close(&ttransport->listen_sock_group);
87081fc34dfSKrzysztof Goreczny 			spdk_poller_unregister(&ttransport->accept_poller);
87181fc34dfSKrzysztof Goreczny 			free(ttransport);
87281fc34dfSKrzysztof Goreczny 			return NULL;
87381fc34dfSKrzysztof Goreczny 		}
87481fc34dfSKrzysztof Goreczny 	}
87581fc34dfSKrzysztof Goreczny 
876e956be96SZiye Yang 	return &ttransport->transport;
877e956be96SZiye Yang }
878e956be96SZiye Yang 
879e956be96SZiye Yang static int
88061d85773SSeth Howell nvmf_tcp_trsvcid_to_int(const char *trsvcid)
881e956be96SZiye Yang {
882e956be96SZiye Yang 	unsigned long long ull;
883e956be96SZiye Yang 	char *end = NULL;
884e956be96SZiye Yang 
885e956be96SZiye Yang 	ull = strtoull(trsvcid, &end, 10);
886e956be96SZiye Yang 	if (end == NULL || end == trsvcid || *end != '\0') {
887e956be96SZiye Yang 		return -1;
888e956be96SZiye Yang 	}
889e956be96SZiye Yang 
890b189e65eSJim Harris 	/* Valid TCP/IP port numbers are in [1, 65535] */
891b189e65eSJim Harris 	if (ull == 0 || ull > 65535) {
892e956be96SZiye Yang 		return -1;
893e956be96SZiye Yang 	}
894e956be96SZiye Yang 
895e956be96SZiye Yang 	return (int)ull;
896e956be96SZiye Yang }
897e956be96SZiye Yang 
898e956be96SZiye Yang /**
899e956be96SZiye Yang  * Canonicalize a listen address trid.
900e956be96SZiye Yang  */
901e956be96SZiye Yang static int
90261d85773SSeth Howell nvmf_tcp_canon_listen_trid(struct spdk_nvme_transport_id *canon_trid,
903e956be96SZiye Yang 			   const struct spdk_nvme_transport_id *trid)
904e956be96SZiye Yang {
905e956be96SZiye Yang 	int trsvcid_int;
906e956be96SZiye Yang 
90761d85773SSeth Howell 	trsvcid_int = nvmf_tcp_trsvcid_to_int(trid->trsvcid);
908e956be96SZiye Yang 	if (trsvcid_int < 0) {
909e956be96SZiye Yang 		return -EINVAL;
910e956be96SZiye Yang 	}
911e956be96SZiye Yang 
912e956be96SZiye Yang 	memset(canon_trid, 0, sizeof(*canon_trid));
9137ed0904bSSeth Howell 	spdk_nvme_trid_populate_transport(canon_trid, SPDK_NVME_TRANSPORT_TCP);
914e956be96SZiye Yang 	canon_trid->adrfam = trid->adrfam;
915e956be96SZiye Yang 	snprintf(canon_trid->traddr, sizeof(canon_trid->traddr), "%s", trid->traddr);
916e956be96SZiye Yang 	snprintf(canon_trid->trsvcid, sizeof(canon_trid->trsvcid), "%d", trsvcid_int);
917e956be96SZiye Yang 
918e956be96SZiye Yang 	return 0;
919e956be96SZiye Yang }
920e956be96SZiye Yang 
921e956be96SZiye Yang /**
922e956be96SZiye Yang  * Find an existing listening port.
923e956be96SZiye Yang  */
924e956be96SZiye Yang static struct spdk_nvmf_tcp_port *
92561d85773SSeth Howell nvmf_tcp_find_port(struct spdk_nvmf_tcp_transport *ttransport,
926e956be96SZiye Yang 		   const struct spdk_nvme_transport_id *trid)
927e956be96SZiye Yang {
928e956be96SZiye Yang 	struct spdk_nvme_transport_id canon_trid;
929e956be96SZiye Yang 	struct spdk_nvmf_tcp_port *port;
930e956be96SZiye Yang 
93161d85773SSeth Howell 	if (nvmf_tcp_canon_listen_trid(&canon_trid, trid) != 0) {
932e956be96SZiye Yang 		return NULL;
933e956be96SZiye Yang 	}
934e956be96SZiye Yang 
935e956be96SZiye Yang 	TAILQ_FOREACH(port, &ttransport->ports, link) {
9366d8f1fc6SJacek Kalwas 		if (spdk_nvme_transport_id_compare(&canon_trid, port->trid) == 0) {
937e956be96SZiye Yang 			return port;
938e956be96SZiye Yang 		}
939e956be96SZiye Yang 	}
940e956be96SZiye Yang 
941e956be96SZiye Yang 	return NULL;
942e956be96SZiye Yang }
943e956be96SZiye Yang 
944e956be96SZiye Yang static int
945599a8884SKonrad Sztyber tcp_sock_get_key(uint8_t *out, int out_len, const char **cipher, const char *pskid,
946aabfea70SKrzysztof Karas 		 void *get_key_ctx)
947aabfea70SKrzysztof Karas {
948aabfea70SKrzysztof Karas 	struct tcp_psk_entry *entry;
949aabfea70SKrzysztof Karas 	struct spdk_nvmf_tcp_transport *ttransport = get_key_ctx;
950aabfea70SKrzysztof Karas 	size_t psk_len;
951e036416bSKrzysztof Karas 	int rc;
952aabfea70SKrzysztof Karas 
953aabfea70SKrzysztof Karas 	TAILQ_FOREACH(entry, &ttransport->psks, link) {
954599a8884SKonrad Sztyber 		if (strcmp(pskid, entry->pskid) != 0) {
955aabfea70SKrzysztof Karas 			continue;
956aabfea70SKrzysztof Karas 		}
957aabfea70SKrzysztof Karas 
9587a50a6bcSKrzysztof Karas 		psk_len = entry->psk_size;
9597a50a6bcSKrzysztof Karas 		if ((size_t)out_len < psk_len) {
960aabfea70SKrzysztof Karas 			SPDK_ERRLOG("Out buffer of size: %" PRIu32 " cannot fit PSK of len: %lu\n",
961aabfea70SKrzysztof Karas 				    out_len, psk_len);
962aabfea70SKrzysztof Karas 			return -ENOBUFS;
963aabfea70SKrzysztof Karas 		}
964e036416bSKrzysztof Karas 
965e036416bSKrzysztof Karas 		/* Convert PSK to the TLS PSK format. */
966599a8884SKonrad Sztyber 		rc = nvme_tcp_derive_tls_psk(entry->psk, psk_len, pskid, out, out_len,
967169ee6c3SKrzysztof Karas 					     entry->tls_cipher_suite);
968e036416bSKrzysztof Karas 		if (rc < 0) {
969e036416bSKrzysztof Karas 			SPDK_ERRLOG("Could not generate TLS PSK\n");
970e036416bSKrzysztof Karas 		}
971e036416bSKrzysztof Karas 
972169ee6c3SKrzysztof Karas 		switch (entry->tls_cipher_suite) {
973169ee6c3SKrzysztof Karas 		case NVME_TCP_CIPHER_AES_128_GCM_SHA256:
974169ee6c3SKrzysztof Karas 			*cipher = "TLS_AES_128_GCM_SHA256";
975169ee6c3SKrzysztof Karas 			break;
976169ee6c3SKrzysztof Karas 		case NVME_TCP_CIPHER_AES_256_GCM_SHA384:
977169ee6c3SKrzysztof Karas 			*cipher = "TLS_AES_256_GCM_SHA384";
978169ee6c3SKrzysztof Karas 			break;
979169ee6c3SKrzysztof Karas 		default:
980169ee6c3SKrzysztof Karas 			*cipher = NULL;
981169ee6c3SKrzysztof Karas 			return -ENOTSUP;
982169ee6c3SKrzysztof Karas 		}
983169ee6c3SKrzysztof Karas 
984e036416bSKrzysztof Karas 		return rc;
985aabfea70SKrzysztof Karas 	}
986aabfea70SKrzysztof Karas 
987599a8884SKonrad Sztyber 	SPDK_ERRLOG("Could not find PSK for identity: %s\n", pskid);
988aabfea70SKrzysztof Karas 
989aabfea70SKrzysztof Karas 	return -EINVAL;
990aabfea70SKrzysztof Karas }
991aabfea70SKrzysztof Karas 
992aabfea70SKrzysztof Karas static int
99387a062e6SJacek Kalwas nvmf_tcp_listen(struct spdk_nvmf_transport *transport, const struct spdk_nvme_transport_id *trid,
99487a062e6SJacek Kalwas 		struct spdk_nvmf_listen_opts *listen_opts)
995e956be96SZiye Yang {
996e956be96SZiye Yang 	struct spdk_nvmf_tcp_transport *ttransport;
997e956be96SZiye Yang 	struct spdk_nvmf_tcp_port *port;
998e956be96SZiye Yang 	int trsvcid_int;
999e956be96SZiye Yang 	uint8_t adrfam;
10004becb0d1SBoris Glimcher 	const char *sock_impl_name;
10014becb0d1SBoris Glimcher 	struct spdk_sock_impl_opts impl_opts;
10024becb0d1SBoris Glimcher 	size_t impl_opts_size = sizeof(impl_opts);
100394345a0aSZiye Yang 	struct spdk_sock_opts opts;
10040e983c56SKrzysztof Goreczny 	int rc;
1005e956be96SZiye Yang 
10069a1cf1c5SJacek Kalwas 	if (!strlen(trid->trsvcid)) {
10079a1cf1c5SJacek Kalwas 		SPDK_ERRLOG("Service id is required\n");
10089a1cf1c5SJacek Kalwas 		return -EINVAL;
10099a1cf1c5SJacek Kalwas 	}
10109a1cf1c5SJacek Kalwas 
1011e956be96SZiye Yang 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
1012e956be96SZiye Yang 
101361d85773SSeth Howell 	trsvcid_int = nvmf_tcp_trsvcid_to_int(trid->trsvcid);
1014e956be96SZiye Yang 	if (trsvcid_int < 0) {
1015e956be96SZiye Yang 		SPDK_ERRLOG("Invalid trsvcid '%s'\n", trid->trsvcid);
1016e956be96SZiye Yang 		return -EINVAL;
1017e956be96SZiye Yang 	}
1018e956be96SZiye Yang 
1019e956be96SZiye Yang 	port = calloc(1, sizeof(*port));
1020e956be96SZiye Yang 	if (!port) {
1021e956be96SZiye Yang 		SPDK_ERRLOG("Port allocation failed\n");
1022e956be96SZiye Yang 		return -ENOMEM;
1023e956be96SZiye Yang 	}
1024e956be96SZiye Yang 
10256d8f1fc6SJacek Kalwas 	port->trid = trid;
10264becb0d1SBoris Glimcher 
10274becb0d1SBoris Glimcher 	sock_impl_name = NULL;
10284becb0d1SBoris Glimcher 
102994345a0aSZiye Yang 	opts.opts_size = sizeof(opts);
103094345a0aSZiye Yang 	spdk_sock_get_default_opts(&opts);
1031f766d1e4SDarek Stojaczyk 	opts.priority = ttransport->tcp_opts.sock_priority;
103279606beeSKonrad Sztyber 	opts.ack_timeout = transport->opts.ack_timeout;
10334becb0d1SBoris Glimcher 	if (listen_opts->secure_channel) {
1034248c547dSKarl Bonde Torp 		if (listen_opts->sock_impl &&
1035248c547dSKarl Bonde Torp 		    strncmp("ssl", listen_opts->sock_impl, strlen(listen_opts->sock_impl))) {
1036248c547dSKarl Bonde Torp 			SPDK_ERRLOG("Enabling secure_channel while specifying a sock_impl different from 'ssl' is unsupported");
1037248c547dSKarl Bonde Torp 			free(port);
1038248c547dSKarl Bonde Torp 			return -EINVAL;
1039248c547dSKarl Bonde Torp 		}
1040248c547dSKarl Bonde Torp 		listen_opts->sock_impl = "ssl";
1041248c547dSKarl Bonde Torp 	}
1042248c547dSKarl Bonde Torp 
1043248c547dSKarl Bonde Torp 	if (listen_opts->sock_impl) {
1044248c547dSKarl Bonde Torp 		sock_impl_name = listen_opts->sock_impl;
1045248c547dSKarl Bonde Torp 		spdk_sock_impl_get_opts(sock_impl_name, &impl_opts, &impl_opts_size);
1046248c547dSKarl Bonde Torp 
1047248c547dSKarl Bonde Torp 		if (!strncmp("ssl", sock_impl_name, strlen(sock_impl_name))) {
10487662387cSKrzysztof Karas 			if (!g_tls_log) {
10497662387cSKrzysztof Karas 				SPDK_NOTICELOG("TLS support is considered experimental\n");
10507662387cSKrzysztof Karas 				g_tls_log = true;
10517662387cSKrzysztof Karas 			}
10524becb0d1SBoris Glimcher 			impl_opts.tls_version = SPDK_TLS_VERSION_1_3;
1053aabfea70SKrzysztof Karas 			impl_opts.get_key = tcp_sock_get_key;
1054aabfea70SKrzysztof Karas 			impl_opts.get_key_ctx = ttransport;
1055169ee6c3SKrzysztof Karas 			impl_opts.tls_cipher_suites = "TLS_AES_256_GCM_SHA384:TLS_AES_128_GCM_SHA256";
1056248c547dSKarl Bonde Torp 		}
1057248c547dSKarl Bonde Torp 
10584becb0d1SBoris Glimcher 		opts.impl_opts = &impl_opts;
10594becb0d1SBoris Glimcher 		opts.impl_opts_size = sizeof(impl_opts);
10604becb0d1SBoris Glimcher 	}
10614becb0d1SBoris Glimcher 
106294345a0aSZiye Yang 	port->listen_sock = spdk_sock_listen_ext(trid->traddr, trsvcid_int,
10634becb0d1SBoris Glimcher 			    sock_impl_name, &opts);
1064e956be96SZiye Yang 	if (port->listen_sock == NULL) {
1065e956be96SZiye Yang 		SPDK_ERRLOG("spdk_sock_listen(%s, %d) failed: %s (%d)\n",
1066e956be96SZiye Yang 			    trid->traddr, trsvcid_int,
1067e956be96SZiye Yang 			    spdk_strerror(errno), errno);
1068e956be96SZiye Yang 		free(port);
1069e956be96SZiye Yang 		return -errno;
1070e956be96SZiye Yang 	}
1071e956be96SZiye Yang 
1072e956be96SZiye Yang 	if (spdk_sock_is_ipv4(port->listen_sock)) {
1073e956be96SZiye Yang 		adrfam = SPDK_NVMF_ADRFAM_IPV4;
1074e956be96SZiye Yang 	} else if (spdk_sock_is_ipv6(port->listen_sock)) {
1075e956be96SZiye Yang 		adrfam = SPDK_NVMF_ADRFAM_IPV6;
1076e956be96SZiye Yang 	} else {
1077e956be96SZiye Yang 		SPDK_ERRLOG("Unhandled socket type\n");
1078e956be96SZiye Yang 		adrfam = 0;
1079e956be96SZiye Yang 	}
1080e956be96SZiye Yang 
1081e956be96SZiye Yang 	if (adrfam != trid->adrfam) {
1082e956be96SZiye Yang 		SPDK_ERRLOG("Socket address family mismatch\n");
1083e956be96SZiye Yang 		spdk_sock_close(&port->listen_sock);
1084e956be96SZiye Yang 		free(port);
1085e956be96SZiye Yang 		return -EINVAL;
1086e956be96SZiye Yang 	}
1087e956be96SZiye Yang 
10880e983c56SKrzysztof Goreczny 	rc = spdk_sock_group_add_sock(ttransport->listen_sock_group, port->listen_sock, nvmf_tcp_accept_cb,
10890e983c56SKrzysztof Goreczny 				      port);
10900e983c56SKrzysztof Goreczny 	if (rc < 0) {
10910e983c56SKrzysztof Goreczny 		SPDK_ERRLOG("Failed to add socket to the listen socket group\n");
10920e983c56SKrzysztof Goreczny 		spdk_sock_close(&port->listen_sock);
10930e983c56SKrzysztof Goreczny 		free(port);
10940e983c56SKrzysztof Goreczny 		return -errno;
10950e983c56SKrzysztof Goreczny 	}
10960e983c56SKrzysztof Goreczny 
1097cff94374SKrzysztof Goreczny 	port->transport = transport;
1098cff94374SKrzysztof Goreczny 
10997cd56fb3SJacek Kalwas 	SPDK_NOTICELOG("*** NVMe/TCP Target Listening on %s port %s ***\n",
11007cd56fb3SJacek Kalwas 		       trid->traddr, trid->trsvcid);
1101e956be96SZiye Yang 
1102e956be96SZiye Yang 	TAILQ_INSERT_TAIL(&ttransport->ports, port, link);
1103e956be96SZiye Yang 	return 0;
1104e956be96SZiye Yang }
1105e956be96SZiye Yang 
11066d8f1fc6SJacek Kalwas static void
11074de405abSSeth Howell nvmf_tcp_stop_listen(struct spdk_nvmf_transport *transport,
1108e956be96SZiye Yang 		     const struct spdk_nvme_transport_id *trid)
1109e956be96SZiye Yang {
1110e956be96SZiye Yang 	struct spdk_nvmf_tcp_transport *ttransport;
1111e956be96SZiye Yang 	struct spdk_nvmf_tcp_port *port;
1112e956be96SZiye Yang 
1113e956be96SZiye Yang 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
1114e956be96SZiye Yang 
11152172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvmf_tcp, "Removing listen address %s port %s\n",
1116e956be96SZiye Yang 		      trid->traddr, trid->trsvcid);
1117e956be96SZiye Yang 
111861d85773SSeth Howell 	port = nvmf_tcp_find_port(ttransport, trid);
1119e956be96SZiye Yang 	if (port) {
11200e983c56SKrzysztof Goreczny 		spdk_sock_group_remove_sock(ttransport->listen_sock_group, port->listen_sock);
1121e956be96SZiye Yang 		TAILQ_REMOVE(&ttransport->ports, port, link);
1122e956be96SZiye Yang 		spdk_sock_close(&port->listen_sock);
1123e956be96SZiye Yang 		free(port);
1124e956be96SZiye Yang 	}
1125e956be96SZiye Yang }
1126e956be96SZiye Yang 
11274de405abSSeth Howell static void nvmf_tcp_qpair_set_recv_state(struct spdk_nvmf_tcp_qpair *tqpair,
1128053fa66bSBen Walker 		enum nvme_tcp_pdu_recv_state state);
1129053fa66bSBen Walker 
1130e956be96SZiye Yang static void
1131b95aae63SKonrad Sztyber nvmf_tcp_qpair_set_state(struct spdk_nvmf_tcp_qpair *tqpair, enum nvmf_tcp_qpair_state state)
1132219ddb57SKrzysztof Karas {
1133219ddb57SKrzysztof Karas 	tqpair->state = state;
1134f906b912SKonrad Sztyber 	spdk_trace_record(TRACE_TCP_QP_STATE_CHANGE, tqpair->qpair.trace_id, 0, 0,
1135f906b912SKonrad Sztyber 			  (uint64_t)tqpair->state);
1136219ddb57SKrzysztof Karas }
1137219ddb57SKrzysztof Karas 
1138219ddb57SKrzysztof Karas static void
11394de405abSSeth Howell nvmf_tcp_qpair_disconnect(struct spdk_nvmf_tcp_qpair *tqpair)
114078a11548SBen Walker {
11412172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvmf_tcp, "Disconnecting qpair %p\n", tqpair);
114278a11548SBen Walker 
1143ea1a6608SJim Harris 	spdk_trace_record(TRACE_TCP_QP_DISCONNECT, tqpair->qpair.trace_id, 0, 0);
11448107587bSKrzysztof Karas 
1145b95aae63SKonrad Sztyber 	if (tqpair->state <= NVMF_TCP_QPAIR_STATE_RUNNING) {
1146b95aae63SKonrad Sztyber 		nvmf_tcp_qpair_set_state(tqpair, NVMF_TCP_QPAIR_STATE_EXITING);
114778df9be4SBen Walker 		assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_ERROR);
114878a11548SBen Walker 		spdk_poller_unregister(&tqpair->timeout_poller);
1149053fa66bSBen Walker 
11504de405abSSeth Howell 		/* This will end up calling nvmf_tcp_close_qpair */
1151608b54a2SKonrad Sztyber 		spdk_nvmf_qpair_disconnect(&tqpair->qpair);
115278a11548SBen Walker 	}
1153053fa66bSBen Walker }
115478a11548SBen Walker 
115578a11548SBen Walker static void
1156c676c081SKonrad Sztyber _mgmt_pdu_write_done(void *_tqpair, int err)
11575d497f6cSBen Walker {
1158c676c081SKonrad Sztyber 	struct spdk_nvmf_tcp_qpair *tqpair = _tqpair;
1159c676c081SKonrad Sztyber 	struct nvme_tcp_pdu *pdu = tqpair->mgmt_pdu;
11605d497f6cSBen Walker 
1161c676c081SKonrad Sztyber 	if (spdk_unlikely(err != 0)) {
116278df9be4SBen Walker 		nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING);
11635d497f6cSBen Walker 		return;
11645d497f6cSBen Walker 	}
11655d497f6cSBen Walker 
11665d497f6cSBen Walker 	assert(pdu->cb_fn != NULL);
1167a2adca79SBen Walker 	pdu->cb_fn(pdu->cb_arg);
11685d497f6cSBen Walker }
11695d497f6cSBen Walker 
11705d497f6cSBen Walker static void
1171c676c081SKonrad Sztyber _req_pdu_write_done(void *req, int err)
1172c676c081SKonrad Sztyber {
1173c676c081SKonrad Sztyber 	struct spdk_nvmf_tcp_req *tcp_req = req;
1174c676c081SKonrad Sztyber 	struct nvme_tcp_pdu *pdu = tcp_req->pdu;
1175c676c081SKonrad Sztyber 	struct spdk_nvmf_tcp_qpair *tqpair = pdu->qpair;
1176c676c081SKonrad Sztyber 
117775169d0dSKonrad Sztyber 	assert(tcp_req->pdu_in_use);
117875169d0dSKonrad Sztyber 	tcp_req->pdu_in_use = false;
117975169d0dSKonrad Sztyber 
118072925e3dSKonrad Sztyber 	/* If the request is in a completed state, we're waiting for write completion to free it */
118172925e3dSKonrad Sztyber 	if (spdk_unlikely(tcp_req->state == TCP_REQUEST_STATE_COMPLETED)) {
118272925e3dSKonrad Sztyber 		nvmf_tcp_request_free(tcp_req);
118372925e3dSKonrad Sztyber 		return;
118472925e3dSKonrad Sztyber 	}
118572925e3dSKonrad Sztyber 
1186c676c081SKonrad Sztyber 	if (spdk_unlikely(err != 0)) {
118778df9be4SBen Walker 		nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING);
1188c676c081SKonrad Sztyber 		return;
1189c676c081SKonrad Sztyber 	}
1190c676c081SKonrad Sztyber 
1191c676c081SKonrad Sztyber 	assert(pdu->cb_fn != NULL);
1192c676c081SKonrad Sztyber 	pdu->cb_fn(pdu->cb_arg);
1193c676c081SKonrad Sztyber }
1194c676c081SKonrad Sztyber 
1195c676c081SKonrad Sztyber static void
1196c676c081SKonrad Sztyber _pdu_write_done(struct nvme_tcp_pdu *pdu, int err)
1197c676c081SKonrad Sztyber {
1198c676c081SKonrad Sztyber 	pdu->sock_req.cb_fn(pdu->sock_req.cb_arg, err);
1199c676c081SKonrad Sztyber }
1200c676c081SKonrad Sztyber 
1201c676c081SKonrad Sztyber static void
1202a2ae7b96SKrzysztof Goreczny tcp_sock_flush_cb(void *arg)
1203a2ae7b96SKrzysztof Goreczny {
1204a2ae7b96SKrzysztof Goreczny 	struct spdk_nvmf_tcp_qpair *tqpair = arg;
1205a2ae7b96SKrzysztof Goreczny 	int rc = spdk_sock_flush(tqpair->sock);
1206a2ae7b96SKrzysztof Goreczny 
1207a2ae7b96SKrzysztof Goreczny 	if (rc < 0 && errno == EAGAIN) {
1208a2ae7b96SKrzysztof Goreczny 		spdk_thread_send_msg(spdk_get_thread(), tcp_sock_flush_cb, tqpair);
1209a2ae7b96SKrzysztof Goreczny 		return;
1210a2ae7b96SKrzysztof Goreczny 	}
1211a2ae7b96SKrzysztof Goreczny 
1212a2ae7b96SKrzysztof Goreczny 	tqpair->pending_flush = false;
1213a2ae7b96SKrzysztof Goreczny 	if (rc < 0) {
1214a2ae7b96SKrzysztof Goreczny 		SPDK_ERRLOG("Could not write to socket: rc=%d, errno=%d\n", rc, errno);
1215a2ae7b96SKrzysztof Goreczny 	}
1216a2ae7b96SKrzysztof Goreczny }
1217a2ae7b96SKrzysztof Goreczny 
1218a2ae7b96SKrzysztof Goreczny static void
1219d478edd4SZiye Yang _tcp_write_pdu(struct nvme_tcp_pdu *pdu)
1220e956be96SZiye Yang {
1221b11407d0SBen Walker 	int rc;
1222b11407d0SBen Walker 	uint32_t mapped_length;
1223d478edd4SZiye Yang 	struct spdk_nvmf_tcp_qpair *tqpair = pdu->qpair;
1224e956be96SZiye Yang 
12255d497f6cSBen Walker 	pdu->sock_req.iovcnt = nvme_tcp_build_iovs(pdu->iov, SPDK_COUNTOF(pdu->iov), pdu,
1226b11407d0SBen Walker 			       tqpair->host_hdgst_enable, tqpair->host_ddgst_enable, &mapped_length);
1227b11407d0SBen Walker 	spdk_sock_writev_async(tqpair->sock, &pdu->sock_req);
1228b11407d0SBen Walker 
1229ea65bf61SBen Walker 	if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP ||
1230ea65bf61SBen Walker 	    pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ) {
1231b11407d0SBen Walker 		/* Try to force the send immediately. */
1232b11407d0SBen Walker 		rc = spdk_sock_flush(tqpair->sock);
1233b11407d0SBen Walker 		if (rc > 0 && (uint32_t)rc == mapped_length) {
12345d497f6cSBen Walker 			_pdu_write_done(pdu, 0);
12355d497f6cSBen Walker 		} else {
1236b11407d0SBen Walker 			SPDK_ERRLOG("Could not write %s to socket: rc=%d, errno=%d\n",
12373bc7e8f0SKonrad Sztyber 				    pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP ?
12383bc7e8f0SKonrad Sztyber 				    "IC_RESP" : "TERM_REQ", rc, errno);
1239b11407d0SBen Walker 			_pdu_write_done(pdu, rc >= 0 ? -EAGAIN : -errno);
12405d497f6cSBen Walker 		}
1241a2ae7b96SKrzysztof Goreczny 	} else if (spdk_interrupt_mode_is_enabled()) {
1242a2ae7b96SKrzysztof Goreczny 		/* Async writes must be flushed */
1243a2ae7b96SKrzysztof Goreczny 		if (!tqpair->pending_flush) {
1244a2ae7b96SKrzysztof Goreczny 			tqpair->pending_flush = true;
1245a2ae7b96SKrzysztof Goreczny 			spdk_thread_send_msg(spdk_get_thread(), tcp_sock_flush_cb, tqpair);
1246a2ae7b96SKrzysztof Goreczny 		}
12475d497f6cSBen Walker 	}
1248e956be96SZiye Yang }
1249e956be96SZiye Yang 
1250d478edd4SZiye Yang static void
1251be57e2a6SZiye Yang data_crc32_accel_done(void *cb_arg, int status)
1252d478edd4SZiye Yang {
1253d478edd4SZiye Yang 	struct nvme_tcp_pdu *pdu = cb_arg;
1254d478edd4SZiye Yang 
1255d478edd4SZiye Yang 	if (spdk_unlikely(status)) {
1256be57e2a6SZiye Yang 		SPDK_ERRLOG("Failed to compute the data digest for pdu =%p\n", pdu);
1257d478edd4SZiye Yang 		_pdu_write_done(pdu, status);
1258d478edd4SZiye Yang 		return;
1259d478edd4SZiye Yang 	}
1260d478edd4SZiye Yang 
1261be57e2a6SZiye Yang 	pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR;
1262be57e2a6SZiye Yang 	MAKE_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32);
1263be57e2a6SZiye Yang 
1264d478edd4SZiye Yang 	_tcp_write_pdu(pdu);
1265d478edd4SZiye Yang }
1266d478edd4SZiye Yang 
1267d478edd4SZiye Yang static void
1268be57e2a6SZiye Yang pdu_data_crc32_compute(struct nvme_tcp_pdu *pdu)
1269be57e2a6SZiye Yang {
1270be57e2a6SZiye Yang 	struct spdk_nvmf_tcp_qpair *tqpair = pdu->qpair;
12717fc2c085SMengjinWu 	int rc = 0;
1272be57e2a6SZiye Yang 
1273be57e2a6SZiye Yang 	/* Data Digest */
1274be57e2a6SZiye Yang 	if (pdu->data_len > 0 && g_nvme_tcp_ddgst[pdu->hdr.common.pdu_type] && tqpair->host_ddgst_enable) {
12753f912cf0SMichal Berger 		/* Only support this limitated case for the first step */
1276f0956d33SZiye Yang 		if (spdk_likely(!pdu->dif_ctx && (pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT == 0)
1277f0956d33SZiye Yang 				&& tqpair->group)) {
12787fc2c085SMengjinWu 			rc = spdk_accel_submit_crc32cv(tqpair->group->accel_channel, &pdu->data_digest_crc32, pdu->data_iov,
12797fc2c085SMengjinWu 						       pdu->data_iovcnt, 0, data_crc32_accel_done, pdu);
12807fc2c085SMengjinWu 			if (spdk_likely(rc == 0)) {
1281be57e2a6SZiye Yang 				return;
1282be57e2a6SZiye Yang 			}
12837fc2c085SMengjinWu 		} else {
12847fc2c085SMengjinWu 			pdu->data_digest_crc32 = nvme_tcp_pdu_calc_data_digest(pdu);
1285b5383af4SMengjinWu 		}
12867fc2c085SMengjinWu 		data_crc32_accel_done(pdu, rc);
12877fc2c085SMengjinWu 	} else {
1288be57e2a6SZiye Yang 		_tcp_write_pdu(pdu);
1289be57e2a6SZiye Yang 	}
12907fc2c085SMengjinWu }
1291be57e2a6SZiye Yang 
1292be57e2a6SZiye Yang static void
1293d478edd4SZiye Yang nvmf_tcp_qpair_write_pdu(struct spdk_nvmf_tcp_qpair *tqpair,
1294d478edd4SZiye Yang 			 struct nvme_tcp_pdu *pdu,
1295d478edd4SZiye Yang 			 nvme_tcp_qpair_xfer_complete_cb cb_fn,
1296d478edd4SZiye Yang 			 void *cb_arg)
1297d478edd4SZiye Yang {
1298d478edd4SZiye Yang 	int hlen;
129964a9432cSBen Walker 	uint32_t crc32c;
1300d478edd4SZiye Yang 
1301f8ac678eSZiye Yang 	assert(tqpair->pdu_in_progress != pdu);
1302d478edd4SZiye Yang 
1303d478edd4SZiye Yang 	hlen = pdu->hdr.common.hlen;
1304d478edd4SZiye Yang 	pdu->cb_fn = cb_fn;
1305d478edd4SZiye Yang 	pdu->cb_arg = cb_arg;
1306d478edd4SZiye Yang 
130790c56d96SZiye Yang 	pdu->iov[0].iov_base = &pdu->hdr.raw;
130890c56d96SZiye Yang 	pdu->iov[0].iov_len = hlen;
130990c56d96SZiye Yang 
1310d478edd4SZiye Yang 	/* Header Digest */
131164a9432cSBen Walker 	if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && tqpair->host_hdgst_enable) {
131264a9432cSBen Walker 		crc32c = nvme_tcp_pdu_calc_header_digest(pdu);
131364a9432cSBen Walker 		MAKE_DIGEST_WORD((uint8_t *)pdu->hdr.raw + hlen, crc32c);
1314d478edd4SZiye Yang 	}
1315d478edd4SZiye Yang 
131664a9432cSBen Walker 	/* Data Digest */
1317be57e2a6SZiye Yang 	pdu_data_crc32_compute(pdu);
1318d478edd4SZiye Yang }
1319d478edd4SZiye Yang 
1320c676c081SKonrad Sztyber static void
1321c676c081SKonrad Sztyber nvmf_tcp_qpair_write_mgmt_pdu(struct spdk_nvmf_tcp_qpair *tqpair,
1322c676c081SKonrad Sztyber 			      nvme_tcp_qpair_xfer_complete_cb cb_fn,
1323c676c081SKonrad Sztyber 			      void *cb_arg)
1324c676c081SKonrad Sztyber {
1325c676c081SKonrad Sztyber 	struct nvme_tcp_pdu *pdu = tqpair->mgmt_pdu;
1326c676c081SKonrad Sztyber 
1327c676c081SKonrad Sztyber 	pdu->sock_req.cb_fn = _mgmt_pdu_write_done;
1328c676c081SKonrad Sztyber 	pdu->sock_req.cb_arg = tqpair;
1329c676c081SKonrad Sztyber 
1330c676c081SKonrad Sztyber 	nvmf_tcp_qpair_write_pdu(tqpair, pdu, cb_fn, cb_arg);
1331c676c081SKonrad Sztyber }
1332c676c081SKonrad Sztyber 
1333c676c081SKonrad Sztyber static void
1334c676c081SKonrad Sztyber nvmf_tcp_qpair_write_req_pdu(struct spdk_nvmf_tcp_qpair *tqpair,
1335c676c081SKonrad Sztyber 			     struct spdk_nvmf_tcp_req *tcp_req,
1336c676c081SKonrad Sztyber 			     nvme_tcp_qpair_xfer_complete_cb cb_fn,
1337c676c081SKonrad Sztyber 			     void *cb_arg)
1338c676c081SKonrad Sztyber {
1339c676c081SKonrad Sztyber 	struct nvme_tcp_pdu *pdu = tcp_req->pdu;
1340c676c081SKonrad Sztyber 
1341c676c081SKonrad Sztyber 	pdu->sock_req.cb_fn = _req_pdu_write_done;
1342c676c081SKonrad Sztyber 	pdu->sock_req.cb_arg = tcp_req;
1343c676c081SKonrad Sztyber 
134475169d0dSKonrad Sztyber 	assert(!tcp_req->pdu_in_use);
134575169d0dSKonrad Sztyber 	tcp_req->pdu_in_use = true;
134675169d0dSKonrad Sztyber 
1347c676c081SKonrad Sztyber 	nvmf_tcp_qpair_write_pdu(tqpair, pdu, cb_fn, cb_arg);
1348c676c081SKonrad Sztyber }
1349c676c081SKonrad Sztyber 
1350e956be96SZiye Yang static int
13514de405abSSeth Howell nvmf_tcp_qpair_init_mem_resource(struct spdk_nvmf_tcp_qpair *tqpair)
1352e956be96SZiye Yang {
13534dba5072SBen Walker 	uint32_t i;
13544dba5072SBen Walker 	struct spdk_nvmf_transport_opts *opts;
1355e3e023cfSShuhei Matsumoto 	uint32_t in_capsule_data_size;
1356e3e023cfSShuhei Matsumoto 
13574dba5072SBen Walker 	opts = &tqpair->qpair.transport->opts;
13584dba5072SBen Walker 
13594dba5072SBen Walker 	in_capsule_data_size = opts->in_capsule_data_size;
13604dba5072SBen Walker 	if (opts->dif_insert_or_strip) {
1361e3e023cfSShuhei Matsumoto 		in_capsule_data_size = SPDK_BDEV_BUF_SIZE_WITH_MD(in_capsule_data_size);
1362e3e023cfSShuhei Matsumoto 	}
1363e956be96SZiye Yang 
13644dba5072SBen Walker 	tqpair->resource_count = opts->max_queue_depth;
1365e956be96SZiye Yang 
13664dba5072SBen Walker 	tqpair->reqs = calloc(tqpair->resource_count, sizeof(*tqpair->reqs));
1367e956be96SZiye Yang 	if (!tqpair->reqs) {
1368e956be96SZiye Yang 		SPDK_ERRLOG("Unable to allocate reqs on tqpair=%p\n", tqpair);
1369e956be96SZiye Yang 		return -1;
1370e956be96SZiye Yang 	}
1371e956be96SZiye Yang 
1372e3e023cfSShuhei Matsumoto 	if (in_capsule_data_size) {
13734dba5072SBen Walker 		tqpair->bufs = spdk_zmalloc(tqpair->resource_count * in_capsule_data_size, 0x1000,
137436ccca2cSDarek Stojaczyk 					    NULL, SPDK_ENV_LCORE_ID_ANY,
137536ccca2cSDarek Stojaczyk 					    SPDK_MALLOC_DMA);
1376e956be96SZiye Yang 		if (!tqpair->bufs) {
1377e956be96SZiye Yang 			SPDK_ERRLOG("Unable to allocate bufs on tqpair=%p.\n", tqpair);
1378e956be96SZiye Yang 			return -1;
1379e956be96SZiye Yang 		}
1380e956be96SZiye Yang 	}
13817fc2c085SMengjinWu 	/* prepare memory space for receiving pdus and tcp_req */
13827fc2c085SMengjinWu 	/* Add additional 1 member, which will be used for mgmt_pdu owned by the tqpair */
13837fc2c085SMengjinWu 	tqpair->pdus = spdk_dma_zmalloc((2 * tqpair->resource_count + 1) * sizeof(*tqpair->pdus), 0x1000,
13847fc2c085SMengjinWu 					NULL);
1385a2adca79SBen Walker 	if (!tqpair->pdus) {
1386a2adca79SBen Walker 		SPDK_ERRLOG("Unable to allocate pdu pool on tqpair =%p.\n", tqpair);
1387a2adca79SBen Walker 		return -1;
1388a2adca79SBen Walker 	}
1389a2adca79SBen Walker 
13904dba5072SBen Walker 	for (i = 0; i < tqpair->resource_count; i++) {
1391c57bafedSBen Walker 		struct spdk_nvmf_tcp_req *tcp_req = &tqpair->reqs[i];
1392e956be96SZiye Yang 
1393e956be96SZiye Yang 		tcp_req->ttag = i + 1;
1394e956be96SZiye Yang 		tcp_req->req.qpair = &tqpair->qpair;
1395e956be96SZiye Yang 
1396a2adca79SBen Walker 		tcp_req->pdu = &tqpair->pdus[i];
1397a2adca79SBen Walker 		tcp_req->pdu->qpair = tqpair;
1398a2adca79SBen Walker 
1399e956be96SZiye Yang 		/* Set up memory to receive commands */
1400e956be96SZiye Yang 		if (tqpair->bufs) {
1401e3e023cfSShuhei Matsumoto 			tcp_req->buf = (void *)((uintptr_t)tqpair->bufs + (i * in_capsule_data_size));
1402e956be96SZiye Yang 		}
1403e956be96SZiye Yang 
1404e956be96SZiye Yang 		/* Set the cmdn and rsp */
1405e956be96SZiye Yang 		tcp_req->req.rsp = (union nvmf_c2h_msg *)&tcp_req->rsp;
1406e956be96SZiye Yang 		tcp_req->req.cmd = (union nvmf_h2c_msg *)&tcp_req->cmd;
1407e956be96SZiye Yang 
14080db0c443SChunsong Feng 		tcp_req->req.stripped_data = NULL;
14090db0c443SChunsong Feng 
1410e956be96SZiye Yang 		/* Initialize request state to FREE */
1411e956be96SZiye Yang 		tcp_req->state = TCP_REQUEST_STATE_FREE;
141260af3c00SRui Chang 		TAILQ_INSERT_TAIL(&tqpair->tcp_req_free_queue, tcp_req, state_link);
14134dba5072SBen Walker 		tqpair->state_cntr[TCP_REQUEST_STATE_FREE]++;
1414e956be96SZiye Yang 	}
1415e956be96SZiye Yang 
14167fc2c085SMengjinWu 	for (; i < 2 * tqpair->resource_count; i++) {
14177fc2c085SMengjinWu 		struct nvme_tcp_pdu *pdu = &tqpair->pdus[i];
14187fc2c085SMengjinWu 
14197fc2c085SMengjinWu 		pdu->qpair = tqpair;
14207fc2c085SMengjinWu 		SLIST_INSERT_HEAD(&tqpair->tcp_pdu_free_queue, pdu, slist);
14217fc2c085SMengjinWu 	}
14227fc2c085SMengjinWu 
14237bac9b06SZiye Yang 	tqpair->mgmt_pdu = &tqpair->pdus[i];
14247bac9b06SZiye Yang 	tqpair->mgmt_pdu->qpair = tqpair;
14257fc2c085SMengjinWu 	tqpair->pdu_in_progress = SLIST_FIRST(&tqpair->tcp_pdu_free_queue);
14267fc2c085SMengjinWu 	SLIST_REMOVE_HEAD(&tqpair->tcp_pdu_free_queue, slist);
142778df9be4SBen Walker 	tqpair->tcp_pdu_working_count = 1;
14287bac9b06SZiye Yang 
14291621809eSBen Walker 	tqpair->recv_buf_size = (in_capsule_data_size + sizeof(struct spdk_nvme_tcp_cmd) + 2 *
14301621809eSBen Walker 				 SPDK_NVME_TCP_DIGEST_LEN) * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR;
14311621809eSBen Walker 
1432e956be96SZiye Yang 	return 0;
1433e956be96SZiye Yang }
1434e956be96SZiye Yang 
1435e956be96SZiye Yang static int
14364de405abSSeth Howell nvmf_tcp_qpair_init(struct spdk_nvmf_qpair *qpair)
1437e956be96SZiye Yang {
14382b59852bSBen Walker 	struct spdk_nvmf_tcp_qpair *tqpair;
1439e956be96SZiye Yang 
14402b59852bSBen Walker 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
1441e956be96SZiye Yang 
14422172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvmf_tcp, "New TCP Connection: %p\n", qpair);
1443e956be96SZiye Yang 
1444ea1a6608SJim Harris 	spdk_trace_record(TRACE_TCP_QP_CREATE, tqpair->qpair.trace_id, 0, 0);
14458107587bSKrzysztof Karas 
1446e956be96SZiye Yang 	/* Initialise request state queues of the qpair */
144760af3c00SRui Chang 	TAILQ_INIT(&tqpair->tcp_req_free_queue);
144860af3c00SRui Chang 	TAILQ_INIT(&tqpair->tcp_req_working_queue);
14497fc2c085SMengjinWu 	SLIST_INIT(&tqpair->tcp_pdu_free_queue);
1450b68ae4fbSAtul Malakar 	tqpair->qpair.queue_depth = 0;
1451e956be96SZiye Yang 
1452e956be96SZiye Yang 	tqpair->host_hdgst_enable = true;
1453e956be96SZiye Yang 	tqpair->host_ddgst_enable = true;
1454cb448c1bSBen Walker 
1455e956be96SZiye Yang 	return 0;
1456e956be96SZiye Yang }
1457e956be96SZiye Yang 
1458e956be96SZiye Yang static int
14594de405abSSeth Howell nvmf_tcp_qpair_sock_init(struct spdk_nvmf_tcp_qpair *tqpair)
1460e956be96SZiye Yang {
1461ea1a6608SJim Harris 	char saddr[32], caddr[32];
1462ea1a6608SJim Harris 	uint16_t sport, cport;
1463ea1a6608SJim Harris 	char owner[256];
1464e956be96SZiye Yang 	int rc;
1465e956be96SZiye Yang 
146613bb2619SJim Harris 	rc = spdk_sock_getaddr(tqpair->sock, saddr, sizeof(saddr), &sport,
1467ea1a6608SJim Harris 			       caddr, sizeof(caddr), &cport);
146813bb2619SJim Harris 	if (rc != 0) {
146913bb2619SJim Harris 		SPDK_ERRLOG("spdk_sock_getaddr() failed\n");
147013bb2619SJim Harris 		return rc;
147113bb2619SJim Harris 	}
1472ea1a6608SJim Harris 	snprintf(owner, sizeof(owner), "%s:%d", caddr, cport);
1473ea1a6608SJim Harris 	tqpair->qpair.trace_id = spdk_trace_register_owner(OWNER_TYPE_NVMF_TCP, owner);
1474ea1a6608SJim Harris 	spdk_trace_record(TRACE_TCP_QP_SOCK_INIT, tqpair->qpair.trace_id, 0, 0);
14758107587bSKrzysztof Karas 
1476e956be96SZiye Yang 	/* set low water mark */
147720cd4841SBinYang0 	rc = spdk_sock_set_recvlowat(tqpair->sock, 1);
1478e956be96SZiye Yang 	if (rc != 0) {
1479e956be96SZiye Yang 		SPDK_ERRLOG("spdk_sock_set_recvlowat() failed\n");
1480e956be96SZiye Yang 		return rc;
1481e956be96SZiye Yang 	}
1482e956be96SZiye Yang 
1483e956be96SZiye Yang 	return 0;
1484e956be96SZiye Yang }
1485e956be96SZiye Yang 
1486e956be96SZiye Yang static void
1487cff94374SKrzysztof Goreczny nvmf_tcp_handle_connect(struct spdk_nvmf_tcp_port *port, struct spdk_sock *sock)
1488e956be96SZiye Yang {
14892b59852bSBen Walker 	struct spdk_nvmf_tcp_qpair *tqpair;
1490e956be96SZiye Yang 	int rc;
1491e956be96SZiye Yang 
14922172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvmf_tcp, "New connection accepted on %s port %s\n",
14936d8f1fc6SJacek Kalwas 		      port->trid->traddr, port->trid->trsvcid);
1494e956be96SZiye Yang 
14952b59852bSBen Walker 	tqpair = calloc(1, sizeof(struct spdk_nvmf_tcp_qpair));
1496e956be96SZiye Yang 	if (tqpair == NULL) {
1497e956be96SZiye Yang 		SPDK_ERRLOG("Could not allocate new connection.\n");
1498e956be96SZiye Yang 		spdk_sock_close(&sock);
1499e956be96SZiye Yang 		return;
1500e956be96SZiye Yang 	}
1501e956be96SZiye Yang 
1502e956be96SZiye Yang 	tqpair->sock = sock;
15034dba5072SBen Walker 	tqpair->state_cntr[TCP_REQUEST_STATE_FREE] = 0;
1504e956be96SZiye Yang 	tqpair->port = port;
1505cff94374SKrzysztof Goreczny 	tqpair->qpair.transport = port->transport;
1506c6ab15a6SJim Harris 	tqpair->qpair.numa.id_valid = 1;
1507c6ab15a6SJim Harris 	tqpair->qpair.numa.id = spdk_sock_get_numa_id(sock);
1508e956be96SZiye Yang 
1509e956be96SZiye Yang 	rc = spdk_sock_getaddr(tqpair->sock, tqpair->target_addr,
1510e956be96SZiye Yang 			       sizeof(tqpair->target_addr), &tqpair->target_port,
1511e956be96SZiye Yang 			       tqpair->initiator_addr, sizeof(tqpair->initiator_addr),
1512e956be96SZiye Yang 			       &tqpair->initiator_port);
1513e956be96SZiye Yang 	if (rc < 0) {
1514e956be96SZiye Yang 		SPDK_ERRLOG("spdk_sock_getaddr() failed of tqpair=%p\n", tqpair);
15154de405abSSeth Howell 		nvmf_tcp_qpair_destroy(tqpair);
1516e956be96SZiye Yang 		return;
1517e956be96SZiye Yang 	}
1518e956be96SZiye Yang 
1519cff94374SKrzysztof Goreczny 	spdk_nvmf_tgt_new_qpair(port->transport->tgt, &tqpair->qpair);
1520e956be96SZiye Yang }
1521e956be96SZiye Yang 
1522e7e10859SMaciej Szwed static uint32_t
1523cff94374SKrzysztof Goreczny nvmf_tcp_port_accept(struct spdk_nvmf_tcp_port *port)
1524e956be96SZiye Yang {
1525e956be96SZiye Yang 	struct spdk_sock *sock;
1526e7e10859SMaciej Szwed 	uint32_t count = 0;
1527e956be96SZiye Yang 	int i;
1528e956be96SZiye Yang 
1529e956be96SZiye Yang 	for (i = 0; i < NVMF_TCP_MAX_ACCEPT_SOCK_ONE_TIME; i++) {
1530e956be96SZiye Yang 		sock = spdk_sock_accept(port->listen_sock);
1531262ecf0eSJim Harris 		if (sock == NULL) {
1532262ecf0eSJim Harris 			break;
1533e956be96SZiye Yang 		}
1534e7e10859SMaciej Szwed 		count++;
1535cff94374SKrzysztof Goreczny 		nvmf_tcp_handle_connect(port, sock);
1536e956be96SZiye Yang 	}
1537e7e10859SMaciej Szwed 
1538e7e10859SMaciej Szwed 	return count;
1539e956be96SZiye Yang }
1540e956be96SZiye Yang 
154143022da3SJacek Kalwas static int
154243022da3SJacek Kalwas nvmf_tcp_accept(void *ctx)
1543e956be96SZiye Yang {
154443022da3SJacek Kalwas 	struct spdk_nvmf_transport *transport = ctx;
1545e956be96SZiye Yang 	struct spdk_nvmf_tcp_transport *ttransport;
15460e983c56SKrzysztof Goreczny 	int count;
1547e956be96SZiye Yang 
1548e956be96SZiye Yang 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
1549e956be96SZiye Yang 
15500e983c56SKrzysztof Goreczny 	count = spdk_sock_group_poll(ttransport->listen_sock_group);
15510e983c56SKrzysztof Goreczny 	if (count < 0) {
15520e983c56SKrzysztof Goreczny 		SPDK_ERRLOG("Fail in TCP listen socket group poll\n");
1553e956be96SZiye Yang 	}
1554e7e10859SMaciej Szwed 
15550e983c56SKrzysztof Goreczny 	return count != 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
15560e983c56SKrzysztof Goreczny }
15570e983c56SKrzysztof Goreczny 
15580e983c56SKrzysztof Goreczny static void
15590e983c56SKrzysztof Goreczny nvmf_tcp_accept_cb(void *ctx, struct spdk_sock_group *group, struct spdk_sock *sock)
15600e983c56SKrzysztof Goreczny {
15610e983c56SKrzysztof Goreczny 	struct spdk_nvmf_tcp_port *port = ctx;
15620e983c56SKrzysztof Goreczny 
15630e983c56SKrzysztof Goreczny 	nvmf_tcp_port_accept(port);
1564e956be96SZiye Yang }
1565e956be96SZiye Yang 
1566e956be96SZiye Yang static void
15674de405abSSeth Howell nvmf_tcp_discover(struct spdk_nvmf_transport *transport,
1568e956be96SZiye Yang 		  struct spdk_nvme_transport_id *trid,
1569e956be96SZiye Yang 		  struct spdk_nvmf_discovery_log_page_entry *entry)
1570e956be96SZiye Yang {
1571eaf70c87SKrzysztof Karas 	struct spdk_nvmf_tcp_port *port;
1572eaf70c87SKrzysztof Karas 	struct spdk_nvmf_tcp_transport *ttransport;
1573eaf70c87SKrzysztof Karas 
1574e956be96SZiye Yang 	entry->trtype = SPDK_NVMF_TRTYPE_TCP;
1575e956be96SZiye Yang 	entry->adrfam = trid->adrfam;
1576e956be96SZiye Yang 
1577e956be96SZiye Yang 	spdk_strcpy_pad(entry->trsvcid, trid->trsvcid, sizeof(entry->trsvcid), ' ');
1578e956be96SZiye Yang 	spdk_strcpy_pad(entry->traddr, trid->traddr, sizeof(entry->traddr), ' ');
1579e956be96SZiye Yang 
1580eaf70c87SKrzysztof Karas 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
1581eaf70c87SKrzysztof Karas 	port = nvmf_tcp_find_port(ttransport, trid);
1582eaf70c87SKrzysztof Karas 
1583eaf70c87SKrzysztof Karas 	assert(port != NULL);
1584eaf70c87SKrzysztof Karas 
1585eaf70c87SKrzysztof Karas 	if (strcmp(spdk_sock_get_impl_name(port->listen_sock), "ssl") == 0) {
1586eaf70c87SKrzysztof Karas 		entry->treq.secure_channel = SPDK_NVMF_TREQ_SECURE_CHANNEL_REQUIRED;
1587eaf70c87SKrzysztof Karas 		entry->tsas.tcp.sectype = SPDK_NVME_TCP_SECURITY_TLS_1_3;
1588eaf70c87SKrzysztof Karas 	} else {
1589eaf70c87SKrzysztof Karas 		entry->treq.secure_channel = SPDK_NVMF_TREQ_SECURE_CHANNEL_NOT_REQUIRED;
1590e956be96SZiye Yang 		entry->tsas.tcp.sectype = SPDK_NVME_TCP_SECURITY_NONE;
1591e956be96SZiye Yang 	}
1592eaf70c87SKrzysztof Karas }
1593e956be96SZiye Yang 
159485fa4324SAlexey Marchuk static struct spdk_nvmf_tcp_control_msg_list *
159585fa4324SAlexey Marchuk nvmf_tcp_control_msg_list_create(uint16_t num_messages)
159685fa4324SAlexey Marchuk {
159785fa4324SAlexey Marchuk 	struct spdk_nvmf_tcp_control_msg_list *list;
159885fa4324SAlexey Marchuk 	struct spdk_nvmf_tcp_control_msg *msg;
159985fa4324SAlexey Marchuk 	uint16_t i;
160085fa4324SAlexey Marchuk 
160185fa4324SAlexey Marchuk 	list = calloc(1, sizeof(*list));
160285fa4324SAlexey Marchuk 	if (!list) {
160385fa4324SAlexey Marchuk 		SPDK_ERRLOG("Failed to allocate memory for list structure\n");
160485fa4324SAlexey Marchuk 		return NULL;
160585fa4324SAlexey Marchuk 	}
160685fa4324SAlexey Marchuk 
160785fa4324SAlexey Marchuk 	list->msg_buf = spdk_zmalloc(num_messages * SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE,
1608186b109dSJim Harris 				     NVMF_DATA_BUFFER_ALIGNMENT, NULL, SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
160985fa4324SAlexey Marchuk 	if (!list->msg_buf) {
161085fa4324SAlexey Marchuk 		SPDK_ERRLOG("Failed to allocate memory for control message buffers\n");
161185fa4324SAlexey Marchuk 		free(list);
161285fa4324SAlexey Marchuk 		return NULL;
161385fa4324SAlexey Marchuk 	}
161485fa4324SAlexey Marchuk 
161585fa4324SAlexey Marchuk 	STAILQ_INIT(&list->free_msgs);
1616c7d22538SKrzysztof Goreczny 	STAILQ_INIT(&list->waiting_for_msg_reqs);
161785fa4324SAlexey Marchuk 
161885fa4324SAlexey Marchuk 	for (i = 0; i < num_messages; i++) {
161985fa4324SAlexey Marchuk 		msg = (struct spdk_nvmf_tcp_control_msg *)((char *)list->msg_buf + i *
162085fa4324SAlexey Marchuk 				SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE);
162185fa4324SAlexey Marchuk 		STAILQ_INSERT_TAIL(&list->free_msgs, msg, link);
162285fa4324SAlexey Marchuk 	}
162385fa4324SAlexey Marchuk 
162485fa4324SAlexey Marchuk 	return list;
162585fa4324SAlexey Marchuk }
162685fa4324SAlexey Marchuk 
162785fa4324SAlexey Marchuk static void
162885fa4324SAlexey Marchuk nvmf_tcp_control_msg_list_free(struct spdk_nvmf_tcp_control_msg_list *list)
162985fa4324SAlexey Marchuk {
163085fa4324SAlexey Marchuk 	if (!list) {
163185fa4324SAlexey Marchuk 		return;
163285fa4324SAlexey Marchuk 	}
163385fa4324SAlexey Marchuk 
163485fa4324SAlexey Marchuk 	spdk_free(list->msg_buf);
163585fa4324SAlexey Marchuk 	free(list);
163685fa4324SAlexey Marchuk }
163785fa4324SAlexey Marchuk 
163881fc34dfSKrzysztof Goreczny static int nvmf_tcp_poll_group_poll(struct spdk_nvmf_transport_poll_group *group);
163981fc34dfSKrzysztof Goreczny 
164081fc34dfSKrzysztof Goreczny static int
164181fc34dfSKrzysztof Goreczny nvmf_tcp_poll_group_intr(void *ctx)
164281fc34dfSKrzysztof Goreczny {
164381fc34dfSKrzysztof Goreczny 	struct spdk_nvmf_transport_poll_group *group = ctx;
164481fc34dfSKrzysztof Goreczny 	int ret = 0;
164581fc34dfSKrzysztof Goreczny 
164681fc34dfSKrzysztof Goreczny 	ret = nvmf_tcp_poll_group_poll(group);
164781fc34dfSKrzysztof Goreczny 
164881fc34dfSKrzysztof Goreczny 	return ret != 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
164981fc34dfSKrzysztof Goreczny }
165081fc34dfSKrzysztof Goreczny 
1651e956be96SZiye Yang static struct spdk_nvmf_transport_poll_group *
16525e373163SJohn Levon nvmf_tcp_poll_group_create(struct spdk_nvmf_transport *transport,
16535e373163SJohn Levon 			   struct spdk_nvmf_poll_group *group)
1654e956be96SZiye Yang {
16554fe47d6fSAlexey Marchuk 	struct spdk_nvmf_tcp_transport	*ttransport;
1656e956be96SZiye Yang 	struct spdk_nvmf_tcp_poll_group *tgroup;
165781fc34dfSKrzysztof Goreczny 	int rc;
165854e1a03bSKrzysztof Goreczny 
1659e956be96SZiye Yang 	tgroup = calloc(1, sizeof(*tgroup));
1660e956be96SZiye Yang 	if (!tgroup) {
1661e956be96SZiye Yang 		return NULL;
1662e956be96SZiye Yang 	}
1663e956be96SZiye Yang 
1664960460f0SZiye Yang 	tgroup->sock_group = spdk_sock_group_create(&tgroup->group);
1665e956be96SZiye Yang 	if (!tgroup->sock_group) {
1666e956be96SZiye Yang 		goto cleanup;
1667e956be96SZiye Yang 	}
1668e956be96SZiye Yang 
1669e956be96SZiye Yang 	TAILQ_INIT(&tgroup->qpairs);
167094cd652bSZiye Yang 
16714fe47d6fSAlexey Marchuk 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
16724fe47d6fSAlexey Marchuk 
167385fa4324SAlexey Marchuk 	if (transport->opts.in_capsule_data_size < SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE) {
167485fa4324SAlexey Marchuk 		SPDK_DEBUGLOG(nvmf_tcp, "ICD %u is less than min required for admin/fabric commands (%u). "
167585fa4324SAlexey Marchuk 			      "Creating control messages list\n", transport->opts.in_capsule_data_size,
167685fa4324SAlexey Marchuk 			      SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE);
16774fe47d6fSAlexey Marchuk 		tgroup->control_msg_list = nvmf_tcp_control_msg_list_create(ttransport->tcp_opts.control_msg_num);
167885fa4324SAlexey Marchuk 		if (!tgroup->control_msg_list) {
167985fa4324SAlexey Marchuk 			goto cleanup;
168085fa4324SAlexey Marchuk 		}
168185fa4324SAlexey Marchuk 	}
168285fa4324SAlexey Marchuk 
168334c48f1bSBen Walker 	tgroup->accel_channel = spdk_accel_get_io_channel();
1684f0956d33SZiye Yang 	if (spdk_unlikely(!tgroup->accel_channel)) {
1685f0956d33SZiye Yang 		SPDK_ERRLOG("Cannot create accel_channel for tgroup=%p\n", tgroup);
1686f0956d33SZiye Yang 		goto cleanup;
1687f0956d33SZiye Yang 	}
1688f0956d33SZiye Yang 
1689d619f6c2STomasz Zawadzki 	TAILQ_INSERT_TAIL(&ttransport->poll_groups, tgroup, link);
1690d619f6c2STomasz Zawadzki 	if (ttransport->next_pg == NULL) {
1691d619f6c2STomasz Zawadzki 		ttransport->next_pg = tgroup;
1692d619f6c2STomasz Zawadzki 	}
1693d619f6c2STomasz Zawadzki 
169481fc34dfSKrzysztof Goreczny 	if (spdk_interrupt_mode_is_enabled()) {
169581fc34dfSKrzysztof Goreczny 		rc = SPDK_SOCK_GROUP_REGISTER_INTERRUPT(tgroup->sock_group,
169681fc34dfSKrzysztof Goreczny 							SPDK_INTERRUPT_EVENT_IN | SPDK_INTERRUPT_EVENT_OUT, nvmf_tcp_poll_group_intr, &tgroup->group);
169781fc34dfSKrzysztof Goreczny 		if (rc != 0) {
169881fc34dfSKrzysztof Goreczny 			SPDK_ERRLOG("Failed to register interrupt for sock group\n");
169981fc34dfSKrzysztof Goreczny 			goto cleanup;
170081fc34dfSKrzysztof Goreczny 		}
170181fc34dfSKrzysztof Goreczny 	}
170281fc34dfSKrzysztof Goreczny 
1703e956be96SZiye Yang 	return &tgroup->group;
1704e956be96SZiye Yang 
1705e956be96SZiye Yang cleanup:
170685fa4324SAlexey Marchuk 	nvmf_tcp_poll_group_destroy(&tgroup->group);
1707e956be96SZiye Yang 	return NULL;
1708e956be96SZiye Yang }
1709e956be96SZiye Yang 
1710960460f0SZiye Yang static struct spdk_nvmf_transport_poll_group *
17114de405abSSeth Howell nvmf_tcp_get_optimal_poll_group(struct spdk_nvmf_qpair *qpair)
1712960460f0SZiye Yang {
1713d619f6c2STomasz Zawadzki 	struct spdk_nvmf_tcp_transport *ttransport;
1714d619f6c2STomasz Zawadzki 	struct spdk_nvmf_tcp_poll_group **pg;
1715960460f0SZiye Yang 	struct spdk_nvmf_tcp_qpair *tqpair;
17166301f891STomasz Zawadzki 	struct spdk_sock_group *group = NULL, *hint = NULL;
1717960460f0SZiye Yang 	int rc;
1718960460f0SZiye Yang 
1719d619f6c2STomasz Zawadzki 	ttransport = SPDK_CONTAINEROF(qpair->transport, struct spdk_nvmf_tcp_transport, transport);
1720d619f6c2STomasz Zawadzki 
1721d619f6c2STomasz Zawadzki 	if (TAILQ_EMPTY(&ttransport->poll_groups)) {
1722960460f0SZiye Yang 		return NULL;
1723960460f0SZiye Yang 	}
1724960460f0SZiye Yang 
1725d619f6c2STomasz Zawadzki 	pg = &ttransport->next_pg;
1726d619f6c2STomasz Zawadzki 	assert(*pg != NULL);
17276301f891STomasz Zawadzki 	hint = (*pg)->sock_group;
1728d619f6c2STomasz Zawadzki 
17296301f891STomasz Zawadzki 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
17306301f891STomasz Zawadzki 	rc = spdk_sock_get_optimal_sock_group(tqpair->sock, &group, hint);
17316301f891STomasz Zawadzki 	if (rc != 0) {
17326301f891STomasz Zawadzki 		return NULL;
17336301f891STomasz Zawadzki 	} else if (group != NULL) {
17346301f891STomasz Zawadzki 		/* Optimal poll group was found */
17356301f891STomasz Zawadzki 		return spdk_sock_group_get_ctx(group);
17366301f891STomasz Zawadzki 	}
1737d619f6c2STomasz Zawadzki 
17386301f891STomasz Zawadzki 	/* The hint was used for optimal poll group, advance next_pg. */
1739d619f6c2STomasz Zawadzki 	*pg = TAILQ_NEXT(*pg, link);
1740d619f6c2STomasz Zawadzki 	if (*pg == NULL) {
1741d619f6c2STomasz Zawadzki 		*pg = TAILQ_FIRST(&ttransport->poll_groups);
1742d619f6c2STomasz Zawadzki 	}
1743d619f6c2STomasz Zawadzki 
17446301f891STomasz Zawadzki 	return spdk_sock_group_get_ctx(hint);
1745d619f6c2STomasz Zawadzki }
1746d619f6c2STomasz Zawadzki 
1747e956be96SZiye Yang static void
17484de405abSSeth Howell nvmf_tcp_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group)
1749e956be96SZiye Yang {
1750d619f6c2STomasz Zawadzki 	struct spdk_nvmf_tcp_poll_group *tgroup, *next_tgroup;
1751d619f6c2STomasz Zawadzki 	struct spdk_nvmf_tcp_transport *ttransport;
1752e956be96SZiye Yang 
1753e956be96SZiye Yang 	tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
175481fc34dfSKrzysztof Goreczny 	spdk_sock_group_unregister_interrupt(tgroup->sock_group);
1755e956be96SZiye Yang 	spdk_sock_group_close(&tgroup->sock_group);
175685fa4324SAlexey Marchuk 	if (tgroup->control_msg_list) {
175785fa4324SAlexey Marchuk 		nvmf_tcp_control_msg_list_free(tgroup->control_msg_list);
175885fa4324SAlexey Marchuk 	}
17593c88819bSZiye Yang 
1760f0956d33SZiye Yang 	if (tgroup->accel_channel) {
1761f0956d33SZiye Yang 		spdk_put_io_channel(tgroup->accel_channel);
1762f0956d33SZiye Yang 	}
1763f0956d33SZiye Yang 
1764e744ee80SAlexey Marchuk 	if (tgroup->group.transport == NULL) {
1765e744ee80SAlexey Marchuk 		/* Transport can be NULL when nvmf_tcp_poll_group_create()
1766e744ee80SAlexey Marchuk 		 * calls this function directly in a failure path. */
1767e744ee80SAlexey Marchuk 		free(tgroup);
1768e744ee80SAlexey Marchuk 		return;
1769e744ee80SAlexey Marchuk 	}
1770e744ee80SAlexey Marchuk 
1771d619f6c2STomasz Zawadzki 	ttransport = SPDK_CONTAINEROF(tgroup->group.transport, struct spdk_nvmf_tcp_transport, transport);
1772d619f6c2STomasz Zawadzki 
1773d619f6c2STomasz Zawadzki 	next_tgroup = TAILQ_NEXT(tgroup, link);
1774d619f6c2STomasz Zawadzki 	TAILQ_REMOVE(&ttransport->poll_groups, tgroup, link);
1775d619f6c2STomasz Zawadzki 	if (next_tgroup == NULL) {
1776d619f6c2STomasz Zawadzki 		next_tgroup = TAILQ_FIRST(&ttransport->poll_groups);
1777d619f6c2STomasz Zawadzki 	}
1778d619f6c2STomasz Zawadzki 	if (ttransport->next_pg == tgroup) {
1779d619f6c2STomasz Zawadzki 		ttransport->next_pg = next_tgroup;
1780d619f6c2STomasz Zawadzki 	}
1781d619f6c2STomasz Zawadzki 
1782e956be96SZiye Yang 	free(tgroup);
1783e956be96SZiye Yang }
1784e956be96SZiye Yang 
1785e956be96SZiye Yang static void
17864de405abSSeth Howell nvmf_tcp_qpair_set_recv_state(struct spdk_nvmf_tcp_qpair *tqpair,
1787e956be96SZiye Yang 			      enum nvme_tcp_pdu_recv_state state)
1788e956be96SZiye Yang {
1789e956be96SZiye Yang 	if (tqpair->recv_state == state) {
1790e956be96SZiye Yang 		SPDK_ERRLOG("The recv state of tqpair=%p is same with the state(%d) to be set\n",
1791e956be96SZiye Yang 			    tqpair, state);
1792e956be96SZiye Yang 		return;
1793e956be96SZiye Yang 	}
1794e956be96SZiye Yang 
179578df9be4SBen Walker 	if (spdk_unlikely(state == NVME_TCP_PDU_RECV_STATE_QUIESCING)) {
179678df9be4SBen Walker 		if (tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH && tqpair->pdu_in_progress) {
179778df9be4SBen Walker 			SLIST_INSERT_HEAD(&tqpair->tcp_pdu_free_queue, tqpair->pdu_in_progress, slist);
179878df9be4SBen Walker 			tqpair->tcp_pdu_working_count--;
179978df9be4SBen Walker 		}
180078df9be4SBen Walker 	}
180178df9be4SBen Walker 
180278df9be4SBen Walker 	if (spdk_unlikely(state == NVME_TCP_PDU_RECV_STATE_ERROR)) {
180378df9be4SBen Walker 		assert(tqpair->tcp_pdu_working_count == 0);
180478df9be4SBen Walker 	}
180578df9be4SBen Walker 
18062172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvmf_tcp, "tqpair(%p) recv state=%d\n", tqpair, state);
1807e956be96SZiye Yang 	tqpair->recv_state = state;
18087a4d6af1SBen Walker 
1809f906b912SKonrad Sztyber 	spdk_trace_record(TRACE_TCP_QP_RCV_STATE_CHANGE, tqpair->qpair.trace_id, 0, 0,
1810f906b912SKonrad Sztyber 			  (uint64_t)tqpair->recv_state);
1811e956be96SZiye Yang }
1812e956be96SZiye Yang 
18132d0ce5b4SZiye Yang static int
18144de405abSSeth Howell nvmf_tcp_qpair_handle_timeout(void *ctx)
18152d0ce5b4SZiye Yang {
18162d0ce5b4SZiye Yang 	struct spdk_nvmf_tcp_qpair *tqpair = ctx;
18172d0ce5b4SZiye Yang 
18182d0ce5b4SZiye Yang 	assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_ERROR);
18192d0ce5b4SZiye Yang 
18202d0ce5b4SZiye Yang 	SPDK_ERRLOG("No pdu coming for tqpair=%p within %d seconds\n", tqpair,
18212d0ce5b4SZiye Yang 		    SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT);
18222d0ce5b4SZiye Yang 
18234de405abSSeth Howell 	nvmf_tcp_qpair_disconnect(tqpair);
1824eb05cbd6SMaciej Szwed 	return SPDK_POLLER_BUSY;
18252d0ce5b4SZiye Yang }
18262d0ce5b4SZiye Yang 
1827e956be96SZiye Yang static void
18284de405abSSeth Howell nvmf_tcp_send_c2h_term_req_complete(void *cb_arg)
1829e956be96SZiye Yang {
18302d0ce5b4SZiye Yang 	struct spdk_nvmf_tcp_qpair *tqpair = (struct spdk_nvmf_tcp_qpair *)cb_arg;
18312d0ce5b4SZiye Yang 
18322d0ce5b4SZiye Yang 	if (!tqpair->timeout_poller) {
18334de405abSSeth Howell 		tqpair->timeout_poller = SPDK_POLLER_REGISTER(nvmf_tcp_qpair_handle_timeout, tqpair,
18342d0ce5b4SZiye Yang 					 SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT * 1000000);
18352d0ce5b4SZiye Yang 	}
1836e956be96SZiye Yang }
1837e956be96SZiye Yang 
1838e956be96SZiye Yang static void
18394de405abSSeth Howell nvmf_tcp_send_c2h_term_req(struct spdk_nvmf_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu,
1840e956be96SZiye Yang 			   enum spdk_nvme_tcp_term_req_fes fes, uint32_t error_offset)
1841e956be96SZiye Yang {
1842e956be96SZiye Yang 	struct nvme_tcp_pdu *rsp_pdu;
1843e956be96SZiye Yang 	struct spdk_nvme_tcp_term_req_hdr *c2h_term_req;
1844e956be96SZiye Yang 	uint32_t c2h_term_req_hdr_len = sizeof(*c2h_term_req);
184571cd1ea7SZiye Yang 	uint32_t copy_len;
1846e956be96SZiye Yang 
18477bac9b06SZiye Yang 	rsp_pdu = tqpair->mgmt_pdu;
1848e956be96SZiye Yang 
1849ea65bf61SBen Walker 	c2h_term_req = &rsp_pdu->hdr.term_req;
1850e956be96SZiye Yang 	c2h_term_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ;
1851e956be96SZiye Yang 	c2h_term_req->common.hlen = c2h_term_req_hdr_len;
1852b5aeff1dSMengjinWu 	c2h_term_req->fes = fes;
1853e956be96SZiye Yang 
1854e956be96SZiye Yang 	if ((fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) ||
1855e956be96SZiye Yang 	    (fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) {
1856e956be96SZiye Yang 		DSET32(&c2h_term_req->fei, error_offset);
1857e956be96SZiye Yang 	}
1858e956be96SZiye Yang 
1859ea65bf61SBen Walker 	copy_len = spdk_min(pdu->hdr.common.hlen, SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE);
1860e956be96SZiye Yang 
186171cd1ea7SZiye Yang 	/* Copy the error info into the buffer */
1862ea65bf61SBen Walker 	memcpy((uint8_t *)rsp_pdu->hdr.raw + c2h_term_req_hdr_len, pdu->hdr.raw, copy_len);
1863ea65bf61SBen Walker 	nvme_tcp_pdu_set_data(rsp_pdu, (uint8_t *)rsp_pdu->hdr.raw + c2h_term_req_hdr_len, copy_len);
186471cd1ea7SZiye Yang 
186571cd1ea7SZiye Yang 	/* Contain the header of the wrong received pdu */
186671cd1ea7SZiye Yang 	c2h_term_req->common.plen = c2h_term_req->common.hlen + copy_len;
1867b7e1f79cSKonrad Sztyber 	tqpair->wait_terminate = true;
186878df9be4SBen Walker 	nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING);
1869c676c081SKonrad Sztyber 	nvmf_tcp_qpair_write_mgmt_pdu(tqpair, nvmf_tcp_send_c2h_term_req_complete, tqpair);
1870e956be96SZiye Yang }
1871e956be96SZiye Yang 
1872e956be96SZiye Yang static void
18734de405abSSeth Howell nvmf_tcp_capsule_cmd_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport,
18742b59852bSBen Walker 				struct spdk_nvmf_tcp_qpair *tqpair,
1875e956be96SZiye Yang 				struct nvme_tcp_pdu *pdu)
1876e956be96SZiye Yang {
1877c57bafedSBen Walker 	struct spdk_nvmf_tcp_req *tcp_req;
1878e956be96SZiye Yang 
18794579a16fSZiye Yang 	assert(pdu->psh_valid_bytes == pdu->psh_len);
1880ea65bf61SBen Walker 	assert(pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD);
18814579a16fSZiye Yang 
18824de405abSSeth Howell 	tcp_req = nvmf_tcp_req_get(tqpair);
1883e956be96SZiye Yang 	if (!tcp_req) {
1884794d47d4SKonrad Sztyber 		/* Directly return and make the allocation retry again.  This can happen if we're
1885794d47d4SKonrad Sztyber 		 * using asynchronous writes to send the response to the host or when releasing
1886794d47d4SKonrad Sztyber 		 * zero-copy buffers after a response has been sent.  In both cases, the host might
1887794d47d4SKonrad Sztyber 		 * receive the response before we've finished processing the request and is free to
1888794d47d4SKonrad Sztyber 		 * send another one.
1889794d47d4SKonrad Sztyber 		 */
1890794d47d4SKonrad Sztyber 		if (tqpair->state_cntr[TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST] > 0 ||
1891794d47d4SKonrad Sztyber 		    tqpair->state_cntr[TCP_REQUEST_STATE_AWAITING_ZCOPY_RELEASE] > 0) {
189208273e77SZiye Yang 			return;
189308273e77SZiye Yang 		}
189408273e77SZiye Yang 
189508273e77SZiye Yang 		/* The host sent more commands than the maximum queue depth. */
18968d512770SZiye Yang 		SPDK_ERRLOG("Cannot allocate tcp_req on tqpair=%p\n", tqpair);
189778df9be4SBen Walker 		nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING);
1898e956be96SZiye Yang 		return;
1899e956be96SZiye Yang 	}
1900e956be96SZiye Yang 
190183ffb207SBen Walker 	pdu->req = tcp_req;
190294507133SJacek Kalwas 	assert(tcp_req->state == TCP_REQUEST_STATE_NEW);
19034de405abSSeth Howell 	nvmf_tcp_req_process(ttransport, tcp_req);
1904e956be96SZiye Yang }
1905e956be96SZiye Yang 
1906e956be96SZiye Yang static void
19074de405abSSeth Howell nvmf_tcp_capsule_cmd_payload_handle(struct spdk_nvmf_tcp_transport *ttransport,
19082b59852bSBen Walker 				    struct spdk_nvmf_tcp_qpair *tqpair,
1909e956be96SZiye Yang 				    struct nvme_tcp_pdu *pdu)
1910e956be96SZiye Yang {
1911c57bafedSBen Walker 	struct spdk_nvmf_tcp_req *tcp_req;
1912e956be96SZiye Yang 	struct spdk_nvme_tcp_cmd *capsule_cmd;
1913e956be96SZiye Yang 	uint32_t error_offset = 0;
1914e956be96SZiye Yang 	enum spdk_nvme_tcp_term_req_fes fes;
1915252430a0SZiye Yang 	struct spdk_nvme_cpl *rsp;
1916e956be96SZiye Yang 
1917ea65bf61SBen Walker 	capsule_cmd = &pdu->hdr.capsule_cmd;
191883ffb207SBen Walker 	tcp_req = pdu->req;
1919e956be96SZiye Yang 	assert(tcp_req != NULL);
1920252430a0SZiye Yang 
1921794d47d4SKonrad Sztyber 	/* Zero-copy requests don't support ICD */
1922794d47d4SKonrad Sztyber 	assert(!spdk_nvmf_request_using_zcopy(&tcp_req->req));
1923794d47d4SKonrad Sztyber 
1924e956be96SZiye Yang 	if (capsule_cmd->common.pdo > SPDK_NVME_TCP_PDU_PDO_MAX_OFFSET) {
1925e956be96SZiye Yang 		SPDK_ERRLOG("Expected ICReq capsule_cmd pdu offset <= %d, got %c\n",
1926e956be96SZiye Yang 			    SPDK_NVME_TCP_PDU_PDO_MAX_OFFSET, capsule_cmd->common.pdo);
1927e956be96SZiye Yang 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1928e956be96SZiye Yang 		error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdo);
1929e956be96SZiye Yang 		goto err;
1930e956be96SZiye Yang 	}
1931e956be96SZiye Yang 
1932252430a0SZiye Yang 	rsp = &tcp_req->req.rsp->nvme_cpl;
1933252430a0SZiye Yang 	if (spdk_unlikely(rsp->status.sc == SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR)) {
1934252430a0SZiye Yang 		nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE);
1935252430a0SZiye Yang 	} else {
19364de405abSSeth Howell 		nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
1937252430a0SZiye Yang 	}
1938252430a0SZiye Yang 
19394de405abSSeth Howell 	nvmf_tcp_req_process(ttransport, tcp_req);
1940e956be96SZiye Yang 
1941e956be96SZiye Yang 	return;
1942e956be96SZiye Yang err:
19434de405abSSeth Howell 	nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1944e956be96SZiye Yang }
1945e956be96SZiye Yang 
1946e956be96SZiye Yang static void
19474de405abSSeth Howell nvmf_tcp_h2c_data_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport,
19482b59852bSBen Walker 			     struct spdk_nvmf_tcp_qpair *tqpair,
1949e956be96SZiye Yang 			     struct nvme_tcp_pdu *pdu)
1950e956be96SZiye Yang {
1951c57bafedSBen Walker 	struct spdk_nvmf_tcp_req *tcp_req;
1952e956be96SZiye Yang 	uint32_t error_offset = 0;
1953e956be96SZiye Yang 	enum spdk_nvme_tcp_term_req_fes fes = 0;
1954e956be96SZiye Yang 	struct spdk_nvme_tcp_h2c_data_hdr *h2c_data;
1955e956be96SZiye Yang 
1956ea65bf61SBen Walker 	h2c_data = &pdu->hdr.h2c_data;
1957e956be96SZiye Yang 
19582172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvmf_tcp, "tqpair=%p, r2t_info: datao=%u, datal=%u, cccid=%u, ttag=%u\n",
1959e956be96SZiye Yang 		      tqpair, h2c_data->datao, h2c_data->datal, h2c_data->cccid, h2c_data->ttag);
1960e956be96SZiye Yang 
1961427cbb46SMengjinWu 	if (h2c_data->ttag > tqpair->resource_count) {
1962427cbb46SMengjinWu 		SPDK_DEBUGLOG(nvmf_tcp, "ttag %u is larger than allowed %u.\n", h2c_data->ttag,
1963427cbb46SMengjinWu 			      tqpair->resource_count);
1964427cbb46SMengjinWu 		fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR;
1965427cbb46SMengjinWu 		error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, ttag);
1966427cbb46SMengjinWu 		goto err;
1967e956be96SZiye Yang 	}
1968e956be96SZiye Yang 
1969427cbb46SMengjinWu 	tcp_req = &tqpair->reqs[h2c_data->ttag - 1];
1970427cbb46SMengjinWu 
1971427cbb46SMengjinWu 	if (spdk_unlikely(tcp_req->state != TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER &&
1972427cbb46SMengjinWu 			  tcp_req->state != TCP_REQUEST_STATE_AWAITING_R2T_ACK)) {
1973427cbb46SMengjinWu 		SPDK_DEBUGLOG(nvmf_tcp, "tcp_req(%p), tqpair=%p, has error state in %d\n", tcp_req, tqpair,
1974427cbb46SMengjinWu 			      tcp_req->state);
1975427cbb46SMengjinWu 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1976e956be96SZiye Yang 		error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, ttag);
1977427cbb46SMengjinWu 		goto err;
1978e956be96SZiye Yang 	}
1979427cbb46SMengjinWu 
1980427cbb46SMengjinWu 	if (spdk_unlikely(tcp_req->req.cmd->nvme_cmd.cid != h2c_data->cccid)) {
1981427cbb46SMengjinWu 		SPDK_DEBUGLOG(nvmf_tcp, "tcp_req(%p), tqpair=%p, expected %u but %u for cccid.\n", tcp_req, tqpair,
1982427cbb46SMengjinWu 			      tcp_req->req.cmd->nvme_cmd.cid, h2c_data->cccid);
1983427cbb46SMengjinWu 		fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR;
1984427cbb46SMengjinWu 		error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, cccid);
1985e956be96SZiye Yang 		goto err;
1986e956be96SZiye Yang 	}
1987e956be96SZiye Yang 
1988fdfb7908SBen Walker 	if (tcp_req->h2c_offset != h2c_data->datao) {
19892172c432STomasz Zawadzki 		SPDK_DEBUGLOG(nvmf_tcp,
1990a2adca79SBen Walker 			      "tcp_req(%p), tqpair=%p, expected data offset %u, but data offset is %u\n",
1991fdfb7908SBen Walker 			      tcp_req, tqpair, tcp_req->h2c_offset, h2c_data->datao);
1992e956be96SZiye Yang 		fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE;
1993e956be96SZiye Yang 		goto err;
1994e956be96SZiye Yang 	}
1995e956be96SZiye Yang 
1996e956be96SZiye Yang 	if ((h2c_data->datao + h2c_data->datal) > tcp_req->req.length) {
19972172c432STomasz Zawadzki 		SPDK_DEBUGLOG(nvmf_tcp,
1998cc6920a4SJosh Soref 			      "tcp_req(%p), tqpair=%p,  (datao=%u + datal=%u) exceeds requested length=%u\n",
1999e956be96SZiye Yang 			      tcp_req, tqpair, h2c_data->datao, h2c_data->datal, tcp_req->req.length);
2000399529aaSBen Walker 		fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE;
2001e956be96SZiye Yang 		goto err;
2002e956be96SZiye Yang 	}
2003e956be96SZiye Yang 
200483ffb207SBen Walker 	pdu->req = tcp_req;
20057ee58b90SShuhei Matsumoto 
200615ae31fbSBen Walker 	if (spdk_unlikely(tcp_req->req.dif_enabled)) {
2007fcd652f5SAlexey Marchuk 		pdu->dif_ctx = &tcp_req->req.dif.dif_ctx;
20087ee58b90SShuhei Matsumoto 	}
20097ee58b90SShuhei Matsumoto 
20103184884fSShuhei Matsumoto 	nvme_tcp_pdu_set_data_buf(pdu, tcp_req->req.iov, tcp_req->req.iovcnt,
20113184884fSShuhei Matsumoto 				  h2c_data->datao, h2c_data->datal);
20124de405abSSeth Howell 	nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
2013e956be96SZiye Yang 	return;
2014e956be96SZiye Yang 
2015e956be96SZiye Yang err:
20164de405abSSeth Howell 	nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
2017e956be96SZiye Yang }
2018e956be96SZiye Yang 
2019e956be96SZiye Yang static void
20204de405abSSeth Howell nvmf_tcp_send_capsule_resp_pdu(struct spdk_nvmf_tcp_req *tcp_req,
20212b59852bSBen Walker 			       struct spdk_nvmf_tcp_qpair *tqpair)
2022e956be96SZiye Yang {
2023e956be96SZiye Yang 	struct nvme_tcp_pdu *rsp_pdu;
2024e956be96SZiye Yang 	struct spdk_nvme_tcp_rsp *capsule_resp;
2025e956be96SZiye Yang 
20262172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvmf_tcp, "enter, tqpair=%p\n", tqpair);
2027a2adca79SBen Walker 
2028a2adca79SBen Walker 	rsp_pdu = nvmf_tcp_req_pdu_init(tcp_req);
202904a4aab2SBen Walker 	assert(rsp_pdu != NULL);
2030e956be96SZiye Yang 
2031ea65bf61SBen Walker 	capsule_resp = &rsp_pdu->hdr.capsule_resp;
2032e956be96SZiye Yang 	capsule_resp->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP;
2033e956be96SZiye Yang 	capsule_resp->common.plen = capsule_resp->common.hlen = sizeof(*capsule_resp);
2034e956be96SZiye Yang 	capsule_resp->rccqe = tcp_req->req.rsp->nvme_cpl;
2035e956be96SZiye Yang 	if (tqpair->host_hdgst_enable) {
2036e956be96SZiye Yang 		capsule_resp->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF;
2037e956be96SZiye Yang 		capsule_resp->common.plen += SPDK_NVME_TCP_DIGEST_LEN;
2038e956be96SZiye Yang 	}
2039e956be96SZiye Yang 
2040c676c081SKonrad Sztyber 	nvmf_tcp_qpair_write_req_pdu(tqpair, tcp_req, nvmf_tcp_request_free, tcp_req);
2041e956be96SZiye Yang }
2042e956be96SZiye Yang 
2043e956be96SZiye Yang static void
20444de405abSSeth Howell nvmf_tcp_pdu_c2h_data_complete(void *cb_arg)
2045e956be96SZiye Yang {
2046c57bafedSBen Walker 	struct spdk_nvmf_tcp_req *tcp_req = cb_arg;
20472b59852bSBen Walker 	struct spdk_nvmf_tcp_qpair *tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair,
20482b59852bSBen Walker 					     struct spdk_nvmf_tcp_qpair, qpair);
2049e956be96SZiye Yang 
2050e956be96SZiye Yang 	assert(tqpair != NULL);
20513e5ea7ffSAlexey Marchuk 
20523e5ea7ffSAlexey Marchuk 	if (spdk_unlikely(tcp_req->pdu->rw_offset < tcp_req->req.length)) {
20533e5ea7ffSAlexey Marchuk 		SPDK_DEBUGLOG(nvmf_tcp, "sending another C2H part, offset %u length %u\n", tcp_req->pdu->rw_offset,
20543e5ea7ffSAlexey Marchuk 			      tcp_req->req.length);
20553e5ea7ffSAlexey Marchuk 		_nvmf_tcp_send_c2h_data(tqpair, tcp_req);
20563e5ea7ffSAlexey Marchuk 		return;
20573e5ea7ffSAlexey Marchuk 	}
20583e5ea7ffSAlexey Marchuk 
2059b1144ce3SEd rodriguez 	if (tcp_req->pdu->hdr.c2h_data.common.flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS) {
2060e956be96SZiye Yang 		nvmf_tcp_request_free(tcp_req);
20616629202cSOr Gerlitz 	} else {
20624de405abSSeth Howell 		nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair);
20636629202cSOr Gerlitz 	}
2064e956be96SZiye Yang }
2065e956be96SZiye Yang 
2066e956be96SZiye Yang static void
20674de405abSSeth Howell nvmf_tcp_r2t_complete(void *cb_arg)
206863a60a0cSBen Walker {
2069a2adca79SBen Walker 	struct spdk_nvmf_tcp_req *tcp_req = cb_arg;
207048a547fdSBen Walker 	struct spdk_nvmf_tcp_transport *ttransport;
2071a2adca79SBen Walker 
207248a547fdSBen Walker 	ttransport = SPDK_CONTAINEROF(tcp_req->req.qpair->transport,
207348a547fdSBen Walker 				      struct spdk_nvmf_tcp_transport, transport);
207448a547fdSBen Walker 
20754de405abSSeth Howell 	nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
207648a547fdSBen Walker 
207748a547fdSBen Walker 	if (tcp_req->h2c_offset == tcp_req->req.length) {
20784de405abSSeth Howell 		nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
20794de405abSSeth Howell 		nvmf_tcp_req_process(ttransport, tcp_req);
208048a547fdSBen Walker 	}
208163a60a0cSBen Walker }
208263a60a0cSBen Walker 
208363a60a0cSBen Walker static void
20844de405abSSeth Howell nvmf_tcp_send_r2t_pdu(struct spdk_nvmf_tcp_qpair *tqpair,
2085c57bafedSBen Walker 		      struct spdk_nvmf_tcp_req *tcp_req)
2086e956be96SZiye Yang {
2087e956be96SZiye Yang 	struct nvme_tcp_pdu *rsp_pdu;
2088e956be96SZiye Yang 	struct spdk_nvme_tcp_r2t_hdr *r2t;
2089e956be96SZiye Yang 
2090a2adca79SBen Walker 	rsp_pdu = nvmf_tcp_req_pdu_init(tcp_req);
209104a4aab2SBen Walker 	assert(rsp_pdu != NULL);
2092e956be96SZiye Yang 
2093ea65bf61SBen Walker 	r2t = &rsp_pdu->hdr.r2t;
2094e956be96SZiye Yang 	r2t->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_R2T;
2095e956be96SZiye Yang 	r2t->common.plen = r2t->common.hlen = sizeof(*r2t);
2096e956be96SZiye Yang 
2097e956be96SZiye Yang 	if (tqpair->host_hdgst_enable) {
2098e956be96SZiye Yang 		r2t->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF;
2099e956be96SZiye Yang 		r2t->common.plen += SPDK_NVME_TCP_DIGEST_LEN;
2100e956be96SZiye Yang 	}
2101e956be96SZiye Yang 
2102e956be96SZiye Yang 	r2t->cccid = tcp_req->req.cmd->nvme_cmd.cid;
2103e956be96SZiye Yang 	r2t->ttag = tcp_req->ttag;
2104fdfb7908SBen Walker 	r2t->r2to = tcp_req->h2c_offset;
2105399529aaSBen Walker 	r2t->r2tl = tcp_req->req.length;
2106e956be96SZiye Yang 
21074de405abSSeth Howell 	nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_AWAITING_R2T_ACK);
210848a547fdSBen Walker 
21092172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvmf_tcp,
2110e956be96SZiye Yang 		      "tcp_req(%p) on tqpair(%p), r2t_info: cccid=%u, ttag=%u, r2to=%u, r2tl=%u\n",
2111e956be96SZiye Yang 		      tcp_req, tqpair, r2t->cccid, r2t->ttag, r2t->r2to, r2t->r2tl);
2112c676c081SKonrad Sztyber 	nvmf_tcp_qpair_write_req_pdu(tqpair, tcp_req, nvmf_tcp_r2t_complete, tcp_req);
2113e956be96SZiye Yang }
2114e956be96SZiye Yang 
2115e956be96SZiye Yang static void
21164de405abSSeth Howell nvmf_tcp_h2c_data_payload_handle(struct spdk_nvmf_tcp_transport *ttransport,
21172b59852bSBen Walker 				 struct spdk_nvmf_tcp_qpair *tqpair,
2118e956be96SZiye Yang 				 struct nvme_tcp_pdu *pdu)
2119e956be96SZiye Yang {
2120c57bafedSBen Walker 	struct spdk_nvmf_tcp_req *tcp_req;
2121252430a0SZiye Yang 	struct spdk_nvme_cpl *rsp;
2122e956be96SZiye Yang 
212383ffb207SBen Walker 	tcp_req = pdu->req;
2124e956be96SZiye Yang 	assert(tcp_req != NULL);
2125e956be96SZiye Yang 
21262172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvmf_tcp, "enter\n");
2127e956be96SZiye Yang 
2128fdfb7908SBen Walker 	tcp_req->h2c_offset += pdu->data_len;
2129a2adca79SBen Walker 
213048a547fdSBen Walker 	/* Wait for all of the data to arrive AND for the initial R2T PDU send to be
213148a547fdSBen Walker 	 * acknowledged before moving on. */
213248a547fdSBen Walker 	if (tcp_req->h2c_offset == tcp_req->req.length &&
213348a547fdSBen Walker 	    tcp_req->state == TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER) {
2134cc6920a4SJosh Soref 		/* After receiving all the h2c data, we need to check whether there is
2135252430a0SZiye Yang 		 * transient transport error */
2136252430a0SZiye Yang 		rsp = &tcp_req->req.rsp->nvme_cpl;
2137252430a0SZiye Yang 		if (spdk_unlikely(rsp->status.sc == SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR)) {
2138252430a0SZiye Yang 			nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE);
2139252430a0SZiye Yang 		} else {
21404de405abSSeth Howell 			nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
2141252430a0SZiye Yang 		}
21424de405abSSeth Howell 		nvmf_tcp_req_process(ttransport, tcp_req);
2143e956be96SZiye Yang 	}
2144e956be96SZiye Yang }
2145e956be96SZiye Yang 
2146e956be96SZiye Yang static void
21474de405abSSeth Howell nvmf_tcp_h2c_term_req_dump(struct spdk_nvme_tcp_term_req_hdr *h2c_term_req)
2148e956be96SZiye Yang {
2149e956be96SZiye Yang 	SPDK_ERRLOG("Error info of pdu(%p): %s\n", h2c_term_req,
21502d07fa15SBen Walker 		    spdk_nvmf_tcp_term_req_fes_str[h2c_term_req->fes]);
2151e956be96SZiye Yang 	if ((h2c_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) ||
2152e956be96SZiye Yang 	    (h2c_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) {
21532172c432STomasz Zawadzki 		SPDK_DEBUGLOG(nvmf_tcp, "The offset from the start of the PDU header is %u\n",
2154e956be96SZiye Yang 			      DGET32(h2c_term_req->fei));
2155e956be96SZiye Yang 	}
2156e956be96SZiye Yang }
2157e956be96SZiye Yang 
2158e956be96SZiye Yang static void
21594de405abSSeth Howell nvmf_tcp_h2c_term_req_hdr_handle(struct spdk_nvmf_tcp_qpair *tqpair,
2160e956be96SZiye Yang 				 struct nvme_tcp_pdu *pdu)
2161e956be96SZiye Yang {
2162ea65bf61SBen Walker 	struct spdk_nvme_tcp_term_req_hdr *h2c_term_req = &pdu->hdr.term_req;
2163e956be96SZiye Yang 	uint32_t error_offset = 0;
2164e956be96SZiye Yang 	enum spdk_nvme_tcp_term_req_fes fes;
2165e956be96SZiye Yang 
2166e956be96SZiye Yang 	if (h2c_term_req->fes > SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER) {
216740352602SZiye Yang 		SPDK_ERRLOG("Fatal Error Status(FES) is unknown for h2c_term_req pdu=%p\n", pdu);
2168e956be96SZiye Yang 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
2169e956be96SZiye Yang 		error_offset = offsetof(struct spdk_nvme_tcp_term_req_hdr, fes);
2170e956be96SZiye Yang 		goto end;
2171e956be96SZiye Yang 	}
2172e956be96SZiye Yang 
2173e956be96SZiye Yang 	/* set the data buffer */
2174ea65bf61SBen Walker 	nvme_tcp_pdu_set_data(pdu, (uint8_t *)pdu->hdr.raw + h2c_term_req->common.hlen,
21758f3b4a3aSZiye Yang 			      h2c_term_req->common.plen - h2c_term_req->common.hlen);
21764de405abSSeth Howell 	nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
2177e956be96SZiye Yang 	return;
2178e956be96SZiye Yang end:
21794de405abSSeth Howell 	nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
2180e956be96SZiye Yang }
2181e956be96SZiye Yang 
2182e956be96SZiye Yang static void
21834de405abSSeth Howell nvmf_tcp_h2c_term_req_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair,
2184e956be96SZiye Yang 				     struct nvme_tcp_pdu *pdu)
2185e956be96SZiye Yang {
2186ea65bf61SBen Walker 	struct spdk_nvme_tcp_term_req_hdr *h2c_term_req = &pdu->hdr.term_req;
2187e956be96SZiye Yang 
21884de405abSSeth Howell 	nvmf_tcp_h2c_term_req_dump(h2c_term_req);
218978df9be4SBen Walker 	nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING);
2190e956be96SZiye Yang }
2191e956be96SZiye Yang 
2192e956be96SZiye Yang static void
21937fc2c085SMengjinWu _nvmf_tcp_pdu_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu)
2194bc35dab9SZiye Yang {
21957fc2c085SMengjinWu 	struct spdk_nvmf_tcp_transport *ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport,
21967fc2c085SMengjinWu 			struct spdk_nvmf_tcp_transport, transport);
2197bc35dab9SZiye Yang 
2198bc35dab9SZiye Yang 	switch (pdu->hdr.common.pdu_type) {
2199bc35dab9SZiye Yang 	case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD:
2200bc35dab9SZiye Yang 		nvmf_tcp_capsule_cmd_payload_handle(ttransport, tqpair, pdu);
2201bc35dab9SZiye Yang 		break;
2202bc35dab9SZiye Yang 	case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA:
2203bc35dab9SZiye Yang 		nvmf_tcp_h2c_data_payload_handle(ttransport, tqpair, pdu);
2204bc35dab9SZiye Yang 		break;
2205bc35dab9SZiye Yang 
2206bc35dab9SZiye Yang 	case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ:
2207bc35dab9SZiye Yang 		nvmf_tcp_h2c_term_req_payload_handle(tqpair, pdu);
2208bc35dab9SZiye Yang 		break;
2209bc35dab9SZiye Yang 
2210bc35dab9SZiye Yang 	default:
2211bc35dab9SZiye Yang 		/* The code should not go to here */
22127fc2c085SMengjinWu 		SPDK_ERRLOG("ERROR pdu type %d\n", pdu->hdr.common.pdu_type);
2213bc35dab9SZiye Yang 		break;
2214bc35dab9SZiye Yang 	}
22157fc2c085SMengjinWu 	SLIST_INSERT_HEAD(&tqpair->tcp_pdu_free_queue, pdu, slist);
221678df9be4SBen Walker 	tqpair->tcp_pdu_working_count--;
2217bc35dab9SZiye Yang }
2218bc35dab9SZiye Yang 
22192a07b467SKonrad Sztyber static inline void
22202a07b467SKonrad Sztyber nvmf_tcp_req_set_cpl(struct spdk_nvmf_tcp_req *treq, int sct, int sc)
22212a07b467SKonrad Sztyber {
22222a07b467SKonrad Sztyber 	treq->req.rsp->nvme_cpl.status.sct = sct;
22232a07b467SKonrad Sztyber 	treq->req.rsp->nvme_cpl.status.sc = sc;
22242a07b467SKonrad Sztyber 	treq->req.rsp->nvme_cpl.cid = treq->req.cmd->nvme_cmd.cid;
22252a07b467SKonrad Sztyber }
22262a07b467SKonrad Sztyber 
2227bc35dab9SZiye Yang static void
22287fc2c085SMengjinWu data_crc32_calc_done(void *cb_arg, int status)
2229e956be96SZiye Yang {
22307fc2c085SMengjinWu 	struct nvme_tcp_pdu *pdu = cb_arg;
22317fc2c085SMengjinWu 	struct spdk_nvmf_tcp_qpair *tqpair = pdu->qpair;
2232e956be96SZiye Yang 
22337fc2c085SMengjinWu 	/* async crc32 calculation is failed and use direct calculation to check */
22347fc2c085SMengjinWu 	if (spdk_unlikely(status)) {
22357fc2c085SMengjinWu 		SPDK_ERRLOG("Data digest on tqpair=(%p) with pdu=%p failed to be calculated asynchronously\n",
22367fc2c085SMengjinWu 			    tqpair, pdu);
22377fc2c085SMengjinWu 		pdu->data_digest_crc32 = nvme_tcp_pdu_calc_data_digest(pdu);
22387fc2c085SMengjinWu 	}
22397fc2c085SMengjinWu 	pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR;
22407fc2c085SMengjinWu 	if (!MATCH_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32)) {
2241e956be96SZiye Yang 		SPDK_ERRLOG("Data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu);
2242bd407444SKonrad Sztyber 		assert(pdu->req != NULL);
2243bd407444SKonrad Sztyber 		nvmf_tcp_req_set_cpl(pdu->req, SPDK_NVME_SCT_GENERIC,
2244bd407444SKonrad Sztyber 				     SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR);
2245e956be96SZiye Yang 	}
22467fc2c085SMengjinWu 	_nvmf_tcp_pdu_payload_handle(tqpair, pdu);
2247e956be96SZiye Yang }
2248e956be96SZiye Yang 
22497fc2c085SMengjinWu static void
22507fc2c085SMengjinWu nvmf_tcp_pdu_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu)
22517fc2c085SMengjinWu {
22527fc2c085SMengjinWu 	int rc = 0;
22537fc2c085SMengjinWu 	assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
22547fc2c085SMengjinWu 	tqpair->pdu_in_progress = NULL;
22557fc2c085SMengjinWu 	nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
22567fc2c085SMengjinWu 	SPDK_DEBUGLOG(nvmf_tcp, "enter\n");
22577fc2c085SMengjinWu 	/* check data digest if need */
22587fc2c085SMengjinWu 	if (pdu->ddgst_enable) {
22599e9da467SMengjinWu 		if (tqpair->qpair.qid != 0 && !pdu->dif_ctx && tqpair->group &&
22609e9da467SMengjinWu 		    (pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT == 0)) {
22617fc2c085SMengjinWu 			rc = spdk_accel_submit_crc32cv(tqpair->group->accel_channel, &pdu->data_digest_crc32, pdu->data_iov,
22627fc2c085SMengjinWu 						       pdu->data_iovcnt, 0, data_crc32_calc_done, pdu);
22637fc2c085SMengjinWu 			if (spdk_likely(rc == 0)) {
22647fc2c085SMengjinWu 				return;
22657fc2c085SMengjinWu 			}
22667fc2c085SMengjinWu 		} else {
22677fc2c085SMengjinWu 			pdu->data_digest_crc32 = nvme_tcp_pdu_calc_data_digest(pdu);
22687fc2c085SMengjinWu 		}
22697fc2c085SMengjinWu 		data_crc32_calc_done(pdu, rc);
22707fc2c085SMengjinWu 	} else {
22717fc2c085SMengjinWu 		_nvmf_tcp_pdu_payload_handle(tqpair, pdu);
22727fc2c085SMengjinWu 	}
2273e956be96SZiye Yang }
2274e956be96SZiye Yang 
2275e956be96SZiye Yang static void
22764de405abSSeth Howell nvmf_tcp_send_icresp_complete(void *cb_arg)
2277e956be96SZiye Yang {
22782b59852bSBen Walker 	struct spdk_nvmf_tcp_qpair *tqpair = cb_arg;
2279e956be96SZiye Yang 
2280b95aae63SKonrad Sztyber 	nvmf_tcp_qpair_set_state(tqpair, NVMF_TCP_QPAIR_STATE_RUNNING);
2281e956be96SZiye Yang }
2282e956be96SZiye Yang 
2283e956be96SZiye Yang static void
22844de405abSSeth Howell nvmf_tcp_icreq_handle(struct spdk_nvmf_tcp_transport *ttransport,
22852b59852bSBen Walker 		      struct spdk_nvmf_tcp_qpair *tqpair,
2286e956be96SZiye Yang 		      struct nvme_tcp_pdu *pdu)
2287e956be96SZiye Yang {
2288ea65bf61SBen Walker 	struct spdk_nvme_tcp_ic_req *ic_req = &pdu->hdr.ic_req;
2289e956be96SZiye Yang 	struct nvme_tcp_pdu *rsp_pdu;
2290e956be96SZiye Yang 	struct spdk_nvme_tcp_ic_resp *ic_resp;
2291e956be96SZiye Yang 	uint32_t error_offset = 0;
2292e956be96SZiye Yang 	enum spdk_nvme_tcp_term_req_fes fes;
2293e956be96SZiye Yang 
2294e956be96SZiye Yang 	/* Only PFV 0 is defined currently */
2295e956be96SZiye Yang 	if (ic_req->pfv != 0) {
2296e956be96SZiye Yang 		SPDK_ERRLOG("Expected ICReq PFV %u, got %u\n", 0u, ic_req->pfv);
2297e956be96SZiye Yang 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
2298e956be96SZiye Yang 		error_offset = offsetof(struct spdk_nvme_tcp_ic_req, pfv);
2299e956be96SZiye Yang 		goto end;
2300e956be96SZiye Yang 	}
2301e956be96SZiye Yang 
23021d723028SMengjinWu 	/* This value is 0’s based value in units of dwords should not be larger than SPDK_NVME_TCP_HPDA_MAX */
23031d723028SMengjinWu 	if (ic_req->hpda > SPDK_NVME_TCP_HPDA_MAX) {
23041d723028SMengjinWu 		SPDK_ERRLOG("ICReq HPDA out of range 0 to 31, got %u\n", ic_req->hpda);
23051d723028SMengjinWu 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
23061d723028SMengjinWu 		error_offset = offsetof(struct spdk_nvme_tcp_ic_req, hpda);
23071d723028SMengjinWu 		goto end;
23081d723028SMengjinWu 	}
23091d723028SMengjinWu 
2310e956be96SZiye Yang 	/* MAXR2T is 0's based */
23112172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvmf_tcp, "maxr2t =%u\n", (ic_req->maxr2t + 1u));
2312e956be96SZiye Yang 
2313e956be96SZiye Yang 	tqpair->host_hdgst_enable = ic_req->dgst.bits.hdgst_enable ? true : false;
23141621809eSBen Walker 	if (!tqpair->host_hdgst_enable) {
23151621809eSBen Walker 		tqpair->recv_buf_size -= SPDK_NVME_TCP_DIGEST_LEN * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR;
23161621809eSBen Walker 	}
23171621809eSBen Walker 
2318e956be96SZiye Yang 	tqpair->host_ddgst_enable = ic_req->dgst.bits.ddgst_enable ? true : false;
23191621809eSBen Walker 	if (!tqpair->host_ddgst_enable) {
23201621809eSBen Walker 		tqpair->recv_buf_size -= SPDK_NVME_TCP_DIGEST_LEN * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR;
23211621809eSBen Walker 	}
23221621809eSBen Walker 
2323c1fbbfbeSAlexey Marchuk 	tqpair->recv_buf_size = spdk_max(tqpair->recv_buf_size, MIN_SOCK_PIPE_SIZE);
23241621809eSBen Walker 	/* Now that we know whether digests are enabled, properly size the receive buffer */
23251621809eSBen Walker 	if (spdk_sock_set_recvbuf(tqpair->sock, tqpair->recv_buf_size) < 0) {
23261621809eSBen Walker 		SPDK_WARNLOG("Unable to allocate enough memory for receive buffer on tqpair=%p with size=%d\n",
23271621809eSBen Walker 			     tqpair,
23281621809eSBen Walker 			     tqpair->recv_buf_size);
23291621809eSBen Walker 		/* Not fatal. */
23301621809eSBen Walker 	}
2331e956be96SZiye Yang 
2332e956be96SZiye Yang 	tqpair->cpda = spdk_min(ic_req->hpda, SPDK_NVME_TCP_CPDA_MAX);
23332172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvmf_tcp, "cpda of tqpair=(%p) is : %u\n", tqpair, tqpair->cpda);
2334e956be96SZiye Yang 
23357bac9b06SZiye Yang 	rsp_pdu = tqpair->mgmt_pdu;
2336e956be96SZiye Yang 
2337ea65bf61SBen Walker 	ic_resp = &rsp_pdu->hdr.ic_resp;
2338e956be96SZiye Yang 	ic_resp->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_IC_RESP;
2339e956be96SZiye Yang 	ic_resp->common.hlen = ic_resp->common.plen =  sizeof(*ic_resp);
2340e956be96SZiye Yang 	ic_resp->pfv = 0;
2341e956be96SZiye Yang 	ic_resp->cpda = tqpair->cpda;
2342399529aaSBen Walker 	ic_resp->maxh2cdata = ttransport->transport.opts.max_io_size;
2343e956be96SZiye Yang 	ic_resp->dgst.bits.hdgst_enable = tqpair->host_hdgst_enable ? 1 : 0;
2344e956be96SZiye Yang 	ic_resp->dgst.bits.ddgst_enable = tqpair->host_ddgst_enable ? 1 : 0;
2345e956be96SZiye Yang 
23462172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvmf_tcp, "host_hdgst_enable: %u\n", tqpair->host_hdgst_enable);
23472172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvmf_tcp, "host_ddgst_enable: %u\n", tqpair->host_ddgst_enable);
2348e956be96SZiye Yang 
2349b95aae63SKonrad Sztyber 	nvmf_tcp_qpair_set_state(tqpair, NVMF_TCP_QPAIR_STATE_INITIALIZING);
2350c676c081SKonrad Sztyber 	nvmf_tcp_qpair_write_mgmt_pdu(tqpair, nvmf_tcp_send_icresp_complete, tqpair);
23514de405abSSeth Howell 	nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
2352e956be96SZiye Yang 	return;
2353e956be96SZiye Yang end:
23544de405abSSeth Howell 	nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
2355e956be96SZiye Yang }
2356e956be96SZiye Yang 
2357e956be96SZiye Yang static void
23584de405abSSeth Howell nvmf_tcp_pdu_psh_handle(struct spdk_nvmf_tcp_qpair *tqpair,
2359e19fd311SZiye Yang 			struct spdk_nvmf_tcp_transport *ttransport)
2360e956be96SZiye Yang {
2361e956be96SZiye Yang 	struct nvme_tcp_pdu *pdu;
2362e956be96SZiye Yang 	int rc;
2363e956be96SZiye Yang 	uint32_t crc32c, error_offset = 0;
2364e956be96SZiye Yang 	enum spdk_nvme_tcp_term_req_fes fes;
2365e956be96SZiye Yang 
2366e956be96SZiye Yang 	assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH);
2367f8ac678eSZiye Yang 	pdu = tqpair->pdu_in_progress;
2368e956be96SZiye Yang 
23692172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvmf_tcp, "pdu type of tqpair(%p) is %d\n", tqpair,
2370ea65bf61SBen Walker 		      pdu->hdr.common.pdu_type);
2371e956be96SZiye Yang 	/* check header digest if needed */
2372e956be96SZiye Yang 	if (pdu->has_hdgst) {
23732172c432STomasz Zawadzki 		SPDK_DEBUGLOG(nvmf_tcp, "Compare the header of pdu=%p on tqpair=%p\n", pdu, tqpair);
2374e956be96SZiye Yang 		crc32c = nvme_tcp_pdu_calc_header_digest(pdu);
2375ea65bf61SBen Walker 		rc = MATCH_DIGEST_WORD((uint8_t *)pdu->hdr.raw + pdu->hdr.common.hlen, crc32c);
2376e956be96SZiye Yang 		if (rc == 0) {
2377e956be96SZiye Yang 			SPDK_ERRLOG("Header digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu);
2378e956be96SZiye Yang 			fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR;
23794de405abSSeth Howell 			nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
2380e956be96SZiye Yang 			return;
2381e956be96SZiye Yang 
2382e956be96SZiye Yang 		}
2383e956be96SZiye Yang 	}
2384e956be96SZiye Yang 
2385ea65bf61SBen Walker 	switch (pdu->hdr.common.pdu_type) {
2386e956be96SZiye Yang 	case SPDK_NVME_TCP_PDU_TYPE_IC_REQ:
23874de405abSSeth Howell 		nvmf_tcp_icreq_handle(ttransport, tqpair, pdu);
2388e956be96SZiye Yang 		break;
2389e956be96SZiye Yang 	case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD:
23904de405abSSeth Howell 		nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_REQ);
2391e956be96SZiye Yang 		break;
2392e956be96SZiye Yang 	case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA:
23934de405abSSeth Howell 		nvmf_tcp_h2c_data_hdr_handle(ttransport, tqpair, pdu);
2394e956be96SZiye Yang 		break;
2395e956be96SZiye Yang 
2396e956be96SZiye Yang 	case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ:
23974de405abSSeth Howell 		nvmf_tcp_h2c_term_req_hdr_handle(tqpair, pdu);
2398e956be96SZiye Yang 		break;
2399e956be96SZiye Yang 
2400e956be96SZiye Yang 	default:
2401f8ac678eSZiye Yang 		SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->pdu_in_progress->hdr.common.pdu_type);
2402e956be96SZiye Yang 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
2403e956be96SZiye Yang 		error_offset = 1;
24044de405abSSeth Howell 		nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
2405e956be96SZiye Yang 		break;
2406e956be96SZiye Yang 	}
2407e956be96SZiye Yang }
2408e956be96SZiye Yang 
2409e956be96SZiye Yang static void
24104de405abSSeth Howell nvmf_tcp_pdu_ch_handle(struct spdk_nvmf_tcp_qpair *tqpair)
2411e956be96SZiye Yang {
2412e956be96SZiye Yang 	struct nvme_tcp_pdu *pdu;
2413e956be96SZiye Yang 	uint32_t error_offset = 0;
2414e956be96SZiye Yang 	enum spdk_nvme_tcp_term_req_fes fes;
2415e956be96SZiye Yang 	uint8_t expected_hlen, pdo;
2416e956be96SZiye Yang 	bool plen_error = false, pdo_error = false;
2417e956be96SZiye Yang 
2418e956be96SZiye Yang 	assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH);
2419f8ac678eSZiye Yang 	pdu = tqpair->pdu_in_progress;
24207fc2c085SMengjinWu 	assert(pdu);
2421ea65bf61SBen Walker 	if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_REQ) {
2422b95aae63SKonrad Sztyber 		if (tqpair->state != NVMF_TCP_QPAIR_STATE_INVALID) {
2423e956be96SZiye Yang 			SPDK_ERRLOG("Already received ICreq PDU, and reject this pdu=%p\n", pdu);
2424e956be96SZiye Yang 			fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR;
2425e956be96SZiye Yang 			goto err;
2426e956be96SZiye Yang 		}
2427e956be96SZiye Yang 		expected_hlen = sizeof(struct spdk_nvme_tcp_ic_req);
2428ea65bf61SBen Walker 		if (pdu->hdr.common.plen != expected_hlen) {
2429e956be96SZiye Yang 			plen_error = true;
2430e956be96SZiye Yang 		}
2431e956be96SZiye Yang 	} else {
2432b95aae63SKonrad Sztyber 		if (tqpair->state != NVMF_TCP_QPAIR_STATE_RUNNING) {
2433cc6920a4SJosh Soref 			SPDK_ERRLOG("The TCP/IP connection is not negotiated\n");
2434e956be96SZiye Yang 			fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR;
2435e956be96SZiye Yang 			goto err;
2436e956be96SZiye Yang 		}
2437e956be96SZiye Yang 
2438ea65bf61SBen Walker 		switch (pdu->hdr.common.pdu_type) {
2439e956be96SZiye Yang 		case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD:
2440e956be96SZiye Yang 			expected_hlen = sizeof(struct spdk_nvme_tcp_cmd);
2441ea65bf61SBen Walker 			pdo = pdu->hdr.common.pdo;
2442e7f6ff2dSWenhua Liu 			if ((tqpair->cpda != 0) && (pdo % ((tqpair->cpda + 1) << 2) != 0)) {
2443e956be96SZiye Yang 				pdo_error = true;
2444e956be96SZiye Yang 				break;
2445e956be96SZiye Yang 			}
2446e956be96SZiye Yang 
2447ea65bf61SBen Walker 			if (pdu->hdr.common.plen < expected_hlen) {
2448e956be96SZiye Yang 				plen_error = true;
2449e956be96SZiye Yang 			}
2450e956be96SZiye Yang 			break;
2451e956be96SZiye Yang 		case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA:
2452e956be96SZiye Yang 			expected_hlen = sizeof(struct spdk_nvme_tcp_h2c_data_hdr);
2453ea65bf61SBen Walker 			pdo = pdu->hdr.common.pdo;
2454e7f6ff2dSWenhua Liu 			if ((tqpair->cpda != 0) && (pdo % ((tqpair->cpda + 1) << 2) != 0)) {
2455e956be96SZiye Yang 				pdo_error = true;
2456e956be96SZiye Yang 				break;
2457e956be96SZiye Yang 			}
2458ea65bf61SBen Walker 			if (pdu->hdr.common.plen < expected_hlen) {
2459e956be96SZiye Yang 				plen_error = true;
2460e956be96SZiye Yang 			}
2461e956be96SZiye Yang 			break;
2462e956be96SZiye Yang 
2463e956be96SZiye Yang 		case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ:
2464e956be96SZiye Yang 			expected_hlen = sizeof(struct spdk_nvme_tcp_term_req_hdr);
2465ea65bf61SBen Walker 			if ((pdu->hdr.common.plen <= expected_hlen) ||
2466ea65bf61SBen Walker 			    (pdu->hdr.common.plen > SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE)) {
2467e956be96SZiye Yang 				plen_error = true;
2468e956be96SZiye Yang 			}
2469e956be96SZiye Yang 			break;
2470e956be96SZiye Yang 
2471e956be96SZiye Yang 		default:
2472ea65bf61SBen Walker 			SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", pdu->hdr.common.pdu_type);
2473e956be96SZiye Yang 			fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
2474e956be96SZiye Yang 			error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdu_type);
2475e956be96SZiye Yang 			goto err;
2476e956be96SZiye Yang 		}
2477e956be96SZiye Yang 	}
2478e956be96SZiye Yang 
2479ea65bf61SBen Walker 	if (pdu->hdr.common.hlen != expected_hlen) {
2480e956be96SZiye Yang 		SPDK_ERRLOG("PDU type=0x%02x, Expected ICReq header length %u, got %u on tqpair=%p\n",
2481ea65bf61SBen Walker 			    pdu->hdr.common.pdu_type,
2482ea65bf61SBen Walker 			    expected_hlen, pdu->hdr.common.hlen, tqpair);
2483e956be96SZiye Yang 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
2484e956be96SZiye Yang 		error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, hlen);
2485e956be96SZiye Yang 		goto err;
2486e956be96SZiye Yang 	} else if (pdo_error) {
2487e956be96SZiye Yang 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
2488e956be96SZiye Yang 		error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdo);
2489e956be96SZiye Yang 	} else if (plen_error) {
2490e956be96SZiye Yang 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
2491e956be96SZiye Yang 		error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, plen);
2492e956be96SZiye Yang 		goto err;
2493e956be96SZiye Yang 	} else {
24944de405abSSeth Howell 		nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH);
2495f8ac678eSZiye Yang 		nvme_tcp_pdu_calc_psh_len(tqpair->pdu_in_progress, tqpair->host_hdgst_enable);
2496e956be96SZiye Yang 		return;
2497e956be96SZiye Yang 	}
2498e956be96SZiye Yang err:
24994de405abSSeth Howell 	nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
2500e956be96SZiye Yang }
2501e956be96SZiye Yang 
2502975239c2SShuhei Matsumoto static int
25034de405abSSeth Howell nvmf_tcp_sock_process(struct spdk_nvmf_tcp_qpair *tqpair)
2504e956be96SZiye Yang {
2505e956be96SZiye Yang 	int rc = 0;
2506e956be96SZiye Yang 	struct nvme_tcp_pdu *pdu;
2507e956be96SZiye Yang 	enum nvme_tcp_pdu_recv_state prev_state;
2508ae6519e4SBen Walker 	uint32_t data_len;
2509e19fd311SZiye Yang 	struct spdk_nvmf_tcp_transport *ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport,
2510e19fd311SZiye Yang 			struct spdk_nvmf_tcp_transport, transport);
2511e956be96SZiye Yang 
2512e956be96SZiye Yang 	/* The loop here is to allow for several back-to-back state changes. */
2513e956be96SZiye Yang 	do {
2514e956be96SZiye Yang 		prev_state = tqpair->recv_state;
25152172c432STomasz Zawadzki 		SPDK_DEBUGLOG(nvmf_tcp, "tqpair(%p) recv pdu entering state %d\n", tqpair, prev_state);
2516e956be96SZiye Yang 
2517f8ac678eSZiye Yang 		pdu = tqpair->pdu_in_progress;
25189e509a7dSKonrad Sztyber 		assert(pdu != NULL ||
25199e509a7dSKonrad Sztyber 		       tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY ||
25209e509a7dSKonrad Sztyber 		       tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_QUIESCING ||
25219e509a7dSKonrad Sztyber 		       tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_ERROR);
25229e509a7dSKonrad Sztyber 
2523e956be96SZiye Yang 		switch (tqpair->recv_state) {
2524e956be96SZiye Yang 		/* Wait for the common header  */
25257a4d6af1SBen Walker 		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY:
25267fc2c085SMengjinWu 			if (!pdu) {
25277fc2c085SMengjinWu 				pdu = SLIST_FIRST(&tqpair->tcp_pdu_free_queue);
25287fc2c085SMengjinWu 				if (spdk_unlikely(!pdu)) {
25297fc2c085SMengjinWu 					return NVME_TCP_PDU_IN_PROGRESS;
25307fc2c085SMengjinWu 				}
25317fc2c085SMengjinWu 				SLIST_REMOVE_HEAD(&tqpair->tcp_pdu_free_queue, slist);
25327fc2c085SMengjinWu 				tqpair->pdu_in_progress = pdu;
253378df9be4SBen Walker 				tqpair->tcp_pdu_working_count++;
25347fc2c085SMengjinWu 			}
25357fc2c085SMengjinWu 			memset(pdu, 0, offsetof(struct nvme_tcp_pdu, qpair));
253612807c5bSMengjinWu 			nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH);
25377fc2c085SMengjinWu 		/* FALLTHROUGH */
2538e956be96SZiye Yang 		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH:
2539b95aae63SKonrad Sztyber 			if (spdk_unlikely(tqpair->state == NVMF_TCP_QPAIR_STATE_INITIALIZING)) {
2540e9be9df4SZiye Yang 				return rc;
2541e9be9df4SZiye Yang 			}
2542e9be9df4SZiye Yang 
25438b7e6ca4SBen Walker 			rc = nvme_tcp_read_data(tqpair->sock,
2544e956be96SZiye Yang 						sizeof(struct spdk_nvme_tcp_common_pdu_hdr) - pdu->ch_valid_bytes,
2545ea65bf61SBen Walker 						(void *)&pdu->hdr.common + pdu->ch_valid_bytes);
25468b7e6ca4SBen Walker 			if (rc < 0) {
25472172c432STomasz Zawadzki 				SPDK_DEBUGLOG(nvmf_tcp, "will disconnect tqpair=%p\n", tqpair);
254878df9be4SBen Walker 				nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING);
254978df9be4SBen Walker 				break;
25508b7e6ca4SBen Walker 			} else if (rc > 0) {
2551e956be96SZiye Yang 				pdu->ch_valid_bytes += rc;
2552ea1a6608SJim Harris 				spdk_trace_record(TRACE_TCP_READ_FROM_SOCKET_DONE, tqpair->qpair.trace_id, rc, 0);
25538b7e6ca4SBen Walker 			}
25547a4d6af1SBen Walker 
2555e956be96SZiye Yang 			if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) {
2556e956be96SZiye Yang 				return NVME_TCP_PDU_IN_PROGRESS;
2557e956be96SZiye Yang 			}
2558e956be96SZiye Yang 
2559e956be96SZiye Yang 			/* The command header of this PDU has now been read from the socket. */
25604de405abSSeth Howell 			nvmf_tcp_pdu_ch_handle(tqpair);
2561e956be96SZiye Yang 			break;
2562e956be96SZiye Yang 		/* Wait for the pdu specific header  */
2563e956be96SZiye Yang 		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH:
25648b7e6ca4SBen Walker 			rc = nvme_tcp_read_data(tqpair->sock,
256573d9cef8SZiye Yang 						pdu->psh_len - pdu->psh_valid_bytes,
2566ea65bf61SBen Walker 						(void *)&pdu->hdr.raw + sizeof(struct spdk_nvme_tcp_common_pdu_hdr) + pdu->psh_valid_bytes);
25678b7e6ca4SBen Walker 			if (rc < 0) {
256878df9be4SBen Walker 				nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING);
256978df9be4SBen Walker 				break;
25708b7e6ca4SBen Walker 			} else if (rc > 0) {
2571ea1a6608SJim Harris 				spdk_trace_record(TRACE_TCP_READ_FROM_SOCKET_DONE, tqpair->qpair.trace_id, rc, 0);
2572e956be96SZiye Yang 				pdu->psh_valid_bytes += rc;
25738b7e6ca4SBen Walker 			}
2574ae6519e4SBen Walker 
257573d9cef8SZiye Yang 			if (pdu->psh_valid_bytes < pdu->psh_len) {
2576e956be96SZiye Yang 				return NVME_TCP_PDU_IN_PROGRESS;
2577e956be96SZiye Yang 			}
2578e956be96SZiye Yang 
257934edd9f1SKamil Godzwon 			/* All header(ch, psh, head digits) of this PDU has now been read from the socket. */
25804de405abSSeth Howell 			nvmf_tcp_pdu_psh_handle(tqpair, ttransport);
2581e956be96SZiye Yang 			break;
25824579a16fSZiye Yang 		/* Wait for the req slot */
25834579a16fSZiye Yang 		case NVME_TCP_PDU_RECV_STATE_AWAIT_REQ:
25844de405abSSeth Howell 			nvmf_tcp_capsule_cmd_hdr_handle(ttransport, tqpair, pdu);
25854579a16fSZiye Yang 			break;
2586358e8720SKrzysztof Goreczny 		/* Wait for the request processing loop to acquire a buffer for the PDU */
2587358e8720SKrzysztof Goreczny 		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_BUF:
2588358e8720SKrzysztof Goreczny 			break;
2589e956be96SZiye Yang 		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD:
2590e956be96SZiye Yang 			/* check whether the data is valid, if not we just return */
25914ee4023aSZiye Yang 			if (!pdu->data_len) {
2592e956be96SZiye Yang 				return NVME_TCP_PDU_IN_PROGRESS;
2593e956be96SZiye Yang 			}
2594e956be96SZiye Yang 
2595e956be96SZiye Yang 			data_len = pdu->data_len;
2596e956be96SZiye Yang 			/* data digest */
2597ea65bf61SBen Walker 			if (spdk_unlikely((pdu->hdr.common.pdu_type != SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ) &&
259858739014SZiye Yang 					  tqpair->host_ddgst_enable)) {
259958739014SZiye Yang 				data_len += SPDK_NVME_TCP_DIGEST_LEN;
260058739014SZiye Yang 				pdu->ddgst_enable = true;
2601e956be96SZiye Yang 			}
2602e956be96SZiye Yang 
260358739014SZiye Yang 			rc = nvme_tcp_read_payload_data(tqpair->sock, pdu);
260458739014SZiye Yang 			if (rc < 0) {
260578df9be4SBen Walker 				nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING);
260678df9be4SBen Walker 				break;
2607e956be96SZiye Yang 			}
260874542baeSAlexey Marchuk 			pdu->rw_offset += rc;
2609975239c2SShuhei Matsumoto 
2610252c053eSMengjinWu 			if (pdu->rw_offset < data_len) {
2611252c053eSMengjinWu 				return NVME_TCP_PDU_IN_PROGRESS;
2612252c053eSMengjinWu 			}
2613252c053eSMengjinWu 
261400005ed8SMengjinWu 			/* Generate and insert DIF to whole data block received if DIF is enabled */
261500005ed8SMengjinWu 			if (spdk_unlikely(pdu->dif_ctx != NULL) &&
261600005ed8SMengjinWu 			    spdk_dif_generate_stream(pdu->data_iov, pdu->data_iovcnt, 0, data_len,
261700005ed8SMengjinWu 						     pdu->dif_ctx) != 0) {
261800005ed8SMengjinWu 				SPDK_ERRLOG("DIF generate failed\n");
261978df9be4SBen Walker 				nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING);
262078df9be4SBen Walker 				break;
2621975239c2SShuhei Matsumoto 			}
2622975239c2SShuhei Matsumoto 
2623e956be96SZiye Yang 			/* All of this PDU has now been read from the socket. */
26247fc2c085SMengjinWu 			nvmf_tcp_pdu_payload_handle(tqpair, pdu);
2625e956be96SZiye Yang 			break;
262678df9be4SBen Walker 		case NVME_TCP_PDU_RECV_STATE_QUIESCING:
262778df9be4SBen Walker 			if (tqpair->tcp_pdu_working_count != 0) {
262878df9be4SBen Walker 				return NVME_TCP_PDU_IN_PROGRESS;
262978df9be4SBen Walker 			}
263078df9be4SBen Walker 			nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
263178df9be4SBen Walker 			break;
2632e956be96SZiye Yang 		case NVME_TCP_PDU_RECV_STATE_ERROR:
2633b7e1f79cSKonrad Sztyber 			if (spdk_sock_is_connected(tqpair->sock) && tqpair->wait_terminate) {
263478df9be4SBen Walker 				return NVME_TCP_PDU_IN_PROGRESS;
2635b7e1f79cSKonrad Sztyber 			}
2636b7e1f79cSKonrad Sztyber 			return NVME_TCP_PDU_FATAL;
2637e956be96SZiye Yang 		default:
26387fc2c085SMengjinWu 			SPDK_ERRLOG("The state(%d) is invalid\n", tqpair->recv_state);
26397fc2c085SMengjinWu 			abort();
2640e956be96SZiye Yang 			break;
2641e956be96SZiye Yang 		}
2642ae6519e4SBen Walker 	} while (tqpair->recv_state != prev_state);
2643e956be96SZiye Yang 
2644e956be96SZiye Yang 	return rc;
2645e956be96SZiye Yang }
2646e956be96SZiye Yang 
264785fa4324SAlexey Marchuk static inline void *
2648c7d22538SKrzysztof Goreczny nvmf_tcp_control_msg_get(struct spdk_nvmf_tcp_control_msg_list *list,
2649c7d22538SKrzysztof Goreczny 			 struct spdk_nvmf_tcp_req *tcp_req)
265085fa4324SAlexey Marchuk {
265185fa4324SAlexey Marchuk 	struct spdk_nvmf_tcp_control_msg *msg;
265285fa4324SAlexey Marchuk 
265385fa4324SAlexey Marchuk 	assert(list);
265485fa4324SAlexey Marchuk 
265585fa4324SAlexey Marchuk 	msg = STAILQ_FIRST(&list->free_msgs);
265685fa4324SAlexey Marchuk 	if (!msg) {
265785fa4324SAlexey Marchuk 		SPDK_DEBUGLOG(nvmf_tcp, "Out of control messages\n");
2658c7d22538SKrzysztof Goreczny 		STAILQ_INSERT_TAIL(&list->waiting_for_msg_reqs, tcp_req, control_msg_link);
265985fa4324SAlexey Marchuk 		return NULL;
266085fa4324SAlexey Marchuk 	}
266185fa4324SAlexey Marchuk 	STAILQ_REMOVE_HEAD(&list->free_msgs, link);
266285fa4324SAlexey Marchuk 	return msg;
266385fa4324SAlexey Marchuk }
266485fa4324SAlexey Marchuk 
266585fa4324SAlexey Marchuk static inline void
266685fa4324SAlexey Marchuk nvmf_tcp_control_msg_put(struct spdk_nvmf_tcp_control_msg_list *list, void *_msg)
266785fa4324SAlexey Marchuk {
266885fa4324SAlexey Marchuk 	struct spdk_nvmf_tcp_control_msg *msg = _msg;
2669c7d22538SKrzysztof Goreczny 	struct spdk_nvmf_tcp_req *tcp_req;
2670c7d22538SKrzysztof Goreczny 	struct spdk_nvmf_tcp_transport *ttransport;
267185fa4324SAlexey Marchuk 
267285fa4324SAlexey Marchuk 	assert(list);
267385fa4324SAlexey Marchuk 	STAILQ_INSERT_HEAD(&list->free_msgs, msg, link);
2674c7d22538SKrzysztof Goreczny 	if (!STAILQ_EMPTY(&list->waiting_for_msg_reqs)) {
2675c7d22538SKrzysztof Goreczny 		tcp_req = STAILQ_FIRST(&list->waiting_for_msg_reqs);
2676c7d22538SKrzysztof Goreczny 		STAILQ_REMOVE_HEAD(&list->waiting_for_msg_reqs, control_msg_link);
2677c7d22538SKrzysztof Goreczny 		ttransport = SPDK_CONTAINEROF(tcp_req->req.qpair->transport,
2678c7d22538SKrzysztof Goreczny 					      struct spdk_nvmf_tcp_transport, transport);
2679c7d22538SKrzysztof Goreczny 		nvmf_tcp_req_process(ttransport, tcp_req);
2680c7d22538SKrzysztof Goreczny 	}
268185fa4324SAlexey Marchuk }
268285fa4324SAlexey Marchuk 
2683c7d22538SKrzysztof Goreczny static void
26844de405abSSeth Howell nvmf_tcp_req_parse_sgl(struct spdk_nvmf_tcp_req *tcp_req,
2685c8734543SShuhei Matsumoto 		       struct spdk_nvmf_transport *transport,
2686c8734543SShuhei Matsumoto 		       struct spdk_nvmf_transport_poll_group *group)
2687eab7360bSShuhei Matsumoto {
2688c8734543SShuhei Matsumoto 	struct spdk_nvmf_request		*req = &tcp_req->req;
2689e956be96SZiye Yang 	struct spdk_nvme_cmd			*cmd;
2690e956be96SZiye Yang 	struct spdk_nvme_sgl_descriptor		*sgl;
269185fa4324SAlexey Marchuk 	struct spdk_nvmf_tcp_poll_group		*tgroup;
26928ed53eeeSMengjinWu 	enum spdk_nvme_tcp_term_req_fes		fes;
26938d1c4f74SMengjinWu 	struct nvme_tcp_pdu			*pdu;
26948d1c4f74SMengjinWu 	struct spdk_nvmf_tcp_qpair		*tqpair;
26958ed53eeeSMengjinWu 	uint32_t				length, error_offset = 0;
2696e956be96SZiye Yang 
2697c8734543SShuhei Matsumoto 	cmd = &req->cmd->nvme_cmd;
2698e956be96SZiye Yang 	sgl = &cmd->dptr.sgl1;
2699e956be96SZiye Yang 
2700e956be96SZiye Yang 	if (sgl->generic.type == SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK &&
2701e956be96SZiye Yang 	    sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_TRANSPORT) {
27028d1c4f74SMengjinWu 		/* get request length from sgl */
27038d1c4f74SMengjinWu 		length = sgl->unkeyed.length;
27048ed53eeeSMengjinWu 		if (spdk_unlikely(length > transport->opts.max_io_size)) {
2705e956be96SZiye Yang 			SPDK_ERRLOG("SGL length 0x%x exceeds max io size 0x%x\n",
2706c8734543SShuhei Matsumoto 				    length, transport->opts.max_io_size);
27078ed53eeeSMengjinWu 			fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_LIMIT_EXCEEDED;
27088ed53eeeSMengjinWu 			goto fatal_err;
2709e956be96SZiye Yang 		}
2710e956be96SZiye Yang 
2711e956be96SZiye Yang 		/* fill request length and populate iovs */
2712c8734543SShuhei Matsumoto 		req->length = length;
2713e956be96SZiye Yang 
27142172c432STomasz Zawadzki 		SPDK_DEBUGLOG(nvmf_tcp, "Data requested length= 0x%x\n", length);
2715e956be96SZiye Yang 
271615ae31fbSBen Walker 		if (spdk_unlikely(req->dif_enabled)) {
2717fcd652f5SAlexey Marchuk 			req->dif.orig_length = length;
2718fcd652f5SAlexey Marchuk 			length = spdk_dif_get_length_with_md(length, &req->dif.dif_ctx);
2719fcd652f5SAlexey Marchuk 			req->dif.elba_length = length;
272051b64364SShuhei Matsumoto 		}
272151b64364SShuhei Matsumoto 
2722794d47d4SKonrad Sztyber 		if (nvmf_ctrlr_use_zcopy(req)) {
2723794d47d4SKonrad Sztyber 			SPDK_DEBUGLOG(nvmf_tcp, "Using zero-copy to execute request %p\n", tcp_req);
2724794d47d4SKonrad Sztyber 			req->data_from_pool = false;
2725c7d22538SKrzysztof Goreczny 			nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_HAVE_BUFFER);
2726c7d22538SKrzysztof Goreczny 			return;
2727794d47d4SKonrad Sztyber 		}
2728794d47d4SKonrad Sztyber 
2729c8734543SShuhei Matsumoto 		if (spdk_nvmf_request_get_buffers(req, group, transport, length)) {
2730e956be96SZiye Yang 			/* No available buffers. Queue this request up. */
27312172c432STomasz Zawadzki 			SPDK_DEBUGLOG(nvmf_tcp, "No available large data buffers. Queueing request %p\n",
2732536bd70eSShuhei Matsumoto 				      tcp_req);
2733c7d22538SKrzysztof Goreczny 			return;
2734e956be96SZiye Yang 		}
2735e956be96SZiye Yang 
2736c7d22538SKrzysztof Goreczny 		nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_HAVE_BUFFER);
27372172c432STomasz Zawadzki 		SPDK_DEBUGLOG(nvmf_tcp, "Request %p took %d buffer/s from central pool, and data=%p\n",
2738fd05a2ffSJohn Levon 			      tcp_req, req->iovcnt, req->iov[0].iov_base);
2739e956be96SZiye Yang 
2740c7d22538SKrzysztof Goreczny 		return;
2741e956be96SZiye Yang 	} else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK &&
2742e956be96SZiye Yang 		   sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) {
2743e956be96SZiye Yang 		uint64_t offset = sgl->address;
2744c8734543SShuhei Matsumoto 		uint32_t max_len = transport->opts.in_capsule_data_size;
27458d1c4f74SMengjinWu 
2746cc6920a4SJosh Soref 		assert(tcp_req->has_in_capsule_data);
27478d1c4f74SMengjinWu 		/* Capsule Cmd with In-capsule Data should get data length from pdu header */
27488d1c4f74SMengjinWu 		tqpair = tcp_req->pdu->qpair;
27498d1c4f74SMengjinWu 		/* receiving pdu is not same with the pdu in tcp_req */
27508d1c4f74SMengjinWu 		pdu = tqpair->pdu_in_progress;
27518d1c4f74SMengjinWu 		length = pdu->hdr.common.plen - pdu->psh_len - sizeof(struct spdk_nvme_tcp_common_pdu_hdr);
27528d1c4f74SMengjinWu 		if (tqpair->host_ddgst_enable) {
27538d1c4f74SMengjinWu 			length -= SPDK_NVME_TCP_DIGEST_LEN;
27548d1c4f74SMengjinWu 		}
27558d1c4f74SMengjinWu 		/* This error is not defined in NVMe/TCP spec, take this error as fatal error */
27568d1c4f74SMengjinWu 		if (spdk_unlikely(length != sgl->unkeyed.length)) {
27578d1c4f74SMengjinWu 			SPDK_ERRLOG("In-Capsule Data length 0x%x is not equal to SGL data length 0x%x\n",
27588d1c4f74SMengjinWu 				    length, sgl->unkeyed.length);
27598d1c4f74SMengjinWu 			fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
27608d1c4f74SMengjinWu 			error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, plen);
27618d1c4f74SMengjinWu 			goto fatal_err;
27628d1c4f74SMengjinWu 		}
2763e956be96SZiye Yang 
27642172c432STomasz Zawadzki 		SPDK_DEBUGLOG(nvmf_tcp, "In-capsule data: offset 0x%" PRIx64 ", length 0x%x\n",
2765536bd70eSShuhei Matsumoto 			      offset, length);
2766e956be96SZiye Yang 
27678ed53eeeSMengjinWu 		/* The NVMe/TCP transport does not use ICDOFF to control the in-capsule data offset. ICDOFF should be '0' */
27688ed53eeeSMengjinWu 		if (spdk_unlikely(offset != 0)) {
27698ed53eeeSMengjinWu 			/* Not defined fatal error in NVMe/TCP spec, handle this error as a fatal error */
27708ed53eeeSMengjinWu 			SPDK_ERRLOG("In-capsule offset 0x%" PRIx64 " should be ZERO in NVMe/TCP\n", offset);
27718ed53eeeSMengjinWu 			fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER;
27728ed53eeeSMengjinWu 			error_offset = offsetof(struct spdk_nvme_tcp_cmd, ccsqe.dptr.sgl1.address);
27738ed53eeeSMengjinWu 			goto fatal_err;
2774e956be96SZiye Yang 		}
2775e956be96SZiye Yang 
277685fa4324SAlexey Marchuk 		if (spdk_unlikely(length > max_len)) {
277785fa4324SAlexey Marchuk 			/* According to the SPEC we should support ICD up to 8192 bytes for admin and fabric commands */
277885fa4324SAlexey Marchuk 			if (length <= SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE &&
277985fa4324SAlexey Marchuk 			    (cmd->opc == SPDK_NVME_OPC_FABRIC || req->qpair->qid == 0)) {
278085fa4324SAlexey Marchuk 
278185fa4324SAlexey Marchuk 				/* Get a buffer from dedicated list */
278285fa4324SAlexey Marchuk 				SPDK_DEBUGLOG(nvmf_tcp, "Getting a buffer from control msg list\n");
278385fa4324SAlexey Marchuk 				tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
278485fa4324SAlexey Marchuk 				assert(tgroup->control_msg_list);
2785c7d22538SKrzysztof Goreczny 				req->iov[0].iov_base = nvmf_tcp_control_msg_get(tgroup->control_msg_list, tcp_req);
2786fd05a2ffSJohn Levon 				if (!req->iov[0].iov_base) {
278785fa4324SAlexey Marchuk 					/* No available buffers. Queue this request up. */
278885fa4324SAlexey Marchuk 					SPDK_DEBUGLOG(nvmf_tcp, "No available ICD buffers. Queueing request %p\n", tcp_req);
2789358e8720SKrzysztof Goreczny 					nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_BUF);
2790c7d22538SKrzysztof Goreczny 					return;
279185fa4324SAlexey Marchuk 				}
279285fa4324SAlexey Marchuk 			} else {
2793e956be96SZiye Yang 				SPDK_ERRLOG("In-capsule data length 0x%x exceeds capsule length 0x%x\n",
2794536bd70eSShuhei Matsumoto 					    length, max_len);
27958ed53eeeSMengjinWu 				fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_LIMIT_EXCEEDED;
27968ed53eeeSMengjinWu 				goto fatal_err;
2797e956be96SZiye Yang 			}
279885fa4324SAlexey Marchuk 		} else {
2799fd05a2ffSJohn Levon 			req->iov[0].iov_base = tcp_req->buf;
280085fa4324SAlexey Marchuk 		}
2801e956be96SZiye Yang 
2802c8734543SShuhei Matsumoto 		req->length = length;
280385fa4324SAlexey Marchuk 		req->data_from_pool = false;
2804e956be96SZiye Yang 
280515ae31fbSBen Walker 		if (spdk_unlikely(req->dif_enabled)) {
2806fcd652f5SAlexey Marchuk 			length = spdk_dif_get_length_with_md(length, &req->dif.dif_ctx);
2807fcd652f5SAlexey Marchuk 			req->dif.elba_length = length;
280851b64364SShuhei Matsumoto 		}
280951b64364SShuhei Matsumoto 
2810c8734543SShuhei Matsumoto 		req->iov[0].iov_len = length;
2811c8734543SShuhei Matsumoto 		req->iovcnt = 1;
2812c7d22538SKrzysztof Goreczny 		nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_HAVE_BUFFER);
2813e956be96SZiye Yang 
2814c7d22538SKrzysztof Goreczny 		return;
2815e956be96SZiye Yang 	}
28168ed53eeeSMengjinWu 	/* If we want to handle the problem here, then we can't skip the following data segment.
28178ed53eeeSMengjinWu 	 * Because this function runs before reading data part, now handle all errors as fatal errors. */
2818e956be96SZiye Yang 	SPDK_ERRLOG("Invalid NVMf I/O Command SGL:  Type 0x%x, Subtype 0x%x\n",
2819e956be96SZiye Yang 		    sgl->generic.type, sgl->generic.subtype);
28208ed53eeeSMengjinWu 	fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER;
28218ed53eeeSMengjinWu 	error_offset = offsetof(struct spdk_nvme_tcp_cmd, ccsqe.dptr.sgl1.generic);
28228ed53eeeSMengjinWu fatal_err:
28238ed53eeeSMengjinWu 	nvmf_tcp_send_c2h_term_req(tcp_req->pdu->qpair, tcp_req->pdu, fes, error_offset);
2824e956be96SZiye Yang }
2825e956be96SZiye Yang 
282634a0d851SShuhei Matsumoto static inline enum spdk_nvme_media_error_status_code
282734a0d851SShuhei Matsumoto nvmf_tcp_dif_error_to_compl_status(uint8_t err_type) {
282834a0d851SShuhei Matsumoto 	enum spdk_nvme_media_error_status_code result;
28298448adaeSShuhei Matsumoto 
283034a0d851SShuhei Matsumoto 	switch (err_type)
283134a0d851SShuhei Matsumoto 	{
283234a0d851SShuhei Matsumoto 	case SPDK_DIF_REFTAG_ERROR:
283334a0d851SShuhei Matsumoto 		result = SPDK_NVME_SC_REFERENCE_TAG_CHECK_ERROR;
283434a0d851SShuhei Matsumoto 		break;
283534a0d851SShuhei Matsumoto 	case SPDK_DIF_APPTAG_ERROR:
283634a0d851SShuhei Matsumoto 		result = SPDK_NVME_SC_APPLICATION_TAG_CHECK_ERROR;
283734a0d851SShuhei Matsumoto 		break;
283834a0d851SShuhei Matsumoto 	case SPDK_DIF_GUARD_ERROR:
283934a0d851SShuhei Matsumoto 		result = SPDK_NVME_SC_GUARD_CHECK_ERROR;
284034a0d851SShuhei Matsumoto 		break;
284134a0d851SShuhei Matsumoto 	default:
284234a0d851SShuhei Matsumoto 		SPDK_UNREACHABLE();
284334a0d851SShuhei Matsumoto 		break;
28448448adaeSShuhei Matsumoto 	}
28458448adaeSShuhei Matsumoto 
284634a0d851SShuhei Matsumoto 	return result;
28478448adaeSShuhei Matsumoto }
28488448adaeSShuhei Matsumoto 
2849e956be96SZiye Yang static void
28503e5ea7ffSAlexey Marchuk _nvmf_tcp_send_c2h_data(struct spdk_nvmf_tcp_qpair *tqpair,
2851c57bafedSBen Walker 			struct spdk_nvmf_tcp_req *tcp_req)
2852e956be96SZiye Yang {
2853f766d1e4SDarek Stojaczyk 	struct spdk_nvmf_tcp_transport *ttransport = SPDK_CONTAINEROF(
2854f766d1e4SDarek Stojaczyk 				tqpair->qpair.transport, struct spdk_nvmf_tcp_transport, transport);
2855e956be96SZiye Yang 	struct nvme_tcp_pdu *rsp_pdu;
2856e956be96SZiye Yang 	struct spdk_nvme_tcp_c2h_data_hdr *c2h_data;
28573184884fSShuhei Matsumoto 	uint32_t plen, pdo, alignment;
28588448adaeSShuhei Matsumoto 	int rc;
2859e956be96SZiye Yang 
28602172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvmf_tcp, "enter\n");
2861e956be96SZiye Yang 
28623e5ea7ffSAlexey Marchuk 	rsp_pdu = tcp_req->pdu;
2863e956be96SZiye Yang 	assert(rsp_pdu != NULL);
2864e956be96SZiye Yang 
2865ea65bf61SBen Walker 	c2h_data = &rsp_pdu->hdr.c2h_data;
2866e956be96SZiye Yang 	c2h_data->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_C2H_DATA;
2867e956be96SZiye Yang 	plen = c2h_data->common.hlen = sizeof(*c2h_data);
2868e956be96SZiye Yang 
2869e956be96SZiye Yang 	if (tqpair->host_hdgst_enable) {
2870e956be96SZiye Yang 		plen += SPDK_NVME_TCP_DIGEST_LEN;
2871e956be96SZiye Yang 		c2h_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF;
2872e956be96SZiye Yang 	}
2873e956be96SZiye Yang 
2874e956be96SZiye Yang 	/* set the psh */
2875e956be96SZiye Yang 	c2h_data->cccid = tcp_req->req.cmd->nvme_cmd.cid;
28763e5ea7ffSAlexey Marchuk 	c2h_data->datal = tcp_req->req.length - tcp_req->pdu->rw_offset;
28773e5ea7ffSAlexey Marchuk 	c2h_data->datao = tcp_req->pdu->rw_offset;
2878e956be96SZiye Yang 
2879e956be96SZiye Yang 	/* set the padding */
2880e956be96SZiye Yang 	rsp_pdu->padding_len = 0;
2881e956be96SZiye Yang 	pdo = plen;
2882e956be96SZiye Yang 	if (tqpair->cpda) {
2883e956be96SZiye Yang 		alignment = (tqpair->cpda + 1) << 2;
2884e7f6ff2dSWenhua Liu 		if (plen % alignment != 0) {
2885e7f6ff2dSWenhua Liu 			pdo = (plen + alignment) / alignment * alignment;
2886e7f6ff2dSWenhua Liu 			rsp_pdu->padding_len = pdo - plen;
2887e7f6ff2dSWenhua Liu 			plen = pdo;
2888e956be96SZiye Yang 		}
2889e956be96SZiye Yang 	}
2890e956be96SZiye Yang 
2891e956be96SZiye Yang 	c2h_data->common.pdo = pdo;
2892e956be96SZiye Yang 	plen += c2h_data->datal;
2893e956be96SZiye Yang 	if (tqpair->host_ddgst_enable) {
2894e956be96SZiye Yang 		c2h_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF;
2895e956be96SZiye Yang 		plen += SPDK_NVME_TCP_DIGEST_LEN;
2896e956be96SZiye Yang 	}
2897e956be96SZiye Yang 
2898e956be96SZiye Yang 	c2h_data->common.plen = plen;
2899e956be96SZiye Yang 
290015ae31fbSBen Walker 	if (spdk_unlikely(tcp_req->req.dif_enabled)) {
2901fcd652f5SAlexey Marchuk 		rsp_pdu->dif_ctx = &tcp_req->req.dif.dif_ctx;
29027ee58b90SShuhei Matsumoto 	}
29037ee58b90SShuhei Matsumoto 
29043184884fSShuhei Matsumoto 	nvme_tcp_pdu_set_data_buf(rsp_pdu, tcp_req->req.iov, tcp_req->req.iovcnt,
29053184884fSShuhei Matsumoto 				  c2h_data->datao, c2h_data->datal);
2906e956be96SZiye Yang 
29073e5ea7ffSAlexey Marchuk 
29083e5ea7ffSAlexey Marchuk 	c2h_data->common.flags |= SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU;
2909b1144ce3SEd rodriguez 	/* Need to send the capsule response if response is not all 0 */
2910b1144ce3SEd rodriguez 	if (ttransport->tcp_opts.c2h_success &&
2911b1144ce3SEd rodriguez 	    tcp_req->rsp.cdw0 == 0 && tcp_req->rsp.cdw1 == 0) {
29123e5ea7ffSAlexey Marchuk 		c2h_data->common.flags |= SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS;
29133e5ea7ffSAlexey Marchuk 	}
29143e5ea7ffSAlexey Marchuk 
291515ae31fbSBen Walker 	if (spdk_unlikely(tcp_req->req.dif_enabled)) {
291634a0d851SShuhei Matsumoto 		struct spdk_nvme_cpl *rsp = &tcp_req->req.rsp->nvme_cpl;
291734a0d851SShuhei Matsumoto 		struct spdk_dif_error err_blk = {};
29183e5ea7ffSAlexey Marchuk 		uint32_t mapped_length = 0;
29193e5ea7ffSAlexey Marchuk 		uint32_t available_iovs = SPDK_COUNTOF(rsp_pdu->iov);
29203e5ea7ffSAlexey Marchuk 		uint32_t ddgst_len = 0;
29213e5ea7ffSAlexey Marchuk 
29223e5ea7ffSAlexey Marchuk 		if (tqpair->host_ddgst_enable) {
29233e5ea7ffSAlexey Marchuk 			/* Data digest consumes additional iov entry */
29243e5ea7ffSAlexey Marchuk 			available_iovs--;
29253e5ea7ffSAlexey Marchuk 			/* plen needs to be updated since nvme_tcp_build_iovs compares expected and actual plen */
29263e5ea7ffSAlexey Marchuk 			ddgst_len = SPDK_NVME_TCP_DIGEST_LEN;
29273e5ea7ffSAlexey Marchuk 			c2h_data->common.plen -= ddgst_len;
29283e5ea7ffSAlexey Marchuk 		}
29293e5ea7ffSAlexey Marchuk 		/* Temp call to estimate if data can be described by limited number of iovs.
29303e5ea7ffSAlexey Marchuk 		 * iov vector will be rebuilt in nvmf_tcp_qpair_write_pdu */
29313e5ea7ffSAlexey Marchuk 		nvme_tcp_build_iovs(rsp_pdu->iov, available_iovs, rsp_pdu, tqpair->host_hdgst_enable,
29323e5ea7ffSAlexey Marchuk 				    false, &mapped_length);
29333e5ea7ffSAlexey Marchuk 
29343e5ea7ffSAlexey Marchuk 		if (mapped_length != c2h_data->common.plen) {
29353e5ea7ffSAlexey Marchuk 			c2h_data->datal = mapped_length - (c2h_data->common.plen - c2h_data->datal);
29363e5ea7ffSAlexey Marchuk 			SPDK_DEBUGLOG(nvmf_tcp,
29373e5ea7ffSAlexey Marchuk 				      "Part C2H, data_len %u (of %u), PDU len %u, updated PDU len %u, offset %u\n",
29383e5ea7ffSAlexey Marchuk 				      c2h_data->datal, tcp_req->req.length, c2h_data->common.plen, mapped_length, rsp_pdu->rw_offset);
29393e5ea7ffSAlexey Marchuk 			c2h_data->common.plen = mapped_length;
29403e5ea7ffSAlexey Marchuk 
29413e5ea7ffSAlexey Marchuk 			/* Rebuild pdu->data_iov since data length is changed */
29423e5ea7ffSAlexey Marchuk 			nvme_tcp_pdu_set_data_buf(rsp_pdu, tcp_req->req.iov, tcp_req->req.iovcnt, c2h_data->datao,
29433e5ea7ffSAlexey Marchuk 						  c2h_data->datal);
29443e5ea7ffSAlexey Marchuk 
29453e5ea7ffSAlexey Marchuk 			c2h_data->common.flags &= ~(SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU |
29463e5ea7ffSAlexey Marchuk 						    SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS);
29473e5ea7ffSAlexey Marchuk 		}
29483e5ea7ffSAlexey Marchuk 
29493e5ea7ffSAlexey Marchuk 		c2h_data->common.plen += ddgst_len;
29503e5ea7ffSAlexey Marchuk 
29513e5ea7ffSAlexey Marchuk 		assert(rsp_pdu->rw_offset <= tcp_req->req.length);
295234a0d851SShuhei Matsumoto 
295334a0d851SShuhei Matsumoto 		rc = spdk_dif_verify_stream(rsp_pdu->data_iov, rsp_pdu->data_iovcnt,
295434a0d851SShuhei Matsumoto 					    0, rsp_pdu->data_len, rsp_pdu->dif_ctx, &err_blk);
29558448adaeSShuhei Matsumoto 		if (rc != 0) {
295634a0d851SShuhei Matsumoto 			SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n",
295734a0d851SShuhei Matsumoto 				    err_blk.err_type, err_blk.err_offset);
295834a0d851SShuhei Matsumoto 			rsp->status.sct = SPDK_NVME_SCT_MEDIA_ERROR;
295934a0d851SShuhei Matsumoto 			rsp->status.sc = nvmf_tcp_dif_error_to_compl_status(err_blk.err_type);
29604de405abSSeth Howell 			nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair);
29618448adaeSShuhei Matsumoto 			return;
29628448adaeSShuhei Matsumoto 		}
29638448adaeSShuhei Matsumoto 	}
29648448adaeSShuhei Matsumoto 
29653e5ea7ffSAlexey Marchuk 	rsp_pdu->rw_offset += c2h_data->datal;
2966c676c081SKonrad Sztyber 	nvmf_tcp_qpair_write_req_pdu(tqpair, tcp_req, nvmf_tcp_pdu_c2h_data_complete, tcp_req);
29676629202cSOr Gerlitz }
2968e956be96SZiye Yang 
29693e5ea7ffSAlexey Marchuk static void
29703e5ea7ffSAlexey Marchuk nvmf_tcp_send_c2h_data(struct spdk_nvmf_tcp_qpair *tqpair,
29713e5ea7ffSAlexey Marchuk 		       struct spdk_nvmf_tcp_req *tcp_req)
29723e5ea7ffSAlexey Marchuk {
29733e5ea7ffSAlexey Marchuk 	nvmf_tcp_req_pdu_init(tcp_req);
29743e5ea7ffSAlexey Marchuk 	_nvmf_tcp_send_c2h_data(tqpair, tcp_req);
2975e956be96SZiye Yang }
2976e956be96SZiye Yang 
2977e956be96SZiye Yang static int
2978e956be96SZiye Yang request_transfer_out(struct spdk_nvmf_request *req)
2979e956be96SZiye Yang {
2980c57bafedSBen Walker 	struct spdk_nvmf_tcp_req	*tcp_req;
2981e956be96SZiye Yang 	struct spdk_nvmf_qpair		*qpair;
29822b59852bSBen Walker 	struct spdk_nvmf_tcp_qpair	*tqpair;
2983e956be96SZiye Yang 	struct spdk_nvme_cpl		*rsp;
2984e956be96SZiye Yang 
29852172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvmf_tcp, "enter\n");
2986e956be96SZiye Yang 
2987e956be96SZiye Yang 	qpair = req->qpair;
2988e956be96SZiye Yang 	rsp = &req->rsp->nvme_cpl;
2989c57bafedSBen Walker 	tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req);
2990e956be96SZiye Yang 
2991e956be96SZiye Yang 	/* Advance our sq_head pointer */
2992e956be96SZiye Yang 	if (qpair->sq_head == qpair->sq_head_max) {
2993e956be96SZiye Yang 		qpair->sq_head = 0;
2994e956be96SZiye Yang 	} else {
2995e956be96SZiye Yang 		qpair->sq_head++;
2996e956be96SZiye Yang 	}
2997e956be96SZiye Yang 	rsp->sqhd = qpair->sq_head;
2998e956be96SZiye Yang 
29992b59852bSBen Walker 	tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair);
30004de405abSSeth Howell 	nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST);
3001a2adca79SBen Walker 	if (rsp->status.sc == SPDK_NVME_SC_SUCCESS && req->xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
30024de405abSSeth Howell 		nvmf_tcp_send_c2h_data(tqpair, tcp_req);
3003e956be96SZiye Yang 	} else {
30044de405abSSeth Howell 		nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair);
3005e956be96SZiye Yang 	}
3006e956be96SZiye Yang 
3007e956be96SZiye Yang 	return 0;
3008e956be96SZiye Yang }
3009e956be96SZiye Yang 
3010e956be96SZiye Yang static void
3011c81c10c5SJim Harris nvmf_tcp_check_fused_ordering(struct spdk_nvmf_tcp_transport *ttransport,
3012c81c10c5SJim Harris 			      struct spdk_nvmf_tcp_qpair *tqpair,
3013c81c10c5SJim Harris 			      struct spdk_nvmf_tcp_req *tcp_req)
3014c81c10c5SJim Harris {
3015c81c10c5SJim Harris 	enum spdk_nvme_cmd_fuse last, next;
3016c81c10c5SJim Harris 
3017c81c10c5SJim Harris 	last = tqpair->fused_first ? tqpair->fused_first->cmd.fuse : SPDK_NVME_CMD_FUSE_NONE;
3018c81c10c5SJim Harris 	next = tcp_req->cmd.fuse;
3019c81c10c5SJim Harris 
3020c81c10c5SJim Harris 	assert(last != SPDK_NVME_CMD_FUSE_SECOND);
3021c81c10c5SJim Harris 
3022c81c10c5SJim Harris 	if (spdk_likely(last == SPDK_NVME_CMD_FUSE_NONE && next == SPDK_NVME_CMD_FUSE_NONE)) {
3023c81c10c5SJim Harris 		return;
3024c81c10c5SJim Harris 	}
3025c81c10c5SJim Harris 
3026c81c10c5SJim Harris 	if (last == SPDK_NVME_CMD_FUSE_FIRST) {
3027c81c10c5SJim Harris 		if (next == SPDK_NVME_CMD_FUSE_SECOND) {
3028c81c10c5SJim Harris 			/* This is a valid pair of fused commands.  Point them at each other
3029c81c10c5SJim Harris 			 * so they can be submitted consecutively once ready to be executed.
3030c81c10c5SJim Harris 			 */
3031c81c10c5SJim Harris 			tqpair->fused_first->fused_pair = tcp_req;
3032c81c10c5SJim Harris 			tcp_req->fused_pair = tqpair->fused_first;
3033c81c10c5SJim Harris 			tqpair->fused_first = NULL;
3034c81c10c5SJim Harris 			return;
3035c81c10c5SJim Harris 		} else {
3036c81c10c5SJim Harris 			/* Mark the last req as failed since it wasn't followed by a SECOND. */
3037c81c10c5SJim Harris 			tqpair->fused_first->fused_failed = true;
3038c81c10c5SJim Harris 
3039c81c10c5SJim Harris 			/*
3040c81c10c5SJim Harris 			 * If the last req is in READY_TO_EXECUTE state, then call
3041c81c10c5SJim Harris 			 * nvmf_tcp_req_process(), otherwise nothing else will kick it.
3042c81c10c5SJim Harris 			 */
3043c81c10c5SJim Harris 			if (tqpair->fused_first->state == TCP_REQUEST_STATE_READY_TO_EXECUTE) {
3044c81c10c5SJim Harris 				nvmf_tcp_req_process(ttransport, tqpair->fused_first);
3045c81c10c5SJim Harris 			}
3046c81c10c5SJim Harris 
3047c81c10c5SJim Harris 			tqpair->fused_first = NULL;
3048c81c10c5SJim Harris 		}
3049c81c10c5SJim Harris 	}
3050c81c10c5SJim Harris 
3051c81c10c5SJim Harris 	if (next == SPDK_NVME_CMD_FUSE_FIRST) {
3052c81c10c5SJim Harris 		/* Set tqpair->fused_first here so that we know to check that the next request
3053c81c10c5SJim Harris 		 * is a SECOND (and to fail this one if it isn't).
3054c81c10c5SJim Harris 		 */
3055c81c10c5SJim Harris 		tqpair->fused_first = tcp_req;
3056c81c10c5SJim Harris 	} else if (next == SPDK_NVME_CMD_FUSE_SECOND) {
3057c81c10c5SJim Harris 		/* Mark this req failed since it is a SECOND and the last one was not a FIRST. */
3058c81c10c5SJim Harris 		tcp_req->fused_failed = true;
3059c81c10c5SJim Harris 	}
3060c81c10c5SJim Harris }
3061c81c10c5SJim Harris 
3062e956be96SZiye Yang static bool
30634de405abSSeth Howell nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport,
3064c57bafedSBen Walker 		     struct spdk_nvmf_tcp_req *tcp_req)
3065e956be96SZiye Yang {
30662b59852bSBen Walker 	struct spdk_nvmf_tcp_qpair		*tqpair;
3067f8dd380bSMengjinWu 	uint32_t				plen;
3068f8dd380bSMengjinWu 	struct nvme_tcp_pdu			*pdu;
3069e956be96SZiye Yang 	enum spdk_nvmf_tcp_req_state		prev_state;
3070e956be96SZiye Yang 	bool					progress = false;
3071c8734543SShuhei Matsumoto 	struct spdk_nvmf_transport		*transport = &ttransport->transport;
30723c88819bSZiye Yang 	struct spdk_nvmf_transport_poll_group	*group;
307385fa4324SAlexey Marchuk 	struct spdk_nvmf_tcp_poll_group		*tgroup;
3074e956be96SZiye Yang 
30752b59852bSBen Walker 	tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair);
307663de221bSBen Walker 	group = &tqpair->group->group;
3077e956be96SZiye Yang 	assert(tcp_req->state != TCP_REQUEST_STATE_FREE);
3078e956be96SZiye Yang 
3079f0be9913SSeth Howell 	/* If the qpair is not active, we need to abort the outstanding requests. */
30803caf2080SKonrad Sztyber 	if (!spdk_nvmf_qpair_is_active(&tqpair->qpair)) {
3081f0be9913SSeth Howell 		if (tcp_req->state == TCP_REQUEST_STATE_NEED_BUFFER) {
3082c7d22538SKrzysztof Goreczny 			nvmf_tcp_request_get_buffers_abort(tcp_req);
3083f0be9913SSeth Howell 		}
3084f0be9913SSeth Howell 		nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_COMPLETED);
3085f0be9913SSeth Howell 	}
3086f0be9913SSeth Howell 
3087e956be96SZiye Yang 	/* The loop here is to allow for several back-to-back state changes. */
3088e956be96SZiye Yang 	do {
3089e956be96SZiye Yang 		prev_state = tcp_req->state;
3090e956be96SZiye Yang 
30912172c432STomasz Zawadzki 		SPDK_DEBUGLOG(nvmf_tcp, "Request %p entering state %d on tqpair=%p\n", tcp_req, prev_state,
3092e956be96SZiye Yang 			      tqpair);
3093e956be96SZiye Yang 
3094e956be96SZiye Yang 		switch (tcp_req->state) {
3095e956be96SZiye Yang 		case TCP_REQUEST_STATE_FREE:
3096e956be96SZiye Yang 			/* Some external code must kick a request into TCP_REQUEST_STATE_NEW
3097e956be96SZiye Yang 			 * to escape this state. */
3098e956be96SZiye Yang 			break;
3099e956be96SZiye Yang 		case TCP_REQUEST_STATE_NEW:
3100b68ae4fbSAtul Malakar 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_NEW, tqpair->qpair.trace_id, 0, (uintptr_t)tcp_req,
3101b68ae4fbSAtul Malakar 					  tqpair->qpair.queue_depth);
3102e956be96SZiye Yang 
3103e956be96SZiye Yang 			/* copy the cmd from the receive pdu */
3104f8ac678eSZiye Yang 			tcp_req->cmd = tqpair->pdu_in_progress->hdr.capsule_cmd.ccsqe;
3105e956be96SZiye Yang 
3106fcd652f5SAlexey Marchuk 			if (spdk_unlikely(spdk_nvmf_request_get_dif_ctx(&tcp_req->req, &tcp_req->req.dif.dif_ctx))) {
310715ae31fbSBen Walker 				tcp_req->req.dif_enabled = true;
3108f8ac678eSZiye Yang 				tqpair->pdu_in_progress->dif_ctx = &tcp_req->req.dif.dif_ctx;
31092c9b0af2SShuhei Matsumoto 			}
31102c9b0af2SShuhei Matsumoto 
3111c81c10c5SJim Harris 			nvmf_tcp_check_fused_ordering(ttransport, tqpair, tcp_req);
3112c81c10c5SJim Harris 
3113e956be96SZiye Yang 			/* The next state transition depends on the data transfer needs of this request. */
3114bc13d022SChangpeng Liu 			tcp_req->req.xfer = spdk_nvmf_req_get_xfer(&tcp_req->req);
3115e956be96SZiye Yang 
3116864d93c0SAlexey Marchuk 			if (spdk_unlikely(tcp_req->req.xfer == SPDK_NVME_DATA_BIDIRECTIONAL)) {
31172a07b467SKonrad Sztyber 				nvmf_tcp_req_set_cpl(tcp_req, SPDK_NVME_SCT_GENERIC, SPDK_NVME_SC_INVALID_OPCODE);
3118f7b9f80bSmatthewb 				nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
3119864d93c0SAlexey Marchuk 				nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE);
31202172c432STomasz Zawadzki 				SPDK_DEBUGLOG(nvmf_tcp, "Request %p: invalid xfer type (BIDIRECTIONAL)\n", tcp_req);
3121864d93c0SAlexey Marchuk 				break;
3122864d93c0SAlexey Marchuk 			}
3123864d93c0SAlexey Marchuk 
3124e956be96SZiye Yang 			/* If no data to transfer, ready to execute. */
3125e956be96SZiye Yang 			if (tcp_req->req.xfer == SPDK_NVME_DATA_NONE) {
3126cc6920a4SJosh Soref 				/* Reset the tqpair receiving pdu state */
31274de405abSSeth Howell 				nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
31284de405abSSeth Howell 				nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
3129e956be96SZiye Yang 				break;
3130e956be96SZiye Yang 			}
3131e956be96SZiye Yang 
3132f8dd380bSMengjinWu 			pdu = tqpair->pdu_in_progress;
3133f8dd380bSMengjinWu 			plen = pdu->hdr.common.hlen;
3134f8dd380bSMengjinWu 			if (tqpair->host_hdgst_enable) {
3135f8dd380bSMengjinWu 				plen += SPDK_NVME_TCP_DIGEST_LEN;
3136f8dd380bSMengjinWu 			}
3137f8dd380bSMengjinWu 			if (pdu->hdr.common.plen != plen) {
3138f8dd380bSMengjinWu 				tcp_req->has_in_capsule_data = true;
3139f8dd380bSMengjinWu 			} else {
3140f8dd380bSMengjinWu 				/* Data is transmitted by C2H PDUs */
31414de405abSSeth Howell 				nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
314240872802SZiye Yang 			}
314340872802SZiye Yang 
31444de405abSSeth Howell 			nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEED_BUFFER);
3145e956be96SZiye Yang 			break;
3146e956be96SZiye Yang 		case TCP_REQUEST_STATE_NEED_BUFFER:
3147ea1a6608SJim Harris 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_NEED_BUFFER, tqpair->qpair.trace_id, 0,
3148ea1a6608SJim Harris 					  (uintptr_t)tcp_req);
3149e956be96SZiye Yang 
3150e956be96SZiye Yang 			assert(tcp_req->req.xfer != SPDK_NVME_DATA_NONE);
3151e956be96SZiye Yang 
3152e956be96SZiye Yang 			/* Try to get a data buffer */
3153c7d22538SKrzysztof Goreczny 			nvmf_tcp_req_parse_sgl(tcp_req, transport, group);
3154e956be96SZiye Yang 			break;
3155c7d22538SKrzysztof Goreczny 		case TCP_REQUEST_STATE_HAVE_BUFFER:
3156c7d22538SKrzysztof Goreczny 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_HAVE_BUFFER, tqpair->qpair.trace_id, 0,
3157c7d22538SKrzysztof Goreczny 					  (uintptr_t)tcp_req);
3158794d47d4SKonrad Sztyber 			/* Get a zcopy buffer if the request can be serviced through zcopy */
3159794d47d4SKonrad Sztyber 			if (spdk_nvmf_request_using_zcopy(&tcp_req->req)) {
3160794d47d4SKonrad Sztyber 				if (spdk_unlikely(tcp_req->req.dif_enabled)) {
3161794d47d4SKonrad Sztyber 					assert(tcp_req->req.dif.elba_length >= tcp_req->req.length);
3162794d47d4SKonrad Sztyber 					tcp_req->req.length = tcp_req->req.dif.elba_length;
3163794d47d4SKonrad Sztyber 				}
3164794d47d4SKonrad Sztyber 
3165794d47d4SKonrad Sztyber 				nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_AWAITING_ZCOPY_START);
3166794d47d4SKonrad Sztyber 				spdk_nvmf_request_zcopy_start(&tcp_req->req);
3167794d47d4SKonrad Sztyber 				break;
3168794d47d4SKonrad Sztyber 			}
3169794d47d4SKonrad Sztyber 
3170c7d22538SKrzysztof Goreczny 			assert(tcp_req->req.iovcnt > 0);
3171e956be96SZiye Yang 
3172e956be96SZiye Yang 			/* If data is transferring from host to controller, we need to do a transfer from the host. */
3173e956be96SZiye Yang 			if (tcp_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
3174033ef363SBen Walker 				if (tcp_req->req.data_from_pool) {
31752172c432STomasz Zawadzki 					SPDK_DEBUGLOG(nvmf_tcp, "Sending R2T for tcp_req(%p) on tqpair=%p\n", tcp_req, tqpair);
31764de405abSSeth Howell 					nvmf_tcp_send_r2t_pdu(tqpair, tcp_req);
3177033ef363SBen Walker 				} else {
3178033ef363SBen Walker 					struct nvme_tcp_pdu *pdu;
3179033ef363SBen Walker 
31804de405abSSeth Howell 					nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
318148a547fdSBen Walker 
3182f8ac678eSZiye Yang 					pdu = tqpair->pdu_in_progress;
31832172c432STomasz Zawadzki 					SPDK_DEBUGLOG(nvmf_tcp, "Not need to send r2t for tcp_req(%p) on tqpair=%p\n", tcp_req,
3184033ef363SBen Walker 						      tqpair);
3185033ef363SBen Walker 					/* No need to send r2t, contained in the capsuled data */
3186033ef363SBen Walker 					nvme_tcp_pdu_set_data_buf(pdu, tcp_req->req.iov, tcp_req->req.iovcnt,
3187033ef363SBen Walker 								  0, tcp_req->req.length);
31884de405abSSeth Howell 					nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
3189033ef363SBen Walker 				}
3190e956be96SZiye Yang 				break;
3191e956be96SZiye Yang 			}
3192e956be96SZiye Yang 
31934de405abSSeth Howell 			nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
3194e956be96SZiye Yang 			break;
3195794d47d4SKonrad Sztyber 		case TCP_REQUEST_STATE_AWAITING_ZCOPY_START:
3196ea1a6608SJim Harris 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_AWAIT_ZCOPY_START, tqpair->qpair.trace_id, 0,
3197ea1a6608SJim Harris 					  (uintptr_t)tcp_req);
3198794d47d4SKonrad Sztyber 			/* Some external code must kick a request into  TCP_REQUEST_STATE_ZCOPY_START_COMPLETED
3199794d47d4SKonrad Sztyber 			 * to escape this state. */
3200794d47d4SKonrad Sztyber 			break;
3201794d47d4SKonrad Sztyber 		case TCP_REQUEST_STATE_ZCOPY_START_COMPLETED:
3202ea1a6608SJim Harris 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_ZCOPY_START_COMPLETED, tqpair->qpair.trace_id, 0,
3203ea1a6608SJim Harris 					  (uintptr_t)tcp_req);
3204794d47d4SKonrad Sztyber 			if (spdk_unlikely(spdk_nvme_cpl_is_error(&tcp_req->req.rsp->nvme_cpl))) {
3205794d47d4SKonrad Sztyber 				SPDK_DEBUGLOG(nvmf_tcp, "Zero-copy start failed for tcp_req(%p) on tqpair=%p\n",
3206794d47d4SKonrad Sztyber 					      tcp_req, tqpair);
3207794d47d4SKonrad Sztyber 				nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE);
3208794d47d4SKonrad Sztyber 				break;
3209794d47d4SKonrad Sztyber 			}
3210794d47d4SKonrad Sztyber 			if (tcp_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
3211794d47d4SKonrad Sztyber 				SPDK_DEBUGLOG(nvmf_tcp, "Sending R2T for tcp_req(%p) on tqpair=%p\n", tcp_req, tqpair);
3212794d47d4SKonrad Sztyber 				nvmf_tcp_send_r2t_pdu(tqpair, tcp_req);
3213794d47d4SKonrad Sztyber 			} else {
3214794d47d4SKonrad Sztyber 				nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_EXECUTED);
3215794d47d4SKonrad Sztyber 			}
3216794d47d4SKonrad Sztyber 			break;
321748a547fdSBen Walker 		case TCP_REQUEST_STATE_AWAITING_R2T_ACK:
3218ea1a6608SJim Harris 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_AWAIT_R2T_ACK, tqpair->qpair.trace_id, 0,
3219ea1a6608SJim Harris 					  (uintptr_t)tcp_req);
322048a547fdSBen Walker 			/* The R2T completion or the h2c data incoming will kick it out of this state. */
322148a547fdSBen Walker 			break;
3222e956be96SZiye Yang 		case TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER:
322348a547fdSBen Walker 
3224ea1a6608SJim Harris 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, tqpair->qpair.trace_id,
3225ea1a6608SJim Harris 					  0, (uintptr_t)tcp_req);
3226e956be96SZiye Yang 			/* Some external code must kick a request into TCP_REQUEST_STATE_READY_TO_EXECUTE
3227e956be96SZiye Yang 			 * to escape this state. */
3228e956be96SZiye Yang 			break;
3229e956be96SZiye Yang 		case TCP_REQUEST_STATE_READY_TO_EXECUTE:
3230ea1a6608SJim Harris 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE, tqpair->qpair.trace_id, 0,
3231ea1a6608SJim Harris 					  (uintptr_t)tcp_req);
32327bfbc388SShuhei Matsumoto 
323315ae31fbSBen Walker 			if (spdk_unlikely(tcp_req->req.dif_enabled)) {
3234fcd652f5SAlexey Marchuk 				assert(tcp_req->req.dif.elba_length >= tcp_req->req.length);
3235fcd652f5SAlexey Marchuk 				tcp_req->req.length = tcp_req->req.dif.elba_length;
32367bfbc388SShuhei Matsumoto 			}
32377bfbc388SShuhei Matsumoto 
3238c81c10c5SJim Harris 			if (tcp_req->cmd.fuse != SPDK_NVME_CMD_FUSE_NONE) {
3239c81c10c5SJim Harris 				if (tcp_req->fused_failed) {
3240c81c10c5SJim Harris 					/* This request failed FUSED semantics.  Fail it immediately, without
3241c81c10c5SJim Harris 					 * even sending it to the target layer.
3242c81c10c5SJim Harris 					 */
32432a07b467SKonrad Sztyber 					nvmf_tcp_req_set_cpl(tcp_req, SPDK_NVME_SCT_GENERIC, SPDK_NVME_SC_ABORTED_MISSING_FUSED);
3244c81c10c5SJim Harris 					nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE);
3245c81c10c5SJim Harris 					break;
3246c81c10c5SJim Harris 				}
3247c81c10c5SJim Harris 
3248c81c10c5SJim Harris 				if (tcp_req->fused_pair == NULL ||
3249c81c10c5SJim Harris 				    tcp_req->fused_pair->state != TCP_REQUEST_STATE_READY_TO_EXECUTE) {
3250c81c10c5SJim Harris 					/* This request is ready to execute, but either we don't know yet if it's
3251c81c10c5SJim Harris 					 * valid - i.e. this is a FIRST but we haven't received the next request yet),
3252c81c10c5SJim Harris 					 * or the other request of this fused pair isn't ready to execute. So
3253c81c10c5SJim Harris 					 * break here and this request will get processed later either when the
3254c81c10c5SJim Harris 					 * other request is ready or we find that this request isn't valid.
3255c81c10c5SJim Harris 					 */
3256c81c10c5SJim Harris 					break;
3257c81c10c5SJim Harris 				}
3258c81c10c5SJim Harris 			}
3259c81c10c5SJim Harris 
3260794d47d4SKonrad Sztyber 			if (!spdk_nvmf_request_using_zcopy(&tcp_req->req)) {
32614de405abSSeth Howell 				nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_EXECUTING);
3262c81c10c5SJim Harris 				/* If we get to this point, and this request is a fused command, we know that
3263c81c10c5SJim Harris 				 * it is part of a valid sequence (FIRST followed by a SECOND) and that both
3264c81c10c5SJim Harris 				 * requests are READY_TO_EXECUTE.  So call spdk_nvmf_request_exec() both on this
3265c81c10c5SJim Harris 				 * request, and the other request of the fused pair, in the correct order.
3266c81c10c5SJim Harris 				 * Also clear the ->fused_pair pointers on both requests, since after this point
3267c81c10c5SJim Harris 				 * we no longer need to maintain the relationship between these two requests.
3268c81c10c5SJim Harris 				 */
3269c81c10c5SJim Harris 				if (tcp_req->cmd.fuse == SPDK_NVME_CMD_FUSE_SECOND) {
3270c81c10c5SJim Harris 					assert(tcp_req->fused_pair != NULL);
3271c81c10c5SJim Harris 					assert(tcp_req->fused_pair->fused_pair == tcp_req);
3272c81c10c5SJim Harris 					nvmf_tcp_req_set_state(tcp_req->fused_pair, TCP_REQUEST_STATE_EXECUTING);
3273c81c10c5SJim Harris 					spdk_nvmf_request_exec(&tcp_req->fused_pair->req);
3274c81c10c5SJim Harris 					tcp_req->fused_pair->fused_pair = NULL;
3275c81c10c5SJim Harris 					tcp_req->fused_pair = NULL;
3276c81c10c5SJim Harris 				}
3277e956be96SZiye Yang 				spdk_nvmf_request_exec(&tcp_req->req);
3278c81c10c5SJim Harris 				if (tcp_req->cmd.fuse == SPDK_NVME_CMD_FUSE_FIRST) {
3279c81c10c5SJim Harris 					assert(tcp_req->fused_pair != NULL);
3280c81c10c5SJim Harris 					assert(tcp_req->fused_pair->fused_pair == tcp_req);
3281c81c10c5SJim Harris 					nvmf_tcp_req_set_state(tcp_req->fused_pair, TCP_REQUEST_STATE_EXECUTING);
3282c81c10c5SJim Harris 					spdk_nvmf_request_exec(&tcp_req->fused_pair->req);
3283c81c10c5SJim Harris 					tcp_req->fused_pair->fused_pair = NULL;
3284c81c10c5SJim Harris 					tcp_req->fused_pair = NULL;
3285c81c10c5SJim Harris 				}
3286794d47d4SKonrad Sztyber 			} else {
3287794d47d4SKonrad Sztyber 				/* For zero-copy, only requests with data coming from host to the
3288794d47d4SKonrad Sztyber 				 * controller can end up here. */
3289794d47d4SKonrad Sztyber 				assert(tcp_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER);
3290794d47d4SKonrad Sztyber 				nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_AWAITING_ZCOPY_COMMIT);
3291794d47d4SKonrad Sztyber 				spdk_nvmf_request_zcopy_end(&tcp_req->req, true);
3292794d47d4SKonrad Sztyber 			}
3293c81c10c5SJim Harris 
3294e956be96SZiye Yang 			break;
3295e956be96SZiye Yang 		case TCP_REQUEST_STATE_EXECUTING:
3296ea1a6608SJim Harris 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_EXECUTING, tqpair->qpair.trace_id, 0, (uintptr_t)tcp_req);
3297e956be96SZiye Yang 			/* Some external code must kick a request into TCP_REQUEST_STATE_EXECUTED
3298e956be96SZiye Yang 			 * to escape this state. */
3299e956be96SZiye Yang 			break;
3300794d47d4SKonrad Sztyber 		case TCP_REQUEST_STATE_AWAITING_ZCOPY_COMMIT:
3301ea1a6608SJim Harris 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_AWAIT_ZCOPY_COMMIT, tqpair->qpair.trace_id, 0,
3302ea1a6608SJim Harris 					  (uintptr_t)tcp_req);
3303794d47d4SKonrad Sztyber 			/* Some external code must kick a request into TCP_REQUEST_STATE_EXECUTED
3304794d47d4SKonrad Sztyber 			 * to escape this state. */
3305794d47d4SKonrad Sztyber 			break;
3306e956be96SZiye Yang 		case TCP_REQUEST_STATE_EXECUTED:
3307ea1a6608SJim Harris 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_EXECUTED, tqpair->qpair.trace_id, 0, (uintptr_t)tcp_req);
33087bfbc388SShuhei Matsumoto 
330915ae31fbSBen Walker 			if (spdk_unlikely(tcp_req->req.dif_enabled)) {
3310fcd652f5SAlexey Marchuk 				tcp_req->req.length = tcp_req->req.dif.orig_length;
33117bfbc388SShuhei Matsumoto 			}
33127bfbc388SShuhei Matsumoto 
33134de405abSSeth Howell 			nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE);
3314e956be96SZiye Yang 			break;
3315e956be96SZiye Yang 		case TCP_REQUEST_STATE_READY_TO_COMPLETE:
3316ea1a6608SJim Harris 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE, tqpair->qpair.trace_id, 0,
3317ea1a6608SJim Harris 					  (uintptr_t)tcp_req);
33188ed53eeeSMengjinWu 			if (request_transfer_out(&tcp_req->req) != 0) {
33198ed53eeeSMengjinWu 				assert(0); /* No good way to handle this currently */
33208ed53eeeSMengjinWu 			}
3321e956be96SZiye Yang 			break;
3322e956be96SZiye Yang 		case TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST:
3323ea1a6608SJim Harris 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST, tqpair->qpair.trace_id,
3324ea1a6608SJim Harris 					  0, (uintptr_t)tcp_req);
3325e956be96SZiye Yang 			/* Some external code must kick a request into TCP_REQUEST_STATE_COMPLETED
3326e956be96SZiye Yang 			 * to escape this state. */
3327e956be96SZiye Yang 			break;
3328794d47d4SKonrad Sztyber 		case TCP_REQUEST_STATE_AWAITING_ZCOPY_RELEASE:
3329ea1a6608SJim Harris 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_AWAIT_ZCOPY_RELEASE, tqpair->qpair.trace_id, 0,
3330ea1a6608SJim Harris 					  (uintptr_t)tcp_req);
3331794d47d4SKonrad Sztyber 			/* Some external code must kick a request into TCP_REQUEST_STATE_COMPLETED
3332794d47d4SKonrad Sztyber 			 * to escape this state. */
3333794d47d4SKonrad Sztyber 			break;
3334e956be96SZiye Yang 		case TCP_REQUEST_STATE_COMPLETED:
3335b68ae4fbSAtul Malakar 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_COMPLETED, tqpair->qpair.trace_id, 0, (uintptr_t)tcp_req,
3336b68ae4fbSAtul Malakar 					  tqpair->qpair.queue_depth);
333772925e3dSKonrad Sztyber 			/* If there's an outstanding PDU sent to the host, the request is completed
333872925e3dSKonrad Sztyber 			 * due to the qpair being disconnected.  We must delay the completion until
333972925e3dSKonrad Sztyber 			 * that write is done to avoid freeing the request twice. */
334072925e3dSKonrad Sztyber 			if (spdk_unlikely(tcp_req->pdu_in_use)) {
334172925e3dSKonrad Sztyber 				SPDK_DEBUGLOG(nvmf_tcp, "Delaying completion due to outstanding "
334272925e3dSKonrad Sztyber 					      "write on req=%p\n", tcp_req);
334372925e3dSKonrad Sztyber 				/* This can only happen for zcopy requests */
334472925e3dSKonrad Sztyber 				assert(spdk_nvmf_request_using_zcopy(&tcp_req->req));
33453caf2080SKonrad Sztyber 				assert(!spdk_nvmf_qpair_is_active(&tqpair->qpair));
334672925e3dSKonrad Sztyber 				break;
334772925e3dSKonrad Sztyber 			}
334872925e3dSKonrad Sztyber 
3349005b053aSShuhei Matsumoto 			if (tcp_req->req.data_from_pool) {
3350c8734543SShuhei Matsumoto 				spdk_nvmf_request_free_buffers(&tcp_req->req, group, transport);
3351cc6920a4SJosh Soref 			} else if (spdk_unlikely(tcp_req->has_in_capsule_data &&
3352cc6920a4SJosh Soref 						 (tcp_req->cmd.opc == SPDK_NVME_OPC_FABRIC ||
335385fa4324SAlexey Marchuk 						  tqpair->qpair.qid == 0) && tcp_req->req.length > transport->opts.in_capsule_data_size)) {
335485fa4324SAlexey Marchuk 				tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
335585fa4324SAlexey Marchuk 				assert(tgroup->control_msg_list);
335685fa4324SAlexey Marchuk 				SPDK_DEBUGLOG(nvmf_tcp, "Put buf to control msg list\n");
3357fd05a2ffSJohn Levon 				nvmf_tcp_control_msg_put(tgroup->control_msg_list,
3358fd05a2ffSJohn Levon 							 tcp_req->req.iov[0].iov_base);
3359794d47d4SKonrad Sztyber 			} else if (tcp_req->req.zcopy_bdev_io != NULL) {
3360794d47d4SKonrad Sztyber 				/* If the request has an unreleased zcopy bdev_io, it's either a
336137dc93b9SKonrad Sztyber 				 * read, a failed write, or the qpair is being disconnected */
3362794d47d4SKonrad Sztyber 				assert(spdk_nvmf_request_using_zcopy(&tcp_req->req));
3363794d47d4SKonrad Sztyber 				assert(tcp_req->req.xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST ||
336437dc93b9SKonrad Sztyber 				       spdk_nvme_cpl_is_error(&tcp_req->req.rsp->nvme_cpl) ||
33653caf2080SKonrad Sztyber 				       !spdk_nvmf_qpair_is_active(&tqpair->qpair));
3366794d47d4SKonrad Sztyber 				nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_AWAITING_ZCOPY_RELEASE);
3367794d47d4SKonrad Sztyber 				spdk_nvmf_request_zcopy_end(&tcp_req->req, false);
3368794d47d4SKonrad Sztyber 				break;
3369e956be96SZiye Yang 			}
3370e956be96SZiye Yang 			tcp_req->req.length = 0;
3371e956be96SZiye Yang 			tcp_req->req.iovcnt = 0;
3372c81c10c5SJim Harris 			tcp_req->fused_failed = false;
3373c81c10c5SJim Harris 			if (tcp_req->fused_pair) {
3374c81c10c5SJim Harris 				/* This req was part of a valid fused pair, but failed before it got to
3375c81c10c5SJim Harris 				 * READ_TO_EXECUTE state.  This means we need to fail the other request
3376c81c10c5SJim Harris 				 * in the pair, because it is no longer part of a valid pair.  If the pair
3377c81c10c5SJim Harris 				 * already reached READY_TO_EXECUTE state, we need to kick it.
3378c81c10c5SJim Harris 				 */
3379c81c10c5SJim Harris 				tcp_req->fused_pair->fused_failed = true;
3380c81c10c5SJim Harris 				if (tcp_req->fused_pair->state == TCP_REQUEST_STATE_READY_TO_EXECUTE) {
3381c81c10c5SJim Harris 					nvmf_tcp_req_process(ttransport, tcp_req->fused_pair);
3382c81c10c5SJim Harris 				}
3383c81c10c5SJim Harris 				tcp_req->fused_pair = NULL;
3384c81c10c5SJim Harris 			}
3385a2adca79SBen Walker 
338660af3c00SRui Chang 			nvmf_tcp_req_put(tqpair, tcp_req);
3387e956be96SZiye Yang 			break;
3388e956be96SZiye Yang 		case TCP_REQUEST_NUM_STATES:
3389e956be96SZiye Yang 		default:
3390e956be96SZiye Yang 			assert(0);
3391e956be96SZiye Yang 			break;
3392e956be96SZiye Yang 		}
3393e956be96SZiye Yang 
3394e956be96SZiye Yang 		if (tcp_req->state != prev_state) {
3395e956be96SZiye Yang 			progress = true;
3396e956be96SZiye Yang 		}
3397e956be96SZiye Yang 	} while (tcp_req->state != prev_state);
3398e956be96SZiye Yang 
3399e956be96SZiye Yang 	return progress;
3400e956be96SZiye Yang }
34011917d3b4SZiye Yang 
3402e956be96SZiye Yang static void
340311595300SJim Harris nvmf_tcp_qpair_process(struct spdk_nvmf_tcp_qpair *tqpair)
3404e956be96SZiye Yang {
3405e956be96SZiye Yang 	int rc;
3406e956be96SZiye Yang 
3407e956be96SZiye Yang 	assert(tqpair != NULL);
34084de405abSSeth Howell 	rc = nvmf_tcp_sock_process(tqpair);
34099dd9addaSZiye Yang 
3410053fa66bSBen Walker 	/* If there was a new socket error, disconnect */
3411053fa66bSBen Walker 	if (rc < 0) {
34124de405abSSeth Howell 		nvmf_tcp_qpair_disconnect(tqpair);
3413e956be96SZiye Yang 	}
3414e956be96SZiye Yang }
3415e956be96SZiye Yang 
341681fc34dfSKrzysztof Goreczny static void
3417*b8c964e2SJim Harris nvmf_tcp_sock_cb(void *arg, struct spdk_sock_group *group, struct spdk_sock *sock)
341811595300SJim Harris {
341911595300SJim Harris 	struct spdk_nvmf_tcp_qpair *tqpair = arg;
342011595300SJim Harris 
342111595300SJim Harris 	nvmf_tcp_qpair_process(tqpair);
342211595300SJim Harris }
342311595300SJim Harris 
3424e956be96SZiye Yang static int
34254de405abSSeth Howell nvmf_tcp_poll_group_add(struct spdk_nvmf_transport_poll_group *group,
3426e956be96SZiye Yang 			struct spdk_nvmf_qpair *qpair)
3427e956be96SZiye Yang {
3428e956be96SZiye Yang 	struct spdk_nvmf_tcp_poll_group	*tgroup;
34292b59852bSBen Walker 	struct spdk_nvmf_tcp_qpair	*tqpair;
3430e956be96SZiye Yang 	int				rc;
3431e956be96SZiye Yang 
3432e956be96SZiye Yang 	tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
34332b59852bSBen Walker 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
3434e956be96SZiye Yang 
34354de405abSSeth Howell 	rc =  nvmf_tcp_qpair_sock_init(tqpair);
3436e956be96SZiye Yang 	if (rc != 0) {
3437e956be96SZiye Yang 		SPDK_ERRLOG("Cannot set sock opt for tqpair=%p\n", tqpair);
3438e956be96SZiye Yang 		return -1;
3439e956be96SZiye Yang 	}
3440e956be96SZiye Yang 
34414de405abSSeth Howell 	rc = nvmf_tcp_qpair_init(&tqpair->qpair);
3442e956be96SZiye Yang 	if (rc < 0) {
3443e956be96SZiye Yang 		SPDK_ERRLOG("Cannot init tqpair=%p\n", tqpair);
3444e956be96SZiye Yang 		return -1;
3445e956be96SZiye Yang 	}
3446e956be96SZiye Yang 
34474de405abSSeth Howell 	rc = nvmf_tcp_qpair_init_mem_resource(tqpair);
3448e956be96SZiye Yang 	if (rc < 0) {
3449e956be96SZiye Yang 		SPDK_ERRLOG("Cannot init memory resource info for tqpair=%p\n", tqpair);
3450e956be96SZiye Yang 		return -1;
3451e956be96SZiye Yang 	}
3452e956be96SZiye Yang 
34532bd41c51SChangpeng Liu 	rc = spdk_sock_group_add_sock(tgroup->sock_group, tqpair->sock,
34542bd41c51SChangpeng Liu 				      nvmf_tcp_sock_cb, tqpair);
34552bd41c51SChangpeng Liu 	if (rc != 0) {
34562bd41c51SChangpeng Liu 		SPDK_ERRLOG("Could not add sock to sock_group: %s (%d)\n",
34572bd41c51SChangpeng Liu 			    spdk_strerror(errno), errno);
34582bd41c51SChangpeng Liu 		return -1;
34592bd41c51SChangpeng Liu 	}
34602bd41c51SChangpeng Liu 
346163de221bSBen Walker 	tqpair->group = tgroup;
3462b95aae63SKonrad Sztyber 	nvmf_tcp_qpair_set_state(tqpair, NVMF_TCP_QPAIR_STATE_INVALID);
3463e956be96SZiye Yang 	TAILQ_INSERT_TAIL(&tgroup->qpairs, tqpair, link);
3464e956be96SZiye Yang 
3465e956be96SZiye Yang 	return 0;
3466e956be96SZiye Yang }
3467e956be96SZiye Yang 
3468e956be96SZiye Yang static int
34694de405abSSeth Howell nvmf_tcp_poll_group_remove(struct spdk_nvmf_transport_poll_group *group,
3470527c825cSZiye Yang 			   struct spdk_nvmf_qpair *qpair)
3471527c825cSZiye Yang {
3472527c825cSZiye Yang 	struct spdk_nvmf_tcp_poll_group	*tgroup;
34732b59852bSBen Walker 	struct spdk_nvmf_tcp_qpair		*tqpair;
3474527c825cSZiye Yang 	int				rc;
3475527c825cSZiye Yang 
3476527c825cSZiye Yang 	tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
34772b59852bSBen Walker 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
3478527c825cSZiye Yang 
347963de221bSBen Walker 	assert(tqpair->group == tgroup);
348063de221bSBen Walker 
34812172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvmf_tcp, "remove tqpair=%p from the tgroup=%p\n", tqpair, tgroup);
3482bf102998SZiye Yang 	if (tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_REQ) {
34838cd4cc63SArtur Paszkiewicz 		/* Change the state to move the qpair from the await_req list to the main list
34848cd4cc63SArtur Paszkiewicz 		 * and prevent adding it again later by nvmf_tcp_qpair_set_recv_state() */
34858cd4cc63SArtur Paszkiewicz 		nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING);
3486bf102998SZiye Yang 	}
34878cd4cc63SArtur Paszkiewicz 	TAILQ_REMOVE(&tgroup->qpairs, tqpair, link);
3488f84c916cSBen Walker 
3489e53d15a2SKonrad Sztyber 	/* Try to force out any pending writes */
3490e53d15a2SKonrad Sztyber 	spdk_sock_flush(tqpair->sock);
3491e53d15a2SKonrad Sztyber 
3492527c825cSZiye Yang 	rc = spdk_sock_group_remove_sock(tgroup->sock_group, tqpair->sock);
3493527c825cSZiye Yang 	if (rc != 0) {
3494527c825cSZiye Yang 		SPDK_ERRLOG("Could not remove sock from sock_group: %s (%d)\n",
3495527c825cSZiye Yang 			    spdk_strerror(errno), errno);
3496527c825cSZiye Yang 	}
3497527c825cSZiye Yang 
3498527c825cSZiye Yang 	return rc;
3499527c825cSZiye Yang }
3500527c825cSZiye Yang 
3501527c825cSZiye Yang static int
35024de405abSSeth Howell nvmf_tcp_req_complete(struct spdk_nvmf_request *req)
3503e956be96SZiye Yang {
3504e956be96SZiye Yang 	struct spdk_nvmf_tcp_transport *ttransport;
3505c57bafedSBen Walker 	struct spdk_nvmf_tcp_req *tcp_req;
3506e956be96SZiye Yang 
3507e956be96SZiye Yang 	ttransport = SPDK_CONTAINEROF(req->qpair->transport, struct spdk_nvmf_tcp_transport, transport);
3508c57bafedSBen Walker 	tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req);
3509e956be96SZiye Yang 
3510794d47d4SKonrad Sztyber 	switch (tcp_req->state) {
3511794d47d4SKonrad Sztyber 	case TCP_REQUEST_STATE_EXECUTING:
3512794d47d4SKonrad Sztyber 	case TCP_REQUEST_STATE_AWAITING_ZCOPY_COMMIT:
35134de405abSSeth Howell 		nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_EXECUTED);
3514794d47d4SKonrad Sztyber 		break;
3515794d47d4SKonrad Sztyber 	case TCP_REQUEST_STATE_AWAITING_ZCOPY_START:
3516794d47d4SKonrad Sztyber 		nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_ZCOPY_START_COMPLETED);
3517794d47d4SKonrad Sztyber 		break;
3518794d47d4SKonrad Sztyber 	case TCP_REQUEST_STATE_AWAITING_ZCOPY_RELEASE:
3519794d47d4SKonrad Sztyber 		nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_COMPLETED);
3520794d47d4SKonrad Sztyber 		break;
3521794d47d4SKonrad Sztyber 	default:
3522c69768bdSMarcin Spiewak 		SPDK_ERRLOG("Unexpected request state %d (cntlid:%d, qid:%d)\n",
3523c69768bdSMarcin Spiewak 			    tcp_req->state, req->qpair->ctrlr->cntlid, req->qpair->qid);
3524794d47d4SKonrad Sztyber 		assert(0 && "Unexpected request state");
3525794d47d4SKonrad Sztyber 		break;
3526794d47d4SKonrad Sztyber 	}
3527794d47d4SKonrad Sztyber 
35284de405abSSeth Howell 	nvmf_tcp_req_process(ttransport, tcp_req);
3529e956be96SZiye Yang 
3530e956be96SZiye Yang 	return 0;
3531e956be96SZiye Yang }
3532e956be96SZiye Yang 
3533e956be96SZiye Yang static void
3534ccd96eadSNaresh Gottumukkala nvmf_tcp_close_qpair(struct spdk_nvmf_qpair *qpair,
3535ccd96eadSNaresh Gottumukkala 		     spdk_nvmf_transport_qpair_fini_cb cb_fn, void *cb_arg)
3536e956be96SZiye Yang {
3537053fa66bSBen Walker 	struct spdk_nvmf_tcp_qpair *tqpair;
3538053fa66bSBen Walker 
35392172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvmf_tcp, "Qpair: %p\n", qpair);
3540e956be96SZiye Yang 
3541053fa66bSBen Walker 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
35423056c8acSKonrad Sztyber 
35433056c8acSKonrad Sztyber 	assert(tqpair->fini_cb_fn == NULL);
35443056c8acSKonrad Sztyber 	tqpair->fini_cb_fn = cb_fn;
35453056c8acSKonrad Sztyber 	tqpair->fini_cb_arg = cb_arg;
35463056c8acSKonrad Sztyber 
3547b95aae63SKonrad Sztyber 	nvmf_tcp_qpair_set_state(tqpair, NVMF_TCP_QPAIR_STATE_EXITED);
35484de405abSSeth Howell 	nvmf_tcp_qpair_destroy(tqpair);
3549e956be96SZiye Yang }
3550e956be96SZiye Yang 
3551e956be96SZiye Yang static int
35524de405abSSeth Howell nvmf_tcp_poll_group_poll(struct spdk_nvmf_transport_poll_group *group)
3553e956be96SZiye Yang {
3554e956be96SZiye Yang 	struct spdk_nvmf_tcp_poll_group *tgroup;
355511595300SJim Harris 	int num_events;
3556e956be96SZiye Yang 
3557e956be96SZiye Yang 	tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
3558e956be96SZiye Yang 
3559*b8c964e2SJim Harris 	if (spdk_unlikely(TAILQ_EMPTY(&tgroup->qpairs))) {
3560e956be96SZiye Yang 		return 0;
3561e956be96SZiye Yang 	}
3562e956be96SZiye Yang 
3563956fd5e1SAlexey Marchuk 	num_events = spdk_sock_group_poll(tgroup->sock_group);
3564956fd5e1SAlexey Marchuk 	if (spdk_unlikely(num_events < 0)) {
3565e956be96SZiye Yang 		SPDK_ERRLOG("Failed to poll sock_group=%p\n", tgroup->sock_group);
3566e956be96SZiye Yang 	}
3567e956be96SZiye Yang 
3568ea7fd2a5SJim Harris 	return num_events;
3569e956be96SZiye Yang }
3570e956be96SZiye Yang 
3571e956be96SZiye Yang static int
35724de405abSSeth Howell nvmf_tcp_qpair_get_trid(struct spdk_nvmf_qpair *qpair,
3573e956be96SZiye Yang 			struct spdk_nvme_transport_id *trid, bool peer)
3574e956be96SZiye Yang {
35752b59852bSBen Walker 	struct spdk_nvmf_tcp_qpair     *tqpair;
3576e956be96SZiye Yang 	uint16_t			port;
3577e956be96SZiye Yang 
35782b59852bSBen Walker 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
35797ed0904bSSeth Howell 	spdk_nvme_trid_populate_transport(trid, SPDK_NVME_TRANSPORT_TCP);
3580e956be96SZiye Yang 
3581e956be96SZiye Yang 	if (peer) {
3582e956be96SZiye Yang 		snprintf(trid->traddr, sizeof(trid->traddr), "%s", tqpair->initiator_addr);
3583e956be96SZiye Yang 		port = tqpair->initiator_port;
3584e956be96SZiye Yang 	} else {
3585e956be96SZiye Yang 		snprintf(trid->traddr, sizeof(trid->traddr), "%s", tqpair->target_addr);
3586e956be96SZiye Yang 		port = tqpair->target_port;
3587e956be96SZiye Yang 	}
3588e956be96SZiye Yang 
3589e956be96SZiye Yang 	if (spdk_sock_is_ipv4(tqpair->sock)) {
3590e956be96SZiye Yang 		trid->adrfam = SPDK_NVMF_ADRFAM_IPV4;
35916b5f7648Sdongx.yi 	} else if (spdk_sock_is_ipv6(tqpair->sock)) {
3592e956be96SZiye Yang 		trid->adrfam = SPDK_NVMF_ADRFAM_IPV6;
3593e956be96SZiye Yang 	} else {
3594e956be96SZiye Yang 		return -1;
3595e956be96SZiye Yang 	}
3596e956be96SZiye Yang 
3597e956be96SZiye Yang 	snprintf(trid->trsvcid, sizeof(trid->trsvcid), "%d", port);
3598e956be96SZiye Yang 	return 0;
3599e956be96SZiye Yang }
3600e956be96SZiye Yang 
3601e956be96SZiye Yang static int
36024de405abSSeth Howell nvmf_tcp_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair,
3603e956be96SZiye Yang 			      struct spdk_nvme_transport_id *trid)
3604e956be96SZiye Yang {
36054de405abSSeth Howell 	return nvmf_tcp_qpair_get_trid(qpair, trid, 0);
3606e956be96SZiye Yang }
3607e956be96SZiye Yang 
3608e956be96SZiye Yang static int
36094de405abSSeth Howell nvmf_tcp_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair,
3610e956be96SZiye Yang 			     struct spdk_nvme_transport_id *trid)
3611e956be96SZiye Yang {
36124de405abSSeth Howell 	return nvmf_tcp_qpair_get_trid(qpair, trid, 1);
3613e956be96SZiye Yang }
3614e956be96SZiye Yang 
3615e956be96SZiye Yang static int
36164de405abSSeth Howell nvmf_tcp_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair,
3617e956be96SZiye Yang 			       struct spdk_nvme_transport_id *trid)
3618e956be96SZiye Yang {
36194de405abSSeth Howell 	return nvmf_tcp_qpair_get_trid(qpair, trid, 0);
3620e956be96SZiye Yang }
3621e956be96SZiye Yang 
3622604b4503SShuhei Matsumoto static void
362372eedc57SShuhei Matsumoto nvmf_tcp_req_set_abort_status(struct spdk_nvmf_request *req,
362472eedc57SShuhei Matsumoto 			      struct spdk_nvmf_tcp_req *tcp_req_to_abort)
362572eedc57SShuhei Matsumoto {
36262a07b467SKonrad Sztyber 	nvmf_tcp_req_set_cpl(tcp_req_to_abort, SPDK_NVME_SCT_GENERIC, SPDK_NVME_SC_ABORTED_BY_REQUEST);
362772eedc57SShuhei Matsumoto 	nvmf_tcp_req_set_state(tcp_req_to_abort, TCP_REQUEST_STATE_READY_TO_COMPLETE);
362872eedc57SShuhei Matsumoto 
362972eedc57SShuhei Matsumoto 	req->rsp->nvme_cpl.cdw0 &= ~1U; /* Command was successfully aborted. */
363072eedc57SShuhei Matsumoto }
363172eedc57SShuhei Matsumoto 
3632040ee27cSShuhei Matsumoto static int
3633040ee27cSShuhei Matsumoto _nvmf_tcp_qpair_abort_request(void *ctx)
3634604b4503SShuhei Matsumoto {
3635040ee27cSShuhei Matsumoto 	struct spdk_nvmf_request *req = ctx;
3636040ee27cSShuhei Matsumoto 	struct spdk_nvmf_tcp_req *tcp_req_to_abort = SPDK_CONTAINEROF(req->req_to_abort,
3637040ee27cSShuhei Matsumoto 			struct spdk_nvmf_tcp_req, req);
3638040ee27cSShuhei Matsumoto 	struct spdk_nvmf_tcp_qpair *tqpair = SPDK_CONTAINEROF(req->req_to_abort->qpair,
3639040ee27cSShuhei Matsumoto 					     struct spdk_nvmf_tcp_qpair, qpair);
36407474fd4bSKonrad Sztyber 	struct spdk_nvmf_tcp_transport *ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport,
36417474fd4bSKonrad Sztyber 			struct spdk_nvmf_tcp_transport, transport);
3642b8a87e6aSShuhei Matsumoto 	int rc;
3643b8a87e6aSShuhei Matsumoto 
3644040ee27cSShuhei Matsumoto 	spdk_poller_unregister(&req->poller);
3645b8a87e6aSShuhei Matsumoto 
3646b8a87e6aSShuhei Matsumoto 	switch (tcp_req_to_abort->state) {
3647b8a87e6aSShuhei Matsumoto 	case TCP_REQUEST_STATE_EXECUTING:
3648794d47d4SKonrad Sztyber 	case TCP_REQUEST_STATE_AWAITING_ZCOPY_START:
3649794d47d4SKonrad Sztyber 	case TCP_REQUEST_STATE_AWAITING_ZCOPY_COMMIT:
365081437ff6SShuhei Matsumoto 		rc = nvmf_ctrlr_abort_request(req);
3651b8a87e6aSShuhei Matsumoto 		if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS) {
3652040ee27cSShuhei Matsumoto 			return SPDK_POLLER_BUSY;
3653b8a87e6aSShuhei Matsumoto 		}
3654b8a87e6aSShuhei Matsumoto 		break;
365572eedc57SShuhei Matsumoto 
365672eedc57SShuhei Matsumoto 	case TCP_REQUEST_STATE_NEED_BUFFER:
3657c7d22538SKrzysztof Goreczny 		nvmf_tcp_request_get_buffers_abort(tcp_req_to_abort);
365872eedc57SShuhei Matsumoto 		nvmf_tcp_req_set_abort_status(req, tcp_req_to_abort);
36597474fd4bSKonrad Sztyber 		nvmf_tcp_req_process(ttransport, tcp_req_to_abort);
366072eedc57SShuhei Matsumoto 		break;
366172eedc57SShuhei Matsumoto 
3662b12419a2SKonrad Sztyber 	case TCP_REQUEST_STATE_AWAITING_R2T_ACK:
3663040ee27cSShuhei Matsumoto 	case TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER:
3664040ee27cSShuhei Matsumoto 		if (spdk_get_ticks() < req->timeout_tsc) {
3665040ee27cSShuhei Matsumoto 			req->poller = SPDK_POLLER_REGISTER(_nvmf_tcp_qpair_abort_request, req, 0);
3666040ee27cSShuhei Matsumoto 			return SPDK_POLLER_BUSY;
3667040ee27cSShuhei Matsumoto 		}
3668040ee27cSShuhei Matsumoto 		break;
3669040ee27cSShuhei Matsumoto 
3670b8a87e6aSShuhei Matsumoto 	default:
36717db282dcSKonrad Sztyber 		/* Requests in other states are either un-abortable (e.g.
36727db282dcSKonrad Sztyber 		 * TRANSFERRING_CONTROLLER_TO_HOST) or should never end up here, as they're
36737db282dcSKonrad Sztyber 		 * immediately transitioned to other states in nvmf_tcp_req_process() (e.g.
36747db282dcSKonrad Sztyber 		 * READY_TO_EXECUTE).  But it is fine to end up here, as we'll simply complete the
36757db282dcSKonrad Sztyber 		 * abort request with the bit0 of dword0 set (command not aborted).
36767db282dcSKonrad Sztyber 		 */
3677b8a87e6aSShuhei Matsumoto 		break;
3678b8a87e6aSShuhei Matsumoto 	}
3679b8a87e6aSShuhei Matsumoto 
3680604b4503SShuhei Matsumoto 	spdk_nvmf_request_complete(req);
3681040ee27cSShuhei Matsumoto 	return SPDK_POLLER_BUSY;
3682040ee27cSShuhei Matsumoto }
3683040ee27cSShuhei Matsumoto 
3684040ee27cSShuhei Matsumoto static void
3685040ee27cSShuhei Matsumoto nvmf_tcp_qpair_abort_request(struct spdk_nvmf_qpair *qpair,
3686040ee27cSShuhei Matsumoto 			     struct spdk_nvmf_request *req)
3687040ee27cSShuhei Matsumoto {
3688040ee27cSShuhei Matsumoto 	struct spdk_nvmf_tcp_qpair *tqpair;
368926e0ef9aSShuhei Matsumoto 	struct spdk_nvmf_tcp_transport *ttransport;
369026e0ef9aSShuhei Matsumoto 	struct spdk_nvmf_transport *transport;
3691040ee27cSShuhei Matsumoto 	uint16_t cid;
3692040ee27cSShuhei Matsumoto 	uint32_t i;
3693040ee27cSShuhei Matsumoto 	struct spdk_nvmf_tcp_req *tcp_req_to_abort = NULL;
3694040ee27cSShuhei Matsumoto 
3695040ee27cSShuhei Matsumoto 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
369626e0ef9aSShuhei Matsumoto 	ttransport = SPDK_CONTAINEROF(qpair->transport, struct spdk_nvmf_tcp_transport, transport);
369726e0ef9aSShuhei Matsumoto 	transport = &ttransport->transport;
369826e0ef9aSShuhei Matsumoto 
3699040ee27cSShuhei Matsumoto 	cid = req->cmd->nvme_cmd.cdw10_bits.abort.cid;
3700040ee27cSShuhei Matsumoto 
3701040ee27cSShuhei Matsumoto 	for (i = 0; i < tqpair->resource_count; i++) {
37020bee1bb6SAlexey Marchuk 		if (tqpair->reqs[i].state != TCP_REQUEST_STATE_FREE &&
37030bee1bb6SAlexey Marchuk 		    tqpair->reqs[i].req.cmd->nvme_cmd.cid == cid) {
3704040ee27cSShuhei Matsumoto 			tcp_req_to_abort = &tqpair->reqs[i];
3705040ee27cSShuhei Matsumoto 			break;
3706040ee27cSShuhei Matsumoto 		}
3707040ee27cSShuhei Matsumoto 	}
3708040ee27cSShuhei Matsumoto 
3709ea1a6608SJim Harris 	spdk_trace_record(TRACE_TCP_QP_ABORT_REQ, tqpair->qpair.trace_id, 0, (uintptr_t)req);
37108107587bSKrzysztof Karas 
3711040ee27cSShuhei Matsumoto 	if (tcp_req_to_abort == NULL) {
3712040ee27cSShuhei Matsumoto 		spdk_nvmf_request_complete(req);
3713040ee27cSShuhei Matsumoto 		return;
3714040ee27cSShuhei Matsumoto 	}
3715040ee27cSShuhei Matsumoto 
3716040ee27cSShuhei Matsumoto 	req->req_to_abort = &tcp_req_to_abort->req;
371726e0ef9aSShuhei Matsumoto 	req->timeout_tsc = spdk_get_ticks() +
371826e0ef9aSShuhei Matsumoto 			   transport->opts.abort_timeout_sec * spdk_get_ticks_hz();
3719040ee27cSShuhei Matsumoto 	req->poller = NULL;
3720040ee27cSShuhei Matsumoto 
3721040ee27cSShuhei Matsumoto 	_nvmf_tcp_qpair_abort_request(req);
3722604b4503SShuhei Matsumoto }
3723604b4503SShuhei Matsumoto 
3724663243cbSKrzysztof Karas struct tcp_subsystem_add_host_opts {
3725663243cbSKrzysztof Karas 	char *psk;
3726663243cbSKrzysztof Karas };
3727663243cbSKrzysztof Karas 
3728663243cbSKrzysztof Karas static const struct spdk_json_object_decoder tcp_subsystem_add_host_opts_decoder[] = {
3729663243cbSKrzysztof Karas 	{"psk", offsetof(struct tcp_subsystem_add_host_opts, psk), spdk_json_decode_string, true},
3730663243cbSKrzysztof Karas };
3731663243cbSKrzysztof Karas 
3732663243cbSKrzysztof Karas static int
3733663243cbSKrzysztof Karas nvmf_tcp_subsystem_add_host(struct spdk_nvmf_transport *transport,
3734663243cbSKrzysztof Karas 			    const struct spdk_nvmf_subsystem *subsystem,
3735663243cbSKrzysztof Karas 			    const char *hostnqn,
3736663243cbSKrzysztof Karas 			    const struct spdk_json_val *transport_specific)
3737663243cbSKrzysztof Karas {
3738663243cbSKrzysztof Karas 	struct tcp_subsystem_add_host_opts opts;
3739663243cbSKrzysztof Karas 	struct spdk_nvmf_tcp_transport *ttransport;
3740cac5ed95SKonrad Sztyber 	struct tcp_psk_entry *tmp, *entry = NULL;
3741572a0c8aSKrzysztof Karas 	uint8_t psk_configured[SPDK_TLS_PSK_MAX_LEN] = {};
374268745a9cSKonrad Sztyber 	char psk_interchange[SPDK_TLS_PSK_MAX_LEN + 1] = {};
3743572a0c8aSKrzysztof Karas 	uint8_t tls_cipher_suite;
3744663243cbSKrzysztof Karas 	int rc = 0;
3745572a0c8aSKrzysztof Karas 	uint8_t psk_retained_hash;
3746572a0c8aSKrzysztof Karas 	uint64_t psk_configured_size;
3747663243cbSKrzysztof Karas 
3748663243cbSKrzysztof Karas 	if (transport_specific == NULL) {
3749663243cbSKrzysztof Karas 		return 0;
3750663243cbSKrzysztof Karas 	}
3751663243cbSKrzysztof Karas 
3752663243cbSKrzysztof Karas 	assert(transport != NULL);
3753663243cbSKrzysztof Karas 	assert(subsystem != NULL);
3754663243cbSKrzysztof Karas 
3755663243cbSKrzysztof Karas 	memset(&opts, 0, sizeof(opts));
3756663243cbSKrzysztof Karas 
3757ee164e62SKonrad Sztyber 	/* Decode PSK (either name of a key or file path) */
3758663243cbSKrzysztof Karas 	if (spdk_json_decode_object_relaxed(transport_specific, tcp_subsystem_add_host_opts_decoder,
3759663243cbSKrzysztof Karas 					    SPDK_COUNTOF(tcp_subsystem_add_host_opts_decoder), &opts)) {
3760663243cbSKrzysztof Karas 		SPDK_ERRLOG("spdk_json_decode_object failed\n");
3761663243cbSKrzysztof Karas 		return -EINVAL;
3762663243cbSKrzysztof Karas 	}
3763663243cbSKrzysztof Karas 
3764663243cbSKrzysztof Karas 	if (opts.psk == NULL) {
3765663243cbSKrzysztof Karas 		return 0;
3766663243cbSKrzysztof Karas 	}
37679e0eacb3SKonrad Sztyber 
37689e0eacb3SKonrad Sztyber 	entry = calloc(1, sizeof(struct tcp_psk_entry));
37699e0eacb3SKonrad Sztyber 	if (entry == NULL) {
37709e0eacb3SKonrad Sztyber 		SPDK_ERRLOG("Unable to allocate memory for PSK entry!\n");
37719e0eacb3SKonrad Sztyber 		rc = -ENOMEM;
37729e0eacb3SKonrad Sztyber 		goto end;
37739e0eacb3SKonrad Sztyber 	}
37749e0eacb3SKonrad Sztyber 
3775ee164e62SKonrad Sztyber 	entry->key = spdk_keyring_get_key(opts.psk);
377611cc2256SKonrad Sztyber 	if (entry->key == NULL) {
377711cc2256SKonrad Sztyber 		SPDK_ERRLOG("Key '%s' does not exist\n", opts.psk);
377811cc2256SKonrad Sztyber 		rc = -EINVAL;
377911cc2256SKonrad Sztyber 		goto end;
378011cc2256SKonrad Sztyber 	}
378111cc2256SKonrad Sztyber 
3782ee164e62SKonrad Sztyber 	rc = spdk_key_get_key(entry->key, psk_interchange, SPDK_TLS_PSK_MAX_LEN);
3783ee164e62SKonrad Sztyber 	if (rc < 0) {
378434edd9f1SKamil Godzwon 		SPDK_ERRLOG("Failed to retrieve PSK '%s'\n", opts.psk);
3785ee164e62SKonrad Sztyber 		rc = -EINVAL;
3786ee164e62SKonrad Sztyber 		goto end;
3787ee164e62SKonrad Sztyber 	}
3788ee164e62SKonrad Sztyber 
3789572a0c8aSKrzysztof Karas 	/* Parse PSK interchange to get length of base64 encoded data.
3790572a0c8aSKrzysztof Karas 	 * This is then used to decide which cipher suite should be used
3791572a0c8aSKrzysztof Karas 	 * to generate PSK identity and TLS PSK later on. */
379203b6183aSKrzysztof Karas 	rc = nvme_tcp_parse_interchange_psk(psk_interchange, psk_configured, sizeof(psk_configured),
3793572a0c8aSKrzysztof Karas 					    &psk_configured_size, &psk_retained_hash);
3794572a0c8aSKrzysztof Karas 	if (rc < 0) {
3795572a0c8aSKrzysztof Karas 		SPDK_ERRLOG("Failed to parse PSK interchange!\n");
3796572a0c8aSKrzysztof Karas 		goto end;
3797572a0c8aSKrzysztof Karas 	}
3798572a0c8aSKrzysztof Karas 
3799572a0c8aSKrzysztof Karas 	/* The Base64 string encodes the configured PSK (32 or 48 bytes binary).
3800572a0c8aSKrzysztof Karas 	 * This check also ensures that psk_configured_size is smaller than
3801572a0c8aSKrzysztof Karas 	 * psk_retained buffer size. */
3802572a0c8aSKrzysztof Karas 	if (psk_configured_size == SHA256_DIGEST_LENGTH) {
3803572a0c8aSKrzysztof Karas 		tls_cipher_suite = NVME_TCP_CIPHER_AES_128_GCM_SHA256;
3804572a0c8aSKrzysztof Karas 	} else if (psk_configured_size == SHA384_DIGEST_LENGTH) {
3805572a0c8aSKrzysztof Karas 		tls_cipher_suite = NVME_TCP_CIPHER_AES_256_GCM_SHA384;
3806572a0c8aSKrzysztof Karas 	} else {
3807572a0c8aSKrzysztof Karas 		SPDK_ERRLOG("Unrecognized cipher suite!\n");
3808572a0c8aSKrzysztof Karas 		rc = -EINVAL;
3809572a0c8aSKrzysztof Karas 		goto end;
3810572a0c8aSKrzysztof Karas 	}
3811572a0c8aSKrzysztof Karas 
3812663243cbSKrzysztof Karas 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
3813663243cbSKrzysztof Karas 	/* Generate PSK identity. */
38149e0eacb3SKonrad Sztyber 	rc = nvme_tcp_generate_psk_identity(entry->pskid, sizeof(entry->pskid), hostnqn,
3815572a0c8aSKrzysztof Karas 					    subsystem->subnqn, tls_cipher_suite);
38163e98fd06SKrzysztof Karas 	if (rc) {
3817663243cbSKrzysztof Karas 		rc = -EINVAL;
3818663243cbSKrzysztof Karas 		goto end;
3819663243cbSKrzysztof Karas 	}
3820663243cbSKrzysztof Karas 	/* Check if PSK identity entry already exists. */
3821cac5ed95SKonrad Sztyber 	TAILQ_FOREACH(tmp, &ttransport->psks, link) {
38229e0eacb3SKonrad Sztyber 		if (strncmp(tmp->pskid, entry->pskid, NVMF_PSK_IDENTITY_LEN) == 0) {
38239e0eacb3SKonrad Sztyber 			SPDK_ERRLOG("Given PSK identity: %s entry already exists!\n", entry->pskid);
3824663243cbSKrzysztof Karas 			rc = -EEXIST;
3825663243cbSKrzysztof Karas 			goto end;
3826663243cbSKrzysztof Karas 		}
3827663243cbSKrzysztof Karas 	}
38283e98fd06SKrzysztof Karas 
3829663243cbSKrzysztof Karas 	if (snprintf(entry->hostnqn, sizeof(entry->hostnqn), "%s", hostnqn) < 0) {
3830663243cbSKrzysztof Karas 		SPDK_ERRLOG("Could not write hostnqn string!\n");
3831663243cbSKrzysztof Karas 		rc = -EINVAL;
3832663243cbSKrzysztof Karas 		goto end;
3833663243cbSKrzysztof Karas 	}
3834663243cbSKrzysztof Karas 	if (snprintf(entry->subnqn, sizeof(entry->subnqn), "%s", subsystem->subnqn) < 0) {
3835663243cbSKrzysztof Karas 		SPDK_ERRLOG("Could not write subnqn string!\n");
3836663243cbSKrzysztof Karas 		rc = -EINVAL;
3837663243cbSKrzysztof Karas 		goto end;
3838663243cbSKrzysztof Karas 	}
38399e0eacb3SKonrad Sztyber 
3840572a0c8aSKrzysztof Karas 	entry->tls_cipher_suite = tls_cipher_suite;
3841572a0c8aSKrzysztof Karas 
3842572a0c8aSKrzysztof Karas 	/* No hash indicates that Configured PSK must be used as Retained PSK. */
3843572a0c8aSKrzysztof Karas 	if (psk_retained_hash == NVME_TCP_HASH_ALGORITHM_NONE) {
3844572a0c8aSKrzysztof Karas 		/* Psk configured is either 32 or 48 bytes long. */
3845572a0c8aSKrzysztof Karas 		memcpy(entry->psk, psk_configured, psk_configured_size);
3846572a0c8aSKrzysztof Karas 		entry->psk_size = psk_configured_size;
3847572a0c8aSKrzysztof Karas 	} else {
38487a50a6bcSKrzysztof Karas 		/* Derive retained PSK. */
3849572a0c8aSKrzysztof Karas 		rc = nvme_tcp_derive_retained_psk(psk_configured, psk_configured_size, hostnqn, entry->psk,
3850572a0c8aSKrzysztof Karas 						  SPDK_TLS_PSK_MAX_LEN, psk_retained_hash);
38517a50a6bcSKrzysztof Karas 		if (rc < 0) {
38527a50a6bcSKrzysztof Karas 			SPDK_ERRLOG("Unable to derive retained PSK!\n");
38537a50a6bcSKrzysztof Karas 			goto end;
38547a50a6bcSKrzysztof Karas 		}
38557a50a6bcSKrzysztof Karas 		entry->psk_size = rc;
3856572a0c8aSKrzysztof Karas 	}
3857663243cbSKrzysztof Karas 
3858663243cbSKrzysztof Karas 	TAILQ_INSERT_TAIL(&ttransport->psks, entry, link);
38597a50a6bcSKrzysztof Karas 	rc = 0;
3860663243cbSKrzysztof Karas 
3861663243cbSKrzysztof Karas end:
3862572a0c8aSKrzysztof Karas 	spdk_memset_s(psk_configured, sizeof(psk_configured), 0, sizeof(psk_configured));
386303b6183aSKrzysztof Karas 	spdk_memset_s(psk_interchange, sizeof(psk_interchange), 0, sizeof(psk_interchange));
386403b6183aSKrzysztof Karas 
3865663243cbSKrzysztof Karas 	free(opts.psk);
3866cac5ed95SKonrad Sztyber 	if (rc != 0) {
386743939e67SKonrad Sztyber 		nvmf_tcp_free_psk_entry(entry);
3868cac5ed95SKonrad Sztyber 	}
3869663243cbSKrzysztof Karas 
3870663243cbSKrzysztof Karas 	return rc;
3871663243cbSKrzysztof Karas }
3872663243cbSKrzysztof Karas 
3873663243cbSKrzysztof Karas static void
3874663243cbSKrzysztof Karas nvmf_tcp_subsystem_remove_host(struct spdk_nvmf_transport *transport,
3875663243cbSKrzysztof Karas 			       const struct spdk_nvmf_subsystem *subsystem,
3876663243cbSKrzysztof Karas 			       const char *hostnqn)
3877663243cbSKrzysztof Karas {
3878663243cbSKrzysztof Karas 	struct spdk_nvmf_tcp_transport *ttransport;
3879663243cbSKrzysztof Karas 	struct tcp_psk_entry *entry, *tmp;
3880663243cbSKrzysztof Karas 
3881663243cbSKrzysztof Karas 	assert(transport != NULL);
3882663243cbSKrzysztof Karas 	assert(subsystem != NULL);
3883663243cbSKrzysztof Karas 
3884663243cbSKrzysztof Karas 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
3885663243cbSKrzysztof Karas 	TAILQ_FOREACH_SAFE(entry, &ttransport->psks, link, tmp) {
3886663243cbSKrzysztof Karas 		if ((strncmp(entry->hostnqn, hostnqn, SPDK_NVMF_NQN_MAX_LEN)) == 0 &&
3887663243cbSKrzysztof Karas 		    (strncmp(entry->subnqn, subsystem->subnqn, SPDK_NVMF_NQN_MAX_LEN)) == 0) {
3888663243cbSKrzysztof Karas 			TAILQ_REMOVE(&ttransport->psks, entry, link);
388943939e67SKonrad Sztyber 			nvmf_tcp_free_psk_entry(entry);
3890663243cbSKrzysztof Karas 			break;
3891663243cbSKrzysztof Karas 		}
3892663243cbSKrzysztof Karas 	}
3893663243cbSKrzysztof Karas }
3894663243cbSKrzysztof Karas 
3895e956be96SZiye Yang static void
3896b4ba2c40SKrzysztof Karas nvmf_tcp_subsystem_dump_host(struct spdk_nvmf_transport *transport,
3897b4ba2c40SKrzysztof Karas 			     const struct spdk_nvmf_subsystem *subsystem, const char *hostnqn,
3898b4ba2c40SKrzysztof Karas 			     struct spdk_json_write_ctx *w)
3899b4ba2c40SKrzysztof Karas {
3900b4ba2c40SKrzysztof Karas 	struct spdk_nvmf_tcp_transport *ttransport;
3901b4ba2c40SKrzysztof Karas 	struct tcp_psk_entry *entry;
3902b4ba2c40SKrzysztof Karas 
3903b4ba2c40SKrzysztof Karas 	assert(transport != NULL);
3904b4ba2c40SKrzysztof Karas 	assert(subsystem != NULL);
3905b4ba2c40SKrzysztof Karas 
3906b4ba2c40SKrzysztof Karas 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
3907b4ba2c40SKrzysztof Karas 	TAILQ_FOREACH(entry, &ttransport->psks, link) {
3908b4ba2c40SKrzysztof Karas 		if ((strncmp(entry->hostnqn, hostnqn, SPDK_NVMF_NQN_MAX_LEN)) == 0 &&
3909b4ba2c40SKrzysztof Karas 		    (strncmp(entry->subnqn, subsystem->subnqn, SPDK_NVMF_NQN_MAX_LEN)) == 0) {
391011cc2256SKonrad Sztyber 			spdk_json_write_named_string(w, "psk",  spdk_key_get_name(entry->key));
3911b4ba2c40SKrzysztof Karas 			break;
3912b4ba2c40SKrzysztof Karas 		}
3913b4ba2c40SKrzysztof Karas 	}
3914b4ba2c40SKrzysztof Karas }
3915b4ba2c40SKrzysztof Karas 
3916b4ba2c40SKrzysztof Karas static void
39174de405abSSeth Howell nvmf_tcp_opts_init(struct spdk_nvmf_transport_opts *opts)
3918e956be96SZiye Yang {
39195eb3239cSMengjinWu 	opts->max_queue_depth =		SPDK_NVMF_TCP_DEFAULT_MAX_IO_QUEUE_DEPTH;
3920e956be96SZiye Yang 	opts->max_qpairs_per_ctrlr =	SPDK_NVMF_TCP_DEFAULT_MAX_QPAIRS_PER_CTRLR;
3921e956be96SZiye Yang 	opts->in_capsule_data_size =	SPDK_NVMF_TCP_DEFAULT_IN_CAPSULE_DATA_SIZE;
3922e956be96SZiye Yang 	opts->max_io_size =		SPDK_NVMF_TCP_DEFAULT_MAX_IO_SIZE;
3923e956be96SZiye Yang 	opts->io_unit_size =		SPDK_NVMF_TCP_DEFAULT_IO_UNIT_SIZE;
39245eb3239cSMengjinWu 	opts->max_aq_depth =		SPDK_NVMF_TCP_DEFAULT_MAX_ADMIN_QUEUE_DEPTH;
392558f16244SZiye Yang 	opts->num_shared_buffers =	SPDK_NVMF_TCP_DEFAULT_NUM_SHARED_BUFFERS;
3926e816c8fdSSeth Howell 	opts->buf_cache_size =		SPDK_NVMF_TCP_DEFAULT_BUFFER_CACHE_SIZE;
3927aa322721SShuhei Matsumoto 	opts->dif_insert_or_strip =	SPDK_NVMF_TCP_DEFAULT_DIF_INSERT_OR_STRIP;
392826e0ef9aSShuhei Matsumoto 	opts->abort_timeout_sec =	SPDK_NVMF_TCP_DEFAULT_ABORT_TIMEOUT_SEC;
3929f766d1e4SDarek Stojaczyk 	opts->transport_specific =      NULL;
3930e956be96SZiye Yang }
3931e956be96SZiye Yang 
3932e956be96SZiye Yang const struct spdk_nvmf_transport_ops spdk_nvmf_transport_tcp = {
39335b3e6cd1SSeth Howell 	.name = "TCP",
3934e956be96SZiye Yang 	.type = SPDK_NVME_TRANSPORT_TCP,
39354de405abSSeth Howell 	.opts_init = nvmf_tcp_opts_init,
39364de405abSSeth Howell 	.create = nvmf_tcp_create,
3937f766d1e4SDarek Stojaczyk 	.dump_opts = nvmf_tcp_dump_opts,
39384de405abSSeth Howell 	.destroy = nvmf_tcp_destroy,
3939e956be96SZiye Yang 
39404de405abSSeth Howell 	.listen = nvmf_tcp_listen,
39414de405abSSeth Howell 	.stop_listen = nvmf_tcp_stop_listen,
3942e956be96SZiye Yang 
39434de405abSSeth Howell 	.listener_discover = nvmf_tcp_discover,
3944e956be96SZiye Yang 
39454de405abSSeth Howell 	.poll_group_create = nvmf_tcp_poll_group_create,
39464de405abSSeth Howell 	.get_optimal_poll_group = nvmf_tcp_get_optimal_poll_group,
39474de405abSSeth Howell 	.poll_group_destroy = nvmf_tcp_poll_group_destroy,
39484de405abSSeth Howell 	.poll_group_add = nvmf_tcp_poll_group_add,
39494de405abSSeth Howell 	.poll_group_remove = nvmf_tcp_poll_group_remove,
39504de405abSSeth Howell 	.poll_group_poll = nvmf_tcp_poll_group_poll,
3951e956be96SZiye Yang 
39524de405abSSeth Howell 	.req_free = nvmf_tcp_req_free,
39534de405abSSeth Howell 	.req_complete = nvmf_tcp_req_complete,
3954c7d22538SKrzysztof Goreczny 	.req_get_buffers_done = nvmf_tcp_req_get_buffers_done,
3955e956be96SZiye Yang 
39564de405abSSeth Howell 	.qpair_fini = nvmf_tcp_close_qpair,
39574de405abSSeth Howell 	.qpair_get_local_trid = nvmf_tcp_qpair_get_local_trid,
39584de405abSSeth Howell 	.qpair_get_peer_trid = nvmf_tcp_qpair_get_peer_trid,
39594de405abSSeth Howell 	.qpair_get_listen_trid = nvmf_tcp_qpair_get_listen_trid,
3960604b4503SShuhei Matsumoto 	.qpair_abort_request = nvmf_tcp_qpair_abort_request,
3961663243cbSKrzysztof Karas 	.subsystem_add_host = nvmf_tcp_subsystem_add_host,
3962663243cbSKrzysztof Karas 	.subsystem_remove_host = nvmf_tcp_subsystem_remove_host,
3963b4ba2c40SKrzysztof Karas 	.subsystem_dump_host = nvmf_tcp_subsystem_dump_host,
3964e956be96SZiye Yang };
3965e956be96SZiye Yang 
3966f038354eSSeth Howell SPDK_NVMF_TRANSPORT_REGISTER(tcp, &spdk_nvmf_transport_tcp);
39672172c432STomasz Zawadzki SPDK_LOG_REGISTER_COMPONENT(nvmf_tcp)
3968