xref: /spdk/lib/nvme/nvme_tcp.c (revision 1efa1b16d579b0c09bcbf26a84140cbbcf88d9df)
1488570ebSJim Harris /*   SPDX-License-Identifier: BSD-3-Clause
2a6dbe372Spaul luse  *   Copyright (C) 2018 Intel Corporation. All rights reserved.
39727aa28SAlexey Marchuk  *   Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved.
4ada93334SAlexey Marchuk  *   Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5e956be96SZiye Yang  */
6e956be96SZiye Yang 
7e956be96SZiye Yang /*
8e956be96SZiye Yang  * NVMe/TCP transport
9e956be96SZiye Yang  */
10e956be96SZiye Yang 
11e956be96SZiye Yang #include "nvme_internal.h"
12e956be96SZiye Yang 
13e956be96SZiye Yang #include "spdk/endian.h"
14e956be96SZiye Yang #include "spdk/likely.h"
15e956be96SZiye Yang #include "spdk/string.h"
16e956be96SZiye Yang #include "spdk/stdinc.h"
17e956be96SZiye Yang #include "spdk/crc32.h"
18e956be96SZiye Yang #include "spdk/assert.h"
19e956be96SZiye Yang #include "spdk/trace.h"
20e956be96SZiye Yang #include "spdk/util.h"
215d5053c8SKrzysztof Karas #include "spdk/nvmf.h"
2208c8ab2bSJacek Kalwas #include "spdk/dma.h"
23e956be96SZiye Yang 
24e956be96SZiye Yang #include "spdk_internal/nvme_tcp.h"
25a6b7e183SJim Harris #include "spdk_internal/trace_defs.h"
26e956be96SZiye Yang 
27e956be96SZiye Yang #define NVME_TCP_RW_BUFFER_SIZE 131072
28672710c8SJim Harris 
29672710c8SJim Harris /* For async connect workloads, allow more time since we are more likely
30672710c8SJim Harris  * to be processing lots ICREQs at once.
31672710c8SJim Harris  */
32672710c8SJim Harris #define ICREQ_TIMEOUT_SYNC 2 /* in seconds */
33672710c8SJim Harris #define ICREQ_TIMEOUT_ASYNC 10 /* in seconds */
34e956be96SZiye Yang 
35e956be96SZiye Yang #define NVME_TCP_HPDA_DEFAULT			0
36679257dbSZiye Yang #define NVME_TCP_MAX_R2T_DEFAULT		1
37e956be96SZiye Yang #define NVME_TCP_PDU_H2C_MIN_DATA_SIZE		4096
38e956be96SZiye Yang 
3931db7b13Szhangduan /*
4031db7b13Szhangduan  * Maximum value of transport_ack_timeout used by TCP controller
4131db7b13Szhangduan  */
4231db7b13Szhangduan #define NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT	31
4331db7b13Szhangduan 
44b95aae63SKonrad Sztyber enum nvme_tcp_qpair_state {
45b95aae63SKonrad Sztyber 	NVME_TCP_QPAIR_STATE_INVALID = 0,
46b95aae63SKonrad Sztyber 	NVME_TCP_QPAIR_STATE_INITIALIZING = 1,
47b95aae63SKonrad Sztyber 	NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND = 2,
48b95aae63SKonrad Sztyber 	NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL = 3,
49ffd098cfSKonrad Sztyber 	NVME_TCP_QPAIR_STATE_AUTHENTICATING = 4,
50ffd098cfSKonrad Sztyber 	NVME_TCP_QPAIR_STATE_RUNNING = 5,
51ffd098cfSKonrad Sztyber 	NVME_TCP_QPAIR_STATE_EXITING = 6,
52ffd098cfSKonrad Sztyber 	NVME_TCP_QPAIR_STATE_EXITED = 7,
53b95aae63SKonrad Sztyber };
5431db7b13Szhangduan 
55e956be96SZiye Yang /* NVMe TCP transport extensions for spdk_nvme_ctrlr */
56e956be96SZiye Yang struct nvme_tcp_ctrlr {
57e956be96SZiye Yang 	struct spdk_nvme_ctrlr			ctrlr;
589341d7bdSKrzysztof Karas 	char					psk_identity[NVMF_PSK_IDENTITY_LEN];
595d5053c8SKrzysztof Karas 	uint8_t					psk[SPDK_TLS_PSK_MAX_LEN];
605d5053c8SKrzysztof Karas 	int					psk_size;
61169ee6c3SKrzysztof Karas 	char					*tls_cipher_suite;
62e956be96SZiye Yang };
63e956be96SZiye Yang 
64fe5e1db6SSeth Howell struct nvme_tcp_poll_group {
65fe5e1db6SSeth Howell 	struct spdk_nvme_transport_poll_group group;
665d071852SSeth Howell 	struct spdk_sock_group *sock_group;
675d071852SSeth Howell 	uint32_t completions_per_qpair;
685d071852SSeth Howell 	int64_t num_completions;
696b86039fSBen Walker 
706b86039fSBen Walker 	TAILQ_HEAD(, nvme_tcp_qpair) needs_poll;
71ea86c035SAlexey Marchuk 	struct spdk_nvme_tcp_stat stats;
72fe5e1db6SSeth Howell };
73fe5e1db6SSeth Howell 
74e956be96SZiye Yang /* NVMe TCP qpair extensions for spdk_nvme_qpair */
75e956be96SZiye Yang struct nvme_tcp_qpair {
76e956be96SZiye Yang 	struct spdk_nvme_qpair			qpair;
77e956be96SZiye Yang 	struct spdk_sock			*sock;
78e956be96SZiye Yang 
79e956be96SZiye Yang 	TAILQ_HEAD(, nvme_tcp_req)		free_reqs;
80e956be96SZiye Yang 	TAILQ_HEAD(, nvme_tcp_req)		outstanding_reqs;
81e956be96SZiye Yang 
82e956be96SZiye Yang 	TAILQ_HEAD(, nvme_tcp_pdu)		send_queue;
8382e4bfd3SZiye Yang 	struct nvme_tcp_pdu			*recv_pdu;
847bac9b06SZiye Yang 	struct nvme_tcp_pdu			*send_pdu; /* only for error pdu and init pdu */
852ac8d3ffSZiye Yang 	struct nvme_tcp_pdu			*send_pdus; /* Used by tcp_reqs */
86e956be96SZiye Yang 	enum nvme_tcp_pdu_recv_state		recv_state;
87e956be96SZiye Yang 	struct nvme_tcp_req			*tcp_reqs;
88ea86c035SAlexey Marchuk 	struct spdk_nvme_tcp_stat		*stats;
89e956be96SZiye Yang 
90e956be96SZiye Yang 	uint16_t				num_entries;
91a910bc64SAlexey Marchuk 	uint16_t				async_complete;
92e956be96SZiye Yang 
93a85579d8SAlexey Marchuk 	struct {
94a85579d8SAlexey Marchuk 		uint16_t host_hdgst_enable: 1;
95a85579d8SAlexey Marchuk 		uint16_t host_ddgst_enable: 1;
962ceff364SAlexey Marchuk 		uint16_t icreq_send_ack: 1;
9717f99bbbSKonrad Sztyber 		uint16_t in_connect_poll: 1;
9817f99bbbSKonrad Sztyber 		uint16_t reserved: 12;
99a85579d8SAlexey Marchuk 	} flags;
100e956be96SZiye Yang 
101e956be96SZiye Yang 	/** Specifies the maximum number of PDU-Data bytes per H2C Data Transfer PDU */
102e956be96SZiye Yang 	uint32_t				maxh2cdata;
103e956be96SZiye Yang 
104679257dbSZiye Yang 	uint32_t				maxr2t;
105e956be96SZiye Yang 
106e956be96SZiye Yang 	/* 0 based value, which is used to guide the padding */
107e956be96SZiye Yang 	uint8_t					cpda;
108e956be96SZiye Yang 
109e956be96SZiye Yang 	enum nvme_tcp_qpair_state		state;
1106b86039fSBen Walker 
1116b86039fSBen Walker 	TAILQ_ENTRY(nvme_tcp_qpair)		link;
1126b86039fSBen Walker 	bool					needs_poll;
113f02e2328SJim Harris 
114f02e2328SJim Harris 	uint64_t				icreq_timeout_tsc;
115f1941efeSShuhei Matsumoto 
116f1941efeSShuhei Matsumoto 	bool					shared_stats;
117e956be96SZiye Yang };
118e956be96SZiye Yang 
119e956be96SZiye Yang enum nvme_tcp_req_state {
120e956be96SZiye Yang 	NVME_TCP_REQ_FREE,
121e956be96SZiye Yang 	NVME_TCP_REQ_ACTIVE,
122e956be96SZiye Yang 	NVME_TCP_REQ_ACTIVE_R2T,
123e956be96SZiye Yang };
124e956be96SZiye Yang 
125e956be96SZiye Yang struct nvme_tcp_req {
126e956be96SZiye Yang 	struct nvme_request			*req;
127e956be96SZiye Yang 	enum nvme_tcp_req_state			state;
128e956be96SZiye Yang 	uint16_t				cid;
129e956be96SZiye Yang 	uint16_t				ttag;
130e956be96SZiye Yang 	uint32_t				datao;
13134c901e3SZiye Yang 	uint32_t				expected_datao;
132e956be96SZiye Yang 	uint32_t				r2tl_remain;
133679257dbSZiye Yang 	uint32_t				active_r2ts;
1342d4af0c1SAlexey Marchuk 	/* Used to hold a value received from subsequent R2T while we are still
1352d4af0c1SAlexey Marchuk 	 * waiting for H2C complete */
1362d4af0c1SAlexey Marchuk 	uint16_t				ttag_r2t_next;
137e956be96SZiye Yang 	bool					in_capsule_data;
1383a1f5364SZiye Yang 	/* It is used to track whether the req can be safely freed */
139dc88d131SAlexey Marchuk 	union {
140dc88d131SAlexey Marchuk 		uint8_t raw;
1413a1f5364SZiye Yang 		struct {
142dc88d131SAlexey Marchuk 			/* The last send operation completed - kernel released send buffer */
1433a1f5364SZiye Yang 			uint8_t				send_ack : 1;
144dc88d131SAlexey Marchuk 			/* Data transfer completed - target send resp or last data bit */
1453a1f5364SZiye Yang 			uint8_t				data_recv : 1;
146e7c92b24SAlexey Marchuk 			/* tcp_req is waiting for completion of the previous send operation (buffer reclaim notification
147e7c92b24SAlexey Marchuk 			 * from kernel) to send H2C */
148e7c92b24SAlexey Marchuk 			uint8_t				h2c_send_waiting_ack : 1;
1492d4af0c1SAlexey Marchuk 			/* tcp_req received subsequent r2t while it is still waiting for send_ack.
1502d4af0c1SAlexey Marchuk 			 * Rare case, actual when dealing with target that can send several R2T requests.
1512d4af0c1SAlexey Marchuk 			 * SPDK TCP target sends 1 R2T for the whole data buffer */
1522d4af0c1SAlexey Marchuk 			uint8_t				r2t_waiting_h2c_complete : 1;
153b5c6199eSKonrad Sztyber 			/* Accel operation is in progress */
154b5c6199eSKonrad Sztyber 			uint8_t				in_progress_accel : 1;
15599265411SJacek Kalwas 			uint8_t				domain_in_use: 1;
15699265411SJacek Kalwas 			uint8_t				reserved : 2;
157dc88d131SAlexey Marchuk 		} bits;
1583a1f5364SZiye Yang 	} ordering;
1592250abaeSZiye Yang 	struct nvme_tcp_pdu			*pdu;
16018b3e741SZiye Yang 	struct iovec				iov[NVME_TCP_MAX_SGL_DESCRIPTORS];
16118b3e741SZiye Yang 	uint32_t				iovcnt;
1622d4af0c1SAlexey Marchuk 	/* Used to hold a value received from subsequent R2T while we are still
1632d4af0c1SAlexey Marchuk 	 * waiting for H2C ack */
1642d4af0c1SAlexey Marchuk 	uint32_t				r2tl_remain_next;
1653a1f5364SZiye Yang 	struct nvme_tcp_qpair			*tqpair;
166e956be96SZiye Yang 	TAILQ_ENTRY(nvme_tcp_req)		link;
1677388e54dSAlexey Marchuk 	struct spdk_nvme_cpl			rsp;
1685b4cf6dbSKonrad Sztyber 	uint8_t					rsvd1[32];
169e956be96SZiye Yang };
1705b4cf6dbSKonrad Sztyber SPDK_STATIC_ASSERT(sizeof(struct nvme_tcp_req) % SPDK_CACHE_LINE_SIZE == 0, "unaligned size");
171e956be96SZiye Yang 
172f1941efeSShuhei Matsumoto static struct spdk_nvme_tcp_stat g_dummy_stats = {};
173f1941efeSShuhei Matsumoto 
174a3f72b2eSSeth Howell static void nvme_tcp_send_h2c_data(struct nvme_tcp_req *tcp_req);
175bc36528cSAlexey Marchuk static int64_t nvme_tcp_poll_group_process_completions(struct spdk_nvme_transport_poll_group
176bc36528cSAlexey Marchuk 		*tgroup, uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb);
1772ceff364SAlexey Marchuk static void nvme_tcp_icresp_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu);
17821d15cb0SJim Harris static void nvme_tcp_req_complete(struct nvme_tcp_req *tcp_req, struct nvme_tcp_qpair *tqpair,
1794a24f581SJim Harris 				  struct spdk_nvme_cpl *rsp, bool print_on_error);
180e956be96SZiye Yang 
181e956be96SZiye Yang static inline struct nvme_tcp_qpair *
182e956be96SZiye Yang nvme_tcp_qpair(struct spdk_nvme_qpair *qpair)
183e956be96SZiye Yang {
184e956be96SZiye Yang 	assert(qpair->trtype == SPDK_NVME_TRANSPORT_TCP);
185e956be96SZiye Yang 	return SPDK_CONTAINEROF(qpair, struct nvme_tcp_qpair, qpair);
186e956be96SZiye Yang }
187e956be96SZiye Yang 
1885d071852SSeth Howell static inline struct nvme_tcp_poll_group *
1895d071852SSeth Howell nvme_tcp_poll_group(struct spdk_nvme_transport_poll_group *group)
1905d071852SSeth Howell {
1915d071852SSeth Howell 	return SPDK_CONTAINEROF(group, struct nvme_tcp_poll_group, group);
1925d071852SSeth Howell }
1935d071852SSeth Howell 
194e956be96SZiye Yang static inline struct nvme_tcp_ctrlr *
195e956be96SZiye Yang nvme_tcp_ctrlr(struct spdk_nvme_ctrlr *ctrlr)
196e956be96SZiye Yang {
197e956be96SZiye Yang 	assert(ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_TCP);
198e956be96SZiye Yang 	return SPDK_CONTAINEROF(ctrlr, struct nvme_tcp_ctrlr, ctrlr);
199e956be96SZiye Yang }
200e956be96SZiye Yang 
201e956be96SZiye Yang static struct nvme_tcp_req *
202e956be96SZiye Yang nvme_tcp_req_get(struct nvme_tcp_qpair *tqpair)
203e956be96SZiye Yang {
204e956be96SZiye Yang 	struct nvme_tcp_req *tcp_req;
205e956be96SZiye Yang 
206e956be96SZiye Yang 	tcp_req = TAILQ_FIRST(&tqpair->free_reqs);
207e956be96SZiye Yang 	if (!tcp_req) {
208e956be96SZiye Yang 		return NULL;
209e956be96SZiye Yang 	}
210e956be96SZiye Yang 
211e956be96SZiye Yang 	assert(tcp_req->state == NVME_TCP_REQ_FREE);
212e956be96SZiye Yang 	tcp_req->state = NVME_TCP_REQ_ACTIVE;
213e956be96SZiye Yang 	TAILQ_REMOVE(&tqpair->free_reqs, tcp_req, link);
214e956be96SZiye Yang 	tcp_req->datao = 0;
21534c901e3SZiye Yang 	tcp_req->expected_datao = 0;
216e956be96SZiye Yang 	tcp_req->req = NULL;
217e956be96SZiye Yang 	tcp_req->in_capsule_data = false;
218e956be96SZiye Yang 	tcp_req->r2tl_remain = 0;
2192d4af0c1SAlexey Marchuk 	tcp_req->r2tl_remain_next = 0;
220679257dbSZiye Yang 	tcp_req->active_r2ts = 0;
22118b3e741SZiye Yang 	tcp_req->iovcnt = 0;
222dc88d131SAlexey Marchuk 	tcp_req->ordering.raw = 0;
2232250abaeSZiye Yang 	memset(tcp_req->pdu, 0, sizeof(struct nvme_tcp_pdu));
2247388e54dSAlexey Marchuk 	memset(&tcp_req->rsp, 0, sizeof(struct spdk_nvme_cpl));
225e956be96SZiye Yang 
226e956be96SZiye Yang 	return tcp_req;
227e956be96SZiye Yang }
228e956be96SZiye Yang 
229e956be96SZiye Yang static void
230e956be96SZiye Yang nvme_tcp_req_put(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req)
231e956be96SZiye Yang {
232e956be96SZiye Yang 	assert(tcp_req->state != NVME_TCP_REQ_FREE);
233e956be96SZiye Yang 	tcp_req->state = NVME_TCP_REQ_FREE;
234e137881eSAlexey Marchuk 	TAILQ_INSERT_HEAD(&tqpair->free_reqs, tcp_req, link);
235e956be96SZiye Yang }
236e956be96SZiye Yang 
2372640e3c3SKonrad Sztyber static inline void
238b5c6199eSKonrad Sztyber nvme_tcp_accel_finish_sequence(struct nvme_tcp_poll_group *tgroup, struct nvme_tcp_req *treq,
239b5c6199eSKonrad Sztyber 			       void *seq, spdk_nvme_accel_completion_cb cb_fn, void *cb_arg)
240e16aabddSKonrad Sztyber {
241e16aabddSKonrad Sztyber 	struct spdk_nvme_poll_group *pg = tgroup->group.group;
242e16aabddSKonrad Sztyber 
243b5c6199eSKonrad Sztyber 	treq->ordering.bits.in_progress_accel = 1;
244e16aabddSKonrad Sztyber 	pg->accel_fn_table.finish_sequence(seq, cb_fn, cb_arg);
245e16aabddSKonrad Sztyber }
246e16aabddSKonrad Sztyber 
247603f998aSKonrad Sztyber static inline void
248603f998aSKonrad Sztyber nvme_tcp_accel_reverse_sequence(struct nvme_tcp_poll_group *tgroup, void *seq)
249603f998aSKonrad Sztyber {
250603f998aSKonrad Sztyber 	struct spdk_nvme_poll_group *pg = tgroup->group.group;
251603f998aSKonrad Sztyber 
252603f998aSKonrad Sztyber 	pg->accel_fn_table.reverse_sequence(seq);
253603f998aSKonrad Sztyber }
254603f998aSKonrad Sztyber 
255d1714d9cSKonrad Sztyber static inline int
256d1714d9cSKonrad Sztyber nvme_tcp_accel_append_crc32c(struct nvme_tcp_poll_group *tgroup, void **seq, uint32_t *dst,
257d1714d9cSKonrad Sztyber 			     struct iovec *iovs, uint32_t iovcnt, uint32_t seed,
258d1714d9cSKonrad Sztyber 			     spdk_nvme_accel_step_cb cb_fn, void *cb_arg)
259d1714d9cSKonrad Sztyber {
260d1714d9cSKonrad Sztyber 	struct spdk_nvme_poll_group *pg = tgroup->group.group;
261d1714d9cSKonrad Sztyber 
262d1714d9cSKonrad Sztyber 	return pg->accel_fn_table.append_crc32c(pg->ctx, seq, dst, iovs, iovcnt, NULL, NULL,
263d1714d9cSKonrad Sztyber 						seed, cb_fn, cb_arg);
264d1714d9cSKonrad Sztyber }
265d1714d9cSKonrad Sztyber 
266e956be96SZiye Yang static void
267e956be96SZiye Yang nvme_tcp_free_reqs(struct nvme_tcp_qpair *tqpair)
268e956be96SZiye Yang {
269e956be96SZiye Yang 	free(tqpair->tcp_reqs);
270e956be96SZiye Yang 	tqpair->tcp_reqs = NULL;
2712ac8d3ffSZiye Yang 
2722ac8d3ffSZiye Yang 	spdk_free(tqpair->send_pdus);
2732ac8d3ffSZiye Yang 	tqpair->send_pdus = NULL;
274e956be96SZiye Yang }
275e956be96SZiye Yang 
276e956be96SZiye Yang static int
277e956be96SZiye Yang nvme_tcp_alloc_reqs(struct nvme_tcp_qpair *tqpair)
278e956be96SZiye Yang {
27933204a43SAlexey Marchuk 	uint16_t i;
280e956be96SZiye Yang 	struct nvme_tcp_req *tcp_req;
281e956be96SZiye Yang 
2825b4cf6dbSKonrad Sztyber 	tqpair->tcp_reqs = aligned_alloc(SPDK_CACHE_LINE_SIZE,
2835b4cf6dbSKonrad Sztyber 					 tqpair->num_entries * sizeof(*tcp_req));
284e956be96SZiye Yang 	if (tqpair->tcp_reqs == NULL) {
2852ac8d3ffSZiye Yang 		SPDK_ERRLOG("Failed to allocate tcp_reqs on tqpair=%p\n", tqpair);
2862ac8d3ffSZiye Yang 		goto fail;
2872ac8d3ffSZiye Yang 	}
2882ac8d3ffSZiye Yang 
28982e4bfd3SZiye Yang 	/* Add additional 2 member for the send_pdu, recv_pdu owned by the tqpair */
29082e4bfd3SZiye Yang 	tqpair->send_pdus = spdk_zmalloc((tqpair->num_entries + 2) * sizeof(struct nvme_tcp_pdu),
2912ac8d3ffSZiye Yang 					 0x1000, NULL,
292186b109dSJim Harris 					 SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
2932ac8d3ffSZiye Yang 
2942ac8d3ffSZiye Yang 	if (tqpair->send_pdus == NULL) {
2952ac8d3ffSZiye Yang 		SPDK_ERRLOG("Failed to allocate send_pdus on tqpair=%p\n", tqpair);
296e956be96SZiye Yang 		goto fail;
297e956be96SZiye Yang 	}
298e956be96SZiye Yang 
2995b4cf6dbSKonrad Sztyber 	memset(tqpair->tcp_reqs, 0, tqpair->num_entries * sizeof(*tcp_req));
300e956be96SZiye Yang 	TAILQ_INIT(&tqpair->send_queue);
301e956be96SZiye Yang 	TAILQ_INIT(&tqpair->free_reqs);
302e956be96SZiye Yang 	TAILQ_INIT(&tqpair->outstanding_reqs);
3030127345cSAtul Malakar 	tqpair->qpair.queue_depth = 0;
304e956be96SZiye Yang 	for (i = 0; i < tqpair->num_entries; i++) {
305e956be96SZiye Yang 		tcp_req = &tqpair->tcp_reqs[i];
306e956be96SZiye Yang 		tcp_req->cid = i;
3073a1f5364SZiye Yang 		tcp_req->tqpair = tqpair;
3082250abaeSZiye Yang 		tcp_req->pdu = &tqpair->send_pdus[i];
309e956be96SZiye Yang 		TAILQ_INSERT_TAIL(&tqpair->free_reqs, tcp_req, link);
310e956be96SZiye Yang 	}
311e956be96SZiye Yang 
3127bac9b06SZiye Yang 	tqpair->send_pdu = &tqpair->send_pdus[i];
31382e4bfd3SZiye Yang 	tqpair->recv_pdu = &tqpair->send_pdus[i + 1];
3147bac9b06SZiye Yang 
315e956be96SZiye Yang 	return 0;
316e956be96SZiye Yang fail:
317e956be96SZiye Yang 	nvme_tcp_free_reqs(tqpair);
318e956be96SZiye Yang 	return -ENOMEM;
319e956be96SZiye Yang }
320e956be96SZiye Yang 
321d4ee6f6cSKonrad Sztyber static inline void
322d4ee6f6cSKonrad Sztyber nvme_tcp_qpair_set_recv_state(struct nvme_tcp_qpair *tqpair,
323d4ee6f6cSKonrad Sztyber 			      enum nvme_tcp_pdu_recv_state state)
324d4ee6f6cSKonrad Sztyber {
325d4ee6f6cSKonrad Sztyber 	if (tqpair->recv_state == state) {
326d4ee6f6cSKonrad Sztyber 		SPDK_ERRLOG("The recv state of tqpair=%p is same with the state(%d) to be set\n",
327d4ee6f6cSKonrad Sztyber 			    tqpair, state);
328d4ee6f6cSKonrad Sztyber 		return;
329d4ee6f6cSKonrad Sztyber 	}
330d4ee6f6cSKonrad Sztyber 
331d4ee6f6cSKonrad Sztyber 	if (state == NVME_TCP_PDU_RECV_STATE_ERROR) {
332d4ee6f6cSKonrad Sztyber 		assert(TAILQ_EMPTY(&tqpair->outstanding_reqs));
333d4ee6f6cSKonrad Sztyber 	}
334d4ee6f6cSKonrad Sztyber 
335d4ee6f6cSKonrad Sztyber 	tqpair->recv_state = state;
336d4ee6f6cSKonrad Sztyber }
337d4ee6f6cSKonrad Sztyber 
338813756e7SBen Walker static void nvme_tcp_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr);
339813756e7SBen Walker 
340f5bc2cbeSBen Walker static void
341738b9569SSeth Howell nvme_tcp_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
3423895ca40SJim Harris {
3433895ca40SJim Harris 	struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
344fe2dddbbSZiye Yang 	struct nvme_tcp_pdu *pdu;
345579a678aSZiye Yang 	int rc;
3466b86039fSBen Walker 	struct nvme_tcp_poll_group *group;
3476b86039fSBen Walker 
3486b86039fSBen Walker 	if (tqpair->needs_poll) {
3496b86039fSBen Walker 		group = nvme_tcp_poll_group(qpair->poll_group);
3506b86039fSBen Walker 		TAILQ_REMOVE(&group->needs_poll, tqpair, link);
3516b86039fSBen Walker 		tqpair->needs_poll = false;
3526b86039fSBen Walker 	}
3533895ca40SJim Harris 
354579a678aSZiye Yang 	rc = spdk_sock_close(&tqpair->sock);
355579a678aSZiye Yang 
356579a678aSZiye Yang 	if (tqpair->sock != NULL) {
357579a678aSZiye Yang 		SPDK_ERRLOG("tqpair=%p, errno=%d, rc=%d\n", tqpair, errno, rc);
358579a678aSZiye Yang 		/* Set it to NULL manually */
359579a678aSZiye Yang 		tqpair->sock = NULL;
360579a678aSZiye Yang 	}
361fe2dddbbSZiye Yang 
362fe2dddbbSZiye Yang 	/* clear the send_queue */
363fe2dddbbSZiye Yang 	while (!TAILQ_EMPTY(&tqpair->send_queue)) {
364fe2dddbbSZiye Yang 		pdu = TAILQ_FIRST(&tqpair->send_queue);
365fe2dddbbSZiye Yang 		/* Remove the pdu from the send_queue to prevent the wrong sending out
366fe2dddbbSZiye Yang 		 * in the next round connection
367fe2dddbbSZiye Yang 		 */
368fe2dddbbSZiye Yang 		TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq);
369fe2dddbbSZiye Yang 	}
370cfe11bd1SShuhei Matsumoto 
3710dc8ed5fSShuhei Matsumoto 	nvme_tcp_qpair_abort_reqs(qpair, qpair->abort_dnr);
3728564f005SKonrad Sztyber 
3738564f005SKonrad Sztyber 	/* If the qpair is marked as asynchronous, let it go through the process_completions() to
3748564f005SKonrad Sztyber 	 * let any outstanding requests (e.g. those with outstanding accel operations) complete.
3758564f005SKonrad Sztyber 	 * Otherwise, there's no way of waiting for them, so tqpair->outstanding_reqs has to be
3768564f005SKonrad Sztyber 	 * empty.
3778564f005SKonrad Sztyber 	 */
3788564f005SKonrad Sztyber 	if (qpair->async) {
3798564f005SKonrad Sztyber 		nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING);
3808564f005SKonrad Sztyber 	} else {
3818564f005SKonrad Sztyber 		assert(TAILQ_EMPTY(&tqpair->outstanding_reqs));
382cfe11bd1SShuhei Matsumoto 		nvme_transport_ctrlr_disconnect_qpair_done(qpair);
3833895ca40SJim Harris 	}
3848564f005SKonrad Sztyber }
3853895ca40SJim Harris 
386f5bc2cbeSBen Walker static int
387738b9569SSeth Howell nvme_tcp_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
388e956be96SZiye Yang {
3898564f005SKonrad Sztyber 	struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
390e956be96SZiye Yang 
3916156777bSJim Harris 	assert(qpair != NULL);
3920dc8ed5fSShuhei Matsumoto 	nvme_tcp_qpair_abort_reqs(qpair, qpair->abort_dnr);
3938564f005SKonrad Sztyber 	assert(TAILQ_EMPTY(&tqpair->outstanding_reqs));
3948564f005SKonrad Sztyber 
395e956be96SZiye Yang 	nvme_qpair_deinit(qpair);
396e956be96SZiye Yang 	nvme_tcp_free_reqs(tqpair);
397f1941efeSShuhei Matsumoto 	if (!tqpair->shared_stats) {
398ea86c035SAlexey Marchuk 		free(tqpair->stats);
399f1941efeSShuhei Matsumoto 	}
400e956be96SZiye Yang 	free(tqpair);
401e956be96SZiye Yang 
402e956be96SZiye Yang 	return 0;
403e956be96SZiye Yang }
404e956be96SZiye Yang 
405f5bc2cbeSBen Walker static int
406e956be96SZiye Yang nvme_tcp_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr)
407e956be96SZiye Yang {
408e956be96SZiye Yang 	return 0;
409e956be96SZiye Yang }
410e956be96SZiye Yang 
411f5bc2cbeSBen Walker static int
412e956be96SZiye Yang nvme_tcp_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr)
413e956be96SZiye Yang {
414e956be96SZiye Yang 	struct nvme_tcp_ctrlr *tctrlr = nvme_tcp_ctrlr(ctrlr);
415e956be96SZiye Yang 
416e956be96SZiye Yang 	if (ctrlr->adminq) {
417738b9569SSeth Howell 		nvme_tcp_ctrlr_delete_io_qpair(ctrlr, ctrlr->adminq);
418e956be96SZiye Yang 	}
419e956be96SZiye Yang 
420e956be96SZiye Yang 	nvme_ctrlr_destruct_finish(ctrlr);
421e956be96SZiye Yang 
422e956be96SZiye Yang 	free(tctrlr);
423e956be96SZiye Yang 
424e956be96SZiye Yang 	return 0;
425e956be96SZiye Yang }
426e956be96SZiye Yang 
4278531a41fSJacek Kalwas /* If there are queued requests, we assume they are queued because they are waiting
4288531a41fSJacek Kalwas  * for resources to be released. Those resources are almost certainly released in
4298531a41fSJacek Kalwas  * response to a PDU completing. However, to attempt to make forward progress
4308531a41fSJacek Kalwas  * the qpair needs to be polled and we can't rely on another network event to make
4318531a41fSJacek Kalwas  * that happen. Add it to a list of qpairs to poll regardless of network activity.
4328531a41fSJacek Kalwas  *
4338531a41fSJacek Kalwas  * Besides, when tqpair state is NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL or
4348531a41fSJacek Kalwas  * NVME_TCP_QPAIR_STATE_INITIALIZING, need to add it to needs_poll list too to make
4358531a41fSJacek Kalwas  * forward progress in case that the resources are released after icreq's or CONNECT's
4368531a41fSJacek Kalwas  * resp is processed. */
4378531a41fSJacek Kalwas static void
4388531a41fSJacek Kalwas nvme_tcp_cond_schedule_qpair_polling(struct nvme_tcp_qpair *tqpair)
4398531a41fSJacek Kalwas {
4408531a41fSJacek Kalwas 	struct nvme_tcp_poll_group *pgroup;
4418531a41fSJacek Kalwas 
4428531a41fSJacek Kalwas 	if (tqpair->needs_poll || !tqpair->qpair.poll_group) {
4438531a41fSJacek Kalwas 		return;
4448531a41fSJacek Kalwas 	}
4458531a41fSJacek Kalwas 
4468531a41fSJacek Kalwas 	if (STAILQ_EMPTY(&tqpair->qpair.queued_req) &&
4478531a41fSJacek Kalwas 	    spdk_likely(tqpair->state != NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL &&
4488531a41fSJacek Kalwas 			tqpair->state != NVME_TCP_QPAIR_STATE_INITIALIZING)) {
4498531a41fSJacek Kalwas 		return;
4508531a41fSJacek Kalwas 	}
4518531a41fSJacek Kalwas 
4528531a41fSJacek Kalwas 	pgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group);
4538531a41fSJacek Kalwas 	TAILQ_INSERT_TAIL(&pgroup->needs_poll, tqpair, link);
4548531a41fSJacek Kalwas 	tqpair->needs_poll = true;
4558531a41fSJacek Kalwas }
4568531a41fSJacek Kalwas 
4578e8a5f7cSOr Gerlitz static void
458485cb2a6SKonrad Sztyber pdu_write_done(void *cb_arg, int err)
459e956be96SZiye Yang {
4608e8a5f7cSOr Gerlitz 	struct nvme_tcp_pdu *pdu = cb_arg;
4618e8a5f7cSOr Gerlitz 	struct nvme_tcp_qpair *tqpair = pdu->qpair;
4626b86039fSBen Walker 
4638531a41fSJacek Kalwas 	nvme_tcp_cond_schedule_qpair_polling(tqpair);
464e956be96SZiye Yang 	TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq);
465e956be96SZiye Yang 
4668e8a5f7cSOr Gerlitz 	if (err != 0) {
467e1c91850SSeth Howell 		nvme_transport_ctrlr_disconnect_qpair(tqpair->qpair.ctrlr, &tqpair->qpair);
4688e8a5f7cSOr Gerlitz 		return;
469e956be96SZiye Yang 	}
470e956be96SZiye Yang 
471e956be96SZiye Yang 	assert(pdu->cb_fn != NULL);
472e956be96SZiye Yang 	pdu->cb_fn(pdu->cb_arg);
473e956be96SZiye Yang }
474e956be96SZiye Yang 
475f1f4f7d3SZiye Yang static void
476485cb2a6SKonrad Sztyber pdu_write_fail(struct nvme_tcp_pdu *pdu, int status)
477485cb2a6SKonrad Sztyber {
478485cb2a6SKonrad Sztyber 	struct nvme_tcp_qpair *tqpair = pdu->qpair;
479485cb2a6SKonrad Sztyber 
480485cb2a6SKonrad Sztyber 	/* This function is similar to pdu_write_done(), but it should be called before a PDU is
481485cb2a6SKonrad Sztyber 	 * sent over the socket */
482485cb2a6SKonrad Sztyber 	TAILQ_INSERT_TAIL(&tqpair->send_queue, pdu, tailq);
483485cb2a6SKonrad Sztyber 	pdu_write_done(pdu, status);
484485cb2a6SKonrad Sztyber }
485485cb2a6SKonrad Sztyber 
486485cb2a6SKonrad Sztyber static void
4879ccef490SJacek Kalwas pdu_seq_fail(struct nvme_tcp_pdu *pdu, int status)
4889ccef490SJacek Kalwas {
4899ccef490SJacek Kalwas 	struct nvme_tcp_req *treq = pdu->req;
4909ccef490SJacek Kalwas 
4919ccef490SJacek Kalwas 	SPDK_ERRLOG("Failed to execute accel sequence: %d\n", status);
4929ccef490SJacek Kalwas 	nvme_tcp_cond_schedule_qpair_polling(pdu->qpair);
4939ccef490SJacek Kalwas 	treq->rsp.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
4949ccef490SJacek Kalwas 	nvme_tcp_req_complete(treq, treq->tqpair, &treq->rsp, true);
4959ccef490SJacek Kalwas }
4969ccef490SJacek Kalwas 
4979ccef490SJacek Kalwas static void
498f1f4f7d3SZiye Yang _tcp_write_pdu(struct nvme_tcp_pdu *pdu)
499f1f4f7d3SZiye Yang {
500f1f4f7d3SZiye Yang 	uint32_t mapped_length = 0;
501f1f4f7d3SZiye Yang 	struct nvme_tcp_qpair *tqpair = pdu->qpair;
502f1f4f7d3SZiye Yang 
503ada93334SAlexey Marchuk 	pdu->sock_req.iovcnt = nvme_tcp_build_iovs(pdu->iov, SPDK_COUNTOF(pdu->iov), pdu,
504f1f4f7d3SZiye Yang 			       (bool)tqpair->flags.host_hdgst_enable, (bool)tqpair->flags.host_ddgst_enable,
505f1f4f7d3SZiye Yang 			       &mapped_length);
506ada93334SAlexey Marchuk 	TAILQ_INSERT_TAIL(&tqpair->send_queue, pdu, tailq);
507ada93334SAlexey Marchuk 	if (spdk_unlikely(mapped_length < pdu->data_len)) {
508ada93334SAlexey Marchuk 		SPDK_ERRLOG("could not map the whole %u bytes (mapped only %u bytes)\n", pdu->data_len,
509ada93334SAlexey Marchuk 			    mapped_length);
510485cb2a6SKonrad Sztyber 		pdu_write_done(pdu, -EINVAL);
511ada93334SAlexey Marchuk 		return;
512ada93334SAlexey Marchuk 	}
513485cb2a6SKonrad Sztyber 	pdu->sock_req.cb_fn = pdu_write_done;
514f1f4f7d3SZiye Yang 	pdu->sock_req.cb_arg = pdu;
515ea86c035SAlexey Marchuk 	tqpair->stats->submitted_requests++;
516f1f4f7d3SZiye Yang 	spdk_sock_writev_async(tqpair->sock, &pdu->sock_req);
517f1f4f7d3SZiye Yang }
518f1f4f7d3SZiye Yang 
519f1f4f7d3SZiye Yang static void
520e16aabddSKonrad Sztyber tcp_write_pdu_seq_cb(void *ctx, int status)
521e16aabddSKonrad Sztyber {
522e16aabddSKonrad Sztyber 	struct nvme_tcp_pdu *pdu = ctx;
523e16aabddSKonrad Sztyber 	struct nvme_tcp_req *treq = pdu->req;
524e16aabddSKonrad Sztyber 	struct nvme_request *req = treq->req;
525e16aabddSKonrad Sztyber 
526b5c6199eSKonrad Sztyber 	assert(treq->ordering.bits.in_progress_accel);
527b5c6199eSKonrad Sztyber 	treq->ordering.bits.in_progress_accel = 0;
528b5c6199eSKonrad Sztyber 
529e16aabddSKonrad Sztyber 	req->accel_sequence = NULL;
530e16aabddSKonrad Sztyber 	if (spdk_unlikely(status != 0)) {
5319ccef490SJacek Kalwas 		pdu_seq_fail(pdu, status);
532e16aabddSKonrad Sztyber 		return;
533e16aabddSKonrad Sztyber 	}
534e16aabddSKonrad Sztyber 
535e16aabddSKonrad Sztyber 	_tcp_write_pdu(pdu);
536e16aabddSKonrad Sztyber }
537e16aabddSKonrad Sztyber 
538e16aabddSKonrad Sztyber static void
539e16aabddSKonrad Sztyber tcp_write_pdu(struct nvme_tcp_pdu *pdu)
540e16aabddSKonrad Sztyber {
541e16aabddSKonrad Sztyber 	struct nvme_tcp_req *treq = pdu->req;
542e16aabddSKonrad Sztyber 	struct nvme_tcp_qpair *tqpair = pdu->qpair;
543e16aabddSKonrad Sztyber 	struct nvme_tcp_poll_group *tgroup;
544e16aabddSKonrad Sztyber 	struct nvme_request *req;
545e16aabddSKonrad Sztyber 
546e16aabddSKonrad Sztyber 	if (spdk_likely(treq != NULL)) {
547e16aabddSKonrad Sztyber 		req = treq->req;
548e16aabddSKonrad Sztyber 		if (req->accel_sequence != NULL &&
5499be487cfSKonrad Sztyber 		    spdk_nvme_opc_get_data_transfer(req->cmd.opc) == SPDK_NVME_DATA_HOST_TO_CONTROLLER &&
5509be487cfSKonrad Sztyber 		    pdu->data_len > 0) {
551e16aabddSKonrad Sztyber 			assert(tqpair->qpair.poll_group != NULL);
552e16aabddSKonrad Sztyber 			tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group);
553b5c6199eSKonrad Sztyber 			nvme_tcp_accel_finish_sequence(tgroup, treq, req->accel_sequence,
554e16aabddSKonrad Sztyber 						       tcp_write_pdu_seq_cb, pdu);
555e16aabddSKonrad Sztyber 			return;
556e16aabddSKonrad Sztyber 		}
557e16aabddSKonrad Sztyber 	}
558e16aabddSKonrad Sztyber 
559e16aabddSKonrad Sztyber 	_tcp_write_pdu(pdu);
560e16aabddSKonrad Sztyber }
561e16aabddSKonrad Sztyber 
562e16aabddSKonrad Sztyber static void
563d1714d9cSKonrad Sztyber pdu_accel_seq_compute_crc32_done(void *cb_arg)
564d1714d9cSKonrad Sztyber {
565d1714d9cSKonrad Sztyber 	struct nvme_tcp_pdu *pdu = cb_arg;
566d1714d9cSKonrad Sztyber 
567d1714d9cSKonrad Sztyber 	pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR;
568d1714d9cSKonrad Sztyber 	MAKE_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32);
569d1714d9cSKonrad Sztyber }
570d1714d9cSKonrad Sztyber 
571f2d23928SKonrad Sztyber static bool
572f2d23928SKonrad Sztyber pdu_accel_compute_crc32(struct nvme_tcp_pdu *pdu)
573f2d23928SKonrad Sztyber {
574f2d23928SKonrad Sztyber 	struct nvme_tcp_qpair *tqpair = pdu->qpair;
575f2d23928SKonrad Sztyber 	struct nvme_tcp_poll_group *tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group);
576e16aabddSKonrad Sztyber 	struct nvme_request *req = ((struct nvme_tcp_req *)pdu->req)->req;
577d1714d9cSKonrad Sztyber 	int rc;
578f2d23928SKonrad Sztyber 
579f2d23928SKonrad Sztyber 	/* Only support this limited case for the first step */
580f2d23928SKonrad Sztyber 	if (spdk_unlikely(nvme_qpair_get_state(&tqpair->qpair) < NVME_QPAIR_CONNECTED ||
581f2d23928SKonrad Sztyber 			  pdu->dif_ctx != NULL ||
582f2d23928SKonrad Sztyber 			  pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT != 0)) {
583f2d23928SKonrad Sztyber 		return false;
584f2d23928SKonrad Sztyber 	}
585f2d23928SKonrad Sztyber 
5860439dfafSKonrad Sztyber 	if (tqpair->qpair.poll_group == NULL ||
5870439dfafSKonrad Sztyber 	    tgroup->group.group->accel_fn_table.append_crc32c == NULL) {
588f2d23928SKonrad Sztyber 		return false;
589f2d23928SKonrad Sztyber 	}
590f2d23928SKonrad Sztyber 
591d1714d9cSKonrad Sztyber 	rc = nvme_tcp_accel_append_crc32c(tgroup, &req->accel_sequence,
592d1714d9cSKonrad Sztyber 					  &pdu->data_digest_crc32,
593d1714d9cSKonrad Sztyber 					  pdu->data_iov, pdu->data_iovcnt, 0,
594d1714d9cSKonrad Sztyber 					  pdu_accel_seq_compute_crc32_done, pdu);
595d1714d9cSKonrad Sztyber 	if (spdk_unlikely(rc != 0)) {
596d1714d9cSKonrad Sztyber 		/* If accel is out of resources, fall back to non-accelerated crc32 */
597d1714d9cSKonrad Sztyber 		if (rc == -ENOMEM) {
598d1714d9cSKonrad Sztyber 			return false;
599d1714d9cSKonrad Sztyber 		}
600d1714d9cSKonrad Sztyber 
601d1714d9cSKonrad Sztyber 		SPDK_ERRLOG("Failed to append crc32c operation: %d\n", rc);
602d1714d9cSKonrad Sztyber 		pdu_write_fail(pdu, rc);
603d1714d9cSKonrad Sztyber 		return true;
604d1714d9cSKonrad Sztyber 	}
605d1714d9cSKonrad Sztyber 
606d1714d9cSKonrad Sztyber 	tcp_write_pdu(pdu);
607f2d23928SKonrad Sztyber 
608f2d23928SKonrad Sztyber 	return true;
609f2d23928SKonrad Sztyber }
610f2d23928SKonrad Sztyber 
611f1f4f7d3SZiye Yang static void
612e16aabddSKonrad Sztyber pdu_compute_crc32_seq_cb(void *cb_arg, int status)
613e16aabddSKonrad Sztyber {
614e16aabddSKonrad Sztyber 	struct nvme_tcp_pdu *pdu = cb_arg;
615b5c6199eSKonrad Sztyber 	struct nvme_tcp_req *treq = pdu->req;
616b5c6199eSKonrad Sztyber 	struct nvme_request *req = treq->req;
617e16aabddSKonrad Sztyber 	uint32_t crc32c;
618e16aabddSKonrad Sztyber 
619b5c6199eSKonrad Sztyber 	assert(treq->ordering.bits.in_progress_accel);
620b5c6199eSKonrad Sztyber 	treq->ordering.bits.in_progress_accel = 0;
621b5c6199eSKonrad Sztyber 
622e16aabddSKonrad Sztyber 	req->accel_sequence = NULL;
623e16aabddSKonrad Sztyber 	if (spdk_unlikely(status != 0)) {
6249ccef490SJacek Kalwas 		pdu_seq_fail(pdu, status);
625e16aabddSKonrad Sztyber 		return;
626e16aabddSKonrad Sztyber 	}
627e16aabddSKonrad Sztyber 
628e16aabddSKonrad Sztyber 	crc32c = nvme_tcp_pdu_calc_data_digest(pdu);
629e16aabddSKonrad Sztyber 	crc32c = crc32c ^ SPDK_CRC32C_XOR;
630e16aabddSKonrad Sztyber 	MAKE_DIGEST_WORD(pdu->data_digest, crc32c);
631e16aabddSKonrad Sztyber 
632e16aabddSKonrad Sztyber 	_tcp_write_pdu(pdu);
633e16aabddSKonrad Sztyber }
634e16aabddSKonrad Sztyber 
635e16aabddSKonrad Sztyber static void
6362fca6fefSKonrad Sztyber pdu_compute_crc32(struct nvme_tcp_pdu *pdu)
637f1f4f7d3SZiye Yang {
638f1f4f7d3SZiye Yang 	struct nvme_tcp_qpair *tqpair = pdu->qpair;
639e16aabddSKonrad Sztyber 	struct nvme_tcp_poll_group *tgroup;
640e16aabddSKonrad Sztyber 	struct nvme_request *req;
641f1f4f7d3SZiye Yang 	uint32_t crc32c;
642f1f4f7d3SZiye Yang 
643f1f4f7d3SZiye Yang 	/* Data Digest */
644f1f4f7d3SZiye Yang 	if (pdu->data_len > 0 && g_nvme_tcp_ddgst[pdu->hdr.common.pdu_type] &&
645f1f4f7d3SZiye Yang 	    tqpair->flags.host_ddgst_enable) {
646f2d23928SKonrad Sztyber 		if (pdu_accel_compute_crc32(pdu)) {
647f1f4f7d3SZiye Yang 			return;
648f1f4f7d3SZiye Yang 		}
649f1f4f7d3SZiye Yang 
650e16aabddSKonrad Sztyber 		req = ((struct nvme_tcp_req *)pdu->req)->req;
651e16aabddSKonrad Sztyber 		if (req->accel_sequence != NULL) {
652e16aabddSKonrad Sztyber 			tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group);
653b5c6199eSKonrad Sztyber 			nvme_tcp_accel_finish_sequence(tgroup, pdu->req, req->accel_sequence,
654e16aabddSKonrad Sztyber 						       pdu_compute_crc32_seq_cb, pdu);
655e16aabddSKonrad Sztyber 			return;
656e16aabddSKonrad Sztyber 		}
657e16aabddSKonrad Sztyber 
658f1f4f7d3SZiye Yang 		crc32c = nvme_tcp_pdu_calc_data_digest(pdu);
659bb33310aSMengjinWu 		crc32c = crc32c ^ SPDK_CRC32C_XOR;
660f1f4f7d3SZiye Yang 		MAKE_DIGEST_WORD(pdu->data_digest, crc32c);
661f1f4f7d3SZiye Yang 	}
662f1f4f7d3SZiye Yang 
663e16aabddSKonrad Sztyber 	tcp_write_pdu(pdu);
664f1f4f7d3SZiye Yang }
665f1f4f7d3SZiye Yang 
666e956be96SZiye Yang static int
667e956be96SZiye Yang nvme_tcp_qpair_write_pdu(struct nvme_tcp_qpair *tqpair,
668e956be96SZiye Yang 			 struct nvme_tcp_pdu *pdu,
669e956be96SZiye Yang 			 nvme_tcp_qpair_xfer_complete_cb cb_fn,
670e956be96SZiye Yang 			 void *cb_arg)
671e956be96SZiye Yang {
672e956be96SZiye Yang 	int hlen;
673e956be96SZiye Yang 	uint32_t crc32c;
674e956be96SZiye Yang 
675ea65bf61SBen Walker 	hlen = pdu->hdr.common.hlen;
676f1f4f7d3SZiye Yang 	pdu->cb_fn = cb_fn;
677f1f4f7d3SZiye Yang 	pdu->cb_arg = cb_arg;
678f1f4f7d3SZiye Yang 	pdu->qpair = tqpair;
679f1f4f7d3SZiye Yang 
680e956be96SZiye Yang 	/* Header Digest */
681a85579d8SAlexey Marchuk 	if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && tqpair->flags.host_hdgst_enable) {
682e956be96SZiye Yang 		crc32c = nvme_tcp_pdu_calc_header_digest(pdu);
683cbeecee6SJim Harris 		MAKE_DIGEST_WORD((uint8_t *)&pdu->hdr.raw[hlen], crc32c);
684e956be96SZiye Yang 	}
685e956be96SZiye Yang 
6862fca6fefSKonrad Sztyber 	pdu_compute_crc32(pdu);
6878e8a5f7cSOr Gerlitz 
688e956be96SZiye Yang 	return 0;
689e956be96SZiye Yang }
690e956be96SZiye Yang 
69108c8ab2bSJacek Kalwas static int
69208c8ab2bSJacek Kalwas nvme_tcp_try_memory_translation(struct nvme_tcp_req *tcp_req, void **addr, uint32_t length)
69308c8ab2bSJacek Kalwas {
69408c8ab2bSJacek Kalwas 	struct nvme_request *req = tcp_req->req;
69508c8ab2bSJacek Kalwas 	struct spdk_memory_domain_translation_result translation = {
69608c8ab2bSJacek Kalwas 		.iov_count = 0,
69708c8ab2bSJacek Kalwas 		.size = sizeof(translation)
69808c8ab2bSJacek Kalwas 	};
69908c8ab2bSJacek Kalwas 	int rc;
70008c8ab2bSJacek Kalwas 
70199265411SJacek Kalwas 	if (!tcp_req->ordering.bits.domain_in_use) {
70208c8ab2bSJacek Kalwas 		return 0;
70308c8ab2bSJacek Kalwas 	}
70408c8ab2bSJacek Kalwas 
70508c8ab2bSJacek Kalwas 	rc = spdk_memory_domain_translate_data(req->payload.opts->memory_domain,
70608c8ab2bSJacek Kalwas 					       req->payload.opts->memory_domain_ctx, spdk_memory_domain_get_system_domain(), NULL, *addr, length,
70708c8ab2bSJacek Kalwas 					       &translation);
70808c8ab2bSJacek Kalwas 	if (spdk_unlikely(rc || translation.iov_count != 1)) {
70908c8ab2bSJacek Kalwas 		SPDK_ERRLOG("DMA memory translation failed, rc %d, iov_count %u\n", rc, translation.iov_count);
71008c8ab2bSJacek Kalwas 		return -EFAULT;
71108c8ab2bSJacek Kalwas 	}
71208c8ab2bSJacek Kalwas 
71308c8ab2bSJacek Kalwas 	assert(length == translation.iov.iov_len);
71408c8ab2bSJacek Kalwas 	*addr = translation.iov.iov_base;
71508c8ab2bSJacek Kalwas 	return 0;
71608c8ab2bSJacek Kalwas }
71708c8ab2bSJacek Kalwas 
718e956be96SZiye Yang /*
719e956be96SZiye Yang  * Build SGL describing contiguous payload buffer.
720e956be96SZiye Yang  */
721e956be96SZiye Yang static int
72255be9a57SZiye Yang nvme_tcp_build_contig_request(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req)
723e956be96SZiye Yang {
72455be9a57SZiye Yang 	struct nvme_request *req = tcp_req->req;
72518b3e741SZiye Yang 
726075d422fSKonrad Sztyber 	/* ubsan complains about applying zero offset to null pointer if contig_or_cb_arg is NULL,
727075d422fSKonrad Sztyber 	 * so just double cast it to make it go away */
72808c8ab2bSJacek Kalwas 	void *addr = (void *)((uintptr_t)req->payload.contig_or_cb_arg + req->payload_offset);
72908c8ab2bSJacek Kalwas 	size_t length = req->payload_size;
73008c8ab2bSJacek Kalwas 	int rc;
731e956be96SZiye Yang 
7322172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvme, "enter\n");
733e956be96SZiye Yang 
734e956be96SZiye Yang 	assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG);
73508c8ab2bSJacek Kalwas 	rc = nvme_tcp_try_memory_translation(tcp_req, &addr, length);
73608c8ab2bSJacek Kalwas 	if (spdk_unlikely(rc)) {
73708c8ab2bSJacek Kalwas 		return rc;
73808c8ab2bSJacek Kalwas 	}
739e956be96SZiye Yang 
74008c8ab2bSJacek Kalwas 	tcp_req->iov[0].iov_base = addr;
74108c8ab2bSJacek Kalwas 	tcp_req->iov[0].iov_len = length;
74208c8ab2bSJacek Kalwas 	tcp_req->iovcnt = 1;
743e956be96SZiye Yang 	return 0;
744e956be96SZiye Yang }
745e956be96SZiye Yang 
746e956be96SZiye Yang /*
747e956be96SZiye Yang  * Build SGL describing scattered payload buffer.
748e956be96SZiye Yang  */
749e956be96SZiye Yang static int
75055be9a57SZiye Yang nvme_tcp_build_sgl_request(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req)
751e956be96SZiye Yang {
75233204a43SAlexey Marchuk 	int rc;
7539ba4bb22SZiye Yang 	uint32_t length, remaining_size, iovcnt = 0, max_num_sgl;
75455be9a57SZiye Yang 	struct nvme_request *req = tcp_req->req;
755e956be96SZiye Yang 
7562172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvme, "enter\n");
757e956be96SZiye Yang 
758e956be96SZiye Yang 	assert(req->payload_size != 0);
759e956be96SZiye Yang 	assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL);
760e956be96SZiye Yang 	assert(req->payload.reset_sgl_fn != NULL);
761e956be96SZiye Yang 	assert(req->payload.next_sge_fn != NULL);
762e956be96SZiye Yang 	req->payload.reset_sgl_fn(req->payload.contig_or_cb_arg, req->payload_offset);
763e956be96SZiye Yang 
7649ba4bb22SZiye Yang 	max_num_sgl = spdk_min(req->qpair->ctrlr->max_sges, NVME_TCP_MAX_SGL_DESCRIPTORS);
76518b3e741SZiye Yang 	remaining_size = req->payload_size;
76655be9a57SZiye Yang 
76718b3e741SZiye Yang 	do {
76808c8ab2bSJacek Kalwas 		void *addr;
76908c8ab2bSJacek Kalwas 
77008c8ab2bSJacek Kalwas 		rc = req->payload.next_sge_fn(req->payload.contig_or_cb_arg, &addr, &length);
771e956be96SZiye Yang 		if (rc) {
772e956be96SZiye Yang 			return -1;
773e956be96SZiye Yang 		}
774e956be96SZiye Yang 
77508c8ab2bSJacek Kalwas 		rc = nvme_tcp_try_memory_translation(tcp_req, &addr, length);
77608c8ab2bSJacek Kalwas 		if (spdk_unlikely(rc)) {
77708c8ab2bSJacek Kalwas 			return rc;
77808c8ab2bSJacek Kalwas 		}
77908c8ab2bSJacek Kalwas 
78031607f3fSZiye Yang 		length = spdk_min(length, remaining_size);
78108c8ab2bSJacek Kalwas 		tcp_req->iov[iovcnt].iov_base = addr;
78218b3e741SZiye Yang 		tcp_req->iov[iovcnt].iov_len = length;
78318b3e741SZiye Yang 		remaining_size -= length;
78418b3e741SZiye Yang 		iovcnt++;
7859ba4bb22SZiye Yang 	} while (remaining_size > 0 && iovcnt < max_num_sgl);
78618b3e741SZiye Yang 
78718b3e741SZiye Yang 
78818b3e741SZiye Yang 	/* Should be impossible if we did our sgl checks properly up the stack, but do a sanity check here. */
78918b3e741SZiye Yang 	if (remaining_size > 0) {
7909ba4bb22SZiye Yang 		SPDK_ERRLOG("Failed to construct tcp_req=%p, and the iovcnt=%u, remaining_size=%u\n",
7919ba4bb22SZiye Yang 			    tcp_req, iovcnt, remaining_size);
792e956be96SZiye Yang 		return -1;
793e956be96SZiye Yang 	}
794e956be96SZiye Yang 
79518b3e741SZiye Yang 	tcp_req->iovcnt = iovcnt;
79618b3e741SZiye Yang 
797e956be96SZiye Yang 	return 0;
798e956be96SZiye Yang }
799e956be96SZiye Yang 
800e956be96SZiye Yang static int
801e956be96SZiye Yang nvme_tcp_req_init(struct nvme_tcp_qpair *tqpair, struct nvme_request *req,
802e956be96SZiye Yang 		  struct nvme_tcp_req *tcp_req)
803e956be96SZiye Yang {
80420ccf47aSZiye Yang 	struct spdk_nvme_ctrlr *ctrlr = tqpair->qpair.ctrlr;
805e956be96SZiye Yang 	int rc = 0;
806e956be96SZiye Yang 	enum spdk_nvme_data_transfer xfer;
807cc6920a4SJosh Soref 	uint32_t max_in_capsule_data_size;
808e956be96SZiye Yang 
809e956be96SZiye Yang 	tcp_req->req = req;
81099265411SJacek Kalwas 	tcp_req->ordering.bits.domain_in_use = (req->payload.opts && req->payload.opts->memory_domain);
81199265411SJacek Kalwas 
812e956be96SZiye Yang 	req->cmd.cid = tcp_req->cid;
813e956be96SZiye Yang 	req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG;
814e956be96SZiye Yang 	req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK;
815e956be96SZiye Yang 	req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_TRANSPORT;
816e956be96SZiye Yang 	req->cmd.dptr.sgl1.unkeyed.length = req->payload_size;
817e956be96SZiye Yang 
818d5ef62ebSBen Walker 	if (spdk_unlikely(req->cmd.opc == SPDK_NVME_OPC_FABRIC)) {
819d5ef62ebSBen Walker 		struct spdk_nvmf_capsule_cmd *nvmf_cmd = (struct spdk_nvmf_capsule_cmd *)&req->cmd;
820d5ef62ebSBen Walker 
821d5ef62ebSBen Walker 		xfer = spdk_nvme_opc_get_data_transfer(nvmf_cmd->fctype);
822d5ef62ebSBen Walker 	} else {
823d5ef62ebSBen Walker 		xfer = spdk_nvme_opc_get_data_transfer(req->cmd.opc);
824d5ef62ebSBen Walker 	}
825d5ef62ebSBen Walker 
826d5ef62ebSBen Walker 	/* For c2h delay filling in the iov until the data arrives.
827d5ef62ebSBen Walker 	 * For h2c some delay is also possible if data doesn't fit into cmd capsule (not implemented). */
828769fc6deSJacek Kalwas 	if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG) {
829d5ef62ebSBen Walker 		if (xfer != SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
83055be9a57SZiye Yang 			rc = nvme_tcp_build_contig_request(tqpair, tcp_req);
831d5ef62ebSBen Walker 		}
832e956be96SZiye Yang 	} else if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL) {
833769fc6deSJacek Kalwas 		if (xfer != SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
83455be9a57SZiye Yang 			rc = nvme_tcp_build_sgl_request(tqpair, tcp_req);
835769fc6deSJacek Kalwas 		}
836e956be96SZiye Yang 	} else {
837e956be96SZiye Yang 		rc = -1;
838e956be96SZiye Yang 	}
839e956be96SZiye Yang 
840e956be96SZiye Yang 	if (rc) {
841e956be96SZiye Yang 		return rc;
842e956be96SZiye Yang 	}
843e956be96SZiye Yang 
84420ccf47aSZiye Yang 	if (xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
845cc6920a4SJosh Soref 		max_in_capsule_data_size = ctrlr->ioccsz_bytes;
84647154c69SJacek Kalwas 		if (spdk_unlikely((req->cmd.opc == SPDK_NVME_OPC_FABRIC) ||
84747154c69SJacek Kalwas 				  nvme_qpair_is_admin_queue(&tqpair->qpair))) {
848cc6920a4SJosh Soref 			max_in_capsule_data_size = SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE;
84920ccf47aSZiye Yang 		}
850e956be96SZiye Yang 
851cc6920a4SJosh Soref 		if (req->payload_size <= max_in_capsule_data_size) {
852e956be96SZiye Yang 			req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK;
853e956be96SZiye Yang 			req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_OFFSET;
854e956be96SZiye Yang 			req->cmd.dptr.sgl1.address = 0;
855e956be96SZiye Yang 			tcp_req->in_capsule_data = true;
856e956be96SZiye Yang 		}
85720ccf47aSZiye Yang 	}
858e956be96SZiye Yang 
859e956be96SZiye Yang 	return 0;
860e956be96SZiye Yang }
861e956be96SZiye Yang 
8627388e54dSAlexey Marchuk static inline bool
8637388e54dSAlexey Marchuk nvme_tcp_req_complete_safe(struct nvme_tcp_req *tcp_req)
8643a1f5364SZiye Yang {
865ca317162SJacek Kalwas 	if (!(tcp_req->ordering.bits.send_ack && tcp_req->ordering.bits.data_recv &&
866ca317162SJacek Kalwas 	      !tcp_req->ordering.bits.in_progress_accel)) {
8677388e54dSAlexey Marchuk 		return false;
8687388e54dSAlexey Marchuk 	}
8697388e54dSAlexey Marchuk 
870ceb07eb8SZiye Yang 	assert(tcp_req->state == NVME_TCP_REQ_ACTIVE);
8713a1f5364SZiye Yang 	assert(tcp_req->tqpair != NULL);
8727388e54dSAlexey Marchuk 	assert(tcp_req->req != NULL);
8737388e54dSAlexey Marchuk 
8744a24f581SJim Harris 	nvme_tcp_req_complete(tcp_req, tcp_req->tqpair, &tcp_req->rsp, true);
8757388e54dSAlexey Marchuk 	return true;
8763a1f5364SZiye Yang }
8773a1f5364SZiye Yang 
878e956be96SZiye Yang static void
879e956be96SZiye Yang nvme_tcp_qpair_cmd_send_complete(void *cb_arg)
880e956be96SZiye Yang {
8813a1f5364SZiye Yang 	struct nvme_tcp_req *tcp_req = cb_arg;
8823a1f5364SZiye Yang 
8832172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvme, "tcp req %p, cid %u, qid %u\n", tcp_req, tcp_req->cid,
8847388e54dSAlexey Marchuk 		      tcp_req->tqpair->qpair.id);
885dc88d131SAlexey Marchuk 	tcp_req->ordering.bits.send_ack = 1;
886449dee35SZiye Yang 	/* Handle the r2t case */
887e7c92b24SAlexey Marchuk 	if (spdk_unlikely(tcp_req->ordering.bits.h2c_send_waiting_ack)) {
8882172c432STomasz Zawadzki 		SPDK_DEBUGLOG(nvme, "tcp req %p, send H2C data\n", tcp_req);
889449dee35SZiye Yang 		nvme_tcp_send_h2c_data(tcp_req);
890449dee35SZiye Yang 	} else {
89199265411SJacek Kalwas 		if (tcp_req->in_capsule_data && tcp_req->ordering.bits.domain_in_use) {
89208c8ab2bSJacek Kalwas 			spdk_memory_domain_invalidate_data(tcp_req->req->payload.opts->memory_domain,
89308c8ab2bSJacek Kalwas 							   tcp_req->req->payload.opts->memory_domain_ctx, tcp_req->iov, tcp_req->iovcnt);
89408c8ab2bSJacek Kalwas 		}
89508c8ab2bSJacek Kalwas 
8967388e54dSAlexey Marchuk 		nvme_tcp_req_complete_safe(tcp_req);
897e956be96SZiye Yang 	}
898449dee35SZiye Yang }
899e956be96SZiye Yang 
900e956be96SZiye Yang static int
901e956be96SZiye Yang nvme_tcp_qpair_capsule_cmd_send(struct nvme_tcp_qpair *tqpair,
902e956be96SZiye Yang 				struct nvme_tcp_req *tcp_req)
903e956be96SZiye Yang {
904e956be96SZiye Yang 	struct nvme_tcp_pdu *pdu;
905e956be96SZiye Yang 	struct spdk_nvme_tcp_cmd *capsule_cmd;
906e956be96SZiye Yang 	uint32_t plen = 0, alignment;
907e956be96SZiye Yang 	uint8_t pdo;
908e956be96SZiye Yang 
9092172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvme, "enter\n");
9102250abaeSZiye Yang 	pdu = tcp_req->pdu;
911e16aabddSKonrad Sztyber 	pdu->req = tcp_req;
912e956be96SZiye Yang 
913ea65bf61SBen Walker 	capsule_cmd = &pdu->hdr.capsule_cmd;
914e956be96SZiye Yang 	capsule_cmd->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD;
915e956be96SZiye Yang 	plen = capsule_cmd->common.hlen = sizeof(*capsule_cmd);
916e956be96SZiye Yang 	capsule_cmd->ccsqe = tcp_req->req->cmd;
917e956be96SZiye Yang 
9182172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvme, "capsule_cmd cid=%u on tqpair(%p)\n", tcp_req->req->cmd.cid, tqpair);
919e956be96SZiye Yang 
920a85579d8SAlexey Marchuk 	if (tqpair->flags.host_hdgst_enable) {
9212172c432STomasz Zawadzki 		SPDK_DEBUGLOG(nvme, "Header digest is enabled for capsule command on tcp_req=%p\n",
922e956be96SZiye Yang 			      tcp_req);
923e956be96SZiye Yang 		capsule_cmd->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF;
924e956be96SZiye Yang 		plen += SPDK_NVME_TCP_DIGEST_LEN;
925e956be96SZiye Yang 	}
926e956be96SZiye Yang 
927e956be96SZiye Yang 	if ((tcp_req->req->payload_size == 0) || !tcp_req->in_capsule_data) {
928e956be96SZiye Yang 		goto end;
929e956be96SZiye Yang 	}
930e956be96SZiye Yang 
931e956be96SZiye Yang 	pdo = plen;
932e956be96SZiye Yang 	pdu->padding_len = 0;
933e956be96SZiye Yang 	if (tqpair->cpda) {
934e956be96SZiye Yang 		alignment = (tqpair->cpda + 1) << 2;
935e956be96SZiye Yang 		if (alignment > plen) {
936e956be96SZiye Yang 			pdu->padding_len = alignment - plen;
937e956be96SZiye Yang 			pdo = alignment;
938e956be96SZiye Yang 			plen = alignment;
939e956be96SZiye Yang 		}
940e956be96SZiye Yang 	}
941e956be96SZiye Yang 
942e956be96SZiye Yang 	capsule_cmd->common.pdo = pdo;
943e956be96SZiye Yang 	plen += tcp_req->req->payload_size;
944a85579d8SAlexey Marchuk 	if (tqpair->flags.host_ddgst_enable) {
945e956be96SZiye Yang 		capsule_cmd->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF;
946e956be96SZiye Yang 		plen += SPDK_NVME_TCP_DIGEST_LEN;
947e956be96SZiye Yang 	}
948e956be96SZiye Yang 
949e956be96SZiye Yang 	tcp_req->datao = 0;
950a7b6d2efSShuhei Matsumoto 	nvme_tcp_pdu_set_data_buf(pdu, tcp_req->iov, tcp_req->iovcnt,
951a7b6d2efSShuhei Matsumoto 				  0, tcp_req->req->payload_size);
952e956be96SZiye Yang end:
953e956be96SZiye Yang 	capsule_cmd->common.plen = plen;
9543a1f5364SZiye Yang 	return nvme_tcp_qpair_write_pdu(tqpair, pdu, nvme_tcp_qpair_cmd_send_complete, tcp_req);
955e956be96SZiye Yang 
956e956be96SZiye Yang }
957e956be96SZiye Yang 
958f5bc2cbeSBen Walker static int
959e956be96SZiye Yang nvme_tcp_qpair_submit_request(struct spdk_nvme_qpair *qpair,
960e956be96SZiye Yang 			      struct nvme_request *req)
961e956be96SZiye Yang {
962e956be96SZiye Yang 	struct nvme_tcp_qpair *tqpair;
963e956be96SZiye Yang 	struct nvme_tcp_req *tcp_req;
964e956be96SZiye Yang 
965e956be96SZiye Yang 	tqpair = nvme_tcp_qpair(qpair);
966e956be96SZiye Yang 	assert(tqpair != NULL);
967e956be96SZiye Yang 	assert(req != NULL);
968e956be96SZiye Yang 
969e956be96SZiye Yang 	tcp_req = nvme_tcp_req_get(tqpair);
970f0be1636SJim Harris 	if (!tcp_req) {
971ea86c035SAlexey Marchuk 		tqpair->stats->queued_requests++;
9727630daa2SSeth Howell 		/* Inform the upper layer to try again later. */
9737630daa2SSeth Howell 		return -EAGAIN;
974e956be96SZiye Yang 	}
975e956be96SZiye Yang 
97647154c69SJacek Kalwas 	if (spdk_unlikely(nvme_tcp_req_init(tqpair, req, tcp_req))) {
977e956be96SZiye Yang 		SPDK_ERRLOG("nvme_tcp_req_init() failed\n");
978e956be96SZiye Yang 		nvme_tcp_req_put(tqpair, tcp_req);
979e956be96SZiye Yang 		return -1;
980e956be96SZiye Yang 	}
981e956be96SZiye Yang 
9820127345cSAtul Malakar 	tqpair->qpair.queue_depth++;
9832bc9d36bSJacek Kalwas 	spdk_trace_record(TRACE_NVME_TCP_SUBMIT, qpair->id, 0, (uintptr_t)tcp_req->pdu, req->cb_arg,
984a6b7e183SJim Harris 			  (uint32_t)req->cmd.cid, (uint32_t)req->cmd.opc,
9850127345cSAtul Malakar 			  req->cmd.cdw10, req->cmd.cdw11, req->cmd.cdw12, tqpair->qpair.queue_depth);
9869396cb9aSJim Harris 	TAILQ_INSERT_TAIL(&tqpair->outstanding_reqs, tcp_req, link);
987e956be96SZiye Yang 	return nvme_tcp_qpair_capsule_cmd_send(tqpair, tcp_req);
988e956be96SZiye Yang }
989e956be96SZiye Yang 
990f5bc2cbeSBen Walker static int
991e956be96SZiye Yang nvme_tcp_qpair_reset(struct spdk_nvme_qpair *qpair)
992e956be96SZiye Yang {
993e956be96SZiye Yang 	return 0;
994e956be96SZiye Yang }
995e956be96SZiye Yang 
9967bf5e1deSZiye Yang static void
997e060285eSShuhei Matsumoto nvme_tcp_req_complete(struct nvme_tcp_req *tcp_req,
998d1179a58SJim Harris 		      struct nvme_tcp_qpair *tqpair,
9994a24f581SJim Harris 		      struct spdk_nvme_cpl *rsp,
10004a24f581SJim Harris 		      bool print_on_error)
10017bf5e1deSZiye Yang {
100221d15cb0SJim Harris 	struct spdk_nvme_cpl	cpl;
100321d15cb0SJim Harris 	struct spdk_nvme_qpair	*qpair;
1004e060285eSShuhei Matsumoto 	struct nvme_request	*req;
100547154c69SJacek Kalwas 	bool			print_error;
1006e060285eSShuhei Matsumoto 
1007e060285eSShuhei Matsumoto 	assert(tcp_req->req != NULL);
1008e060285eSShuhei Matsumoto 	req = tcp_req->req;
100906955fd3SJim Harris 	qpair = req->qpair;
1010e060285eSShuhei Matsumoto 
10112a268d7aSJacek Kalwas 	SPDK_DEBUGLOG(nvme, "complete tcp_req(%p) on tqpair=%p\n", tcp_req, tqpair);
10122a268d7aSJacek Kalwas 
10132a268d7aSJacek Kalwas 	if (!tcp_req->tqpair->qpair.in_completion_context) {
10142a268d7aSJacek Kalwas 		tcp_req->tqpair->async_complete++;
10152a268d7aSJacek Kalwas 	}
10162a268d7aSJacek Kalwas 
101721d15cb0SJim Harris 	/* Cache arguments to be passed to nvme_complete_request since tcp_req can be zeroed when released */
101821d15cb0SJim Harris 	memcpy(&cpl, rsp, sizeof(cpl));
101921d15cb0SJim Harris 
102047154c69SJacek Kalwas 	if (spdk_unlikely(spdk_nvme_cpl_is_error(rsp))) {
102147154c69SJacek Kalwas 		print_error = print_on_error && !qpair->ctrlr->opts.disable_error_logging;
10224a24f581SJim Harris 
10234a24f581SJim Harris 		if (print_error) {
10244a24f581SJim Harris 			spdk_nvme_qpair_print_command(qpair, &req->cmd);
10254a24f581SJim Harris 		}
10264a24f581SJim Harris 
10274a24f581SJim Harris 		if (print_error || SPDK_DEBUGLOG_FLAG_ENABLED("nvme")) {
10284a24f581SJim Harris 			spdk_nvme_qpair_print_completion(qpair, rsp);
10294a24f581SJim Harris 		}
103047154c69SJacek Kalwas 	}
10314a24f581SJim Harris 
10320127345cSAtul Malakar 	tqpair->qpair.queue_depth--;
10332bc9d36bSJacek Kalwas 	spdk_trace_record(TRACE_NVME_TCP_COMPLETE, qpair->id, 0, (uintptr_t)tcp_req->pdu, req->cb_arg,
10340127345cSAtul Malakar 			  (uint32_t)req->cmd.cid, (uint32_t)cpl.status_raw, tqpair->qpair.queue_depth);
1035ad69e739SShuhei Matsumoto 	TAILQ_REMOVE(&tcp_req->tqpair->outstanding_reqs, tcp_req, link);
1036d1179a58SJim Harris 	nvme_tcp_req_put(tqpair, tcp_req);
103706955fd3SJim Harris 	nvme_complete_request(req->cb_fn, req->cb_arg, req->qpair, req, &cpl);
10387bf5e1deSZiye Yang }
10397bf5e1deSZiye Yang 
1040f5bc2cbeSBen Walker static void
1041783a2a20SJim Harris nvme_tcp_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr)
1042e956be96SZiye Yang {
10437bf5e1deSZiye Yang 	struct nvme_tcp_req *tcp_req, *tmp;
10440b92da6cSGangCao 	struct spdk_nvme_cpl cpl = {};
10457bf5e1deSZiye Yang 	struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
10467bf5e1deSZiye Yang 
10477413e1e4SShuhei Matsumoto 	cpl.sqid = qpair->id;
10487bf5e1deSZiye Yang 	cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION;
10497bf5e1deSZiye Yang 	cpl.status.sct = SPDK_NVME_SCT_GENERIC;
1050783a2a20SJim Harris 	cpl.status.dnr = dnr;
10517bf5e1deSZiye Yang 
10527bf5e1deSZiye Yang 	TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) {
10538564f005SKonrad Sztyber 		/* We cannot abort requests with accel operations in progress */
10548564f005SKonrad Sztyber 		if (tcp_req->ordering.bits.in_progress_accel) {
10558564f005SKonrad Sztyber 			continue;
10568564f005SKonrad Sztyber 		}
10578564f005SKonrad Sztyber 
10584a24f581SJim Harris 		nvme_tcp_req_complete(tcp_req, tqpair, &cpl, true);
10597bf5e1deSZiye Yang 	}
1060e956be96SZiye Yang }
1061e956be96SZiye Yang 
1062e956be96SZiye Yang static void
1063e956be96SZiye Yang nvme_tcp_qpair_send_h2c_term_req_complete(void *cb_arg)
1064e956be96SZiye Yang {
1065e956be96SZiye Yang 	struct nvme_tcp_qpair *tqpair = cb_arg;
1066e956be96SZiye Yang 
1067e956be96SZiye Yang 	tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
1068e956be96SZiye Yang }
1069e956be96SZiye Yang 
1070e956be96SZiye Yang static void
1071e956be96SZiye Yang nvme_tcp_qpair_send_h2c_term_req(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu,
1072e956be96SZiye Yang 				 enum spdk_nvme_tcp_term_req_fes fes, uint32_t error_offset)
1073e956be96SZiye Yang {
1074e956be96SZiye Yang 	struct nvme_tcp_pdu *rsp_pdu;
1075e956be96SZiye Yang 	struct spdk_nvme_tcp_term_req_hdr *h2c_term_req;
1076e956be96SZiye Yang 	uint32_t h2c_term_req_hdr_len = sizeof(*h2c_term_req);
107771cd1ea7SZiye Yang 	uint8_t copy_len;
1078e956be96SZiye Yang 
10797bac9b06SZiye Yang 	rsp_pdu = tqpair->send_pdu;
1080e956be96SZiye Yang 	memset(rsp_pdu, 0, sizeof(*rsp_pdu));
1081ea65bf61SBen Walker 	h2c_term_req = &rsp_pdu->hdr.term_req;
1082e956be96SZiye Yang 	h2c_term_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ;
1083e956be96SZiye Yang 	h2c_term_req->common.hlen = h2c_term_req_hdr_len;
1084e956be96SZiye Yang 
1085e956be96SZiye Yang 	if ((fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) ||
1086e956be96SZiye Yang 	    (fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) {
1087e956be96SZiye Yang 		DSET32(&h2c_term_req->fei, error_offset);
1088e956be96SZiye Yang 	}
1089e956be96SZiye Yang 
1090ea65bf61SBen Walker 	copy_len = pdu->hdr.common.hlen;
109171cd1ea7SZiye Yang 	if (copy_len > SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE) {
109271cd1ea7SZiye Yang 		copy_len = SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE;
109371cd1ea7SZiye Yang 	}
109471cd1ea7SZiye Yang 
109571cd1ea7SZiye Yang 	/* Copy the error info into the buffer */
1096ea65bf61SBen Walker 	memcpy((uint8_t *)rsp_pdu->hdr.raw + h2c_term_req_hdr_len, pdu->hdr.raw, copy_len);
1097ea65bf61SBen Walker 	nvme_tcp_pdu_set_data(rsp_pdu, (uint8_t *)rsp_pdu->hdr.raw + h2c_term_req_hdr_len, copy_len);
109871cd1ea7SZiye Yang 
109971cd1ea7SZiye Yang 	/* Contain the header len of the wrong received pdu */
110071cd1ea7SZiye Yang 	h2c_term_req->common.plen = h2c_term_req->common.hlen + copy_len;
110178df9be4SBen Walker 	nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING);
11022f579469SChangpeng Liu 	nvme_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvme_tcp_qpair_send_h2c_term_req_complete, tqpair);
1103e956be96SZiye Yang }
1104e956be96SZiye Yang 
1105b49fa72bSJim Harris static bool
1106b49fa72bSJim Harris nvme_tcp_qpair_recv_state_valid(struct nvme_tcp_qpair *tqpair)
1107b49fa72bSJim Harris {
1108b49fa72bSJim Harris 	switch (tqpair->state) {
1109b49fa72bSJim Harris 	case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND:
1110b49fa72bSJim Harris 	case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL:
1111ffd098cfSKonrad Sztyber 	case NVME_TCP_QPAIR_STATE_AUTHENTICATING:
1112b49fa72bSJim Harris 	case NVME_TCP_QPAIR_STATE_RUNNING:
1113b49fa72bSJim Harris 		return true;
1114b49fa72bSJim Harris 	default:
1115b49fa72bSJim Harris 		return false;
1116b49fa72bSJim Harris 	}
1117b49fa72bSJim Harris }
1118b49fa72bSJim Harris 
1119e956be96SZiye Yang static void
1120e956be96SZiye Yang nvme_tcp_pdu_ch_handle(struct nvme_tcp_qpair *tqpair)
1121e956be96SZiye Yang {
1122e956be96SZiye Yang 	struct nvme_tcp_pdu *pdu;
1123e956be96SZiye Yang 	uint32_t error_offset = 0;
1124e956be96SZiye Yang 	enum spdk_nvme_tcp_term_req_fes fes;
1125e956be96SZiye Yang 	uint32_t expected_hlen, hd_len = 0;
1126e956be96SZiye Yang 	bool plen_error = false;
1127e956be96SZiye Yang 
112882e4bfd3SZiye Yang 	pdu = tqpair->recv_pdu;
1129e956be96SZiye Yang 
11302172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvme, "pdu type = %d\n", pdu->hdr.common.pdu_type);
1131ea65bf61SBen Walker 	if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP) {
1132e956be96SZiye Yang 		if (tqpair->state != NVME_TCP_QPAIR_STATE_INVALID) {
1133e956be96SZiye Yang 			SPDK_ERRLOG("Already received IC_RESP PDU, and we should reject this pdu=%p\n", pdu);
1134e956be96SZiye Yang 			fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR;
1135e956be96SZiye Yang 			goto err;
1136e956be96SZiye Yang 		}
1137e956be96SZiye Yang 		expected_hlen = sizeof(struct spdk_nvme_tcp_ic_resp);
1138ea65bf61SBen Walker 		if (pdu->hdr.common.plen != expected_hlen) {
1139e956be96SZiye Yang 			plen_error = true;
1140e956be96SZiye Yang 		}
1141e956be96SZiye Yang 	} else {
1142b49fa72bSJim Harris 		if (spdk_unlikely(!nvme_tcp_qpair_recv_state_valid(tqpair))) {
1143cc6920a4SJosh Soref 			SPDK_ERRLOG("The TCP/IP tqpair connection is not negotiated\n");
1144e956be96SZiye Yang 			fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR;
1145e956be96SZiye Yang 			goto err;
1146e956be96SZiye Yang 		}
1147e956be96SZiye Yang 
1148ea65bf61SBen Walker 		switch (pdu->hdr.common.pdu_type) {
1149e956be96SZiye Yang 		case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP:
1150e956be96SZiye Yang 			expected_hlen = sizeof(struct spdk_nvme_tcp_rsp);
1151ea65bf61SBen Walker 			if (pdu->hdr.common.flags & SPDK_NVME_TCP_CH_FLAGS_HDGSTF) {
1152e956be96SZiye Yang 				hd_len = SPDK_NVME_TCP_DIGEST_LEN;
1153e956be96SZiye Yang 			}
1154e956be96SZiye Yang 
1155ea65bf61SBen Walker 			if (pdu->hdr.common.plen != (expected_hlen + hd_len)) {
1156e956be96SZiye Yang 				plen_error = true;
1157e956be96SZiye Yang 			}
1158e956be96SZiye Yang 			break;
1159e956be96SZiye Yang 		case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA:
1160e956be96SZiye Yang 			expected_hlen = sizeof(struct spdk_nvme_tcp_c2h_data_hdr);
1161ea65bf61SBen Walker 			if (pdu->hdr.common.plen < pdu->hdr.common.pdo) {
1162e956be96SZiye Yang 				plen_error = true;
1163e956be96SZiye Yang 			}
1164e956be96SZiye Yang 			break;
1165e956be96SZiye Yang 		case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ:
1166e956be96SZiye Yang 			expected_hlen = sizeof(struct spdk_nvme_tcp_term_req_hdr);
1167ea65bf61SBen Walker 			if ((pdu->hdr.common.plen <= expected_hlen) ||
1168ea65bf61SBen Walker 			    (pdu->hdr.common.plen > SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE)) {
1169e956be96SZiye Yang 				plen_error = true;
1170e956be96SZiye Yang 			}
1171e956be96SZiye Yang 			break;
1172e956be96SZiye Yang 		case SPDK_NVME_TCP_PDU_TYPE_R2T:
1173e956be96SZiye Yang 			expected_hlen = sizeof(struct spdk_nvme_tcp_r2t_hdr);
1174ea65bf61SBen Walker 			if (pdu->hdr.common.flags & SPDK_NVME_TCP_CH_FLAGS_HDGSTF) {
1175e956be96SZiye Yang 				hd_len = SPDK_NVME_TCP_DIGEST_LEN;
1176e956be96SZiye Yang 			}
1177e956be96SZiye Yang 
1178ea65bf61SBen Walker 			if (pdu->hdr.common.plen != (expected_hlen + hd_len)) {
1179e956be96SZiye Yang 				plen_error = true;
1180e956be96SZiye Yang 			}
1181e956be96SZiye Yang 			break;
1182e956be96SZiye Yang 
1183e956be96SZiye Yang 		default:
118482e4bfd3SZiye Yang 			SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->recv_pdu->hdr.common.pdu_type);
1185e956be96SZiye Yang 			fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1186e956be96SZiye Yang 			error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdu_type);
1187e956be96SZiye Yang 			goto err;
1188e956be96SZiye Yang 		}
1189e956be96SZiye Yang 	}
1190e956be96SZiye Yang 
1191ea65bf61SBen Walker 	if (pdu->hdr.common.hlen != expected_hlen) {
1192e956be96SZiye Yang 		SPDK_ERRLOG("Expected PDU header length %u, got %u\n",
1193ea65bf61SBen Walker 			    expected_hlen, pdu->hdr.common.hlen);
1194e956be96SZiye Yang 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1195e956be96SZiye Yang 		error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, hlen);
1196e956be96SZiye Yang 		goto err;
1197e956be96SZiye Yang 
1198e956be96SZiye Yang 	} else if (plen_error) {
1199e956be96SZiye Yang 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1200e956be96SZiye Yang 		error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, plen);
1201e956be96SZiye Yang 		goto err;
1202e956be96SZiye Yang 	} else {
1203e956be96SZiye Yang 		nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH);
120482e4bfd3SZiye Yang 		nvme_tcp_pdu_calc_psh_len(tqpair->recv_pdu, tqpair->flags.host_hdgst_enable);
1205e956be96SZiye Yang 		return;
1206e956be96SZiye Yang 	}
1207e956be96SZiye Yang err:
1208e956be96SZiye Yang 	nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset);
1209e956be96SZiye Yang }
1210e956be96SZiye Yang 
1211e956be96SZiye Yang static struct nvme_tcp_req *
1212e956be96SZiye Yang get_nvme_active_req_by_cid(struct nvme_tcp_qpair *tqpair, uint32_t cid)
1213e956be96SZiye Yang {
1214e956be96SZiye Yang 	assert(tqpair != NULL);
1215e956be96SZiye Yang 	if ((cid >= tqpair->num_entries) || (tqpair->tcp_reqs[cid].state == NVME_TCP_REQ_FREE)) {
1216e956be96SZiye Yang 		return NULL;
1217e956be96SZiye Yang 	}
1218e956be96SZiye Yang 
1219e956be96SZiye Yang 	return &tqpair->tcp_reqs[cid];
1220e956be96SZiye Yang }
1221e956be96SZiye Yang 
1222e956be96SZiye Yang static void
1223603f998aSKonrad Sztyber nvme_tcp_recv_payload_seq_cb(void *cb_arg, int status)
1224603f998aSKonrad Sztyber {
1225603f998aSKonrad Sztyber 	struct nvme_tcp_req *treq = cb_arg;
1226603f998aSKonrad Sztyber 	struct nvme_request *req = treq->req;
1227603f998aSKonrad Sztyber 	struct nvme_tcp_qpair *tqpair = treq->tqpair;
1228603f998aSKonrad Sztyber 
1229b5c6199eSKonrad Sztyber 	assert(treq->ordering.bits.in_progress_accel);
1230b5c6199eSKonrad Sztyber 	treq->ordering.bits.in_progress_accel = 0;
1231b5c6199eSKonrad Sztyber 
12328531a41fSJacek Kalwas 	nvme_tcp_cond_schedule_qpair_polling(tqpair);
1233603f998aSKonrad Sztyber 
1234603f998aSKonrad Sztyber 	req->accel_sequence = NULL;
1235603f998aSKonrad Sztyber 	if (spdk_unlikely(status != 0)) {
12369ccef490SJacek Kalwas 		pdu_seq_fail(treq->pdu, status);
12379ccef490SJacek Kalwas 		return;
1238603f998aSKonrad Sztyber 	}
1239603f998aSKonrad Sztyber 
1240603f998aSKonrad Sztyber 	nvme_tcp_req_complete_safe(treq);
1241603f998aSKonrad Sztyber }
1242603f998aSKonrad Sztyber 
1243603f998aSKonrad Sztyber static void
1244e956be96SZiye Yang nvme_tcp_c2h_data_payload_handle(struct nvme_tcp_qpair *tqpair,
1245e956be96SZiye Yang 				 struct nvme_tcp_pdu *pdu, uint32_t *reaped)
1246e956be96SZiye Yang {
1247e956be96SZiye Yang 	struct nvme_tcp_req *tcp_req;
1248603f998aSKonrad Sztyber 	struct nvme_tcp_poll_group *tgroup;
1249e956be96SZiye Yang 	struct spdk_nvme_tcp_c2h_data_hdr *c2h_data;
1250e956be96SZiye Yang 	uint8_t flags;
1251e956be96SZiye Yang 
125283ffb207SBen Walker 	tcp_req = pdu->req;
1253e956be96SZiye Yang 	assert(tcp_req != NULL);
1254e956be96SZiye Yang 
12552172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvme, "enter\n");
1256ea65bf61SBen Walker 	c2h_data = &pdu->hdr.c2h_data;
1257e956be96SZiye Yang 	tcp_req->datao += pdu->data_len;
1258e956be96SZiye Yang 	flags = c2h_data->common.flags;
1259e956be96SZiye Yang 
12600b51da14SZiye Yang 	if (flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU) {
1261e956be96SZiye Yang 		if (tcp_req->datao == tcp_req->req->payload_size) {
12627388e54dSAlexey Marchuk 			tcp_req->rsp.status.p = 0;
1263e956be96SZiye Yang 		} else {
12647388e54dSAlexey Marchuk 			tcp_req->rsp.status.p = 1;
1265e956be96SZiye Yang 		}
1266e956be96SZiye Yang 
12677388e54dSAlexey Marchuk 		tcp_req->rsp.cid = tcp_req->cid;
12687388e54dSAlexey Marchuk 		tcp_req->rsp.sqid = tqpair->qpair.id;
12690b51da14SZiye Yang 		if (flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS) {
12707388e54dSAlexey Marchuk 			tcp_req->ordering.bits.data_recv = 1;
1271603f998aSKonrad Sztyber 			if (tcp_req->req->accel_sequence != NULL) {
1272603f998aSKonrad Sztyber 				tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group);
1273603f998aSKonrad Sztyber 				nvme_tcp_accel_reverse_sequence(tgroup, tcp_req->req->accel_sequence);
1274b5c6199eSKonrad Sztyber 				nvme_tcp_accel_finish_sequence(tgroup, tcp_req,
1275b5c6199eSKonrad Sztyber 							       tcp_req->req->accel_sequence,
1276603f998aSKonrad Sztyber 							       nvme_tcp_recv_payload_seq_cb,
1277603f998aSKonrad Sztyber 							       tcp_req);
1278603f998aSKonrad Sztyber 				return;
1279603f998aSKonrad Sztyber 			}
1280603f998aSKonrad Sztyber 
12817388e54dSAlexey Marchuk 			if (nvme_tcp_req_complete_safe(tcp_req)) {
1282e956be96SZiye Yang 				(*reaped)++;
12834c9aad02SZiye Yang 			}
1284e956be96SZiye Yang 		}
1285e956be96SZiye Yang 	}
12860b51da14SZiye Yang }
1287e956be96SZiye Yang 
1288e956be96SZiye Yang static const char *spdk_nvme_tcp_term_req_fes_str[] = {
1289e956be96SZiye Yang 	"Invalid PDU Header Field",
1290e956be96SZiye Yang 	"PDU Sequence Error",
1291e956be96SZiye Yang 	"Header Digest Error",
1292e956be96SZiye Yang 	"Data Transfer Out of Range",
1293e956be96SZiye Yang 	"Data Transfer Limit Exceeded",
1294e956be96SZiye Yang 	"Unsupported parameter",
1295e956be96SZiye Yang };
1296e956be96SZiye Yang 
1297e956be96SZiye Yang static void
1298e956be96SZiye Yang nvme_tcp_c2h_term_req_dump(struct spdk_nvme_tcp_term_req_hdr *c2h_term_req)
1299e956be96SZiye Yang {
1300e956be96SZiye Yang 	SPDK_ERRLOG("Error info of pdu(%p): %s\n", c2h_term_req,
1301e956be96SZiye Yang 		    spdk_nvme_tcp_term_req_fes_str[c2h_term_req->fes]);
1302e956be96SZiye Yang 	if ((c2h_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) ||
1303e956be96SZiye Yang 	    (c2h_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) {
13042172c432STomasz Zawadzki 		SPDK_DEBUGLOG(nvme, "The offset from the start of the PDU header is %u\n",
1305e956be96SZiye Yang 			      DGET32(c2h_term_req->fei));
1306e956be96SZiye Yang 	}
1307e956be96SZiye Yang 	/* we may also need to dump some other info here */
1308e956be96SZiye Yang }
1309e956be96SZiye Yang 
1310e956be96SZiye Yang static void
1311e956be96SZiye Yang nvme_tcp_c2h_term_req_payload_handle(struct nvme_tcp_qpair *tqpair,
1312e956be96SZiye Yang 				     struct nvme_tcp_pdu *pdu)
1313e956be96SZiye Yang {
1314ea65bf61SBen Walker 	nvme_tcp_c2h_term_req_dump(&pdu->hdr.term_req);
131578df9be4SBen Walker 	nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING);
1316e956be96SZiye Yang }
1317e956be96SZiye Yang 
1318e956be96SZiye Yang static void
1319bcbccf8bSZiye Yang _nvme_tcp_pdu_payload_handle(struct nvme_tcp_qpair *tqpair, uint32_t *reaped)
1320bcbccf8bSZiye Yang {
1321bcbccf8bSZiye Yang 	struct nvme_tcp_pdu *pdu;
1322bcbccf8bSZiye Yang 
1323bcbccf8bSZiye Yang 	assert(tqpair != NULL);
1324bcbccf8bSZiye Yang 	pdu = tqpair->recv_pdu;
1325bcbccf8bSZiye Yang 
1326bcbccf8bSZiye Yang 	switch (pdu->hdr.common.pdu_type) {
1327bcbccf8bSZiye Yang 	case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA:
1328bcbccf8bSZiye Yang 		nvme_tcp_c2h_data_payload_handle(tqpair, pdu, reaped);
13299ab0ffccSZiye Yang 		nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
1330bcbccf8bSZiye Yang 		break;
1331bcbccf8bSZiye Yang 
1332bcbccf8bSZiye Yang 	case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ:
1333bcbccf8bSZiye Yang 		nvme_tcp_c2h_term_req_payload_handle(tqpair, pdu);
1334bcbccf8bSZiye Yang 		break;
1335bcbccf8bSZiye Yang 
1336bcbccf8bSZiye Yang 	default:
1337bcbccf8bSZiye Yang 		/* The code should not go to here */
1338bcbccf8bSZiye Yang 		SPDK_ERRLOG("The code should not go to here\n");
1339bcbccf8bSZiye Yang 		break;
1340bcbccf8bSZiye Yang 	}
1341bcbccf8bSZiye Yang }
1342bcbccf8bSZiye Yang 
1343bcbccf8bSZiye Yang static void
13441561615dSKonrad Sztyber nvme_tcp_req_copy_pdu(struct nvme_tcp_req *treq, struct nvme_tcp_pdu *pdu)
13451561615dSKonrad Sztyber {
13461561615dSKonrad Sztyber 	treq->pdu->hdr = pdu->hdr;
13471561615dSKonrad Sztyber 	treq->pdu->req = treq;
13481561615dSKonrad Sztyber 	memcpy(treq->pdu->data_digest, pdu->data_digest, sizeof(pdu->data_digest));
13491561615dSKonrad Sztyber 	memcpy(treq->pdu->data_iov, pdu->data_iov, sizeof(pdu->data_iov[0]) * pdu->data_iovcnt);
13501561615dSKonrad Sztyber 	treq->pdu->data_iovcnt = pdu->data_iovcnt;
13511561615dSKonrad Sztyber 	treq->pdu->data_len = pdu->data_len;
13521561615dSKonrad Sztyber }
13531561615dSKonrad Sztyber 
13546ae3f677SKonrad Sztyber static void
13556ae3f677SKonrad Sztyber nvme_tcp_accel_seq_recv_compute_crc32_done(void *cb_arg)
13566ae3f677SKonrad Sztyber {
13576ae3f677SKonrad Sztyber 	struct nvme_tcp_req *treq = cb_arg;
13586ae3f677SKonrad Sztyber 	struct nvme_tcp_qpair *tqpair = treq->tqpair;
13596ae3f677SKonrad Sztyber 	struct nvme_tcp_pdu *pdu = treq->pdu;
13606ae3f677SKonrad Sztyber 	bool result;
13616ae3f677SKonrad Sztyber 
13626ae3f677SKonrad Sztyber 	pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR;
13636ae3f677SKonrad Sztyber 	result = MATCH_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32);
13646ae3f677SKonrad Sztyber 	if (spdk_unlikely(!result)) {
13656ae3f677SKonrad Sztyber 		SPDK_ERRLOG("data digest error on tqpair=(%p)\n", tqpair);
13666ae3f677SKonrad Sztyber 		treq->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR;
13676ae3f677SKonrad Sztyber 	}
13686ae3f677SKonrad Sztyber }
13696ae3f677SKonrad Sztyber 
1370d662413eSKonrad Sztyber static bool
1371d662413eSKonrad Sztyber nvme_tcp_accel_recv_compute_crc32(struct nvme_tcp_req *treq, struct nvme_tcp_pdu *pdu)
1372d662413eSKonrad Sztyber {
1373d662413eSKonrad Sztyber 	struct nvme_tcp_qpair *tqpair = treq->tqpair;
1374d662413eSKonrad Sztyber 	struct nvme_tcp_poll_group *tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group);
1375d662413eSKonrad Sztyber 	struct nvme_request *req = treq->req;
13766ae3f677SKonrad Sztyber 	int rc, dummy = 0;
1377d662413eSKonrad Sztyber 
1378d662413eSKonrad Sztyber 	/* Only support this limited case that the request has only one c2h pdu */
1379d662413eSKonrad Sztyber 	if (spdk_unlikely(nvme_qpair_get_state(&tqpair->qpair) < NVME_QPAIR_CONNECTED ||
1380d662413eSKonrad Sztyber 			  tqpair->qpair.poll_group == NULL || pdu->dif_ctx != NULL ||
1381d662413eSKonrad Sztyber 			  pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT != 0 ||
1382d662413eSKonrad Sztyber 			  pdu->data_len != req->payload_size)) {
1383d662413eSKonrad Sztyber 		return false;
1384d662413eSKonrad Sztyber 	}
1385d662413eSKonrad Sztyber 
13860439dfafSKonrad Sztyber 	if (tgroup->group.group->accel_fn_table.append_crc32c == NULL) {
13870439dfafSKonrad Sztyber 		return false;
13880439dfafSKonrad Sztyber 	}
13890439dfafSKonrad Sztyber 
13906ae3f677SKonrad Sztyber 	nvme_tcp_req_copy_pdu(treq, pdu);
13916ae3f677SKonrad Sztyber 	rc = nvme_tcp_accel_append_crc32c(tgroup, &req->accel_sequence,
13926ae3f677SKonrad Sztyber 					  &treq->pdu->data_digest_crc32,
13936ae3f677SKonrad Sztyber 					  treq->pdu->data_iov, treq->pdu->data_iovcnt, 0,
13946ae3f677SKonrad Sztyber 					  nvme_tcp_accel_seq_recv_compute_crc32_done, treq);
13956ae3f677SKonrad Sztyber 	if (spdk_unlikely(rc != 0)) {
13966ae3f677SKonrad Sztyber 		/* If accel is out of resources, fall back to non-accelerated crc32 */
13976ae3f677SKonrad Sztyber 		if (rc == -ENOMEM) {
1398d662413eSKonrad Sztyber 			return false;
1399d662413eSKonrad Sztyber 		}
1400d662413eSKonrad Sztyber 
14016ae3f677SKonrad Sztyber 		SPDK_ERRLOG("Failed to append crc32c operation: %d\n", rc);
14026ae3f677SKonrad Sztyber 		treq->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR;
14036ae3f677SKonrad Sztyber 	}
14046ae3f677SKonrad Sztyber 
14057f08d977SKonrad Sztyber 	nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
14066ae3f677SKonrad Sztyber 	nvme_tcp_c2h_data_payload_handle(tqpair, treq->pdu, &dummy);
1407d662413eSKonrad Sztyber 
14080439dfafSKonrad Sztyber 	return true;
14096ae3f677SKonrad Sztyber }
14106ae3f677SKonrad Sztyber 
14119ab0ffccSZiye Yang static void
1412e956be96SZiye Yang nvme_tcp_pdu_payload_handle(struct nvme_tcp_qpair *tqpair,
1413e956be96SZiye Yang 			    uint32_t *reaped)
1414e956be96SZiye Yang {
1415e956be96SZiye Yang 	int rc = 0;
14161f3bd08fSKonrad Sztyber 	struct nvme_tcp_pdu *pdu = tqpair->recv_pdu;
1417252430a0SZiye Yang 	uint32_t crc32c;
14181f3bd08fSKonrad Sztyber 	struct nvme_tcp_req *tcp_req = pdu->req;
1419e956be96SZiye Yang 
1420e956be96SZiye Yang 	assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
14212172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvme, "enter\n");
1422e956be96SZiye Yang 
14231f3bd08fSKonrad Sztyber 	/* The request can be NULL, e.g. in case of C2HTermReq */
14241f3bd08fSKonrad Sztyber 	if (spdk_likely(tcp_req != NULL)) {
142534c901e3SZiye Yang 		tcp_req->expected_datao += pdu->data_len;
14261f3bd08fSKonrad Sztyber 	}
142734c901e3SZiye Yang 
1428e956be96SZiye Yang 	/* check data digest if need */
142958739014SZiye Yang 	if (pdu->ddgst_enable) {
14301f3bd08fSKonrad Sztyber 		/* But if the data digest is enabled, tcp_req cannot be NULL */
14311f3bd08fSKonrad Sztyber 		assert(tcp_req != NULL);
1432d662413eSKonrad Sztyber 		if (nvme_tcp_accel_recv_compute_crc32(tcp_req, pdu)) {
14339ab0ffccSZiye Yang 			return;
14349ab0ffccSZiye Yang 		}
14359ab0ffccSZiye Yang 
1436e956be96SZiye Yang 		crc32c = nvme_tcp_pdu_calc_data_digest(pdu);
1437bb33310aSMengjinWu 		crc32c = crc32c ^ SPDK_CRC32C_XOR;
1438e956be96SZiye Yang 		rc = MATCH_DIGEST_WORD(pdu->data_digest, crc32c);
1439e956be96SZiye Yang 		if (rc == 0) {
1440e956be96SZiye Yang 			SPDK_ERRLOG("data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu);
1441252430a0SZiye Yang 			tcp_req = pdu->req;
1442252430a0SZiye Yang 			assert(tcp_req != NULL);
1443252430a0SZiye Yang 			tcp_req->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR;
1444e956be96SZiye Yang 		}
1445e956be96SZiye Yang 	}
1446e956be96SZiye Yang 
1447bcbccf8bSZiye Yang 	_nvme_tcp_pdu_payload_handle(tqpair, reaped);
1448e956be96SZiye Yang }
1449e956be96SZiye Yang 
1450e956be96SZiye Yang static void
1451e956be96SZiye Yang nvme_tcp_send_icreq_complete(void *cb_arg)
1452e956be96SZiye Yang {
14532ceff364SAlexey Marchuk 	struct nvme_tcp_qpair *tqpair = cb_arg;
14542ceff364SAlexey Marchuk 
1455d296fcd8SAlexey Marchuk 	SPDK_DEBUGLOG(nvme, "Complete the icreq send for tqpair=%p %u\n", tqpair, tqpair->qpair.id);
14562ceff364SAlexey Marchuk 
14572ceff364SAlexey Marchuk 	tqpair->flags.icreq_send_ack = true;
14582ceff364SAlexey Marchuk 
14592ceff364SAlexey Marchuk 	if (tqpair->state == NVME_TCP_QPAIR_STATE_INITIALIZING) {
1460cc6920a4SJosh Soref 		SPDK_DEBUGLOG(nvme, "tqpair %p %u, finalize icresp\n", tqpair, tqpair->qpair.id);
1461b49fa72bSJim Harris 		tqpair->state = NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND;
14622ceff364SAlexey Marchuk 	}
1463e956be96SZiye Yang }
1464e956be96SZiye Yang 
1465e956be96SZiye Yang static void
1466e956be96SZiye Yang nvme_tcp_icresp_handle(struct nvme_tcp_qpair *tqpair,
1467e956be96SZiye Yang 		       struct nvme_tcp_pdu *pdu)
1468e956be96SZiye Yang {
1469ea65bf61SBen Walker 	struct spdk_nvme_tcp_ic_resp *ic_resp = &pdu->hdr.ic_resp;
1470e956be96SZiye Yang 	uint32_t error_offset = 0;
1471e956be96SZiye Yang 	enum spdk_nvme_tcp_term_req_fes fes;
14720accbe8aSBen Walker 	int recv_buf_size;
1473e956be96SZiye Yang 
1474e956be96SZiye Yang 	/* Only PFV 0 is defined currently */
1475e956be96SZiye Yang 	if (ic_resp->pfv != 0) {
1476e956be96SZiye Yang 		SPDK_ERRLOG("Expected ICResp PFV %u, got %u\n", 0u, ic_resp->pfv);
1477e956be96SZiye Yang 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1478e956be96SZiye Yang 		error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, pfv);
1479e956be96SZiye Yang 		goto end;
1480e956be96SZiye Yang 	}
1481e956be96SZiye Yang 
1482e956be96SZiye Yang 	if (ic_resp->maxh2cdata < NVME_TCP_PDU_H2C_MIN_DATA_SIZE) {
1483e956be96SZiye Yang 		SPDK_ERRLOG("Expected ICResp maxh2cdata >=%u, got %u\n", NVME_TCP_PDU_H2C_MIN_DATA_SIZE,
1484e956be96SZiye Yang 			    ic_resp->maxh2cdata);
1485e956be96SZiye Yang 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1486e956be96SZiye Yang 		error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, maxh2cdata);
1487e956be96SZiye Yang 		goto end;
1488e956be96SZiye Yang 	}
1489e956be96SZiye Yang 	tqpair->maxh2cdata = ic_resp->maxh2cdata;
1490e956be96SZiye Yang 
1491e956be96SZiye Yang 	if (ic_resp->cpda > SPDK_NVME_TCP_CPDA_MAX) {
1492e956be96SZiye Yang 		SPDK_ERRLOG("Expected ICResp cpda <=%u, got %u\n", SPDK_NVME_TCP_CPDA_MAX, ic_resp->cpda);
1493e956be96SZiye Yang 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1494e956be96SZiye Yang 		error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, cpda);
1495e956be96SZiye Yang 		goto end;
1496e956be96SZiye Yang 	}
1497e956be96SZiye Yang 	tqpair->cpda = ic_resp->cpda;
1498e956be96SZiye Yang 
1499a85579d8SAlexey Marchuk 	tqpair->flags.host_hdgst_enable = ic_resp->dgst.bits.hdgst_enable ? true : false;
1500a85579d8SAlexey Marchuk 	tqpair->flags.host_ddgst_enable = ic_resp->dgst.bits.ddgst_enable ? true : false;
15012172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvme, "host_hdgst_enable: %u\n", tqpair->flags.host_hdgst_enable);
15022172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvme, "host_ddgst_enable: %u\n", tqpair->flags.host_ddgst_enable);
1503e956be96SZiye Yang 
15040accbe8aSBen Walker 	/* Now that we know whether digests are enabled, properly size the receive buffer to
15051da44e06SZiye Yang 	 * handle several incoming 4K read commands according to SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR
15061da44e06SZiye Yang 	 * parameter. */
15071da44e06SZiye Yang 	recv_buf_size = 0x1000 + sizeof(struct spdk_nvme_tcp_c2h_data_hdr);
15080accbe8aSBen Walker 
1509a85579d8SAlexey Marchuk 	if (tqpair->flags.host_hdgst_enable) {
15100accbe8aSBen Walker 		recv_buf_size += SPDK_NVME_TCP_DIGEST_LEN;
15110accbe8aSBen Walker 	}
15120accbe8aSBen Walker 
1513a85579d8SAlexey Marchuk 	if (tqpair->flags.host_ddgst_enable) {
15140accbe8aSBen Walker 		recv_buf_size += SPDK_NVME_TCP_DIGEST_LEN;
15150accbe8aSBen Walker 	}
15160accbe8aSBen Walker 
15171da44e06SZiye Yang 	if (spdk_sock_set_recvbuf(tqpair->sock, recv_buf_size * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR) < 0) {
15180accbe8aSBen Walker 		SPDK_WARNLOG("Unable to allocate enough memory for receive buffer on tqpair=%p with size=%d\n",
15190accbe8aSBen Walker 			     tqpair,
15200accbe8aSBen Walker 			     recv_buf_size);
15210accbe8aSBen Walker 		/* Not fatal. */
15220accbe8aSBen Walker 	}
15230accbe8aSBen Walker 
1524e956be96SZiye Yang 	nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
1525d296fcd8SAlexey Marchuk 
1526d296fcd8SAlexey Marchuk 	if (!tqpair->flags.icreq_send_ack) {
1527d296fcd8SAlexey Marchuk 		tqpair->state = NVME_TCP_QPAIR_STATE_INITIALIZING;
1528d296fcd8SAlexey Marchuk 		SPDK_DEBUGLOG(nvme, "tqpair %p %u, waiting icreq ack\n", tqpair, tqpair->qpair.id);
1529d296fcd8SAlexey Marchuk 		return;
1530d296fcd8SAlexey Marchuk 	}
1531d296fcd8SAlexey Marchuk 
1532b49fa72bSJim Harris 	tqpair->state = NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND;
1533e956be96SZiye Yang 	return;
1534e956be96SZiye Yang end:
1535e956be96SZiye Yang 	nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset);
1536e956be96SZiye Yang }
1537e956be96SZiye Yang 
1538e956be96SZiye Yang static void
1539e956be96SZiye Yang nvme_tcp_capsule_resp_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu,
1540e956be96SZiye Yang 				 uint32_t *reaped)
1541e956be96SZiye Yang {
1542e956be96SZiye Yang 	struct nvme_tcp_req *tcp_req;
1543603f998aSKonrad Sztyber 	struct nvme_tcp_poll_group *tgroup;
1544ea65bf61SBen Walker 	struct spdk_nvme_tcp_rsp *capsule_resp = &pdu->hdr.capsule_resp;
1545e956be96SZiye Yang 	uint32_t cid, error_offset = 0;
1546e956be96SZiye Yang 	enum spdk_nvme_tcp_term_req_fes fes;
1547e956be96SZiye Yang 
15482172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvme, "enter\n");
15497388e54dSAlexey Marchuk 	cid = capsule_resp->rccqe.cid;
1550e956be96SZiye Yang 	tcp_req = get_nvme_active_req_by_cid(tqpair, cid);
15517388e54dSAlexey Marchuk 
1552e956be96SZiye Yang 	if (!tcp_req) {
1553e956be96SZiye Yang 		SPDK_ERRLOG("no tcp_req is found with cid=%u for tqpair=%p\n", cid, tqpair);
1554e956be96SZiye Yang 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1555e956be96SZiye Yang 		error_offset = offsetof(struct spdk_nvme_tcp_rsp, rccqe);
1556e956be96SZiye Yang 		goto end;
1557e956be96SZiye Yang 	}
1558e956be96SZiye Yang 
15597388e54dSAlexey Marchuk 	assert(tcp_req->req != NULL);
15607388e54dSAlexey Marchuk 
15617388e54dSAlexey Marchuk 	tcp_req->rsp = capsule_resp->rccqe;
15627388e54dSAlexey Marchuk 	tcp_req->ordering.bits.data_recv = 1;
15637388e54dSAlexey Marchuk 
15647388e54dSAlexey Marchuk 	/* Recv the pdu again */
15657388e54dSAlexey Marchuk 	nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
15667388e54dSAlexey Marchuk 
1567603f998aSKonrad Sztyber 	if (tcp_req->req->accel_sequence != NULL) {
1568603f998aSKonrad Sztyber 		tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group);
1569603f998aSKonrad Sztyber 		nvme_tcp_accel_reverse_sequence(tgroup, tcp_req->req->accel_sequence);
1570b5c6199eSKonrad Sztyber 		nvme_tcp_accel_finish_sequence(tgroup, tcp_req, tcp_req->req->accel_sequence,
1571b5c6199eSKonrad Sztyber 					       nvme_tcp_recv_payload_seq_cb, tcp_req);
1572603f998aSKonrad Sztyber 		return;
1573603f998aSKonrad Sztyber 	}
1574603f998aSKonrad Sztyber 
15757388e54dSAlexey Marchuk 	if (nvme_tcp_req_complete_safe(tcp_req)) {
1576e956be96SZiye Yang 		(*reaped)++;
1577cb98b2abSZiye Yang 	}
1578e956be96SZiye Yang 
1579e956be96SZiye Yang 	return;
1580e956be96SZiye Yang 
1581e956be96SZiye Yang end:
1582e956be96SZiye Yang 	nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset);
1583e956be96SZiye Yang }
1584e956be96SZiye Yang 
1585e956be96SZiye Yang static void
1586e956be96SZiye Yang nvme_tcp_c2h_term_req_hdr_handle(struct nvme_tcp_qpair *tqpair,
1587e956be96SZiye Yang 				 struct nvme_tcp_pdu *pdu)
1588e956be96SZiye Yang {
1589ea65bf61SBen Walker 	struct spdk_nvme_tcp_term_req_hdr *c2h_term_req = &pdu->hdr.term_req;
1590e956be96SZiye Yang 	uint32_t error_offset = 0;
1591e956be96SZiye Yang 	enum spdk_nvme_tcp_term_req_fes fes;
1592e956be96SZiye Yang 
1593e956be96SZiye Yang 	if (c2h_term_req->fes > SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER) {
1594cc6920a4SJosh Soref 		SPDK_ERRLOG("Fatal Error Status(FES) is unknown for c2h_term_req pdu=%p\n", pdu);
1595e956be96SZiye Yang 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1596e956be96SZiye Yang 		error_offset = offsetof(struct spdk_nvme_tcp_term_req_hdr, fes);
1597e956be96SZiye Yang 		goto end;
1598e956be96SZiye Yang 	}
1599e956be96SZiye Yang 
1600e956be96SZiye Yang 	/* set the data buffer */
1601ea65bf61SBen Walker 	nvme_tcp_pdu_set_data(pdu, (uint8_t *)pdu->hdr.raw + c2h_term_req->common.hlen,
16028f3b4a3aSZiye Yang 			      c2h_term_req->common.plen - c2h_term_req->common.hlen);
1603e956be96SZiye Yang 	nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
1604e956be96SZiye Yang 	return;
1605e956be96SZiye Yang end:
1606e956be96SZiye Yang 	nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset);
1607e956be96SZiye Yang }
1608e956be96SZiye Yang 
1609e956be96SZiye Yang static void
1610e956be96SZiye Yang nvme_tcp_c2h_data_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu)
1611e956be96SZiye Yang {
1612e956be96SZiye Yang 	struct nvme_tcp_req *tcp_req;
1613ea65bf61SBen Walker 	struct spdk_nvme_tcp_c2h_data_hdr *c2h_data = &pdu->hdr.c2h_data;
1614e956be96SZiye Yang 	uint32_t error_offset = 0;
1615e956be96SZiye Yang 	enum spdk_nvme_tcp_term_req_fes fes;
16160b51da14SZiye Yang 	int flags = c2h_data->common.flags;
1617769fc6deSJacek Kalwas 	int rc;
1618e956be96SZiye Yang 
16192172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvme, "enter\n");
16202172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvme, "c2h_data info on tqpair(%p): datao=%u, datal=%u, cccid=%d\n",
1621e956be96SZiye Yang 		      tqpair, c2h_data->datao, c2h_data->datal, c2h_data->cccid);
1622e956be96SZiye Yang 	tcp_req = get_nvme_active_req_by_cid(tqpair, c2h_data->cccid);
1623e956be96SZiye Yang 	if (!tcp_req) {
1624e956be96SZiye Yang 		SPDK_ERRLOG("no tcp_req found for c2hdata cid=%d\n", c2h_data->cccid);
1625e956be96SZiye Yang 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1626e956be96SZiye Yang 		error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, cccid);
1627e956be96SZiye Yang 		goto end;
1628e956be96SZiye Yang 
1629e956be96SZiye Yang 	}
1630e956be96SZiye Yang 
163134c901e3SZiye Yang 	SPDK_DEBUGLOG(nvme, "tcp_req(%p) on tqpair(%p): expected_datao=%u, payload_size=%u\n",
163234c901e3SZiye Yang 		      tcp_req, tqpair, tcp_req->expected_datao, tcp_req->req->payload_size);
1633e956be96SZiye Yang 
16340b51da14SZiye Yang 	if (spdk_unlikely((flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS) &&
16350b51da14SZiye Yang 			  !(flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU))) {
16360b51da14SZiye Yang 		SPDK_ERRLOG("Invalid flag flags=%d in c2h_data=%p\n", flags, c2h_data);
16370b51da14SZiye Yang 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
16380b51da14SZiye Yang 		error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, common);
16390b51da14SZiye Yang 		goto end;
16400b51da14SZiye Yang 	}
16410b51da14SZiye Yang 
1642e956be96SZiye Yang 	if (c2h_data->datal > tcp_req->req->payload_size) {
1643e956be96SZiye Yang 		SPDK_ERRLOG("Invalid datal for tcp_req(%p), datal(%u) exceeds payload_size(%u)\n",
1644e956be96SZiye Yang 			    tcp_req, c2h_data->datal, tcp_req->req->payload_size);
1645e956be96SZiye Yang 		fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE;
1646e956be96SZiye Yang 		goto end;
1647e956be96SZiye Yang 	}
1648e956be96SZiye Yang 
164934c901e3SZiye Yang 	if (tcp_req->expected_datao != c2h_data->datao) {
165034c901e3SZiye Yang 		SPDK_ERRLOG("Invalid datao for tcp_req(%p), received datal(%u) != expected datao(%u) in tcp_req\n",
165134c901e3SZiye Yang 			    tcp_req, c2h_data->datao, tcp_req->expected_datao);
1652e956be96SZiye Yang 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1653e956be96SZiye Yang 		error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, datao);
1654e956be96SZiye Yang 		goto end;
1655e956be96SZiye Yang 	}
1656e956be96SZiye Yang 
1657e956be96SZiye Yang 	if ((c2h_data->datao + c2h_data->datal) > tcp_req->req->payload_size) {
1658e956be96SZiye Yang 		SPDK_ERRLOG("Invalid data range for tcp_req(%p), received (datao(%u) + datal(%u)) > datao(%u) in tcp_req\n",
1659e956be96SZiye Yang 			    tcp_req, c2h_data->datao, c2h_data->datal, tcp_req->req->payload_size);
1660e956be96SZiye Yang 		fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE;
1661e956be96SZiye Yang 		error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, datal);
1662e956be96SZiye Yang 		goto end;
1663e956be96SZiye Yang 
1664e956be96SZiye Yang 	}
1665e956be96SZiye Yang 
1666d5ef62ebSBen Walker 	if (nvme_payload_type(&tcp_req->req->payload) == NVME_PAYLOAD_TYPE_CONTIG) {
1667d5ef62ebSBen Walker 		rc = nvme_tcp_build_contig_request(tqpair, tcp_req);
1668769fc6deSJacek Kalwas 	} else {
1669769fc6deSJacek Kalwas 		assert(nvme_payload_type(&tcp_req->req->payload) == NVME_PAYLOAD_TYPE_SGL);
1670769fc6deSJacek Kalwas 		rc = nvme_tcp_build_sgl_request(tqpair, tcp_req);
1671769fc6deSJacek Kalwas 	}
1672769fc6deSJacek Kalwas 
1673d5ef62ebSBen Walker 	if (rc) {
1674d5ef62ebSBen Walker 		/* Not the right error message but at least it handles the failure. */
1675d5ef62ebSBen Walker 		fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_LIMIT_EXCEEDED;
1676d5ef62ebSBen Walker 		goto end;
1677d5ef62ebSBen Walker 	}
1678d5ef62ebSBen Walker 
1679a7b6d2efSShuhei Matsumoto 	nvme_tcp_pdu_set_data_buf(pdu, tcp_req->iov, tcp_req->iovcnt,
1680a7b6d2efSShuhei Matsumoto 				  c2h_data->datao, c2h_data->datal);
168183ffb207SBen Walker 	pdu->req = tcp_req;
1682e956be96SZiye Yang 
1683e956be96SZiye Yang 	nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
1684e956be96SZiye Yang 	return;
1685e956be96SZiye Yang 
1686e956be96SZiye Yang end:
1687e956be96SZiye Yang 	nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset);
1688e956be96SZiye Yang }
1689e956be96SZiye Yang 
1690e956be96SZiye Yang static void
1691e956be96SZiye Yang nvme_tcp_qpair_h2c_data_send_complete(void *cb_arg)
1692e956be96SZiye Yang {
1693e956be96SZiye Yang 	struct nvme_tcp_req *tcp_req = cb_arg;
1694e956be96SZiye Yang 
1695e956be96SZiye Yang 	assert(tcp_req != NULL);
1696e956be96SZiye Yang 
1697dc88d131SAlexey Marchuk 	tcp_req->ordering.bits.send_ack = 1;
1698e956be96SZiye Yang 	if (tcp_req->r2tl_remain) {
1699a3f72b2eSSeth Howell 		nvme_tcp_send_h2c_data(tcp_req);
1700679257dbSZiye Yang 	} else {
1701679257dbSZiye Yang 		assert(tcp_req->active_r2ts > 0);
1702679257dbSZiye Yang 		tcp_req->active_r2ts--;
1703679257dbSZiye Yang 		tcp_req->state = NVME_TCP_REQ_ACTIVE;
17042d4af0c1SAlexey Marchuk 
17052d4af0c1SAlexey Marchuk 		if (tcp_req->ordering.bits.r2t_waiting_h2c_complete) {
17062d4af0c1SAlexey Marchuk 			tcp_req->ordering.bits.r2t_waiting_h2c_complete = 0;
17072172c432STomasz Zawadzki 			SPDK_DEBUGLOG(nvme, "tcp_req %p: continue r2t\n", tcp_req);
17082d4af0c1SAlexey Marchuk 			assert(tcp_req->active_r2ts > 0);
17092d4af0c1SAlexey Marchuk 			tcp_req->ttag = tcp_req->ttag_r2t_next;
17102d4af0c1SAlexey Marchuk 			tcp_req->r2tl_remain = tcp_req->r2tl_remain_next;
17112d4af0c1SAlexey Marchuk 			tcp_req->state = NVME_TCP_REQ_ACTIVE_R2T;
17122d4af0c1SAlexey Marchuk 			nvme_tcp_send_h2c_data(tcp_req);
17132d4af0c1SAlexey Marchuk 			return;
17142d4af0c1SAlexey Marchuk 		}
17152d4af0c1SAlexey Marchuk 
171699265411SJacek Kalwas 		if (tcp_req->ordering.bits.domain_in_use) {
171708c8ab2bSJacek Kalwas 			spdk_memory_domain_invalidate_data(tcp_req->req->payload.opts->memory_domain,
171808c8ab2bSJacek Kalwas 							   tcp_req->req->payload.opts->memory_domain_ctx, tcp_req->iov, tcp_req->iovcnt);
171908c8ab2bSJacek Kalwas 		}
172008c8ab2bSJacek Kalwas 
1721ceb07eb8SZiye Yang 		/* Need also call this function to free the resource */
17227388e54dSAlexey Marchuk 		nvme_tcp_req_complete_safe(tcp_req);
1723e956be96SZiye Yang 	}
1724e956be96SZiye Yang }
1725e956be96SZiye Yang 
1726e956be96SZiye Yang static void
1727a3f72b2eSSeth Howell nvme_tcp_send_h2c_data(struct nvme_tcp_req *tcp_req)
1728e956be96SZiye Yang {
1729e956be96SZiye Yang 	struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(tcp_req->req->qpair);
1730e956be96SZiye Yang 	struct nvme_tcp_pdu *rsp_pdu;
1731e956be96SZiye Yang 	struct spdk_nvme_tcp_h2c_data_hdr *h2c_data;
1732e956be96SZiye Yang 	uint32_t plen, pdo, alignment;
1733e956be96SZiye Yang 
1734e7c92b24SAlexey Marchuk 	/* Reinit the send_ack and h2c_send_waiting_ack bits */
1735dc88d131SAlexey Marchuk 	tcp_req->ordering.bits.send_ack = 0;
1736e7c92b24SAlexey Marchuk 	tcp_req->ordering.bits.h2c_send_waiting_ack = 0;
17372250abaeSZiye Yang 	rsp_pdu = tcp_req->pdu;
1738e956be96SZiye Yang 	memset(rsp_pdu, 0, sizeof(*rsp_pdu));
1739e16aabddSKonrad Sztyber 	rsp_pdu->req = tcp_req;
1740ea65bf61SBen Walker 	h2c_data = &rsp_pdu->hdr.h2c_data;
1741e956be96SZiye Yang 
1742e956be96SZiye Yang 	h2c_data->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_H2C_DATA;
1743e956be96SZiye Yang 	plen = h2c_data->common.hlen = sizeof(*h2c_data);
1744e956be96SZiye Yang 	h2c_data->cccid = tcp_req->cid;
1745e956be96SZiye Yang 	h2c_data->ttag = tcp_req->ttag;
1746e956be96SZiye Yang 	h2c_data->datao = tcp_req->datao;
1747e956be96SZiye Yang 
1748e956be96SZiye Yang 	h2c_data->datal = spdk_min(tcp_req->r2tl_remain, tqpair->maxh2cdata);
1749a7b6d2efSShuhei Matsumoto 	nvme_tcp_pdu_set_data_buf(rsp_pdu, tcp_req->iov, tcp_req->iovcnt,
1750a7b6d2efSShuhei Matsumoto 				  h2c_data->datao, h2c_data->datal);
1751e956be96SZiye Yang 	tcp_req->r2tl_remain -= h2c_data->datal;
1752e956be96SZiye Yang 
1753a85579d8SAlexey Marchuk 	if (tqpair->flags.host_hdgst_enable) {
1754e956be96SZiye Yang 		h2c_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF;
1755e956be96SZiye Yang 		plen += SPDK_NVME_TCP_DIGEST_LEN;
1756e956be96SZiye Yang 	}
1757e956be96SZiye Yang 
1758e956be96SZiye Yang 	rsp_pdu->padding_len = 0;
1759e956be96SZiye Yang 	pdo = plen;
1760e956be96SZiye Yang 	if (tqpair->cpda) {
1761e956be96SZiye Yang 		alignment = (tqpair->cpda + 1) << 2;
1762e956be96SZiye Yang 		if (alignment > plen) {
1763e956be96SZiye Yang 			rsp_pdu->padding_len = alignment - plen;
1764e956be96SZiye Yang 			pdo = plen = alignment;
1765e956be96SZiye Yang 		}
1766e956be96SZiye Yang 	}
1767e956be96SZiye Yang 
1768e956be96SZiye Yang 	h2c_data->common.pdo = pdo;
1769e956be96SZiye Yang 	plen += h2c_data->datal;
1770a85579d8SAlexey Marchuk 	if (tqpair->flags.host_ddgst_enable) {
1771e956be96SZiye Yang 		h2c_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF;
1772e956be96SZiye Yang 		plen += SPDK_NVME_TCP_DIGEST_LEN;
1773e956be96SZiye Yang 	}
1774e956be96SZiye Yang 
1775e956be96SZiye Yang 	h2c_data->common.plen = plen;
1776e956be96SZiye Yang 	tcp_req->datao += h2c_data->datal;
1777e956be96SZiye Yang 	if (!tcp_req->r2tl_remain) {
1778e956be96SZiye Yang 		h2c_data->common.flags |= SPDK_NVME_TCP_H2C_DATA_FLAGS_LAST_PDU;
1779e956be96SZiye Yang 	}
1780e956be96SZiye Yang 
17812172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvme, "h2c_data info: datao=%u, datal=%u, pdu_len=%u for tqpair=%p\n",
1782e956be96SZiye Yang 		      h2c_data->datao, h2c_data->datal, h2c_data->common.plen, tqpair);
1783e956be96SZiye Yang 
1784e956be96SZiye Yang 	nvme_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvme_tcp_qpair_h2c_data_send_complete, tcp_req);
1785e956be96SZiye Yang }
1786e956be96SZiye Yang 
1787e956be96SZiye Yang static void
1788e956be96SZiye Yang nvme_tcp_r2t_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu)
1789e956be96SZiye Yang {
1790e956be96SZiye Yang 	struct nvme_tcp_req *tcp_req;
1791ea65bf61SBen Walker 	struct spdk_nvme_tcp_r2t_hdr *r2t = &pdu->hdr.r2t;
1792e956be96SZiye Yang 	uint32_t cid, error_offset = 0;
1793e956be96SZiye Yang 	enum spdk_nvme_tcp_term_req_fes fes;
1794e956be96SZiye Yang 
17952172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvme, "enter\n");
1796e956be96SZiye Yang 	cid = r2t->cccid;
1797e956be96SZiye Yang 	tcp_req = get_nvme_active_req_by_cid(tqpair, cid);
1798e956be96SZiye Yang 	if (!tcp_req) {
1799e956be96SZiye Yang 		SPDK_ERRLOG("Cannot find tcp_req for tqpair=%p\n", tqpair);
1800e956be96SZiye Yang 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1801e956be96SZiye Yang 		error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, cccid);
1802e956be96SZiye Yang 		goto end;
1803e956be96SZiye Yang 	}
1804e956be96SZiye Yang 
18052172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvme, "r2t info: r2to=%u, r2tl=%u for tqpair=%p\n", r2t->r2to, r2t->r2tl,
1806e956be96SZiye Yang 		      tqpair);
1807e956be96SZiye Yang 
1808679257dbSZiye Yang 	if (tcp_req->state == NVME_TCP_REQ_ACTIVE) {
1809679257dbSZiye Yang 		assert(tcp_req->active_r2ts == 0);
1810679257dbSZiye Yang 		tcp_req->state = NVME_TCP_REQ_ACTIVE_R2T;
1811679257dbSZiye Yang 	}
1812679257dbSZiye Yang 
1813e956be96SZiye Yang 	if (tcp_req->datao != r2t->r2to) {
1814e956be96SZiye Yang 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1815e956be96SZiye Yang 		error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, r2to);
1816e956be96SZiye Yang 		goto end;
1817e956be96SZiye Yang 
1818e956be96SZiye Yang 	}
1819e956be96SZiye Yang 
1820e956be96SZiye Yang 	if ((r2t->r2tl + r2t->r2to) > tcp_req->req->payload_size) {
1821e956be96SZiye Yang 		SPDK_ERRLOG("Invalid R2T info for tcp_req=%p: (r2to(%u) + r2tl(%u)) exceeds payload_size(%u)\n",
1822e956be96SZiye Yang 			    tcp_req, r2t->r2to, r2t->r2tl, tqpair->maxh2cdata);
1823e956be96SZiye Yang 		fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE;
1824e956be96SZiye Yang 		error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, r2tl);
1825e956be96SZiye Yang 		goto end;
18262d4af0c1SAlexey Marchuk 	}
1827e956be96SZiye Yang 
18282d4af0c1SAlexey Marchuk 	tcp_req->active_r2ts++;
18292d4af0c1SAlexey Marchuk 	if (spdk_unlikely(tcp_req->active_r2ts > tqpair->maxr2t)) {
18302d4af0c1SAlexey Marchuk 		if (tcp_req->state == NVME_TCP_REQ_ACTIVE_R2T && !tcp_req->ordering.bits.send_ack) {
18312d4af0c1SAlexey Marchuk 			/* We receive a subsequent R2T while we are waiting for H2C transfer to complete */
18322172c432STomasz Zawadzki 			SPDK_DEBUGLOG(nvme, "received a subsequent R2T\n");
18332d4af0c1SAlexey Marchuk 			assert(tcp_req->active_r2ts == tqpair->maxr2t + 1);
18342d4af0c1SAlexey Marchuk 			tcp_req->ttag_r2t_next = r2t->ttag;
18352d4af0c1SAlexey Marchuk 			tcp_req->r2tl_remain_next = r2t->r2tl;
18362d4af0c1SAlexey Marchuk 			tcp_req->ordering.bits.r2t_waiting_h2c_complete = 1;
18372d4af0c1SAlexey Marchuk 			nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
18382d4af0c1SAlexey Marchuk 			return;
18392d4af0c1SAlexey Marchuk 		} else {
18402d4af0c1SAlexey Marchuk 			fes = SPDK_NVME_TCP_TERM_REQ_FES_R2T_LIMIT_EXCEEDED;
18412d4af0c1SAlexey Marchuk 			SPDK_ERRLOG("Invalid R2T: Maximum number of R2T exceeded! Max: %u for tqpair=%p\n", tqpair->maxr2t,
18422d4af0c1SAlexey Marchuk 				    tqpair);
18432d4af0c1SAlexey Marchuk 			goto end;
18442d4af0c1SAlexey Marchuk 		}
1845e956be96SZiye Yang 	}
1846e956be96SZiye Yang 
1847e956be96SZiye Yang 	tcp_req->ttag = r2t->ttag;
1848e956be96SZiye Yang 	tcp_req->r2tl_remain = r2t->r2tl;
1849e956be96SZiye Yang 	nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
1850e956be96SZiye Yang 
1851dc88d131SAlexey Marchuk 	if (spdk_likely(tcp_req->ordering.bits.send_ack)) {
1852a3f72b2eSSeth Howell 		nvme_tcp_send_h2c_data(tcp_req);
1853e7c92b24SAlexey Marchuk 	} else {
1854e7c92b24SAlexey Marchuk 		tcp_req->ordering.bits.h2c_send_waiting_ack = 1;
1855449dee35SZiye Yang 	}
1856e7c92b24SAlexey Marchuk 
1857e956be96SZiye Yang 	return;
1858e956be96SZiye Yang 
1859e956be96SZiye Yang end:
1860e956be96SZiye Yang 	nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset);
1861e956be96SZiye Yang 
1862e956be96SZiye Yang }
1863e956be96SZiye Yang 
1864e956be96SZiye Yang static void
1865e956be96SZiye Yang nvme_tcp_pdu_psh_handle(struct nvme_tcp_qpair *tqpair, uint32_t *reaped)
1866e956be96SZiye Yang {
1867e956be96SZiye Yang 	struct nvme_tcp_pdu *pdu;
1868e956be96SZiye Yang 	int rc;
1869e956be96SZiye Yang 	uint32_t crc32c, error_offset = 0;
1870e956be96SZiye Yang 	enum spdk_nvme_tcp_term_req_fes fes;
1871e956be96SZiye Yang 
1872e956be96SZiye Yang 	assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH);
187382e4bfd3SZiye Yang 	pdu = tqpair->recv_pdu;
1874e956be96SZiye Yang 
18752172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvme, "enter: pdu type =%u\n", pdu->hdr.common.pdu_type);
1876e956be96SZiye Yang 	/* check header digest if needed */
1877e956be96SZiye Yang 	if (pdu->has_hdgst) {
1878e956be96SZiye Yang 		crc32c = nvme_tcp_pdu_calc_header_digest(pdu);
1879ea65bf61SBen Walker 		rc = MATCH_DIGEST_WORD((uint8_t *)pdu->hdr.raw + pdu->hdr.common.hlen, crc32c);
1880e956be96SZiye Yang 		if (rc == 0) {
1881e956be96SZiye Yang 			SPDK_ERRLOG("header digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu);
1882e956be96SZiye Yang 			fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR;
1883e956be96SZiye Yang 			nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset);
1884e956be96SZiye Yang 			return;
1885e956be96SZiye Yang 
1886e956be96SZiye Yang 		}
1887e956be96SZiye Yang 	}
1888e956be96SZiye Yang 
1889ea65bf61SBen Walker 	switch (pdu->hdr.common.pdu_type) {
1890e956be96SZiye Yang 	case SPDK_NVME_TCP_PDU_TYPE_IC_RESP:
1891e956be96SZiye Yang 		nvme_tcp_icresp_handle(tqpair, pdu);
1892e956be96SZiye Yang 		break;
1893e956be96SZiye Yang 	case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP:
1894e956be96SZiye Yang 		nvme_tcp_capsule_resp_hdr_handle(tqpair, pdu, reaped);
1895e956be96SZiye Yang 		break;
1896e956be96SZiye Yang 	case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA:
1897e956be96SZiye Yang 		nvme_tcp_c2h_data_hdr_handle(tqpair, pdu);
1898e956be96SZiye Yang 		break;
1899e956be96SZiye Yang 
1900e956be96SZiye Yang 	case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ:
1901e956be96SZiye Yang 		nvme_tcp_c2h_term_req_hdr_handle(tqpair, pdu);
1902e956be96SZiye Yang 		break;
1903e956be96SZiye Yang 	case SPDK_NVME_TCP_PDU_TYPE_R2T:
1904e956be96SZiye Yang 		nvme_tcp_r2t_hdr_handle(tqpair, pdu);
1905e956be96SZiye Yang 		break;
1906e956be96SZiye Yang 
1907e956be96SZiye Yang 	default:
190882e4bfd3SZiye Yang 		SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->recv_pdu->hdr.common.pdu_type);
1909e956be96SZiye Yang 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1910e956be96SZiye Yang 		error_offset = 1;
1911e956be96SZiye Yang 		nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset);
1912e956be96SZiye Yang 		break;
1913e956be96SZiye Yang 	}
1914e956be96SZiye Yang 
1915e956be96SZiye Yang }
1916e956be96SZiye Yang 
1917e956be96SZiye Yang static int
191851ae6d40SSzulik, Maciej nvme_tcp_read_pdu(struct nvme_tcp_qpair *tqpair, uint32_t *reaped, uint32_t max_completions)
1919e956be96SZiye Yang {
1920e956be96SZiye Yang 	int rc = 0;
1921e956be96SZiye Yang 	struct nvme_tcp_pdu *pdu;
1922e956be96SZiye Yang 	uint32_t data_len;
1923e956be96SZiye Yang 	enum nvme_tcp_pdu_recv_state prev_state;
1924e956be96SZiye Yang 
192573b02ffdSBen Walker 	*reaped = tqpair->async_complete;
192673b02ffdSBen Walker 	tqpair->async_complete = 0;
192773b02ffdSBen Walker 
1928e956be96SZiye Yang 	/* The loop here is to allow for several back-to-back state changes. */
1929e956be96SZiye Yang 	do {
193073b02ffdSBen Walker 		if (*reaped >= max_completions) {
193173b02ffdSBen Walker 			break;
193273b02ffdSBen Walker 		}
193373b02ffdSBen Walker 
1934e956be96SZiye Yang 		prev_state = tqpair->recv_state;
193548312019SMengjinWu 		pdu = tqpair->recv_pdu;
1936e956be96SZiye Yang 		switch (tqpair->recv_state) {
1937e956be96SZiye Yang 		/* If in a new state */
1938e956be96SZiye Yang 		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY:
193948312019SMengjinWu 			memset(pdu, 0, sizeof(struct nvme_tcp_pdu));
1940e956be96SZiye Yang 			nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH);
1941e956be96SZiye Yang 			break;
19420b7f5a57SMengjinWu 		/* Wait for the pdu common header */
1943e956be96SZiye Yang 		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH:
19440b7f5a57SMengjinWu 			assert(pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr));
1945e956be96SZiye Yang 			rc = nvme_tcp_read_data(tqpair->sock,
1946e956be96SZiye Yang 						sizeof(struct spdk_nvme_tcp_common_pdu_hdr) - pdu->ch_valid_bytes,
1947ea65bf61SBen Walker 						(uint8_t *)&pdu->hdr.common + pdu->ch_valid_bytes);
1948e956be96SZiye Yang 			if (rc < 0) {
194978df9be4SBen Walker 				nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING);
1950e956be96SZiye Yang 				break;
1951e956be96SZiye Yang 			}
1952e956be96SZiye Yang 			pdu->ch_valid_bytes += rc;
1953e956be96SZiye Yang 			if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) {
195473b02ffdSBen Walker 				return NVME_TCP_PDU_IN_PROGRESS;
1955e956be96SZiye Yang 			}
1956e956be96SZiye Yang 
1957e956be96SZiye Yang 			/* The command header of this PDU has now been read from the socket. */
1958e956be96SZiye Yang 			nvme_tcp_pdu_ch_handle(tqpair);
1959e956be96SZiye Yang 			break;
1960e956be96SZiye Yang 		/* Wait for the pdu specific header  */
1961e956be96SZiye Yang 		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH:
1962e4569bd4SMengjinWu 			assert(pdu->psh_valid_bytes < pdu->psh_len);
1963e956be96SZiye Yang 			rc = nvme_tcp_read_data(tqpair->sock,
196473d9cef8SZiye Yang 						pdu->psh_len - pdu->psh_valid_bytes,
1965ea65bf61SBen Walker 						(uint8_t *)&pdu->hdr.raw + sizeof(struct spdk_nvme_tcp_common_pdu_hdr) + pdu->psh_valid_bytes);
1966e956be96SZiye Yang 			if (rc < 0) {
196778df9be4SBen Walker 				nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING);
1968e956be96SZiye Yang 				break;
1969e956be96SZiye Yang 			}
1970e956be96SZiye Yang 
1971e956be96SZiye Yang 			pdu->psh_valid_bytes += rc;
197273d9cef8SZiye Yang 			if (pdu->psh_valid_bytes < pdu->psh_len) {
197373b02ffdSBen Walker 				return NVME_TCP_PDU_IN_PROGRESS;
1974e956be96SZiye Yang 			}
1975e956be96SZiye Yang 
197634edd9f1SKamil Godzwon 			/* All header(ch, psh, head digits) of this PDU has now been read from the socket. */
1977e956be96SZiye Yang 			nvme_tcp_pdu_psh_handle(tqpair, reaped);
1978e956be96SZiye Yang 			break;
1979e956be96SZiye Yang 		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD:
1980e956be96SZiye Yang 			/* check whether the data is valid, if not we just return */
19814ee4023aSZiye Yang 			if (!pdu->data_len) {
1982e956be96SZiye Yang 				return NVME_TCP_PDU_IN_PROGRESS;
1983e956be96SZiye Yang 			}
1984e956be96SZiye Yang 
1985e956be96SZiye Yang 			data_len = pdu->data_len;
1986e956be96SZiye Yang 			/* data digest */
1987ea65bf61SBen Walker 			if (spdk_unlikely((pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_DATA) &&
1988a85579d8SAlexey Marchuk 					  tqpair->flags.host_ddgst_enable)) {
198958739014SZiye Yang 				data_len += SPDK_NVME_TCP_DIGEST_LEN;
199058739014SZiye Yang 				pdu->ddgst_enable = true;
199158739014SZiye Yang 			}
199258739014SZiye Yang 
199358739014SZiye Yang 			rc = nvme_tcp_read_payload_data(tqpair->sock, pdu);
1994e956be96SZiye Yang 			if (rc < 0) {
199578df9be4SBen Walker 				nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING);
1996e956be96SZiye Yang 				break;
1997e956be96SZiye Yang 			}
1998e956be96SZiye Yang 
199974542baeSAlexey Marchuk 			pdu->rw_offset += rc;
200074542baeSAlexey Marchuk 			if (pdu->rw_offset < data_len) {
200173b02ffdSBen Walker 				return NVME_TCP_PDU_IN_PROGRESS;
2002e956be96SZiye Yang 			}
2003e956be96SZiye Yang 
200474542baeSAlexey Marchuk 			assert(pdu->rw_offset == data_len);
2005e956be96SZiye Yang 			/* All of this PDU has now been read from the socket. */
2006e956be96SZiye Yang 			nvme_tcp_pdu_payload_handle(tqpair, reaped);
2007e956be96SZiye Yang 			break;
200878df9be4SBen Walker 		case NVME_TCP_PDU_RECV_STATE_QUIESCING:
200978df9be4SBen Walker 			if (TAILQ_EMPTY(&tqpair->outstanding_reqs)) {
20108564f005SKonrad Sztyber 				if (nvme_qpair_get_state(&tqpair->qpair) == NVME_QPAIR_DISCONNECTING) {
20118564f005SKonrad Sztyber 					nvme_transport_ctrlr_disconnect_qpair_done(&tqpair->qpair);
20128564f005SKonrad Sztyber 				}
20138564f005SKonrad Sztyber 
201478df9be4SBen Walker 				nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
201578df9be4SBen Walker 			}
201678df9be4SBen Walker 			break;
2017e956be96SZiye Yang 		case NVME_TCP_PDU_RECV_STATE_ERROR:
201848312019SMengjinWu 			memset(pdu, 0, sizeof(struct nvme_tcp_pdu));
201973b02ffdSBen Walker 			return NVME_TCP_PDU_FATAL;
2020e956be96SZiye Yang 		default:
2021e956be96SZiye Yang 			assert(0);
2022e956be96SZiye Yang 			break;
2023e956be96SZiye Yang 		}
202473b02ffdSBen Walker 	} while (prev_state != tqpair->recv_state);
2025e956be96SZiye Yang 
202673b02ffdSBen Walker 	return rc > 0 ? 0 : rc;
2027e956be96SZiye Yang }
2028e956be96SZiye Yang 
2029d4875ed8SZiye Yang static void
2030d4875ed8SZiye Yang nvme_tcp_qpair_check_timeout(struct spdk_nvme_qpair *qpair)
2031d4875ed8SZiye Yang {
2032d4875ed8SZiye Yang 	uint64_t t02;
2033d4875ed8SZiye Yang 	struct nvme_tcp_req *tcp_req, *tmp;
2034d4875ed8SZiye Yang 	struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
2035d4875ed8SZiye Yang 	struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
2036d4875ed8SZiye Yang 	struct spdk_nvme_ctrlr_process *active_proc;
2037d4875ed8SZiye Yang 
2038d4875ed8SZiye Yang 	/* Don't check timeouts during controller initialization. */
2039d4875ed8SZiye Yang 	if (ctrlr->state != NVME_CTRLR_STATE_READY) {
2040d4875ed8SZiye Yang 		return;
2041d4875ed8SZiye Yang 	}
2042d4875ed8SZiye Yang 
2043d4875ed8SZiye Yang 	if (nvme_qpair_is_admin_queue(qpair)) {
20441a9c19a9SSeth Howell 		active_proc = nvme_ctrlr_get_current_process(ctrlr);
2045d4875ed8SZiye Yang 	} else {
2046d4875ed8SZiye Yang 		active_proc = qpair->active_proc;
2047d4875ed8SZiye Yang 	}
2048d4875ed8SZiye Yang 
2049d4875ed8SZiye Yang 	/* Only check timeouts if the current process has a timeout callback. */
2050d4875ed8SZiye Yang 	if (active_proc == NULL || active_proc->timeout_cb_fn == NULL) {
2051d4875ed8SZiye Yang 		return;
2052d4875ed8SZiye Yang 	}
2053d4875ed8SZiye Yang 
2054d4875ed8SZiye Yang 	t02 = spdk_get_ticks();
2055d4875ed8SZiye Yang 	TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) {
2056a0e16aecSLiadOz 		if (ctrlr->is_failed) {
2057a0e16aecSLiadOz 			/* The controller state may be changed to failed in one of the nvme_request_check_timeout callbacks. */
2058a0e16aecSLiadOz 			return;
2059a0e16aecSLiadOz 		}
2060d4875ed8SZiye Yang 		assert(tcp_req->req != NULL);
2061d4875ed8SZiye Yang 
2062d4875ed8SZiye Yang 		if (nvme_request_check_timeout(tcp_req->req, tcp_req->cid, active_proc, t02)) {
2063d4875ed8SZiye Yang 			/*
2064d4875ed8SZiye Yang 			 * The requests are in order, so as soon as one has not timed out,
2065d4875ed8SZiye Yang 			 * stop iterating.
2066d4875ed8SZiye Yang 			 */
2067d4875ed8SZiye Yang 			break;
2068d4875ed8SZiye Yang 		}
2069d4875ed8SZiye Yang 	}
2070d4875ed8SZiye Yang }
2071d4875ed8SZiye Yang 
20727d32600eSKonrad Sztyber static int nvme_tcp_ctrlr_connect_qpair_poll(struct spdk_nvme_ctrlr *ctrlr,
20737d32600eSKonrad Sztyber 		struct spdk_nvme_qpair *qpair);
20747d32600eSKonrad Sztyber 
2075f5bc2cbeSBen Walker static int
2076e956be96SZiye Yang nvme_tcp_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_completions)
2077e956be96SZiye Yang {
2078e956be96SZiye Yang 	struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
2079e956be96SZiye Yang 	uint32_t reaped;
2080e956be96SZiye Yang 	int rc;
2081e956be96SZiye Yang 
208242b47742SBen Walker 	if (qpair->poll_group == NULL) {
20838e8a5f7cSOr Gerlitz 		rc = spdk_sock_flush(tqpair->sock);
2084739c6d7cSKonrad Sztyber 		if (rc < 0 && errno != EAGAIN) {
208535156582SKonrad Sztyber 			SPDK_ERRLOG("Failed to flush tqpair=%p (%d): %s\n", tqpair,
208635156582SKonrad Sztyber 				    errno, spdk_strerror(errno));
20875c3360ceSLiadOz 			if (spdk_unlikely(tqpair->qpair.ctrlr->timeout_enabled)) {
20885c3360ceSLiadOz 				nvme_tcp_qpair_check_timeout(qpair);
20895c3360ceSLiadOz 			}
20908564f005SKonrad Sztyber 
20918564f005SKonrad Sztyber 			if (nvme_qpair_get_state(qpair) == NVME_QPAIR_DISCONNECTING) {
20928564f005SKonrad Sztyber 				if (TAILQ_EMPTY(&tqpair->outstanding_reqs)) {
20938564f005SKonrad Sztyber 					nvme_transport_ctrlr_disconnect_qpair_done(qpair);
20948564f005SKonrad Sztyber 				}
20958564f005SKonrad Sztyber 
20968564f005SKonrad Sztyber 				/* Don't return errors until the qpair gets disconnected */
20978564f005SKonrad Sztyber 				return 0;
20988564f005SKonrad Sztyber 			}
20998564f005SKonrad Sztyber 
2100ac94b60bSKonrad Sztyber 			goto fail;
2101e956be96SZiye Yang 		}
210242b47742SBen Walker 	}
2103e956be96SZiye Yang 
2104e956be96SZiye Yang 	if (max_completions == 0) {
210573b02ffdSBen Walker 		max_completions = spdk_max(tqpair->num_entries, 1);
2106e956be96SZiye Yang 	} else {
2107e956be96SZiye Yang 		max_completions = spdk_min(max_completions, tqpair->num_entries);
2108e956be96SZiye Yang 	}
2109e956be96SZiye Yang 
2110e956be96SZiye Yang 	reaped = 0;
211151ae6d40SSzulik, Maciej 	rc = nvme_tcp_read_pdu(tqpair, &reaped, max_completions);
2112e956be96SZiye Yang 	if (rc < 0) {
21132172c432STomasz Zawadzki 		SPDK_DEBUGLOG(nvme, "Error polling CQ! (%d): %s\n",
2114e956be96SZiye Yang 			      errno, spdk_strerror(errno));
2115f6646fd9SSeth Howell 		goto fail;
2116e956be96SZiye Yang 	}
2117e956be96SZiye Yang 
2118d4875ed8SZiye Yang 	if (spdk_unlikely(tqpair->qpair.ctrlr->timeout_enabled)) {
2119d4875ed8SZiye Yang 		nvme_tcp_qpair_check_timeout(qpair);
2120d4875ed8SZiye Yang 	}
2121d4875ed8SZiye Yang 
21227d32600eSKonrad Sztyber 	if (spdk_unlikely(nvme_qpair_get_state(qpair) == NVME_QPAIR_CONNECTING)) {
21237d32600eSKonrad Sztyber 		rc = nvme_tcp_ctrlr_connect_qpair_poll(qpair->ctrlr, qpair);
21247d32600eSKonrad Sztyber 		if (rc != 0 && rc != -EAGAIN) {
21257d32600eSKonrad Sztyber 			SPDK_ERRLOG("Failed to connect tqpair=%p\n", tqpair);
21267d32600eSKonrad Sztyber 			goto fail;
21277d32600eSKonrad Sztyber 		} else if (rc == 0) {
21287d32600eSKonrad Sztyber 			/* Once the connection is completed, we can submit queued requests */
21297d32600eSKonrad Sztyber 			nvme_qpair_resubmit_requests(qpair, tqpair->num_entries);
21307d32600eSKonrad Sztyber 		}
21317d32600eSKonrad Sztyber 	}
21327d32600eSKonrad Sztyber 
2133e956be96SZiye Yang 	return reaped;
2134f6646fd9SSeth Howell fail:
2135f6646fd9SSeth Howell 
2136f6646fd9SSeth Howell 	/*
2137f6646fd9SSeth Howell 	 * Since admin queues take the ctrlr_lock before entering this function,
21386189c0ceSSeth Howell 	 * we can call nvme_transport_ctrlr_disconnect_qpair. For other qpairs we need
2139f6646fd9SSeth Howell 	 * to call the generic function which will take the lock for us.
2140f6646fd9SSeth Howell 	 */
214124bca2eaSSeth Howell 	qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_UNKNOWN;
214224bca2eaSSeth Howell 
2143f6646fd9SSeth Howell 	if (nvme_qpair_is_admin_queue(qpair)) {
21441c733603SArtur Paszkiewicz 		enum nvme_qpair_state state_prev = nvme_qpair_get_state(qpair);
21451c733603SArtur Paszkiewicz 
2146e1c91850SSeth Howell 		nvme_transport_ctrlr_disconnect_qpair(qpair->ctrlr, qpair);
21471c733603SArtur Paszkiewicz 
21481c733603SArtur Paszkiewicz 		if (state_prev == NVME_QPAIR_CONNECTING && qpair->poll_status != NULL) {
21491c733603SArtur Paszkiewicz 			/* Needed to free the poll_status */
21501c733603SArtur Paszkiewicz 			nvme_tcp_ctrlr_connect_qpair_poll(qpair->ctrlr, qpair);
21511c733603SArtur Paszkiewicz 		}
2152f6646fd9SSeth Howell 	} else {
2153f6646fd9SSeth Howell 		nvme_ctrlr_disconnect_qpair(qpair);
2154f6646fd9SSeth Howell 	}
2155f6646fd9SSeth Howell 	return -ENXIO;
2156e956be96SZiye Yang }
2157e956be96SZiye Yang 
21585d071852SSeth Howell static void
21595d071852SSeth Howell nvme_tcp_qpair_sock_cb(void *ctx, struct spdk_sock_group *group, struct spdk_sock *sock)
21605d071852SSeth Howell {
21615d071852SSeth Howell 	struct spdk_nvme_qpair *qpair = ctx;
21625d071852SSeth Howell 	struct nvme_tcp_poll_group *pgroup = nvme_tcp_poll_group(qpair->poll_group);
21635d071852SSeth Howell 	int32_t num_completions;
21646b86039fSBen Walker 	struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
21656b86039fSBen Walker 
21666b86039fSBen Walker 	if (tqpair->needs_poll) {
21676b86039fSBen Walker 		TAILQ_REMOVE(&pgroup->needs_poll, tqpair, link);
21686b86039fSBen Walker 		tqpair->needs_poll = false;
21696b86039fSBen Walker 	}
21705d071852SSeth Howell 
21715d071852SSeth Howell 	num_completions = spdk_nvme_qpair_process_completions(qpair, pgroup->completions_per_qpair);
21725d071852SSeth Howell 
21735d071852SSeth Howell 	if (pgroup->num_completions >= 0 && num_completions >= 0) {
21745d071852SSeth Howell 		pgroup->num_completions += num_completions;
2175ea86c035SAlexey Marchuk 		pgroup->stats.nvme_completions += num_completions;
21765d071852SSeth Howell 	} else {
21775d071852SSeth Howell 		pgroup->num_completions = -ENXIO;
21785d071852SSeth Howell 	}
21795d071852SSeth Howell }
21805d071852SSeth Howell 
2181e956be96SZiye Yang static int
2182e956be96SZiye Yang nvme_tcp_qpair_icreq_send(struct nvme_tcp_qpair *tqpair)
2183e956be96SZiye Yang {
2184e956be96SZiye Yang 	struct spdk_nvme_tcp_ic_req *ic_req;
2185e956be96SZiye Yang 	struct nvme_tcp_pdu *pdu;
2186672710c8SJim Harris 	uint32_t timeout_in_sec;
2187e956be96SZiye Yang 
21887bac9b06SZiye Yang 	pdu = tqpair->send_pdu;
21897bac9b06SZiye Yang 	memset(tqpair->send_pdu, 0, sizeof(*tqpair->send_pdu));
2190ea65bf61SBen Walker 	ic_req = &pdu->hdr.ic_req;
2191e956be96SZiye Yang 
2192e956be96SZiye Yang 	ic_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_IC_REQ;
2193e956be96SZiye Yang 	ic_req->common.hlen = ic_req->common.plen = sizeof(*ic_req);
2194e956be96SZiye Yang 	ic_req->pfv = 0;
2195e956be96SZiye Yang 	ic_req->maxr2t = NVME_TCP_MAX_R2T_DEFAULT - 1;
2196e956be96SZiye Yang 	ic_req->hpda = NVME_TCP_HPDA_DEFAULT;
2197e956be96SZiye Yang 
2198be4fbb21SZiye Yang 	ic_req->dgst.bits.hdgst_enable = tqpair->qpair.ctrlr->opts.header_digest;
2199be4fbb21SZiye Yang 	ic_req->dgst.bits.ddgst_enable = tqpair->qpair.ctrlr->opts.data_digest;
2200e956be96SZiye Yang 
2201e956be96SZiye Yang 	nvme_tcp_qpair_write_pdu(tqpair, pdu, nvme_tcp_send_icreq_complete, tqpair);
2202e956be96SZiye Yang 
2203672710c8SJim Harris 	timeout_in_sec = tqpair->qpair.async ? ICREQ_TIMEOUT_ASYNC : ICREQ_TIMEOUT_SYNC;
2204672710c8SJim Harris 	tqpair->icreq_timeout_tsc = spdk_get_ticks() + (timeout_in_sec * spdk_get_ticks_hz());
220545d63e98SJim Harris 	return 0;
220645d63e98SJim Harris }
220745d63e98SJim Harris 
220845d63e98SJim Harris static int
2209579a678aSZiye Yang nvme_tcp_qpair_connect_sock(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
2210e956be96SZiye Yang {
2211e956be96SZiye Yang 	struct sockaddr_storage dst_addr;
2212e956be96SZiye Yang 	struct sockaddr_storage src_addr;
2213e956be96SZiye Yang 	int rc;
2214738b9569SSeth Howell 	struct nvme_tcp_qpair *tqpair;
2215e956be96SZiye Yang 	int family;
22166e98729cSKonrad Sztyber 	long int port, src_port = 0;
221735f7f0ceSBoris Glimcher 	char *sock_impl_name;
22183e98fd06SKrzysztof Karas 	struct spdk_sock_impl_opts impl_opts = {};
221935f7f0ceSBoris Glimcher 	size_t impl_opts_size = sizeof(impl_opts);
222094345a0aSZiye Yang 	struct spdk_sock_opts opts;
22213e98fd06SKrzysztof Karas 	struct nvme_tcp_ctrlr *tcp_ctrlr;
2222e956be96SZiye Yang 
2223738b9569SSeth Howell 	tqpair = nvme_tcp_qpair(qpair);
2224e956be96SZiye Yang 
2225e956be96SZiye Yang 	switch (ctrlr->trid.adrfam) {
2226e956be96SZiye Yang 	case SPDK_NVMF_ADRFAM_IPV4:
2227e956be96SZiye Yang 		family = AF_INET;
2228e956be96SZiye Yang 		break;
2229e956be96SZiye Yang 	case SPDK_NVMF_ADRFAM_IPV6:
2230e956be96SZiye Yang 		family = AF_INET6;
2231e956be96SZiye Yang 		break;
2232e956be96SZiye Yang 	default:
2233e956be96SZiye Yang 		SPDK_ERRLOG("Unhandled ADRFAM %d\n", ctrlr->trid.adrfam);
2234d4d2e317SZiye Yang 		rc = -1;
2235d4d2e317SZiye Yang 		return rc;
2236e956be96SZiye Yang 	}
2237e956be96SZiye Yang 
22382172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvme, "adrfam %d ai_family %d\n", ctrlr->trid.adrfam, family);
2239e956be96SZiye Yang 
2240e956be96SZiye Yang 	memset(&dst_addr, 0, sizeof(dst_addr));
2241e956be96SZiye Yang 
22422172c432STomasz Zawadzki 	SPDK_DEBUGLOG(nvme, "trsvcid is %s\n", ctrlr->trid.trsvcid);
2243bd7c9e07SJim Harris 	rc = nvme_parse_addr(&dst_addr, family, ctrlr->trid.traddr, ctrlr->trid.trsvcid, &port);
2244e956be96SZiye Yang 	if (rc != 0) {
224501bbc271SJim Harris 		SPDK_ERRLOG("dst_addr nvme_parse_addr() failed\n");
2246d4d2e317SZiye Yang 		return rc;
2247e956be96SZiye Yang 	}
2248e956be96SZiye Yang 
2249e956be96SZiye Yang 	if (ctrlr->opts.src_addr[0] || ctrlr->opts.src_svcid[0]) {
2250e956be96SZiye Yang 		memset(&src_addr, 0, sizeof(src_addr));
22516e98729cSKonrad Sztyber 		rc = nvme_parse_addr(&src_addr, family,
22526e98729cSKonrad Sztyber 				     ctrlr->opts.src_addr[0] ? ctrlr->opts.src_addr : NULL,
22536e98729cSKonrad Sztyber 				     ctrlr->opts.src_svcid[0] ? ctrlr->opts.src_svcid : NULL,
22546e98729cSKonrad Sztyber 				     &src_port);
2255e956be96SZiye Yang 		if (rc != 0) {
225601bbc271SJim Harris 			SPDK_ERRLOG("src_addr nvme_parse_addr() failed\n");
2257d4d2e317SZiye Yang 			return rc;
2258e956be96SZiye Yang 		}
2259e956be96SZiye Yang 	}
2260e956be96SZiye Yang 
22617a50a6bcSKrzysztof Karas 	tcp_ctrlr = SPDK_CONTAINEROF(ctrlr, struct nvme_tcp_ctrlr, ctrlr);
22627a50a6bcSKrzysztof Karas 	sock_impl_name = tcp_ctrlr->psk[0] ? "ssl" : NULL;
226335f7f0ceSBoris Glimcher 	SPDK_DEBUGLOG(nvme, "sock_impl_name is %s\n", sock_impl_name);
226435f7f0ceSBoris Glimcher 
22653e98fd06SKrzysztof Karas 	if (sock_impl_name) {
226635f7f0ceSBoris Glimcher 		spdk_sock_impl_get_opts(sock_impl_name, &impl_opts, &impl_opts_size);
226735f7f0ceSBoris Glimcher 		impl_opts.tls_version = SPDK_TLS_VERSION_1_3;
22683e98fd06SKrzysztof Karas 		impl_opts.psk_identity = tcp_ctrlr->psk_identity;
22697a50a6bcSKrzysztof Karas 		impl_opts.psk_key = tcp_ctrlr->psk;
22707a50a6bcSKrzysztof Karas 		impl_opts.psk_key_size = tcp_ctrlr->psk_size;
2271169ee6c3SKrzysztof Karas 		impl_opts.tls_cipher_suites = tcp_ctrlr->tls_cipher_suite;
22723e98fd06SKrzysztof Karas 	}
227394345a0aSZiye Yang 	opts.opts_size = sizeof(opts);
227494345a0aSZiye Yang 	spdk_sock_get_default_opts(&opts);
227594345a0aSZiye Yang 	opts.priority = ctrlr->trid.priority;
2276a620cd19SZiye Yang 	opts.zcopy = !nvme_qpair_is_admin_queue(qpair);
22778425f00cSKonrad Sztyber 	opts.src_addr = ctrlr->opts.src_addr[0] ? ctrlr->opts.src_addr : NULL;
22788425f00cSKonrad Sztyber 	opts.src_port = src_port;
227931db7b13Szhangduan 	if (ctrlr->opts.transport_ack_timeout) {
228031db7b13Szhangduan 		opts.ack_timeout = 1ULL << ctrlr->opts.transport_ack_timeout;
228131db7b13Szhangduan 	}
228235f7f0ceSBoris Glimcher 	if (sock_impl_name) {
228335f7f0ceSBoris Glimcher 		opts.impl_opts = &impl_opts;
228435f7f0ceSBoris Glimcher 		opts.impl_opts_size = sizeof(impl_opts);
228535f7f0ceSBoris Glimcher 	}
228635f7f0ceSBoris Glimcher 	tqpair->sock = spdk_sock_connect_ext(ctrlr->trid.traddr, port, sock_impl_name, &opts);
2287e956be96SZiye Yang 	if (!tqpair->sock) {
2288e8207e9dSBen Walker 		SPDK_ERRLOG("sock connection error of tqpair=%p with addr=%s, port=%ld\n",
2289e8207e9dSBen Walker 			    tqpair, ctrlr->trid.traddr, port);
2290d4d2e317SZiye Yang 		rc = -1;
2291d4d2e317SZiye Yang 		return rc;
2292e956be96SZiye Yang 	}
2293e956be96SZiye Yang 
2294579a678aSZiye Yang 	return 0;
2295579a678aSZiye Yang }
2296579a678aSZiye Yang 
2297579a678aSZiye Yang static int
229845d63e98SJim Harris nvme_tcp_ctrlr_connect_qpair_poll(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
229945d63e98SJim Harris {
230045d63e98SJim Harris 	struct nvme_tcp_qpair *tqpair;
230145d63e98SJim Harris 	int rc;
230245d63e98SJim Harris 
230345d63e98SJim Harris 	tqpair = nvme_tcp_qpair(qpair);
230445d63e98SJim Harris 
230517f99bbbSKonrad Sztyber 	/* Prevent this function from being called recursively, as it could lead to issues with
230617f99bbbSKonrad Sztyber 	 * nvme_fabric_qpair_connect_poll() if the connect response is received in the recursive
230717f99bbbSKonrad Sztyber 	 * call.
230817f99bbbSKonrad Sztyber 	 */
230917f99bbbSKonrad Sztyber 	if (tqpair->flags.in_connect_poll) {
231017f99bbbSKonrad Sztyber 		return -EAGAIN;
231117f99bbbSKonrad Sztyber 	}
231217f99bbbSKonrad Sztyber 
231317f99bbbSKonrad Sztyber 	tqpair->flags.in_connect_poll = 1;
231417f99bbbSKonrad Sztyber 
231545d63e98SJim Harris 	switch (tqpair->state) {
231645d63e98SJim Harris 	case NVME_TCP_QPAIR_STATE_INVALID:
231745d63e98SJim Harris 	case NVME_TCP_QPAIR_STATE_INITIALIZING:
23187d32600eSKonrad Sztyber 		if (spdk_get_ticks() > tqpair->icreq_timeout_tsc) {
231945d63e98SJim Harris 			SPDK_ERRLOG("Failed to construct the tqpair=%p via correct icresp\n", tqpair);
23207d32600eSKonrad Sztyber 			rc = -ETIMEDOUT;
23217d32600eSKonrad Sztyber 			break;
232245d63e98SJim Harris 		}
23237d32600eSKonrad Sztyber 		rc = -EAGAIN;
232445d63e98SJim Harris 		break;
2325b49fa72bSJim Harris 	case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND:
2326834e3c5aSEvgeniy Kochetov 		rc = nvme_fabric_qpair_connect_async(&tqpair->qpair, tqpair->num_entries + 1);
232745d63e98SJim Harris 		if (rc < 0) {
232845d63e98SJim Harris 			SPDK_ERRLOG("Failed to send an NVMe-oF Fabric CONNECT command\n");
232945d63e98SJim Harris 			break;
233045d63e98SJim Harris 		}
2331b49fa72bSJim Harris 		tqpair->state = NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL;
2332b49fa72bSJim Harris 		rc = -EAGAIN;
2333b49fa72bSJim Harris 		break;
2334b49fa72bSJim Harris 	case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL:
2335b49fa72bSJim Harris 		rc = nvme_fabric_qpair_connect_poll(&tqpair->qpair);
2336b49fa72bSJim Harris 		if (rc == 0) {
2337ffd098cfSKonrad Sztyber 			if (nvme_fabric_qpair_auth_required(qpair)) {
2338ffd098cfSKonrad Sztyber 				rc = nvme_fabric_qpair_authenticate_async(qpair);
2339ffd098cfSKonrad Sztyber 				if (rc == 0) {
2340ffd098cfSKonrad Sztyber 					tqpair->state = NVME_TCP_QPAIR_STATE_AUTHENTICATING;
2341ffd098cfSKonrad Sztyber 					rc = -EAGAIN;
2342ffd098cfSKonrad Sztyber 				}
2343ffd098cfSKonrad Sztyber 			} else {
2344b49fa72bSJim Harris 				tqpair->state = NVME_TCP_QPAIR_STATE_RUNNING;
234545d63e98SJim Harris 				nvme_qpair_set_state(qpair, NVME_QPAIR_CONNECTED);
2346ffd098cfSKonrad Sztyber 			}
2347b49fa72bSJim Harris 		} else if (rc != -EAGAIN) {
2348b49fa72bSJim Harris 			SPDK_ERRLOG("Failed to poll NVMe-oF Fabric CONNECT command\n");
2349b49fa72bSJim Harris 		}
2350b49fa72bSJim Harris 		break;
2351ffd098cfSKonrad Sztyber 	case NVME_TCP_QPAIR_STATE_AUTHENTICATING:
2352ffd098cfSKonrad Sztyber 		rc = nvme_fabric_qpair_authenticate_poll(qpair);
2353ffd098cfSKonrad Sztyber 		if (rc == 0) {
2354ffd098cfSKonrad Sztyber 			tqpair->state = NVME_TCP_QPAIR_STATE_RUNNING;
2355ffd098cfSKonrad Sztyber 			nvme_qpair_set_state(qpair, NVME_QPAIR_CONNECTED);
2356ffd098cfSKonrad Sztyber 		}
2357ffd098cfSKonrad Sztyber 		break;
2358b49fa72bSJim Harris 	case NVME_TCP_QPAIR_STATE_RUNNING:
2359b49fa72bSJim Harris 		rc = 0;
236045d63e98SJim Harris 		break;
236145d63e98SJim Harris 	default:
236245d63e98SJim Harris 		assert(false);
236345d63e98SJim Harris 		rc = -EINVAL;
236445d63e98SJim Harris 		break;
236545d63e98SJim Harris 	}
236645d63e98SJim Harris 
236717f99bbbSKonrad Sztyber 	tqpair->flags.in_connect_poll = 0;
236845d63e98SJim Harris 	return rc;
236945d63e98SJim Harris }
237045d63e98SJim Harris 
237145d63e98SJim Harris static int
2372579a678aSZiye Yang nvme_tcp_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
2373579a678aSZiye Yang {
2374579a678aSZiye Yang 	int rc = 0;
2375579a678aSZiye Yang 	struct nvme_tcp_qpair *tqpair;
2376ea86c035SAlexey Marchuk 	struct nvme_tcp_poll_group *tgroup;
2377579a678aSZiye Yang 
2378579a678aSZiye Yang 	tqpair = nvme_tcp_qpair(qpair);
2379579a678aSZiye Yang 
2380579a678aSZiye Yang 	if (!tqpair->sock) {
2381579a678aSZiye Yang 		rc = nvme_tcp_qpair_connect_sock(ctrlr, qpair);
2382579a678aSZiye Yang 		if (rc < 0) {
2383579a678aSZiye Yang 			return rc;
2384579a678aSZiye Yang 		}
2385579a678aSZiye Yang 	}
2386579a678aSZiye Yang 
2387bc36528cSAlexey Marchuk 	if (qpair->poll_group) {
2388bc36528cSAlexey Marchuk 		rc = nvme_poll_group_connect_qpair(qpair);
2389bc36528cSAlexey Marchuk 		if (rc) {
2390bc36528cSAlexey Marchuk 			SPDK_ERRLOG("Unable to activate the tcp qpair.\n");
2391bc36528cSAlexey Marchuk 			return rc;
2392bc36528cSAlexey Marchuk 		}
2393ea86c035SAlexey Marchuk 		tgroup = nvme_tcp_poll_group(qpair->poll_group);
2394ea86c035SAlexey Marchuk 		tqpair->stats = &tgroup->stats;
2395f1941efeSShuhei Matsumoto 		tqpair->shared_stats = true;
2396ea86c035SAlexey Marchuk 	} else {
2397df4600f4SRichael Zhuang 		/* When resetting a controller, we disconnect adminq and then reconnect. The stats
2398df4600f4SRichael Zhuang 		 * is not freed when disconnecting. So when reconnecting, don't allocate memory
2399df4600f4SRichael Zhuang 		 * again.
2400df4600f4SRichael Zhuang 		 */
2401df4600f4SRichael Zhuang 		if (tqpair->stats == NULL) {
2402ea86c035SAlexey Marchuk 			tqpair->stats = calloc(1, sizeof(*tqpair->stats));
2403ea86c035SAlexey Marchuk 			if (!tqpair->stats) {
2404ea86c035SAlexey Marchuk 				SPDK_ERRLOG("tcp stats memory allocation failed\n");
2405ea86c035SAlexey Marchuk 				return -ENOMEM;
2406ea86c035SAlexey Marchuk 			}
2407bc36528cSAlexey Marchuk 		}
2408df4600f4SRichael Zhuang 	}
2409bc36528cSAlexey Marchuk 
2410804ca3e9SZiye Yang 	tqpair->maxr2t = NVME_TCP_MAX_R2T_DEFAULT;
2411fe2dddbbSZiye Yang 	/* Explicitly set the state and recv_state of tqpair */
2412fe2dddbbSZiye Yang 	tqpair->state = NVME_TCP_QPAIR_STATE_INVALID;
2413fe2dddbbSZiye Yang 	if (tqpair->recv_state != NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY) {
2414fe2dddbbSZiye Yang 		nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
2415fe2dddbbSZiye Yang 	}
2416e956be96SZiye Yang 	rc = nvme_tcp_qpair_icreq_send(tqpair);
2417e956be96SZiye Yang 	if (rc != 0) {
2418e956be96SZiye Yang 		SPDK_ERRLOG("Unable to connect the tqpair\n");
2419d4d2e317SZiye Yang 		return rc;
2420e956be96SZiye Yang 	}
2421e956be96SZiye Yang 
242245d63e98SJim Harris 	return rc;
2423e956be96SZiye Yang }
2424e956be96SZiye Yang 
2425e956be96SZiye Yang static struct spdk_nvme_qpair *
2426e956be96SZiye Yang nvme_tcp_ctrlr_create_qpair(struct spdk_nvme_ctrlr *ctrlr,
2427e956be96SZiye Yang 			    uint16_t qid, uint32_t qsize,
2428e956be96SZiye Yang 			    enum spdk_nvme_qprio qprio,
2429681f5e6eSKonrad Sztyber 			    uint32_t num_requests, bool async)
2430e956be96SZiye Yang {
2431e956be96SZiye Yang 	struct nvme_tcp_qpair *tqpair;
2432e956be96SZiye Yang 	struct spdk_nvme_qpair *qpair;
2433e956be96SZiye Yang 	int rc;
2434e956be96SZiye Yang 
2435834e3c5aSEvgeniy Kochetov 	if (qsize < SPDK_NVME_QUEUE_MIN_ENTRIES) {
2436834e3c5aSEvgeniy Kochetov 		SPDK_ERRLOG("Failed to create qpair with size %u. Minimum queue size is %d.\n",
2437834e3c5aSEvgeniy Kochetov 			    qsize, SPDK_NVME_QUEUE_MIN_ENTRIES);
2438834e3c5aSEvgeniy Kochetov 		return NULL;
2439834e3c5aSEvgeniy Kochetov 	}
2440834e3c5aSEvgeniy Kochetov 
2441e956be96SZiye Yang 	tqpair = calloc(1, sizeof(struct nvme_tcp_qpair));
2442e956be96SZiye Yang 	if (!tqpair) {
2443e956be96SZiye Yang 		SPDK_ERRLOG("failed to get create tqpair\n");
2444e956be96SZiye Yang 		return NULL;
2445e956be96SZiye Yang 	}
2446e956be96SZiye Yang 
2447834e3c5aSEvgeniy Kochetov 	/* Set num_entries one less than queue size. According to NVMe
2448834e3c5aSEvgeniy Kochetov 	 * and NVMe-oF specs we can not submit queue size requests,
2449834e3c5aSEvgeniy Kochetov 	 * one slot shall always remain empty.
2450834e3c5aSEvgeniy Kochetov 	 */
2451834e3c5aSEvgeniy Kochetov 	tqpair->num_entries = qsize - 1;
2452e956be96SZiye Yang 	qpair = &tqpair->qpair;
2453681f5e6eSKonrad Sztyber 	rc = nvme_qpair_init(qpair, qid, ctrlr, qprio, num_requests, async);
2454e956be96SZiye Yang 	if (rc != 0) {
24555ecc5ef1SMaciej Szwed 		free(tqpair);
2456e956be96SZiye Yang 		return NULL;
2457e956be96SZiye Yang 	}
2458e956be96SZiye Yang 
2459a5c54a1fSJim Harris 	rc = nvme_tcp_alloc_reqs(tqpair);
2460a5c54a1fSJim Harris 	if (rc) {
2461738b9569SSeth Howell 		nvme_tcp_ctrlr_delete_io_qpair(ctrlr, qpair);
2462a5c54a1fSJim Harris 		return NULL;
2463a5c54a1fSJim Harris 	}
2464a5c54a1fSJim Harris 
2465579a678aSZiye Yang 	/* spdk_nvme_qpair_get_optimal_poll_group needs socket information.
2466579a678aSZiye Yang 	 * So create the socket first when creating a qpair. */
2467579a678aSZiye Yang 	rc = nvme_tcp_qpair_connect_sock(ctrlr, qpair);
2468579a678aSZiye Yang 	if (rc) {
2469579a678aSZiye Yang 		nvme_tcp_ctrlr_delete_io_qpair(ctrlr, qpair);
2470579a678aSZiye Yang 		return NULL;
2471579a678aSZiye Yang 	}
2472579a678aSZiye Yang 
2473e956be96SZiye Yang 	return qpair;
2474e956be96SZiye Yang }
2475e956be96SZiye Yang 
2476f5bc2cbeSBen Walker static struct spdk_nvme_qpair *
2477e956be96SZiye Yang nvme_tcp_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid,
2478e956be96SZiye Yang 			       const struct spdk_nvme_io_qpair_opts *opts)
2479e956be96SZiye Yang {
2480e956be96SZiye Yang 	return nvme_tcp_ctrlr_create_qpair(ctrlr, qid, opts->io_queue_size, opts->qprio,
2481681f5e6eSKonrad Sztyber 					   opts->io_queue_requests, opts->async_mode);
2482e956be96SZiye Yang }
2483e956be96SZiye Yang 
24847a50a6bcSKrzysztof Karas static int
24857a50a6bcSKrzysztof Karas nvme_tcp_generate_tls_credentials(struct nvme_tcp_ctrlr *tctrlr)
24867a50a6bcSKrzysztof Karas {
2487e628e991SKonrad Sztyber 	struct spdk_nvme_ctrlr *ctrlr = &tctrlr->ctrlr;
24887a50a6bcSKrzysztof Karas 	int rc;
2489e036416bSKrzysztof Karas 	uint8_t psk_retained[SPDK_TLS_PSK_MAX_LEN] = {};
2490572a0c8aSKrzysztof Karas 	uint8_t psk_configured[SPDK_TLS_PSK_MAX_LEN] = {};
2491a6e805f5SKonrad Sztyber 	uint8_t pskbuf[SPDK_TLS_PSK_MAX_LEN + 1] = {};
2492572a0c8aSKrzysztof Karas 	uint8_t tls_cipher_suite;
2493572a0c8aSKrzysztof Karas 	uint8_t psk_retained_hash;
2494572a0c8aSKrzysztof Karas 	uint64_t psk_configured_size;
24957a50a6bcSKrzysztof Karas 
2496a6e805f5SKonrad Sztyber 	rc = spdk_key_get_key(ctrlr->opts.tls_psk, pskbuf, SPDK_TLS_PSK_MAX_LEN);
2497a6e805f5SKonrad Sztyber 	if (rc < 0) {
2498a6e805f5SKonrad Sztyber 		SPDK_ERRLOG("Failed to obtain key '%s': %s\n",
2499a6e805f5SKonrad Sztyber 			    spdk_key_get_name(ctrlr->opts.tls_psk), spdk_strerror(-rc));
2500a6e805f5SKonrad Sztyber 		goto finish;
2501a6e805f5SKonrad Sztyber 	}
2502a6e805f5SKonrad Sztyber 
2503d341bee7SKonrad Sztyber 	rc = nvme_tcp_parse_interchange_psk(pskbuf, psk_configured, sizeof(psk_configured),
2504572a0c8aSKrzysztof Karas 					    &psk_configured_size, &psk_retained_hash);
2505572a0c8aSKrzysztof Karas 	if (rc < 0) {
2506572a0c8aSKrzysztof Karas 		SPDK_ERRLOG("Failed to parse PSK interchange!\n");
2507572a0c8aSKrzysztof Karas 		goto finish;
2508572a0c8aSKrzysztof Karas 	}
2509572a0c8aSKrzysztof Karas 
2510572a0c8aSKrzysztof Karas 	/* The Base64 string encodes the configured PSK (32 or 48 bytes binary).
2511572a0c8aSKrzysztof Karas 	 * This check also ensures that psk_configured_size is smaller than
2512572a0c8aSKrzysztof Karas 	 * psk_retained buffer size. */
2513572a0c8aSKrzysztof Karas 	if (psk_configured_size == SHA256_DIGEST_LENGTH) {
2514572a0c8aSKrzysztof Karas 		tls_cipher_suite = NVME_TCP_CIPHER_AES_128_GCM_SHA256;
2515169ee6c3SKrzysztof Karas 		tctrlr->tls_cipher_suite = "TLS_AES_128_GCM_SHA256";
2516572a0c8aSKrzysztof Karas 	} else if (psk_configured_size == SHA384_DIGEST_LENGTH) {
2517572a0c8aSKrzysztof Karas 		tls_cipher_suite = NVME_TCP_CIPHER_AES_256_GCM_SHA384;
2518572a0c8aSKrzysztof Karas 		tctrlr->tls_cipher_suite = "TLS_AES_256_GCM_SHA384";
2519572a0c8aSKrzysztof Karas 	} else {
2520572a0c8aSKrzysztof Karas 		SPDK_ERRLOG("Unrecognized cipher suite!\n");
2521572a0c8aSKrzysztof Karas 		rc = -ENOTSUP;
2522572a0c8aSKrzysztof Karas 		goto finish;
2523572a0c8aSKrzysztof Karas 	}
2524169ee6c3SKrzysztof Karas 
25257a50a6bcSKrzysztof Karas 	rc = nvme_tcp_generate_psk_identity(tctrlr->psk_identity, sizeof(tctrlr->psk_identity),
2526e628e991SKonrad Sztyber 					    ctrlr->opts.hostnqn, ctrlr->trid.subnqn,
2527572a0c8aSKrzysztof Karas 					    tls_cipher_suite);
25287a50a6bcSKrzysztof Karas 	if (rc) {
25297a50a6bcSKrzysztof Karas 		SPDK_ERRLOG("could not generate PSK identity\n");
2530572a0c8aSKrzysztof Karas 		goto finish;
25317a50a6bcSKrzysztof Karas 	}
25327a50a6bcSKrzysztof Karas 
2533572a0c8aSKrzysztof Karas 	/* No hash indicates that Configured PSK must be used as Retained PSK. */
2534572a0c8aSKrzysztof Karas 	if (psk_retained_hash == NVME_TCP_HASH_ALGORITHM_NONE) {
2535572a0c8aSKrzysztof Karas 		assert(psk_configured_size < sizeof(psk_retained));
2536572a0c8aSKrzysztof Karas 		memcpy(psk_retained, psk_configured, psk_configured_size);
2537572a0c8aSKrzysztof Karas 		rc = psk_configured_size;
2538572a0c8aSKrzysztof Karas 	} else {
2539572a0c8aSKrzysztof Karas 		/* Derive retained PSK. */
2540e628e991SKonrad Sztyber 		rc = nvme_tcp_derive_retained_psk(psk_configured, psk_configured_size, ctrlr->opts.hostnqn,
2541572a0c8aSKrzysztof Karas 						  psk_retained, sizeof(psk_retained), psk_retained_hash);
25427a50a6bcSKrzysztof Karas 		if (rc < 0) {
25437a50a6bcSKrzysztof Karas 			SPDK_ERRLOG("Unable to derive retained PSK!\n");
2544572a0c8aSKrzysztof Karas 			goto finish;
2545572a0c8aSKrzysztof Karas 		}
25467a50a6bcSKrzysztof Karas 	}
2547e036416bSKrzysztof Karas 
2548e036416bSKrzysztof Karas 	rc = nvme_tcp_derive_tls_psk(psk_retained, rc, tctrlr->psk_identity, tctrlr->psk,
2549572a0c8aSKrzysztof Karas 				     sizeof(tctrlr->psk), tls_cipher_suite);
2550e036416bSKrzysztof Karas 	if (rc < 0) {
2551e036416bSKrzysztof Karas 		SPDK_ERRLOG("Could not generate TLS PSK!\n");
255283b6f7e4SKonrad Sztyber 		goto finish;
2553e036416bSKrzysztof Karas 	}
2554e036416bSKrzysztof Karas 
25557a50a6bcSKrzysztof Karas 	tctrlr->psk_size = rc;
2556572a0c8aSKrzysztof Karas 	rc = 0;
2557572a0c8aSKrzysztof Karas finish:
2558572a0c8aSKrzysztof Karas 	spdk_memset_s(psk_configured, sizeof(psk_configured), 0, sizeof(psk_configured));
2559a6e805f5SKonrad Sztyber 	spdk_memset_s(pskbuf, sizeof(pskbuf), 0, sizeof(pskbuf));
2560572a0c8aSKrzysztof Karas 
2561572a0c8aSKrzysztof Karas 	return rc;
25627a50a6bcSKrzysztof Karas }
25637a50a6bcSKrzysztof Karas 
25644c18542eSKonrad Sztyber /* We have to use the typedef in the function declaration to appease astyle. */
25654c18542eSKonrad Sztyber typedef struct spdk_nvme_ctrlr spdk_nvme_ctrlr_t;
25664c18542eSKonrad Sztyber 
25678dd1cd21SBen Walker static spdk_nvme_ctrlr_t *
25688dd1cd21SBen Walker nvme_tcp_ctrlr_construct(const struct spdk_nvme_transport_id *trid,
2569e956be96SZiye Yang 			 const struct spdk_nvme_ctrlr_opts *opts,
2570e956be96SZiye Yang 			 void *devhandle)
2571e956be96SZiye Yang {
2572e956be96SZiye Yang 	struct nvme_tcp_ctrlr *tctrlr;
257370dccebeSJim Harris 	struct nvme_tcp_qpair *tqpair;
2574e956be96SZiye Yang 	int rc;
2575e956be96SZiye Yang 
2576e956be96SZiye Yang 	tctrlr = calloc(1, sizeof(*tctrlr));
2577e956be96SZiye Yang 	if (tctrlr == NULL) {
2578e956be96SZiye Yang 		SPDK_ERRLOG("could not allocate ctrlr\n");
2579e956be96SZiye Yang 		return NULL;
2580e956be96SZiye Yang 	}
2581e956be96SZiye Yang 
2582e956be96SZiye Yang 	tctrlr->ctrlr.opts = *opts;
2583e956be96SZiye Yang 	tctrlr->ctrlr.trid = *trid;
2584e956be96SZiye Yang 
2585d341bee7SKonrad Sztyber 	if (opts->tls_psk != NULL) {
25867a50a6bcSKrzysztof Karas 		rc = nvme_tcp_generate_tls_credentials(tctrlr);
25877a50a6bcSKrzysztof Karas 		if (rc != 0) {
25883e98fd06SKrzysztof Karas 			free(tctrlr);
25893e98fd06SKrzysztof Karas 			return NULL;
25903e98fd06SKrzysztof Karas 		}
25913e98fd06SKrzysztof Karas 	}
25923e98fd06SKrzysztof Karas 
259331db7b13Szhangduan 	if (opts->transport_ack_timeout > NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT) {
259431db7b13Szhangduan 		SPDK_NOTICELOG("transport_ack_timeout exceeds max value %d, use max value\n",
259531db7b13Szhangduan 			       NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT);
259631db7b13Szhangduan 		tctrlr->ctrlr.opts.transport_ack_timeout = NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT;
259731db7b13Szhangduan 	}
259831db7b13Szhangduan 
2599e956be96SZiye Yang 	rc = nvme_ctrlr_construct(&tctrlr->ctrlr);
2600e956be96SZiye Yang 	if (rc != 0) {
2601b7b2c972SZiye Yang 		free(tctrlr);
2602e956be96SZiye Yang 		return NULL;
2603e956be96SZiye Yang 	}
2604e956be96SZiye Yang 
26051bf3c2feSJacek Kalwas 	/* Sequence might be used not only for data digest offload purposes but
26061bf3c2feSJacek Kalwas 	 * to handle a potential COPY operation appended as the result of translation. */
2607ed87975aSKonrad Sztyber 	tctrlr->ctrlr.flags |= SPDK_NVME_CTRLR_ACCEL_SEQUENCE_SUPPORTED;
2608e956be96SZiye Yang 	tctrlr->ctrlr.adminq = nvme_tcp_ctrlr_create_qpair(&tctrlr->ctrlr, 0,
2609daa8f941SJacek Kalwas 			       tctrlr->ctrlr.opts.admin_queue_size, 0,
2610681f5e6eSKonrad Sztyber 			       tctrlr->ctrlr.opts.admin_queue_size, true);
2611e956be96SZiye Yang 	if (!tctrlr->ctrlr.adminq) {
2612e956be96SZiye Yang 		SPDK_ERRLOG("failed to create admin qpair\n");
2613b7b2c972SZiye Yang 		nvme_tcp_ctrlr_destruct(&tctrlr->ctrlr);
2614e956be96SZiye Yang 		return NULL;
2615e956be96SZiye Yang 	}
2616e956be96SZiye Yang 
261770dccebeSJim Harris 	tqpair = nvme_tcp_qpair(tctrlr->ctrlr.adminq);
261870dccebeSJim Harris 	tctrlr->ctrlr.numa.id_valid = 1;
261970dccebeSJim Harris 	tctrlr->ctrlr.numa.id = spdk_sock_get_numa_id(tqpair->sock);
262070dccebeSJim Harris 
262114e1d0c7SZiye Yang 	if (nvme_ctrlr_add_process(&tctrlr->ctrlr, 0) != 0) {
262214e1d0c7SZiye Yang 		SPDK_ERRLOG("nvme_ctrlr_add_process() failed\n");
262314e1d0c7SZiye Yang 		nvme_ctrlr_destruct(&tctrlr->ctrlr);
262414e1d0c7SZiye Yang 		return NULL;
262514e1d0c7SZiye Yang 	}
262614e1d0c7SZiye Yang 
2627e956be96SZiye Yang 	return &tctrlr->ctrlr;
2628e956be96SZiye Yang }
2629e956be96SZiye Yang 
2630f5bc2cbeSBen Walker static uint32_t
2631e956be96SZiye Yang nvme_tcp_ctrlr_get_max_xfer_size(struct spdk_nvme_ctrlr *ctrlr)
2632e956be96SZiye Yang {
2633cc6920a4SJosh Soref 	/* TCP transport doesn't limit maximum IO transfer size. */
26348b539eb5SShuhei Matsumoto 	return UINT32_MAX;
2635e956be96SZiye Yang }
2636e956be96SZiye Yang 
2637f5bc2cbeSBen Walker static uint16_t
2638e956be96SZiye Yang nvme_tcp_ctrlr_get_max_sges(struct spdk_nvme_ctrlr *ctrlr)
2639e956be96SZiye Yang {
2640ada93334SAlexey Marchuk 	return NVME_TCP_MAX_SGL_DESCRIPTORS;
2641e956be96SZiye Yang }
2642e956be96SZiye Yang 
2643f2bd635eSShuhei Matsumoto static int
2644f2bd635eSShuhei Matsumoto nvme_tcp_qpair_iterate_requests(struct spdk_nvme_qpair *qpair,
2645f2bd635eSShuhei Matsumoto 				int (*iter_fn)(struct nvme_request *req, void *arg),
2646f2bd635eSShuhei Matsumoto 				void *arg)
2647f2bd635eSShuhei Matsumoto {
2648f2bd635eSShuhei Matsumoto 	struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
2649f2bd635eSShuhei Matsumoto 	struct nvme_tcp_req *tcp_req, *tmp;
2650f2bd635eSShuhei Matsumoto 	int rc;
2651f2bd635eSShuhei Matsumoto 
2652f2bd635eSShuhei Matsumoto 	assert(iter_fn != NULL);
2653f2bd635eSShuhei Matsumoto 
2654f2bd635eSShuhei Matsumoto 	TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) {
2655f2bd635eSShuhei Matsumoto 		assert(tcp_req->req != NULL);
2656f2bd635eSShuhei Matsumoto 
2657f2bd635eSShuhei Matsumoto 		rc = iter_fn(tcp_req->req, arg);
2658f2bd635eSShuhei Matsumoto 		if (rc != 0) {
2659f2bd635eSShuhei Matsumoto 			return rc;
2660f2bd635eSShuhei Matsumoto 		}
2661f2bd635eSShuhei Matsumoto 	}
2662f2bd635eSShuhei Matsumoto 
2663f2bd635eSShuhei Matsumoto 	return 0;
2664f2bd635eSShuhei Matsumoto }
2665f2bd635eSShuhei Matsumoto 
26664ddd77b2SKonrad Sztyber static int
26674ddd77b2SKonrad Sztyber nvme_tcp_qpair_authenticate(struct spdk_nvme_qpair *qpair)
26684ddd77b2SKonrad Sztyber {
26694ddd77b2SKonrad Sztyber 	struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
26704ddd77b2SKonrad Sztyber 	int rc;
26714ddd77b2SKonrad Sztyber 
26724ddd77b2SKonrad Sztyber 	/* If the qpair is still connecting, it'll be forced to authenticate later on */
26734ddd77b2SKonrad Sztyber 	if (tqpair->state < NVME_TCP_QPAIR_STATE_RUNNING) {
26744ddd77b2SKonrad Sztyber 		return 0;
26754ddd77b2SKonrad Sztyber 	} else if (tqpair->state != NVME_TCP_QPAIR_STATE_RUNNING) {
26764ddd77b2SKonrad Sztyber 		return -ENOTCONN;
26774ddd77b2SKonrad Sztyber 	}
26784ddd77b2SKonrad Sztyber 
26794ddd77b2SKonrad Sztyber 	rc = nvme_fabric_qpair_authenticate_async(qpair);
26804ddd77b2SKonrad Sztyber 	if (rc == 0) {
26814ddd77b2SKonrad Sztyber 		nvme_qpair_set_state(qpair, NVME_QPAIR_CONNECTING);
26824ddd77b2SKonrad Sztyber 		tqpair->state = NVME_TCP_QPAIR_STATE_AUTHENTICATING;
26834ddd77b2SKonrad Sztyber 	}
26844ddd77b2SKonrad Sztyber 
26854ddd77b2SKonrad Sztyber 	return rc;
26864ddd77b2SKonrad Sztyber }
26874ddd77b2SKonrad Sztyber 
2688f5bc2cbeSBen Walker static void
2689f366e261SJim Harris nvme_tcp_admin_qpair_abort_aers(struct spdk_nvme_qpair *qpair)
2690f366e261SJim Harris {
2691f366e261SJim Harris 	struct nvme_tcp_req *tcp_req, *tmp;
26920b92da6cSGangCao 	struct spdk_nvme_cpl cpl = {};
2693f366e261SJim Harris 	struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
2694f366e261SJim Harris 
2695f366e261SJim Harris 	cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION;
2696f366e261SJim Harris 	cpl.status.sct = SPDK_NVME_SCT_GENERIC;
2697f366e261SJim Harris 
2698f366e261SJim Harris 	TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) {
2699f366e261SJim Harris 		assert(tcp_req->req != NULL);
2700e060285eSShuhei Matsumoto 		if (tcp_req->req->cmd.opc != SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) {
27018df52a0fSTomasz Zawadzki 			continue;
27028df52a0fSTomasz Zawadzki 		}
2703f366e261SJim Harris 
27044a24f581SJim Harris 		nvme_tcp_req_complete(tcp_req, tqpair, &cpl, false);
2705f366e261SJim Harris 	}
2706f366e261SJim Harris }
2707e4eef697SSeth Howell 
2708c998c6c6SSeth Howell static struct spdk_nvme_transport_poll_group *
2709c998c6c6SSeth Howell nvme_tcp_poll_group_create(void)
2710c998c6c6SSeth Howell {
2711fe5e1db6SSeth Howell 	struct nvme_tcp_poll_group *group = calloc(1, sizeof(*group));
2712fe5e1db6SSeth Howell 
2713fe5e1db6SSeth Howell 	if (group == NULL) {
2714fe5e1db6SSeth Howell 		SPDK_ERRLOG("Unable to allocate poll group.\n");
2715c998c6c6SSeth Howell 		return NULL;
2716c998c6c6SSeth Howell 	}
2717c998c6c6SSeth Howell 
27186b86039fSBen Walker 	TAILQ_INIT(&group->needs_poll);
27196b86039fSBen Walker 
27205d071852SSeth Howell 	group->sock_group = spdk_sock_group_create(group);
27215d071852SSeth Howell 	if (group->sock_group == NULL) {
27225d071852SSeth Howell 		free(group);
27235d071852SSeth Howell 		SPDK_ERRLOG("Unable to allocate sock group.\n");
27245d071852SSeth Howell 		return NULL;
27255d071852SSeth Howell 	}
27265d071852SSeth Howell 
2727fe5e1db6SSeth Howell 	return &group->group;
2728fe5e1db6SSeth Howell }
2729fe5e1db6SSeth Howell 
27305206698eSZiye Yang static struct spdk_nvme_transport_poll_group *
27315206698eSZiye Yang nvme_tcp_qpair_get_optimal_poll_group(struct spdk_nvme_qpair *qpair)
27325206698eSZiye Yang {
27335206698eSZiye Yang 	struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
27345206698eSZiye Yang 	struct spdk_sock_group *group = NULL;
27355206698eSZiye Yang 	int rc;
27365206698eSZiye Yang 
27376301f891STomasz Zawadzki 	rc = spdk_sock_get_optimal_sock_group(tqpair->sock, &group, NULL);
27385206698eSZiye Yang 	if (!rc && group != NULL) {
27395206698eSZiye Yang 		return spdk_sock_group_get_ctx(group);
27405206698eSZiye Yang 	}
27415206698eSZiye Yang 
27425206698eSZiye Yang 	return NULL;
27435206698eSZiye Yang }
27445206698eSZiye Yang 
2745c998c6c6SSeth Howell static int
2746fc86e792SSeth Howell nvme_tcp_poll_group_connect_qpair(struct spdk_nvme_qpair *qpair)
2747c998c6c6SSeth Howell {
27485d071852SSeth Howell 	struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(qpair->poll_group);
27495d071852SSeth Howell 	struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
27505d071852SSeth Howell 
27515d071852SSeth Howell 	if (spdk_sock_group_add_sock(group->sock_group, tqpair->sock, nvme_tcp_qpair_sock_cb, qpair)) {
27525d071852SSeth Howell 		return -EPROTO;
27535d071852SSeth Howell 	}
2754c998c6c6SSeth Howell 	return 0;
2755c998c6c6SSeth Howell }
2756c998c6c6SSeth Howell 
2757c998c6c6SSeth Howell static int
2758fc86e792SSeth Howell nvme_tcp_poll_group_disconnect_qpair(struct spdk_nvme_qpair *qpair)
2759c998c6c6SSeth Howell {
27605d071852SSeth Howell 	struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(qpair->poll_group);
27615d071852SSeth Howell 	struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
27625d071852SSeth Howell 
27636b86039fSBen Walker 	if (tqpair->needs_poll) {
27646b86039fSBen Walker 		TAILQ_REMOVE(&group->needs_poll, tqpair, link);
27656b86039fSBen Walker 		tqpair->needs_poll = false;
27666b86039fSBen Walker 	}
27676b86039fSBen Walker 
27685d071852SSeth Howell 	if (tqpair->sock && group->sock_group) {
27695d071852SSeth Howell 		if (spdk_sock_group_remove_sock(group->sock_group, tqpair->sock)) {
27705d071852SSeth Howell 			return -EPROTO;
27715d071852SSeth Howell 		}
27725d071852SSeth Howell 	}
2773c998c6c6SSeth Howell 	return 0;
2774c998c6c6SSeth Howell }
2775c998c6c6SSeth Howell 
2776c998c6c6SSeth Howell static int
2777c998c6c6SSeth Howell nvme_tcp_poll_group_add(struct spdk_nvme_transport_poll_group *tgroup,
2778c998c6c6SSeth Howell 			struct spdk_nvme_qpair *qpair)
2779c998c6c6SSeth Howell {
27805d071852SSeth Howell 	struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
27815d071852SSeth Howell 	struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup);
27825d071852SSeth Howell 
27835d071852SSeth Howell 	/* disconnected qpairs won't have a sock to add. */
27845d071852SSeth Howell 	if (nvme_qpair_get_state(qpair) >= NVME_QPAIR_CONNECTED) {
27855d071852SSeth Howell 		if (spdk_sock_group_add_sock(group->sock_group, tqpair->sock, nvme_tcp_qpair_sock_cb, qpair)) {
27865d071852SSeth Howell 			return -EPROTO;
27875d071852SSeth Howell 		}
27885d071852SSeth Howell 	}
27895d071852SSeth Howell 
2790fe5e1db6SSeth Howell 	return 0;
2791c998c6c6SSeth Howell }
2792c998c6c6SSeth Howell 
2793c998c6c6SSeth Howell static int
2794c998c6c6SSeth Howell nvme_tcp_poll_group_remove(struct spdk_nvme_transport_poll_group *tgroup,
2795c998c6c6SSeth Howell 			   struct spdk_nvme_qpair *qpair)
2796c998c6c6SSeth Howell {
2797ea86c035SAlexey Marchuk 	struct nvme_tcp_qpair *tqpair;
2798999f0362SShuhei Matsumoto 	struct nvme_tcp_poll_group *group;
2799ea86c035SAlexey Marchuk 
28004c8ccb54SShuhei Matsumoto 	assert(qpair->poll_group_tailq_head == &tgroup->disconnected_qpairs);
28015d071852SSeth Howell 
2802ea86c035SAlexey Marchuk 	tqpair = nvme_tcp_qpair(qpair);
2803999f0362SShuhei Matsumoto 	group = nvme_tcp_poll_group(tgroup);
2804f1941efeSShuhei Matsumoto 
2805f1941efeSShuhei Matsumoto 	assert(tqpair->shared_stats == true);
2806f1941efeSShuhei Matsumoto 	tqpair->stats = &g_dummy_stats;
2807ea86c035SAlexey Marchuk 
2808999f0362SShuhei Matsumoto 	if (tqpair->needs_poll) {
2809999f0362SShuhei Matsumoto 		TAILQ_REMOVE(&group->needs_poll, tqpair, link);
2810999f0362SShuhei Matsumoto 		tqpair->needs_poll = false;
2811999f0362SShuhei Matsumoto 	}
2812999f0362SShuhei Matsumoto 
28134c8ccb54SShuhei Matsumoto 	return 0;
2814c998c6c6SSeth Howell }
2815c998c6c6SSeth Howell 
2816c998c6c6SSeth Howell static int64_t
2817c998c6c6SSeth Howell nvme_tcp_poll_group_process_completions(struct spdk_nvme_transport_poll_group *tgroup,
2818fc86e792SSeth Howell 					uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb)
2819c998c6c6SSeth Howell {
28205d071852SSeth Howell 	struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup);
2821fe5e1db6SSeth Howell 	struct spdk_nvme_qpair *qpair, *tmp_qpair;
28226b86039fSBen Walker 	struct nvme_tcp_qpair *tqpair, *tmp_tqpair;
28237d589976SAlexey Marchuk 	int num_events;
28245d071852SSeth Howell 
28255d071852SSeth Howell 	group->completions_per_qpair = completions_per_qpair;
28265d071852SSeth Howell 	group->num_completions = 0;
2827ea86c035SAlexey Marchuk 	group->stats.polls++;
28285d071852SSeth Howell 
28297d589976SAlexey Marchuk 	num_events = spdk_sock_group_poll(group->sock_group);
2830fe5e1db6SSeth Howell 
2831fe5e1db6SSeth Howell 	STAILQ_FOREACH_SAFE(qpair, &tgroup->disconnected_qpairs, poll_group_stailq, tmp_qpair) {
28328564f005SKonrad Sztyber 		tqpair = nvme_tcp_qpair(qpair);
28338564f005SKonrad Sztyber 		if (nvme_qpair_get_state(qpair) == NVME_QPAIR_DISCONNECTING) {
28348564f005SKonrad Sztyber 			if (TAILQ_EMPTY(&tqpair->outstanding_reqs)) {
28358564f005SKonrad Sztyber 				nvme_transport_ctrlr_disconnect_qpair_done(qpair);
28368564f005SKonrad Sztyber 			}
28378564f005SKonrad Sztyber 		}
28388564f005SKonrad Sztyber 		/* Wait until the qpair transitions to the DISCONNECTED state, otherwise user might
28398564f005SKonrad Sztyber 		 * want to free it from disconnect_qpair_cb, while it's not fully disconnected (and
28408564f005SKonrad Sztyber 		 * might still have outstanding requests) */
28418564f005SKonrad Sztyber 		if (nvme_qpair_get_state(qpair) == NVME_QPAIR_DISCONNECTED) {
2842fe5e1db6SSeth Howell 			disconnected_qpair_cb(qpair, tgroup->group->ctx);
2843fe5e1db6SSeth Howell 		}
28448564f005SKonrad Sztyber 	}
2845fe5e1db6SSeth Howell 
28466b86039fSBen Walker 	/* If any qpairs were marked as needing to be polled due to an asynchronous write completion
28476b86039fSBen Walker 	 * and they weren't polled as a consequence of calling spdk_sock_group_poll above, poll them now. */
28486b86039fSBen Walker 	TAILQ_FOREACH_SAFE(tqpair, &group->needs_poll, link, tmp_tqpair) {
28496b86039fSBen Walker 		nvme_tcp_qpair_sock_cb(&tqpair->qpair, group->sock_group, tqpair->sock);
28506b86039fSBen Walker 	}
28516b86039fSBen Walker 
28527d589976SAlexey Marchuk 	if (spdk_unlikely(num_events < 0)) {
28537d589976SAlexey Marchuk 		return num_events;
28547d589976SAlexey Marchuk 	}
28557d589976SAlexey Marchuk 
2856ea86c035SAlexey Marchuk 	group->stats.idle_polls += !num_events;
2857ea86c035SAlexey Marchuk 	group->stats.socket_completions += num_events;
2858ea86c035SAlexey Marchuk 
28595d071852SSeth Howell 	return group->num_completions;
2860c998c6c6SSeth Howell }
2861c998c6c6SSeth Howell 
2862*1efa1b16SAnkit Kumar /*
2863*1efa1b16SAnkit Kumar  * Handle disconnected qpairs when interrupt support gets added.
2864*1efa1b16SAnkit Kumar  */
2865*1efa1b16SAnkit Kumar static void
2866*1efa1b16SAnkit Kumar nvme_tcp_poll_group_check_disconnected_qpairs(struct spdk_nvme_transport_poll_group *tgroup,
2867*1efa1b16SAnkit Kumar 		spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb)
2868*1efa1b16SAnkit Kumar {
2869*1efa1b16SAnkit Kumar }
2870*1efa1b16SAnkit Kumar 
2871c998c6c6SSeth Howell static int
2872c998c6c6SSeth Howell nvme_tcp_poll_group_destroy(struct spdk_nvme_transport_poll_group *tgroup)
2873c998c6c6SSeth Howell {
28745d071852SSeth Howell 	int rc;
28755d071852SSeth Howell 	struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup);
28765d071852SSeth Howell 
2877fe5e1db6SSeth Howell 	if (!STAILQ_EMPTY(&tgroup->connected_qpairs) || !STAILQ_EMPTY(&tgroup->disconnected_qpairs)) {
2878fe5e1db6SSeth Howell 		return -EBUSY;
2879fe5e1db6SSeth Howell 	}
2880fe5e1db6SSeth Howell 
28815d071852SSeth Howell 	rc = spdk_sock_group_close(&group->sock_group);
28825d071852SSeth Howell 	if (rc != 0) {
28835d071852SSeth Howell 		SPDK_ERRLOG("Failed to close the sock group for a tcp poll group.\n");
28845d071852SSeth Howell 		assert(false);
28855d071852SSeth Howell 	}
28865d071852SSeth Howell 
2887fe5e1db6SSeth Howell 	free(tgroup);
2888fe5e1db6SSeth Howell 
2889fe5e1db6SSeth Howell 	return 0;
2890c998c6c6SSeth Howell }
2891c998c6c6SSeth Howell 
2892ea86c035SAlexey Marchuk static int
2893ea86c035SAlexey Marchuk nvme_tcp_poll_group_get_stats(struct spdk_nvme_transport_poll_group *tgroup,
2894ea86c035SAlexey Marchuk 			      struct spdk_nvme_transport_poll_group_stat **_stats)
2895ea86c035SAlexey Marchuk {
2896ea86c035SAlexey Marchuk 	struct nvme_tcp_poll_group *group;
2897ea86c035SAlexey Marchuk 	struct spdk_nvme_transport_poll_group_stat *stats;
2898ea86c035SAlexey Marchuk 
2899ea86c035SAlexey Marchuk 	if (tgroup == NULL || _stats == NULL) {
2900ea86c035SAlexey Marchuk 		SPDK_ERRLOG("Invalid stats or group pointer\n");
2901ea86c035SAlexey Marchuk 		return -EINVAL;
2902ea86c035SAlexey Marchuk 	}
2903ea86c035SAlexey Marchuk 
2904ea86c035SAlexey Marchuk 	group = nvme_tcp_poll_group(tgroup);
2905ea86c035SAlexey Marchuk 
2906ea86c035SAlexey Marchuk 	stats = calloc(1, sizeof(*stats));
2907ea86c035SAlexey Marchuk 	if (!stats) {
2908ea86c035SAlexey Marchuk 		SPDK_ERRLOG("Can't allocate memory for TCP stats\n");
2909ea86c035SAlexey Marchuk 		return -ENOMEM;
2910ea86c035SAlexey Marchuk 	}
2911ea86c035SAlexey Marchuk 	stats->trtype = SPDK_NVME_TRANSPORT_TCP;
2912ea86c035SAlexey Marchuk 	memcpy(&stats->tcp, &group->stats, sizeof(group->stats));
2913ea86c035SAlexey Marchuk 
2914ea86c035SAlexey Marchuk 	*_stats = stats;
2915ea86c035SAlexey Marchuk 
2916ea86c035SAlexey Marchuk 	return 0;
2917ea86c035SAlexey Marchuk }
2918ea86c035SAlexey Marchuk 
2919ea86c035SAlexey Marchuk static void
2920ea86c035SAlexey Marchuk nvme_tcp_poll_group_free_stats(struct spdk_nvme_transport_poll_group *tgroup,
2921ea86c035SAlexey Marchuk 			       struct spdk_nvme_transport_poll_group_stat *stats)
2922ea86c035SAlexey Marchuk {
2923ea86c035SAlexey Marchuk 	free(stats);
2924ea86c035SAlexey Marchuk }
2925ea86c035SAlexey Marchuk 
292608c8ab2bSJacek Kalwas static int
292708c8ab2bSJacek Kalwas nvme_tcp_ctrlr_get_memory_domains(const struct spdk_nvme_ctrlr *ctrlr,
292808c8ab2bSJacek Kalwas 				  struct spdk_memory_domain **domains, int array_size)
292908c8ab2bSJacek Kalwas {
293008c8ab2bSJacek Kalwas 	if (domains && array_size > 0) {
293108c8ab2bSJacek Kalwas 		domains[0] = spdk_memory_domain_get_system_domain();
293208c8ab2bSJacek Kalwas 	}
293308c8ab2bSJacek Kalwas 
293408c8ab2bSJacek Kalwas 	return 1;
293508c8ab2bSJacek Kalwas }
293608c8ab2bSJacek Kalwas 
2937e4eef697SSeth Howell const struct spdk_nvme_transport_ops tcp_ops = {
2938e4eef697SSeth Howell 	.name = "TCP",
2939e4eef697SSeth Howell 	.type = SPDK_NVME_TRANSPORT_TCP,
2940e4eef697SSeth Howell 	.ctrlr_construct = nvme_tcp_ctrlr_construct,
2941e4eef697SSeth Howell 	.ctrlr_scan = nvme_fabric_ctrlr_scan,
2942e4eef697SSeth Howell 	.ctrlr_destruct = nvme_tcp_ctrlr_destruct,
2943e4eef697SSeth Howell 	.ctrlr_enable = nvme_tcp_ctrlr_enable,
2944e4eef697SSeth Howell 
2945e4eef697SSeth Howell 	.ctrlr_set_reg_4 = nvme_fabric_ctrlr_set_reg_4,
2946e4eef697SSeth Howell 	.ctrlr_set_reg_8 = nvme_fabric_ctrlr_set_reg_8,
2947e4eef697SSeth Howell 	.ctrlr_get_reg_4 = nvme_fabric_ctrlr_get_reg_4,
2948e4eef697SSeth Howell 	.ctrlr_get_reg_8 = nvme_fabric_ctrlr_get_reg_8,
2949214ed8e3SKonrad Sztyber 	.ctrlr_set_reg_4_async = nvme_fabric_ctrlr_set_reg_4_async,
2950214ed8e3SKonrad Sztyber 	.ctrlr_set_reg_8_async = nvme_fabric_ctrlr_set_reg_8_async,
2951214ed8e3SKonrad Sztyber 	.ctrlr_get_reg_4_async = nvme_fabric_ctrlr_get_reg_4_async,
2952214ed8e3SKonrad Sztyber 	.ctrlr_get_reg_8_async = nvme_fabric_ctrlr_get_reg_8_async,
2953e4eef697SSeth Howell 
2954e4eef697SSeth Howell 	.ctrlr_get_max_xfer_size = nvme_tcp_ctrlr_get_max_xfer_size,
2955e4eef697SSeth Howell 	.ctrlr_get_max_sges = nvme_tcp_ctrlr_get_max_sges,
2956e4eef697SSeth Howell 
2957e4eef697SSeth Howell 	.ctrlr_create_io_qpair = nvme_tcp_ctrlr_create_io_qpair,
2958e4eef697SSeth Howell 	.ctrlr_delete_io_qpair = nvme_tcp_ctrlr_delete_io_qpair,
2959e4eef697SSeth Howell 	.ctrlr_connect_qpair = nvme_tcp_ctrlr_connect_qpair,
2960e4eef697SSeth Howell 	.ctrlr_disconnect_qpair = nvme_tcp_ctrlr_disconnect_qpair,
2961e4eef697SSeth Howell 
296208c8ab2bSJacek Kalwas 	.ctrlr_get_memory_domains = nvme_tcp_ctrlr_get_memory_domains,
296308c8ab2bSJacek Kalwas 
2964e4eef697SSeth Howell 	.qpair_abort_reqs = nvme_tcp_qpair_abort_reqs,
2965e4eef697SSeth Howell 	.qpair_reset = nvme_tcp_qpair_reset,
2966e4eef697SSeth Howell 	.qpair_submit_request = nvme_tcp_qpair_submit_request,
2967e4eef697SSeth Howell 	.qpair_process_completions = nvme_tcp_qpair_process_completions,
2968f2bd635eSShuhei Matsumoto 	.qpair_iterate_requests = nvme_tcp_qpair_iterate_requests,
29694ddd77b2SKonrad Sztyber 	.qpair_authenticate = nvme_tcp_qpair_authenticate,
2970e4eef697SSeth Howell 	.admin_qpair_abort_aers = nvme_tcp_admin_qpair_abort_aers,
2971c998c6c6SSeth Howell 
2972c998c6c6SSeth Howell 	.poll_group_create = nvme_tcp_poll_group_create,
29735206698eSZiye Yang 	.qpair_get_optimal_poll_group = nvme_tcp_qpair_get_optimal_poll_group,
2974fc86e792SSeth Howell 	.poll_group_connect_qpair = nvme_tcp_poll_group_connect_qpair,
2975fc86e792SSeth Howell 	.poll_group_disconnect_qpair = nvme_tcp_poll_group_disconnect_qpair,
2976c998c6c6SSeth Howell 	.poll_group_add = nvme_tcp_poll_group_add,
2977c998c6c6SSeth Howell 	.poll_group_remove = nvme_tcp_poll_group_remove,
2978c998c6c6SSeth Howell 	.poll_group_process_completions = nvme_tcp_poll_group_process_completions,
2979*1efa1b16SAnkit Kumar 	.poll_group_check_disconnected_qpairs = nvme_tcp_poll_group_check_disconnected_qpairs,
2980c998c6c6SSeth Howell 	.poll_group_destroy = nvme_tcp_poll_group_destroy,
2981ea86c035SAlexey Marchuk 	.poll_group_get_stats = nvme_tcp_poll_group_get_stats,
2982ea86c035SAlexey Marchuk 	.poll_group_free_stats = nvme_tcp_poll_group_free_stats,
2983e4eef697SSeth Howell };
2984e4eef697SSeth Howell 
2985e4eef697SSeth Howell SPDK_NVME_TRANSPORT_REGISTER(tcp, &tcp_ops);
2986a6b7e183SJim Harris 
29870eae0106SJim Harris static void
29880eae0106SJim Harris nvme_tcp_trace(void)
2989a6b7e183SJim Harris {
2990a6b7e183SJim Harris 	struct spdk_trace_tpoint_opts opts[] = {
2991a6b7e183SJim Harris 		{
2992a6b7e183SJim Harris 			"NVME_TCP_SUBMIT", TRACE_NVME_TCP_SUBMIT,
299326d44a12SJim Harris 			OWNER_TYPE_NVME_TCP_QP, OBJECT_NVME_TCP_REQ, 1,
2994e36f0d36SJim Harris 			{	{ "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 },
2995e36f0d36SJim Harris 				{ "cid", SPDK_TRACE_ARG_TYPE_INT, 4 },
2996a6b7e183SJim Harris 				{ "opc", SPDK_TRACE_ARG_TYPE_INT, 4 },
2997a6b7e183SJim Harris 				{ "dw10", SPDK_TRACE_ARG_TYPE_PTR, 4 },
2998a6b7e183SJim Harris 				{ "dw11", SPDK_TRACE_ARG_TYPE_PTR, 4 },
29990127345cSAtul Malakar 				{ "dw12", SPDK_TRACE_ARG_TYPE_PTR, 4 },
30000127345cSAtul Malakar 				{ "qd", SPDK_TRACE_ARG_TYPE_INT, 4 }
3001a6b7e183SJim Harris 			}
3002a6b7e183SJim Harris 		},
3003a6b7e183SJim Harris 		{
3004a6b7e183SJim Harris 			"NVME_TCP_COMPLETE", TRACE_NVME_TCP_COMPLETE,
300526d44a12SJim Harris 			OWNER_TYPE_NVME_TCP_QP, OBJECT_NVME_TCP_REQ, 0,
3006e36f0d36SJim Harris 			{	{ "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 },
3007e36f0d36SJim Harris 				{ "cid", SPDK_TRACE_ARG_TYPE_INT, 4 },
30080127345cSAtul Malakar 				{ "cpl", SPDK_TRACE_ARG_TYPE_PTR, 4 },
30090127345cSAtul Malakar 				{ "qd", SPDK_TRACE_ARG_TYPE_INT, 4 }
3010a6b7e183SJim Harris 			}
3011a6b7e183SJim Harris 		},
3012a6b7e183SJim Harris 	};
3013a6b7e183SJim Harris 
3014a6b7e183SJim Harris 	spdk_trace_register_object(OBJECT_NVME_TCP_REQ, 'p');
301526d44a12SJim Harris 	spdk_trace_register_owner_type(OWNER_TYPE_NVME_TCP_QP, 'q');
3016a6b7e183SJim Harris 	spdk_trace_register_description_ext(opts, SPDK_COUNTOF(opts));
30172bc9d36bSJacek Kalwas 
30182bc9d36bSJacek Kalwas 	spdk_trace_tpoint_register_relation(TRACE_SOCK_REQ_QUEUE, OBJECT_NVME_TCP_REQ, 0);
30192bc9d36bSJacek Kalwas 	spdk_trace_tpoint_register_relation(TRACE_SOCK_REQ_PEND, OBJECT_NVME_TCP_REQ, 0);
30202bc9d36bSJacek Kalwas 	spdk_trace_tpoint_register_relation(TRACE_SOCK_REQ_COMPLETE, OBJECT_NVME_TCP_REQ, 0);
3021a6b7e183SJim Harris }
30220eae0106SJim Harris SPDK_TRACE_REGISTER_FN(nvme_tcp_trace, "nvme_tcp", TRACE_GROUP_NVME_TCP)
3023