xref: /spdk/lib/nvmf/tcp.c (revision 913f780e1077efada6674c7a14b687ba5ac1b6d0)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation. All rights reserved.
5  *   Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/stdinc.h"
35 #include "spdk/crc32.h"
36 #include "spdk/endian.h"
37 #include "spdk/assert.h"
38 #include "spdk/thread.h"
39 #include "spdk/nvmf.h"
40 #include "spdk/nvmf_spec.h"
41 #include "spdk/sock.h"
42 #include "spdk/string.h"
43 #include "spdk/trace.h"
44 #include "spdk/util.h"
45 
46 #include "nvmf_internal.h"
47 #include "transport.h"
48 
49 #include "spdk_internal/assert.h"
50 #include "spdk_internal/log.h"
51 #include "spdk_internal/nvme_tcp.h"
52 
53 #define NVMF_TCP_MAX_ACCEPT_SOCK_ONE_TIME 16
54 
55 #define NVMF_TCP_PDU_MAX_H2C_DATA_SIZE	131072
56 #define NVMF_TCP_PDU_MAX_C2H_DATA_SIZE	131072
57 #define NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM  64  /* Maximal c2h_data pdu number for ecah tqpair */
58 #define SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY 6
59 #define SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR 4
60 
61 /* spdk nvmf related structure */
62 enum spdk_nvmf_tcp_req_state {
63 
64 	/* The request is not currently in use */
65 	TCP_REQUEST_STATE_FREE = 0,
66 
67 	/* Initial state when request first received */
68 	TCP_REQUEST_STATE_NEW,
69 
70 	/* The request is queued until a data buffer is available. */
71 	TCP_REQUEST_STATE_NEED_BUFFER,
72 
73 	/* The request is currently transferring data from the host to the controller. */
74 	TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER,
75 
76 	/* The request is ready to execute at the block device */
77 	TCP_REQUEST_STATE_READY_TO_EXECUTE,
78 
79 	/* The request is currently executing at the block device */
80 	TCP_REQUEST_STATE_EXECUTING,
81 
82 	/* The request finished executing at the block device */
83 	TCP_REQUEST_STATE_EXECUTED,
84 
85 	/* The request is ready to send a completion */
86 	TCP_REQUEST_STATE_READY_TO_COMPLETE,
87 
88 	/* The request is currently transferring final pdus from the controller to the host. */
89 	TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST,
90 
91 	/* The request completed and can be marked free. */
92 	TCP_REQUEST_STATE_COMPLETED,
93 
94 	/* Terminator */
95 	TCP_REQUEST_NUM_STATES,
96 };
97 
98 static const char *spdk_nvmf_tcp_term_req_fes_str[] = {
99 	"Invalid PDU Header Field",
100 	"PDU Sequence Error",
101 	"Header Digiest Error",
102 	"Data Transfer Out of Range",
103 	"R2T Limit Exceeded",
104 	"Unsupported parameter",
105 };
106 
107 #define OBJECT_NVMF_TCP_IO				0x80
108 
109 #define TRACE_GROUP_NVMF_TCP				0x5
110 #define TRACE_TCP_REQUEST_STATE_NEW					SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x0)
111 #define TRACE_TCP_REQUEST_STATE_NEED_BUFFER				SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x1)
112 #define TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER		SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x2)
113 #define TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE			SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x3)
114 #define TRACE_TCP_REQUEST_STATE_EXECUTING				SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x4)
115 #define TRACE_TCP_REQUEST_STATE_EXECUTED				SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x5)
116 #define TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE			SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x6)
117 #define TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST		SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x7)
118 #define TRACE_TCP_REQUEST_STATE_COMPLETED				SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x8)
119 #define TRACE_TCP_FLUSH_WRITEBUF_START					SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x9)
120 #define TRACE_TCP_FLUSH_WRITEBUF_DONE					SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xA)
121 #define TRACE_TCP_READ_FROM_SOCKET_DONE					SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xB)
122 
123 SPDK_TRACE_REGISTER_FN(nvmf_tcp_trace, "nvmf_tcp", TRACE_GROUP_NVMF_TCP)
124 {
125 	spdk_trace_register_object(OBJECT_NVMF_TCP_IO, 'r');
126 	spdk_trace_register_description("TCP_REQ_NEW",
127 					TRACE_TCP_REQUEST_STATE_NEW,
128 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 1, 1, "");
129 	spdk_trace_register_description("TCP_REQ_NEED_BUFFER",
130 					TRACE_TCP_REQUEST_STATE_NEED_BUFFER,
131 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
132 	spdk_trace_register_description("TCP_REQ_TX_H_TO_C",
133 					TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER,
134 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
135 	spdk_trace_register_description("TCP_REQ_RDY_TO_EXECUTE",
136 					TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE,
137 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
138 	spdk_trace_register_description("TCP_REQ_EXECUTING",
139 					TRACE_TCP_REQUEST_STATE_EXECUTING,
140 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
141 	spdk_trace_register_description("TCP_REQ_EXECUTED",
142 					TRACE_TCP_REQUEST_STATE_EXECUTED,
143 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
144 	spdk_trace_register_description("TCP_REQ_RDY_TO_COMPLETE",
145 					TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE,
146 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
147 	spdk_trace_register_description("TCP_REQ_TRANSFER_C2H",
148 					TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST,
149 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
150 	spdk_trace_register_description("TCP_REQ_COMPLETED",
151 					TRACE_TCP_REQUEST_STATE_COMPLETED,
152 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
153 	spdk_trace_register_description("TCP_WRITE_START",
154 					TRACE_TCP_FLUSH_WRITEBUF_START,
155 					OWNER_NONE, OBJECT_NONE, 0, 0, "");
156 	spdk_trace_register_description("TCP_WRITE_DONE",
157 					TRACE_TCP_FLUSH_WRITEBUF_DONE,
158 					OWNER_NONE, OBJECT_NONE, 0, 0, "");
159 	spdk_trace_register_description("TCP_READ_DONE",
160 					TRACE_TCP_READ_FROM_SOCKET_DONE,
161 					OWNER_NONE, OBJECT_NONE, 0, 0, "");
162 }
163 
164 struct spdk_nvmf_tcp_req  {
165 	struct spdk_nvmf_request		req;
166 	struct spdk_nvme_cpl			rsp;
167 	struct spdk_nvme_cmd			cmd;
168 
169 	/* In-capsule data buffer */
170 	uint8_t					*buf;
171 
172 	bool					has_incapsule_data;
173 
174 	/* transfer_tag */
175 	uint16_t				ttag;
176 
177 	enum spdk_nvmf_tcp_req_state		state;
178 
179 	/*
180 	 * next_expected_r2t_offset is used when we receive the h2c_data PDU.
181 	 */
182 	uint32_t				next_expected_r2t_offset;
183 	uint32_t				r2tl_remain;
184 
185 	/*
186 	 * c2h_data_offset is used when we send the c2h_data PDU.
187 	 */
188 	uint32_t				c2h_data_offset;
189 	uint32_t				c2h_data_pdu_num;
190 
191 	STAILQ_ENTRY(spdk_nvmf_tcp_req)		link;
192 	TAILQ_ENTRY(spdk_nvmf_tcp_req)		state_link;
193 };
194 
195 struct nvme_tcp_pdu_recv_buf {
196 	char					*buf;
197 	uint32_t				off;
198 	uint32_t				size;
199 	uint32_t				remain_size;
200 };
201 
202 struct spdk_nvmf_tcp_qpair {
203 	struct spdk_nvmf_qpair			qpair;
204 	struct spdk_nvmf_tcp_poll_group		*group;
205 	struct spdk_nvmf_tcp_port		*port;
206 	struct spdk_sock			*sock;
207 	struct spdk_poller			*flush_poller;
208 
209 	enum nvme_tcp_pdu_recv_state		recv_state;
210 	enum nvme_tcp_qpair_state		state;
211 
212 	struct nvme_tcp_pdu			pdu_in_progress;
213 	struct nvme_tcp_pdu_recv_buf		pdu_recv_buf;
214 
215 	TAILQ_HEAD(, nvme_tcp_pdu)		send_queue;
216 	TAILQ_HEAD(, nvme_tcp_pdu)		free_queue;
217 
218 	struct nvme_tcp_pdu			*pdu;
219 	struct nvme_tcp_pdu			*pdu_pool;
220 	uint16_t				free_pdu_num;
221 
222 	/* Queues to track the requests in all states */
223 	TAILQ_HEAD(, spdk_nvmf_tcp_req)		state_queue[TCP_REQUEST_NUM_STATES];
224 	/* Number of requests in each state */
225 	int32_t					state_cntr[TCP_REQUEST_NUM_STATES];
226 
227 	STAILQ_HEAD(, spdk_nvmf_tcp_req)	queued_c2h_data_tcp_req;
228 
229 	uint8_t					cpda;
230 
231 	/* Array of size "max_queue_depth * InCapsuleDataSize" containing
232 	 * buffers to be used for in capsule data.
233 	 */
234 	void					*buf;
235 	void					*bufs;
236 	struct spdk_nvmf_tcp_req		*req;
237 	struct spdk_nvmf_tcp_req		*reqs;
238 
239 	bool					host_hdgst_enable;
240 	bool					host_ddgst_enable;
241 
242 
243 	/* The maximum number of I/O outstanding on this connection at one time */
244 	uint16_t				max_queue_depth;
245 
246 
247 	/** Specifies the maximum number of PDU-Data bytes per H2C Data Transfer PDU */
248 	uint32_t				maxh2cdata;
249 
250 	uint32_t				c2h_data_pdu_cnt;
251 
252 	/* IP address */
253 	char					initiator_addr[SPDK_NVMF_TRADDR_MAX_LEN];
254 	char					target_addr[SPDK_NVMF_TRADDR_MAX_LEN];
255 
256 	/* IP port */
257 	uint16_t				initiator_port;
258 	uint16_t				target_port;
259 
260 	/* Timer used to destroy qpair after detecting transport error issue if initiator does
261 	 *  not close the connection.
262 	 */
263 	struct spdk_poller			*timeout_poller;
264 
265 	TAILQ_ENTRY(spdk_nvmf_tcp_qpair)	link;
266 };
267 
268 struct spdk_nvmf_tcp_poll_group {
269 	struct spdk_nvmf_transport_poll_group	group;
270 	struct spdk_sock_group			*sock_group;
271 
272 	TAILQ_HEAD(, spdk_nvmf_tcp_qpair)	qpairs;
273 };
274 
275 struct spdk_nvmf_tcp_port {
276 	struct spdk_nvme_transport_id		trid;
277 	struct spdk_sock			*listen_sock;
278 	uint32_t				ref;
279 	TAILQ_ENTRY(spdk_nvmf_tcp_port)		link;
280 };
281 
282 struct spdk_nvmf_tcp_transport {
283 	struct spdk_nvmf_transport		transport;
284 
285 	pthread_mutex_t				lock;
286 
287 	TAILQ_HEAD(, spdk_nvmf_tcp_port)	ports;
288 };
289 
290 static bool spdk_nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport,
291 				      struct spdk_nvmf_tcp_req *tcp_req);
292 static void spdk_nvmf_tcp_handle_pending_c2h_data_queue(struct spdk_nvmf_tcp_qpair *tqpair);
293 
294 static void
295 spdk_nvmf_tcp_req_set_state(struct spdk_nvmf_tcp_req *tcp_req,
296 			    enum spdk_nvmf_tcp_req_state state)
297 {
298 	struct spdk_nvmf_qpair *qpair;
299 	struct spdk_nvmf_tcp_qpair *tqpair;
300 
301 	qpair = tcp_req->req.qpair;
302 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
303 
304 	TAILQ_REMOVE(&tqpair->state_queue[tcp_req->state], tcp_req, state_link);
305 	tqpair->state_cntr[tcp_req->state]--;
306 	assert(tqpair->state_cntr[tcp_req->state] >= 0);
307 
308 	TAILQ_INSERT_TAIL(&tqpair->state_queue[state], tcp_req, state_link);
309 	tqpair->state_cntr[state]++;
310 
311 	tcp_req->state = state;
312 }
313 
314 static struct nvme_tcp_pdu *
315 spdk_nvmf_tcp_pdu_get(struct spdk_nvmf_tcp_qpair *tqpair)
316 {
317 	struct nvme_tcp_pdu *pdu;
318 
319 	pdu = TAILQ_FIRST(&tqpair->free_queue);
320 	if (!pdu) {
321 		SPDK_ERRLOG("Unable to get PDU for tqpair=%p\n", tqpair);
322 		abort();
323 		return NULL;
324 	}
325 
326 	tqpair->free_pdu_num--;
327 	TAILQ_REMOVE(&tqpair->free_queue, pdu, tailq);
328 	memset(pdu, 0, sizeof(*pdu));
329 	pdu->ref = 1;
330 	pdu->hdr = &pdu->hdr_mem;
331 
332 	return pdu;
333 }
334 
335 static void
336 spdk_nvmf_tcp_pdu_put(struct spdk_nvmf_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu)
337 {
338 	if (!pdu) {
339 		return;
340 	}
341 
342 	assert(pdu->ref > 0);
343 
344 	pdu->ref--;
345 	if (pdu->ref == 0) {
346 		tqpair->free_pdu_num++;
347 		TAILQ_INSERT_HEAD(&tqpair->free_queue, pdu, tailq);
348 	}
349 }
350 
351 static struct spdk_nvmf_tcp_req *
352 spdk_nvmf_tcp_req_get(struct spdk_nvmf_tcp_qpair *tqpair)
353 {
354 	struct spdk_nvmf_tcp_req *tcp_req;
355 
356 	tcp_req = TAILQ_FIRST(&tqpair->state_queue[TCP_REQUEST_STATE_FREE]);
357 	if (!tcp_req) {
358 		SPDK_ERRLOG("Cannot allocate tcp_req on tqpair=%p\n", tqpair);
359 		return NULL;
360 	}
361 
362 	memset(&tcp_req->cmd, 0, sizeof(tcp_req->cmd));
363 	memset(&tcp_req->rsp, 0, sizeof(tcp_req->rsp));
364 	tcp_req->next_expected_r2t_offset = 0;
365 	tcp_req->r2tl_remain = 0;
366 	tcp_req->c2h_data_offset = 0;
367 	tcp_req->has_incapsule_data = false;
368 	memset(&tcp_req->req.dif, 0, sizeof(tcp_req->req.dif));
369 
370 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEW);
371 	return tcp_req;
372 }
373 
374 static void
375 nvmf_tcp_request_free(struct spdk_nvmf_tcp_req *tcp_req)
376 {
377 	struct spdk_nvmf_tcp_transport *ttransport;
378 
379 	if (!tcp_req) {
380 		return;
381 	}
382 
383 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tcp_req=%p will be freed\n", tcp_req);
384 	ttransport = SPDK_CONTAINEROF(tcp_req->req.qpair->transport,
385 				      struct spdk_nvmf_tcp_transport, transport);
386 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_COMPLETED);
387 	spdk_nvmf_tcp_req_process(ttransport, tcp_req);
388 }
389 
390 static int
391 spdk_nvmf_tcp_req_free(struct spdk_nvmf_request *req)
392 {
393 	struct spdk_nvmf_tcp_req *tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req);
394 
395 	nvmf_tcp_request_free(tcp_req);
396 
397 	return 0;
398 }
399 
400 static void
401 spdk_nvmf_tcp_drain_state_queue(struct spdk_nvmf_tcp_qpair *tqpair,
402 				enum spdk_nvmf_tcp_req_state state)
403 {
404 	struct spdk_nvmf_tcp_req *tcp_req, *req_tmp;
405 
406 	TAILQ_FOREACH_SAFE(tcp_req, &tqpair->state_queue[state], state_link, req_tmp) {
407 		nvmf_tcp_request_free(tcp_req);
408 	}
409 }
410 
411 static void
412 spdk_nvmf_tcp_cleanup_all_states(struct spdk_nvmf_tcp_qpair *tqpair)
413 {
414 	struct spdk_nvmf_tcp_req *tcp_req, *req_tmp;
415 	struct nvme_tcp_pdu *pdu, *tmp_pdu;
416 
417 	/* Free the pdus in the send_queue */
418 	TAILQ_FOREACH_SAFE(pdu, &tqpair->send_queue, tailq, tmp_pdu) {
419 		TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq);
420 		/* Also check the pdu type, we need to calculte the c2h_data_pdu_cnt later */
421 		if (pdu->hdr->common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_DATA) {
422 			assert(tqpair->c2h_data_pdu_cnt > 0);
423 			tqpair->c2h_data_pdu_cnt--;
424 		}
425 		spdk_nvmf_tcp_pdu_put(tqpair, pdu);
426 	}
427 
428 	while (!STAILQ_EMPTY(&tqpair->queued_c2h_data_tcp_req)) {
429 		STAILQ_REMOVE_HEAD(&tqpair->queued_c2h_data_tcp_req, link);
430 	}
431 	spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST);
432 
433 	spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEW);
434 
435 	/* Wipe the requests waiting for buffer from the global list */
436 	TAILQ_FOREACH_SAFE(tcp_req, &tqpair->state_queue[TCP_REQUEST_STATE_NEED_BUFFER], state_link,
437 			   req_tmp) {
438 		STAILQ_REMOVE(&tqpair->group->group.pending_buf_queue, &tcp_req->req,
439 			      spdk_nvmf_request, buf_link);
440 	}
441 
442 	spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEED_BUFFER);
443 	spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_EXECUTING);
444 	spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
445 }
446 
447 static void
448 nvmf_tcp_dump_qpair_req_contents(struct spdk_nvmf_tcp_qpair *tqpair)
449 {
450 	int i;
451 	struct spdk_nvmf_tcp_req *tcp_req;
452 
453 	SPDK_ERRLOG("Dumping contents of queue pair (QID %d)\n", tqpair->qpair.qid);
454 	for (i = 1; i < TCP_REQUEST_NUM_STATES; i++) {
455 		SPDK_ERRLOG("\tNum of requests in state[%d] = %d\n", i, tqpair->state_cntr[i]);
456 		TAILQ_FOREACH(tcp_req, &tqpair->state_queue[i], state_link) {
457 			SPDK_ERRLOG("\t\tRequest Data From Pool: %d\n", tcp_req->req.data_from_pool);
458 			SPDK_ERRLOG("\t\tRequest opcode: %d\n", tcp_req->req.cmd->nvmf_cmd.opcode);
459 		}
460 	}
461 }
462 
463 static void
464 spdk_nvmf_tcp_qpair_destroy(struct spdk_nvmf_tcp_qpair *tqpair)
465 {
466 	int err = 0;
467 
468 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
469 
470 	spdk_poller_unregister(&tqpair->flush_poller);
471 	spdk_sock_close(&tqpair->sock);
472 	spdk_nvmf_tcp_cleanup_all_states(tqpair);
473 
474 	if (tqpair->free_pdu_num != (tqpair->max_queue_depth + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM)) {
475 		SPDK_ERRLOG("tqpair(%p) free pdu pool num is %u but should be %u\n", tqpair,
476 			    tqpair->free_pdu_num,
477 			    (tqpair->max_queue_depth + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM));
478 		err++;
479 	}
480 
481 	if (tqpair->state_cntr[TCP_REQUEST_STATE_FREE] != tqpair->max_queue_depth) {
482 		SPDK_ERRLOG("tqpair(%p) free tcp request num is %u but should be %u\n", tqpair,
483 			    tqpair->state_cntr[TCP_REQUEST_STATE_FREE],
484 			    tqpair->max_queue_depth);
485 		err++;
486 	}
487 
488 	if (tqpair->c2h_data_pdu_cnt != 0) {
489 		SPDK_ERRLOG("tqpair(%p) free c2h_data_pdu cnt is %u but should be 0\n", tqpair,
490 			    tqpair->c2h_data_pdu_cnt);
491 		err++;
492 	}
493 
494 	if (err > 0) {
495 		nvmf_tcp_dump_qpair_req_contents(tqpair);
496 	}
497 	free(tqpair->pdu);
498 	free(tqpair->pdu_pool);
499 	free(tqpair->req);
500 	free(tqpair->reqs);
501 	spdk_free(tqpair->buf);
502 	spdk_free(tqpair->bufs);
503 	free(tqpair->pdu_recv_buf.buf);
504 	free(tqpair);
505 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Leave\n");
506 }
507 
508 static int
509 spdk_nvmf_tcp_destroy(struct spdk_nvmf_transport *transport)
510 {
511 	struct spdk_nvmf_tcp_transport	*ttransport;
512 
513 	assert(transport != NULL);
514 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
515 
516 	pthread_mutex_destroy(&ttransport->lock);
517 	free(ttransport);
518 	return 0;
519 }
520 
521 static struct spdk_nvmf_transport *
522 spdk_nvmf_tcp_create(struct spdk_nvmf_transport_opts *opts)
523 {
524 	struct spdk_nvmf_tcp_transport *ttransport;
525 	uint32_t sge_count;
526 	uint32_t min_shared_buffers;
527 
528 	ttransport = calloc(1, sizeof(*ttransport));
529 	if (!ttransport) {
530 		return NULL;
531 	}
532 
533 	TAILQ_INIT(&ttransport->ports);
534 
535 	ttransport->transport.ops = &spdk_nvmf_transport_tcp;
536 
537 	SPDK_NOTICELOG("*** TCP Transport Init ***\n");
538 
539 	SPDK_INFOLOG(SPDK_LOG_NVMF_TCP, "*** TCP Transport Init ***\n"
540 		     "  Transport opts:  max_ioq_depth=%d, max_io_size=%d,\n"
541 		     "  max_qpairs_per_ctrlr=%d, io_unit_size=%d,\n"
542 		     "  in_capsule_data_size=%d, max_aq_depth=%d\n"
543 		     "  num_shared_buffers=%d, c2h_success=%d,\n"
544 		     "  dif_insert_or_strip=%d, sock_priority=%d\n",
545 		     opts->max_queue_depth,
546 		     opts->max_io_size,
547 		     opts->max_qpairs_per_ctrlr,
548 		     opts->io_unit_size,
549 		     opts->in_capsule_data_size,
550 		     opts->max_aq_depth,
551 		     opts->num_shared_buffers,
552 		     opts->c2h_success,
553 		     opts->dif_insert_or_strip,
554 		     opts->sock_priority);
555 
556 	if (opts->sock_priority > SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY) {
557 		SPDK_ERRLOG("Unsupported socket_priority=%d, the current range is: 0 to %d\n"
558 			    "you can use man 7 socket to view the range of priority under SO_PRIORITY item\n",
559 			    opts->sock_priority, SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY);
560 		free(ttransport);
561 		return NULL;
562 	}
563 
564 	/* I/O unit size cannot be larger than max I/O size */
565 	if (opts->io_unit_size > opts->max_io_size) {
566 		opts->io_unit_size = opts->max_io_size;
567 	}
568 
569 	sge_count = opts->max_io_size / opts->io_unit_size;
570 	if (sge_count > SPDK_NVMF_MAX_SGL_ENTRIES) {
571 		SPDK_ERRLOG("Unsupported IO Unit size specified, %d bytes\n", opts->io_unit_size);
572 		free(ttransport);
573 		return NULL;
574 	}
575 
576 	min_shared_buffers = spdk_thread_get_count() * opts->buf_cache_size;
577 	if (min_shared_buffers > opts->num_shared_buffers) {
578 		SPDK_ERRLOG("There are not enough buffers to satisfy"
579 			    "per-poll group caches for each thread. (%" PRIu32 ")"
580 			    "supplied. (%" PRIu32 ") required\n", opts->num_shared_buffers, min_shared_buffers);
581 		SPDK_ERRLOG("Please specify a larger number of shared buffers\n");
582 		spdk_nvmf_tcp_destroy(&ttransport->transport);
583 		return NULL;
584 	}
585 
586 	pthread_mutex_init(&ttransport->lock, NULL);
587 
588 	return &ttransport->transport;
589 }
590 
591 static int
592 _spdk_nvmf_tcp_trsvcid_to_int(const char *trsvcid)
593 {
594 	unsigned long long ull;
595 	char *end = NULL;
596 
597 	ull = strtoull(trsvcid, &end, 10);
598 	if (end == NULL || end == trsvcid || *end != '\0') {
599 		return -1;
600 	}
601 
602 	/* Valid TCP/IP port numbers are in [0, 65535] */
603 	if (ull > 65535) {
604 		return -1;
605 	}
606 
607 	return (int)ull;
608 }
609 
610 /**
611  * Canonicalize a listen address trid.
612  */
613 static int
614 _spdk_nvmf_tcp_canon_listen_trid(struct spdk_nvme_transport_id *canon_trid,
615 				 const struct spdk_nvme_transport_id *trid)
616 {
617 	int trsvcid_int;
618 
619 	trsvcid_int = _spdk_nvmf_tcp_trsvcid_to_int(trid->trsvcid);
620 	if (trsvcid_int < 0) {
621 		return -EINVAL;
622 	}
623 
624 	memset(canon_trid, 0, sizeof(*canon_trid));
625 	canon_trid->trtype = SPDK_NVME_TRANSPORT_TCP;
626 	canon_trid->adrfam = trid->adrfam;
627 	snprintf(canon_trid->traddr, sizeof(canon_trid->traddr), "%s", trid->traddr);
628 	snprintf(canon_trid->trsvcid, sizeof(canon_trid->trsvcid), "%d", trsvcid_int);
629 
630 	return 0;
631 }
632 
633 /**
634  * Find an existing listening port.
635  *
636  * Caller must hold ttransport->lock.
637  */
638 static struct spdk_nvmf_tcp_port *
639 _spdk_nvmf_tcp_find_port(struct spdk_nvmf_tcp_transport *ttransport,
640 			 const struct spdk_nvme_transport_id *trid)
641 {
642 	struct spdk_nvme_transport_id canon_trid;
643 	struct spdk_nvmf_tcp_port *port;
644 
645 	if (_spdk_nvmf_tcp_canon_listen_trid(&canon_trid, trid) != 0) {
646 		return NULL;
647 	}
648 
649 	TAILQ_FOREACH(port, &ttransport->ports, link) {
650 		if (spdk_nvme_transport_id_compare(&canon_trid, &port->trid) == 0) {
651 			return port;
652 		}
653 	}
654 
655 	return NULL;
656 }
657 
658 static int
659 spdk_nvmf_tcp_listen(struct spdk_nvmf_transport *transport,
660 		     const struct spdk_nvme_transport_id *trid)
661 {
662 	struct spdk_nvmf_tcp_transport *ttransport;
663 	struct spdk_nvmf_tcp_port *port;
664 	int trsvcid_int;
665 	uint8_t adrfam;
666 
667 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
668 
669 	trsvcid_int = _spdk_nvmf_tcp_trsvcid_to_int(trid->trsvcid);
670 	if (trsvcid_int < 0) {
671 		SPDK_ERRLOG("Invalid trsvcid '%s'\n", trid->trsvcid);
672 		return -EINVAL;
673 	}
674 
675 	pthread_mutex_lock(&ttransport->lock);
676 
677 	port = _spdk_nvmf_tcp_find_port(ttransport, trid);
678 	if (port) {
679 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Already listening on %s port %s\n",
680 			      trid->traddr, trid->trsvcid);
681 		port->ref++;
682 		pthread_mutex_unlock(&ttransport->lock);
683 		return 0;
684 	}
685 
686 	port = calloc(1, sizeof(*port));
687 	if (!port) {
688 		SPDK_ERRLOG("Port allocation failed\n");
689 		free(port);
690 		pthread_mutex_unlock(&ttransport->lock);
691 		return -ENOMEM;
692 	}
693 
694 	port->ref = 1;
695 
696 	if (_spdk_nvmf_tcp_canon_listen_trid(&port->trid, trid) != 0) {
697 		SPDK_ERRLOG("Invalid traddr %s / trsvcid %s\n",
698 			    trid->traddr, trid->trsvcid);
699 		free(port);
700 		pthread_mutex_unlock(&ttransport->lock);
701 		return -ENOMEM;
702 	}
703 
704 	port->listen_sock = spdk_sock_listen(trid->traddr, trsvcid_int);
705 	if (port->listen_sock == NULL) {
706 		SPDK_ERRLOG("spdk_sock_listen(%s, %d) failed: %s (%d)\n",
707 			    trid->traddr, trsvcid_int,
708 			    spdk_strerror(errno), errno);
709 		free(port);
710 		pthread_mutex_unlock(&ttransport->lock);
711 		return -errno;
712 	}
713 
714 	if (spdk_sock_is_ipv4(port->listen_sock)) {
715 		adrfam = SPDK_NVMF_ADRFAM_IPV4;
716 	} else if (spdk_sock_is_ipv6(port->listen_sock)) {
717 		adrfam = SPDK_NVMF_ADRFAM_IPV6;
718 	} else {
719 		SPDK_ERRLOG("Unhandled socket type\n");
720 		adrfam = 0;
721 	}
722 
723 	if (adrfam != trid->adrfam) {
724 		SPDK_ERRLOG("Socket address family mismatch\n");
725 		spdk_sock_close(&port->listen_sock);
726 		free(port);
727 		pthread_mutex_unlock(&ttransport->lock);
728 		return -EINVAL;
729 	}
730 
731 	SPDK_NOTICELOG("*** NVMe/TCP Target Listening on %s port %d ***\n",
732 		       trid->traddr, trsvcid_int);
733 
734 	TAILQ_INSERT_TAIL(&ttransport->ports, port, link);
735 	pthread_mutex_unlock(&ttransport->lock);
736 
737 	return 0;
738 }
739 
740 static int
741 spdk_nvmf_tcp_stop_listen(struct spdk_nvmf_transport *transport,
742 			  const struct spdk_nvme_transport_id *trid)
743 {
744 	struct spdk_nvmf_tcp_transport *ttransport;
745 	struct spdk_nvmf_tcp_port *port;
746 	int rc;
747 
748 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
749 
750 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Removing listen address %s port %s\n",
751 		      trid->traddr, trid->trsvcid);
752 
753 	pthread_mutex_lock(&ttransport->lock);
754 	port = _spdk_nvmf_tcp_find_port(ttransport, trid);
755 	if (port) {
756 		assert(port->ref > 0);
757 		port->ref--;
758 		if (port->ref == 0) {
759 			TAILQ_REMOVE(&ttransport->ports, port, link);
760 			spdk_sock_close(&port->listen_sock);
761 			free(port);
762 		}
763 		rc = 0;
764 	} else {
765 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Port not found\n");
766 		rc = -ENOENT;
767 	}
768 	pthread_mutex_unlock(&ttransport->lock);
769 
770 	return rc;
771 }
772 
773 static int
774 spdk_nvmf_tcp_qpair_flush_pdus_internal(struct spdk_nvmf_tcp_qpair *tqpair)
775 {
776 	const int array_size = 32;
777 	struct iovec iovs[array_size];
778 	int iovcnt = 0;
779 	int bytes = 0;
780 	int total_length = 0;
781 	uint32_t mapped_length = 0;
782 	struct nvme_tcp_pdu *pdu;
783 	int pdu_length;
784 	TAILQ_HEAD(, nvme_tcp_pdu) completed_pdus_list;
785 
786 	pdu = TAILQ_FIRST(&tqpair->send_queue);
787 
788 	if (pdu == NULL) {
789 		return 0;
790 	}
791 
792 	/*
793 	 * Build up a list of iovecs for the first few PDUs in the
794 	 *  tqpair 's send_queue.
795 	 */
796 	while (pdu != NULL && ((array_size - iovcnt) >= (2 + (int)pdu->data_iovcnt))) {
797 		iovcnt += nvme_tcp_build_iovs(&iovs[iovcnt],
798 					      array_size - iovcnt,
799 					      pdu,
800 					      tqpair->host_hdgst_enable,
801 					      tqpair->host_ddgst_enable,
802 					      &mapped_length);
803 		total_length += mapped_length;
804 		pdu = TAILQ_NEXT(pdu, tailq);
805 	}
806 
807 	spdk_trace_record(TRACE_TCP_FLUSH_WRITEBUF_START, 0, total_length, 0, iovcnt);
808 
809 	bytes = spdk_sock_writev(tqpair->sock, iovs, iovcnt);
810 	if (bytes == -1) {
811 		if (errno == EWOULDBLOCK || errno == EAGAIN) {
812 			return 1;
813 		} else {
814 			SPDK_ERRLOG("spdk_sock_writev() failed, errno %d: %s\n",
815 				    errno, spdk_strerror(errno));
816 			return -1;
817 		}
818 	}
819 
820 	spdk_trace_record(TRACE_TCP_FLUSH_WRITEBUF_DONE, 0, bytes, 0, 0);
821 
822 	pdu = TAILQ_FIRST(&tqpair->send_queue);
823 
824 	/*
825 	 * Free any PDUs that were fully written.  If a PDU was only
826 	 *  partially written, update its writev_offset so that next
827 	 *  time only the unwritten portion will be sent to writev().
828 	 */
829 	TAILQ_INIT(&completed_pdus_list);
830 	while (bytes > 0) {
831 		pdu_length = pdu->hdr->common.plen - pdu->writev_offset;
832 		if (bytes >= pdu_length) {
833 			bytes -= pdu_length;
834 			TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq);
835 			TAILQ_INSERT_TAIL(&completed_pdus_list, pdu, tailq);
836 			pdu = TAILQ_FIRST(&tqpair->send_queue);
837 
838 		} else {
839 			pdu->writev_offset += bytes;
840 			bytes = 0;
841 		}
842 	}
843 
844 	while (!TAILQ_EMPTY(&completed_pdus_list)) {
845 		pdu = TAILQ_FIRST(&completed_pdus_list);
846 		TAILQ_REMOVE(&completed_pdus_list, pdu, tailq);
847 		assert(pdu->cb_fn != NULL);
848 		pdu->cb_fn(pdu->cb_arg);
849 		spdk_nvmf_tcp_pdu_put(tqpair, pdu);
850 	}
851 
852 	return TAILQ_EMPTY(&tqpair->send_queue) ? 0 : 1;
853 }
854 
855 static int
856 spdk_nvmf_tcp_qpair_flush_pdus(void *_tqpair)
857 {
858 	struct spdk_nvmf_tcp_qpair *tqpair = _tqpair;
859 	int rc;
860 
861 	if (tqpair->state == NVME_TCP_QPAIR_STATE_RUNNING) {
862 		rc = spdk_nvmf_tcp_qpair_flush_pdus_internal(tqpair);
863 		if (rc == 0 && tqpair->flush_poller != NULL) {
864 			spdk_poller_unregister(&tqpair->flush_poller);
865 		} else if (rc == 1 && tqpair->flush_poller == NULL) {
866 			tqpair->flush_poller = spdk_poller_register(spdk_nvmf_tcp_qpair_flush_pdus,
867 					       tqpair, 50);
868 		}
869 	} else {
870 		/*
871 		 * If the tqpair state is not RUNNING, then
872 		 * keep trying to flush PDUs until our list is
873 		 * empty - to make sure all data is sent before
874 		 * closing the connection.
875 		 */
876 		do {
877 			rc = spdk_nvmf_tcp_qpair_flush_pdus_internal(tqpair);
878 		} while (rc == 1);
879 	}
880 
881 	if (rc < 0 && tqpair->state < NVME_TCP_QPAIR_STATE_EXITING) {
882 		/*
883 		 * If the poller has already started destruction of the tqpair,
884 		 *  i.e. the socket read failed, then the connection state may already
885 		 *  be EXITED.  We don't want to set it back to EXITING in that case.
886 		 */
887 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
888 	}
889 
890 	return -1;
891 }
892 
893 static void
894 spdk_nvmf_tcp_qpair_write_pdu(struct spdk_nvmf_tcp_qpair *tqpair,
895 			      struct nvme_tcp_pdu *pdu,
896 			      nvme_tcp_qpair_xfer_complete_cb cb_fn,
897 			      void *cb_arg)
898 {
899 	int enable_digest;
900 	int hlen;
901 	uint32_t crc32c;
902 
903 	hlen = pdu->hdr->common.hlen;
904 	enable_digest = 1;
905 	if (pdu->hdr->common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP ||
906 	    pdu->hdr->common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ) {
907 		/* this PDU should be sent without digest */
908 		enable_digest = 0;
909 	}
910 
911 	/* Header Digest */
912 	if (enable_digest && tqpair->host_hdgst_enable) {
913 		crc32c = nvme_tcp_pdu_calc_header_digest(pdu);
914 		MAKE_DIGEST_WORD((uint8_t *)pdu->hdr->raw + hlen, crc32c);
915 	}
916 
917 	/* Data Digest */
918 	if (pdu->data_len > 0 && enable_digest && tqpair->host_ddgst_enable) {
919 		crc32c = nvme_tcp_pdu_calc_data_digest(pdu);
920 		MAKE_DIGEST_WORD(pdu->data_digest, crc32c);
921 	}
922 
923 	pdu->cb_fn = cb_fn;
924 	pdu->cb_arg = cb_arg;
925 	TAILQ_INSERT_TAIL(&tqpair->send_queue, pdu, tailq);
926 	spdk_nvmf_tcp_qpair_flush_pdus(tqpair);
927 }
928 
929 static int
930 spdk_nvmf_tcp_qpair_init_mem_resource(struct spdk_nvmf_tcp_qpair *tqpair, uint16_t size)
931 {
932 	int i;
933 	struct spdk_nvmf_tcp_req *tcp_req;
934 	struct spdk_nvmf_transport *transport = tqpair->qpair.transport;
935 	uint32_t in_capsule_data_size;
936 
937 	in_capsule_data_size = transport->opts.in_capsule_data_size;
938 	if (transport->opts.dif_insert_or_strip) {
939 		in_capsule_data_size = SPDK_BDEV_BUF_SIZE_WITH_MD(in_capsule_data_size);
940 	}
941 
942 	if (!tqpair->qpair.sq_head_max) {
943 		tqpair->req = calloc(1, sizeof(*tqpair->req));
944 		if (!tqpair->req) {
945 			SPDK_ERRLOG("Unable to allocate req on tqpair=%p.\n", tqpair);
946 			return -1;
947 		}
948 
949 		if (in_capsule_data_size) {
950 			tqpair->buf = spdk_zmalloc(in_capsule_data_size, 0x1000,
951 						   NULL, SPDK_ENV_LCORE_ID_ANY,
952 						   SPDK_MALLOC_DMA);
953 			if (!tqpair->buf) {
954 				SPDK_ERRLOG("Unable to allocate buf on tqpair=%p.\n", tqpair);
955 				return -1;
956 			}
957 		}
958 
959 		tcp_req = tqpair->req;
960 		tcp_req->ttag = 0;
961 		tcp_req->req.qpair = &tqpair->qpair;
962 
963 		/* Set up memory to receive commands */
964 		if (tqpair->buf) {
965 			tcp_req->buf = tqpair->buf;
966 		}
967 
968 		/* Set the cmdn and rsp */
969 		tcp_req->req.rsp = (union nvmf_c2h_msg *)&tcp_req->rsp;
970 		tcp_req->req.cmd = (union nvmf_h2c_msg *)&tcp_req->cmd;
971 
972 		/* Initialize request state to FREE */
973 		tcp_req->state = TCP_REQUEST_STATE_FREE;
974 		TAILQ_INSERT_TAIL(&tqpair->state_queue[tcp_req->state], tcp_req, state_link);
975 
976 		tqpair->pdu = calloc(NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM + 1, sizeof(*tqpair->pdu));
977 		if (!tqpair->pdu) {
978 			SPDK_ERRLOG("Unable to allocate pdu on tqpair=%p.\n", tqpair);
979 			return -1;
980 		}
981 
982 		for (i = 0; i < 1 + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM; i++) {
983 			TAILQ_INSERT_TAIL(&tqpair->free_queue, &tqpair->pdu[i], tailq);
984 		}
985 
986 		tqpair->pdu_recv_buf.size = (in_capsule_data_size + sizeof(struct spdk_nvme_tcp_cmd) + 2 *
987 					     SPDK_NVME_TCP_DIGEST_LEN) * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR;
988 		tqpair->pdu_recv_buf.buf = calloc(1, tqpair->pdu_recv_buf.size);
989 		if (!tqpair->pdu_recv_buf.buf) {
990 			SPDK_ERRLOG("Unable to allocate the pdu recv buf on tqpair=%p with size=%d\n", tqpair,
991 				    tqpair->pdu_recv_buf.size);
992 			return -1;
993 		}
994 		tqpair->pdu_in_progress.hdr = (union nvme_tcp_pdu_hdr *)tqpair->pdu_recv_buf.buf;
995 	} else {
996 		tqpair->reqs = calloc(size, sizeof(*tqpair->reqs));
997 		if (!tqpair->reqs) {
998 			SPDK_ERRLOG("Unable to allocate reqs on tqpair=%p\n", tqpair);
999 			return -1;
1000 		}
1001 
1002 		if (in_capsule_data_size) {
1003 			tqpair->bufs = spdk_zmalloc(size * in_capsule_data_size, 0x1000,
1004 						    NULL, SPDK_ENV_LCORE_ID_ANY,
1005 						    SPDK_MALLOC_DMA);
1006 			if (!tqpair->bufs) {
1007 				SPDK_ERRLOG("Unable to allocate bufs on tqpair=%p.\n", tqpair);
1008 				return -1;
1009 			}
1010 		}
1011 
1012 		for (i = 0; i < size; i++) {
1013 			struct spdk_nvmf_tcp_req *tcp_req = &tqpair->reqs[i];
1014 
1015 			tcp_req->ttag = i + 1;
1016 			tcp_req->req.qpair = &tqpair->qpair;
1017 
1018 			/* Set up memory to receive commands */
1019 			if (tqpair->bufs) {
1020 				tcp_req->buf = (void *)((uintptr_t)tqpair->bufs + (i * in_capsule_data_size));
1021 			}
1022 
1023 			/* Set the cmdn and rsp */
1024 			tcp_req->req.rsp = (union nvmf_c2h_msg *)&tcp_req->rsp;
1025 			tcp_req->req.cmd = (union nvmf_h2c_msg *)&tcp_req->cmd;
1026 
1027 			/* Initialize request state to FREE */
1028 			tcp_req->state = TCP_REQUEST_STATE_FREE;
1029 			TAILQ_INSERT_TAIL(&tqpair->state_queue[tcp_req->state], tcp_req, state_link);
1030 		}
1031 
1032 		tqpair->pdu_pool = calloc(size, sizeof(*tqpair->pdu_pool));
1033 		if (!tqpair->pdu_pool) {
1034 			SPDK_ERRLOG("Unable to allocate pdu pool on tqpair =%p.\n", tqpair);
1035 			return -1;
1036 		}
1037 
1038 		for (i = 0; i < size; i++) {
1039 			TAILQ_INSERT_TAIL(&tqpair->free_queue, &tqpair->pdu_pool[i], tailq);
1040 		}
1041 	}
1042 
1043 	return 0;
1044 }
1045 
1046 static int
1047 spdk_nvmf_tcp_qpair_init(struct spdk_nvmf_qpair *qpair)
1048 {
1049 	struct spdk_nvmf_tcp_qpair *tqpair;
1050 	int i;
1051 
1052 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
1053 
1054 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "New TCP Connection: %p\n", qpair);
1055 
1056 	TAILQ_INIT(&tqpair->send_queue);
1057 	TAILQ_INIT(&tqpair->free_queue);
1058 	STAILQ_INIT(&tqpair->queued_c2h_data_tcp_req);
1059 
1060 	/* Initialise request state queues of the qpair */
1061 	for (i = TCP_REQUEST_STATE_FREE; i < TCP_REQUEST_NUM_STATES; i++) {
1062 		TAILQ_INIT(&tqpair->state_queue[i]);
1063 	}
1064 
1065 	tqpair->host_hdgst_enable = true;
1066 	tqpair->host_ddgst_enable = true;
1067 	return 0;
1068 }
1069 
1070 static int
1071 spdk_nvmf_tcp_qpair_sock_init(struct spdk_nvmf_tcp_qpair *tqpair)
1072 {
1073 
1074 	int rc;
1075 	int buf_size;
1076 
1077 	/* set send buffer size */
1078 	buf_size = 2 * 1024 * 1024;
1079 	rc = spdk_sock_set_sendbuf(tqpair->sock, buf_size);
1080 	if (rc != 0) {
1081 		SPDK_ERRLOG("spdk_sock_set_sendbuf failed\n");
1082 		return rc;
1083 	}
1084 
1085 	/* set low water mark */
1086 	rc = spdk_sock_set_recvlowat(tqpair->sock, sizeof(struct spdk_nvme_tcp_c2h_data_hdr));
1087 	if (rc != 0) {
1088 		SPDK_ERRLOG("spdk_sock_set_recvlowat() failed\n");
1089 		return rc;
1090 	}
1091 
1092 	return 0;
1093 }
1094 
1095 static void
1096 _spdk_nvmf_tcp_handle_connect(struct spdk_nvmf_transport *transport,
1097 			      struct spdk_nvmf_tcp_port *port,
1098 			      struct spdk_sock *sock,
1099 			      new_qpair_fn cb_fn, void *cb_arg)
1100 {
1101 	struct spdk_nvmf_tcp_qpair *tqpair;
1102 	int rc;
1103 
1104 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "New connection accepted on %s port %s\n",
1105 		      port->trid.traddr, port->trid.trsvcid);
1106 
1107 	if (transport->opts.sock_priority) {
1108 		rc = spdk_sock_set_priority(sock, transport->opts.sock_priority);
1109 		if (rc) {
1110 			SPDK_ERRLOG("Failed to set the priority of the socket\n");
1111 			spdk_sock_close(&sock);
1112 			return;
1113 		}
1114 	}
1115 
1116 	tqpair = calloc(1, sizeof(struct spdk_nvmf_tcp_qpair));
1117 	if (tqpair == NULL) {
1118 		SPDK_ERRLOG("Could not allocate new connection.\n");
1119 		spdk_sock_close(&sock);
1120 		return;
1121 	}
1122 
1123 	tqpair->sock = sock;
1124 	tqpair->max_queue_depth = 1;
1125 	tqpair->free_pdu_num = tqpair->max_queue_depth + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM;
1126 	tqpair->state_cntr[TCP_REQUEST_STATE_FREE] = tqpair->max_queue_depth;
1127 	tqpair->port = port;
1128 	tqpair->qpair.transport = transport;
1129 
1130 	rc = spdk_sock_getaddr(tqpair->sock, tqpair->target_addr,
1131 			       sizeof(tqpair->target_addr), &tqpair->target_port,
1132 			       tqpair->initiator_addr, sizeof(tqpair->initiator_addr),
1133 			       &tqpair->initiator_port);
1134 	if (rc < 0) {
1135 		SPDK_ERRLOG("spdk_sock_getaddr() failed of tqpair=%p\n", tqpair);
1136 		spdk_nvmf_tcp_qpair_destroy(tqpair);
1137 		return;
1138 	}
1139 
1140 	cb_fn(&tqpair->qpair, cb_arg);
1141 }
1142 
1143 static void
1144 spdk_nvmf_tcp_port_accept(struct spdk_nvmf_transport *transport, struct spdk_nvmf_tcp_port *port,
1145 			  new_qpair_fn cb_fn, void *cb_arg)
1146 {
1147 	struct spdk_sock *sock;
1148 	int i;
1149 
1150 	for (i = 0; i < NVMF_TCP_MAX_ACCEPT_SOCK_ONE_TIME; i++) {
1151 		sock = spdk_sock_accept(port->listen_sock);
1152 		if (sock) {
1153 			_spdk_nvmf_tcp_handle_connect(transport, port, sock, cb_fn, cb_arg);
1154 		}
1155 	}
1156 }
1157 
1158 static void
1159 spdk_nvmf_tcp_accept(struct spdk_nvmf_transport *transport, new_qpair_fn cb_fn, void *cb_arg)
1160 {
1161 	struct spdk_nvmf_tcp_transport *ttransport;
1162 	struct spdk_nvmf_tcp_port *port;
1163 
1164 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
1165 
1166 	TAILQ_FOREACH(port, &ttransport->ports, link) {
1167 		spdk_nvmf_tcp_port_accept(transport, port, cb_fn, cb_arg);
1168 	}
1169 }
1170 
1171 static void
1172 spdk_nvmf_tcp_discover(struct spdk_nvmf_transport *transport,
1173 		       struct spdk_nvme_transport_id *trid,
1174 		       struct spdk_nvmf_discovery_log_page_entry *entry)
1175 {
1176 	entry->trtype = SPDK_NVMF_TRTYPE_TCP;
1177 	entry->adrfam = trid->adrfam;
1178 	entry->treq.secure_channel = SPDK_NVMF_TREQ_SECURE_CHANNEL_NOT_REQUIRED;
1179 
1180 	spdk_strcpy_pad(entry->trsvcid, trid->trsvcid, sizeof(entry->trsvcid), ' ');
1181 	spdk_strcpy_pad(entry->traddr, trid->traddr, sizeof(entry->traddr), ' ');
1182 
1183 	entry->tsas.tcp.sectype = SPDK_NVME_TCP_SECURITY_NONE;
1184 }
1185 
1186 static struct spdk_nvmf_transport_poll_group *
1187 spdk_nvmf_tcp_poll_group_create(struct spdk_nvmf_transport *transport)
1188 {
1189 	struct spdk_nvmf_tcp_poll_group *tgroup;
1190 
1191 	tgroup = calloc(1, sizeof(*tgroup));
1192 	if (!tgroup) {
1193 		return NULL;
1194 	}
1195 
1196 	tgroup->sock_group = spdk_sock_group_create(&tgroup->group);
1197 	if (!tgroup->sock_group) {
1198 		goto cleanup;
1199 	}
1200 
1201 	TAILQ_INIT(&tgroup->qpairs);
1202 
1203 	return &tgroup->group;
1204 
1205 cleanup:
1206 	free(tgroup);
1207 	return NULL;
1208 }
1209 
1210 static struct spdk_nvmf_transport_poll_group *
1211 spdk_nvmf_tcp_get_optimal_poll_group(struct spdk_nvmf_qpair *qpair)
1212 {
1213 	struct spdk_nvmf_tcp_qpair *tqpair;
1214 	struct spdk_sock_group *group = NULL;
1215 	int rc;
1216 
1217 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
1218 	rc = spdk_sock_get_optimal_sock_group(tqpair->sock, &group);
1219 	if (!rc && group != NULL) {
1220 		return spdk_sock_group_get_ctx(group);
1221 	}
1222 
1223 	return NULL;
1224 }
1225 
1226 static void
1227 spdk_nvmf_tcp_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group)
1228 {
1229 	struct spdk_nvmf_tcp_poll_group *tgroup;
1230 
1231 	tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
1232 	spdk_sock_group_close(&tgroup->sock_group);
1233 
1234 	free(tgroup);
1235 }
1236 
1237 static inline void
1238 spdk_nvmf_tcp_reset_pdu_in_process(struct spdk_nvmf_tcp_qpair *tqpair)
1239 {
1240 	struct nvme_tcp_pdu_recv_buf *pdu_recv_buf = &tqpair->pdu_recv_buf;
1241 	char *dst, *src;
1242 
1243 	if (spdk_unlikely((pdu_recv_buf->off + sizeof(union nvme_tcp_pdu_hdr)) >
1244 			  pdu_recv_buf->size)) {
1245 		if (pdu_recv_buf->remain_size) {
1246 			dst = pdu_recv_buf->buf;
1247 			src = (char *)((void *)pdu_recv_buf->buf + pdu_recv_buf->off);
1248 
1249 			/* purpose: to avoid overlap copy, so do not use memcpy if there is overlap case */
1250 			memmove(dst, src, pdu_recv_buf->remain_size);
1251 		}
1252 		tqpair->pdu_recv_buf.off = 0;
1253 	} else if (!pdu_recv_buf->remain_size) {
1254 		tqpair->pdu_recv_buf.off = 0;
1255 	}
1256 
1257 	tqpair->pdu_in_progress.hdr = (union nvme_tcp_pdu_hdr *)((void *)pdu_recv_buf->buf +
1258 				      pdu_recv_buf->off);
1259 }
1260 
1261 static void
1262 spdk_nvmf_tcp_qpair_set_recv_state(struct spdk_nvmf_tcp_qpair *tqpair,
1263 				   enum nvme_tcp_pdu_recv_state state)
1264 {
1265 	if (tqpair->recv_state == state) {
1266 		SPDK_ERRLOG("The recv state of tqpair=%p is same with the state(%d) to be set\n",
1267 			    tqpair, state);
1268 		return;
1269 	}
1270 
1271 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair(%p) recv state=%d\n", tqpair, state);
1272 	tqpair->recv_state = state;
1273 
1274 	switch (state) {
1275 	case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH:
1276 	case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH:
1277 	case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD:
1278 		break;
1279 	case NVME_TCP_PDU_RECV_STATE_ERROR:
1280 	case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY:
1281 		memset(&tqpair->pdu_in_progress, 0, sizeof(tqpair->pdu_in_progress));
1282 		spdk_nvmf_tcp_reset_pdu_in_process(tqpair);
1283 		break;
1284 	default:
1285 		SPDK_ERRLOG("The state(%d) is invalid\n", state);
1286 		abort();
1287 		break;
1288 	}
1289 }
1290 
1291 static int
1292 spdk_nvmf_tcp_qpair_handle_timeout(void *ctx)
1293 {
1294 	struct spdk_nvmf_tcp_qpair *tqpair = ctx;
1295 
1296 	assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_ERROR);
1297 
1298 	SPDK_ERRLOG("No pdu coming for tqpair=%p within %d seconds\n", tqpair,
1299 		    SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT);
1300 	tqpair->state = NVME_TCP_QPAIR_STATE_EXITED;
1301 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "will disconect the tqpair=%p\n", tqpair);
1302 	spdk_poller_unregister(&tqpair->timeout_poller);
1303 	spdk_nvmf_qpair_disconnect(&tqpair->qpair, NULL, NULL);
1304 
1305 	return 0;
1306 }
1307 
1308 static void
1309 spdk_nvmf_tcp_send_c2h_term_req_complete(void *cb_arg)
1310 {
1311 	struct spdk_nvmf_tcp_qpair *tqpair = (struct spdk_nvmf_tcp_qpair *)cb_arg;
1312 
1313 	if (!tqpair->timeout_poller) {
1314 		tqpair->timeout_poller = spdk_poller_register(spdk_nvmf_tcp_qpair_handle_timeout, tqpair,
1315 					 SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT * 1000000);
1316 	}
1317 }
1318 
1319 static void
1320 spdk_nvmf_tcp_send_c2h_term_req(struct spdk_nvmf_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu,
1321 				enum spdk_nvme_tcp_term_req_fes fes, uint32_t error_offset)
1322 {
1323 	struct nvme_tcp_pdu *rsp_pdu;
1324 	struct spdk_nvme_tcp_term_req_hdr *c2h_term_req;
1325 	uint32_t c2h_term_req_hdr_len = sizeof(*c2h_term_req);
1326 	uint32_t copy_len;
1327 
1328 	rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair);
1329 	if (!rsp_pdu) {
1330 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
1331 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1332 		return;
1333 	}
1334 
1335 	c2h_term_req = &rsp_pdu->hdr->term_req;
1336 	c2h_term_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ;
1337 	c2h_term_req->common.hlen = c2h_term_req_hdr_len;
1338 
1339 	if ((fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) ||
1340 	    (fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) {
1341 		DSET32(&c2h_term_req->fei, error_offset);
1342 	}
1343 
1344 	copy_len = pdu->hdr->common.hlen;
1345 	if (copy_len > SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE) {
1346 		copy_len = SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE;
1347 	}
1348 
1349 	/* Copy the error info into the buffer */
1350 	memcpy((uint8_t *)rsp_pdu->hdr->raw + c2h_term_req_hdr_len, pdu->hdr->raw, copy_len);
1351 	nvme_tcp_pdu_set_data(rsp_pdu, (uint8_t *)rsp_pdu->hdr->raw + c2h_term_req_hdr_len, copy_len);
1352 
1353 	/* Contain the header of the wrong received pdu */
1354 	c2h_term_req->common.plen = c2h_term_req->common.hlen + copy_len;
1355 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1356 	spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_send_c2h_term_req_complete, tqpair);
1357 }
1358 
1359 static void
1360 spdk_nvmf_tcp_capsule_cmd_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport,
1361 				     struct spdk_nvmf_tcp_qpair *tqpair,
1362 				     struct nvme_tcp_pdu *pdu)
1363 {
1364 	struct spdk_nvmf_tcp_req *tcp_req;
1365 
1366 	tcp_req = spdk_nvmf_tcp_req_get(tqpair);
1367 	if (!tcp_req) {
1368 		SPDK_ERRLOG("Cannot allocate tcp_req\n");
1369 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
1370 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1371 		return;
1372 	}
1373 
1374 	pdu->ctx = tcp_req;
1375 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEW);
1376 	spdk_nvmf_tcp_req_process(ttransport, tcp_req);
1377 	return;
1378 }
1379 
1380 static void
1381 spdk_nvmf_tcp_capsule_cmd_payload_handle(struct spdk_nvmf_tcp_transport *ttransport,
1382 		struct spdk_nvmf_tcp_qpair *tqpair,
1383 		struct nvme_tcp_pdu *pdu)
1384 {
1385 	struct spdk_nvmf_tcp_req *tcp_req;
1386 	struct spdk_nvme_tcp_cmd *capsule_cmd;
1387 	uint32_t error_offset = 0;
1388 	enum spdk_nvme_tcp_term_req_fes fes;
1389 
1390 	capsule_cmd = &pdu->hdr->capsule_cmd;
1391 	tcp_req = pdu->ctx;
1392 	assert(tcp_req != NULL);
1393 	if (capsule_cmd->common.pdo > SPDK_NVME_TCP_PDU_PDO_MAX_OFFSET) {
1394 		SPDK_ERRLOG("Expected ICReq capsule_cmd pdu offset <= %d, got %c\n",
1395 			    SPDK_NVME_TCP_PDU_PDO_MAX_OFFSET, capsule_cmd->common.pdo);
1396 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1397 		error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdo);
1398 		goto err;
1399 	}
1400 
1401 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
1402 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
1403 	spdk_nvmf_tcp_req_process(ttransport, tcp_req);
1404 
1405 	return;
1406 err:
1407 	spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1408 }
1409 
1410 static void
1411 spdk_nvmf_tcp_h2c_data_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport,
1412 				  struct spdk_nvmf_tcp_qpair *tqpair,
1413 				  struct nvme_tcp_pdu *pdu)
1414 {
1415 	struct spdk_nvmf_tcp_req *tcp_req;
1416 	uint32_t error_offset = 0;
1417 	enum spdk_nvme_tcp_term_req_fes fes = 0;
1418 	struct spdk_nvme_tcp_h2c_data_hdr *h2c_data;
1419 	bool ttag_offset_error = false;
1420 
1421 	h2c_data = &pdu->hdr->h2c_data;
1422 
1423 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair=%p, r2t_info: datao=%u, datal=%u, cccid=%u, ttag=%u\n",
1424 		      tqpair, h2c_data->datao, h2c_data->datal, h2c_data->cccid, h2c_data->ttag);
1425 
1426 	/* According to the information in the pdu to find the req */
1427 	TAILQ_FOREACH(tcp_req, &tqpair->state_queue[TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER],
1428 		      state_link) {
1429 		if ((tcp_req->req.cmd->nvme_cmd.cid == h2c_data->cccid) && (tcp_req->ttag == h2c_data->ttag)) {
1430 			break;
1431 		}
1432 
1433 		if (!ttag_offset_error && (tcp_req->req.cmd->nvme_cmd.cid == h2c_data->cccid)) {
1434 			ttag_offset_error = true;
1435 		}
1436 	}
1437 
1438 	if (!tcp_req) {
1439 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tcp_req is not found for tqpair=%p\n", tqpair);
1440 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER;
1441 		if (!ttag_offset_error) {
1442 			error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, cccid);
1443 		} else {
1444 			error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, ttag);
1445 		}
1446 		goto err;
1447 	}
1448 
1449 	if (tcp_req->next_expected_r2t_offset != h2c_data->datao) {
1450 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
1451 			      "tcp_req(%p), tqpair=%p,  expected_r2t_offset=%u, but data offset =%u\n",
1452 			      tcp_req, tqpair, tcp_req->next_expected_r2t_offset, h2c_data->datao);
1453 		fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE;
1454 		goto err;
1455 	}
1456 
1457 	if (h2c_data->datal > tqpair->maxh2cdata) {
1458 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tcp_req(%p), tqpair=%p,  datao=%u execeeds maxh2cdata size=%u\n",
1459 			      tcp_req, tqpair, h2c_data->datao, tqpair->maxh2cdata);
1460 		fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE;
1461 		goto err;
1462 	}
1463 
1464 	if ((h2c_data->datao + h2c_data->datal) > tcp_req->req.length) {
1465 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
1466 			      "tcp_req(%p), tqpair=%p,  (datao=%u + datal=%u) execeeds requested length=%u\n",
1467 			      tcp_req, tqpair, h2c_data->datao, h2c_data->datal, tcp_req->req.length);
1468 		fes = SPDK_NVME_TCP_TERM_REQ_FES_R2T_LIMIT_EXCEEDED;
1469 		goto err;
1470 	}
1471 
1472 	pdu->ctx = tcp_req;
1473 
1474 	if (spdk_unlikely(tcp_req->req.dif.dif_insert_or_strip)) {
1475 		pdu->dif_ctx = &tcp_req->req.dif.dif_ctx;
1476 	}
1477 
1478 	nvme_tcp_pdu_set_data_buf(pdu, tcp_req->req.iov, tcp_req->req.iovcnt,
1479 				  h2c_data->datao, h2c_data->datal);
1480 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
1481 	return;
1482 
1483 err:
1484 	spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1485 }
1486 
1487 static void
1488 spdk_nvmf_tcp_pdu_cmd_complete(void *cb_arg)
1489 {
1490 	struct spdk_nvmf_tcp_req *tcp_req = cb_arg;
1491 	nvmf_tcp_request_free(tcp_req);
1492 }
1493 
1494 static void
1495 spdk_nvmf_tcp_send_capsule_resp_pdu(struct spdk_nvmf_tcp_req *tcp_req,
1496 				    struct spdk_nvmf_tcp_qpair *tqpair)
1497 {
1498 	struct nvme_tcp_pdu *rsp_pdu;
1499 	struct spdk_nvme_tcp_rsp *capsule_resp;
1500 
1501 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter, tqpair=%p\n", tqpair);
1502 	rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair);
1503 	if (!rsp_pdu) {
1504 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1505 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
1506 		return;
1507 	}
1508 
1509 	capsule_resp = &rsp_pdu->hdr->capsule_resp;
1510 	capsule_resp->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP;
1511 	capsule_resp->common.plen = capsule_resp->common.hlen = sizeof(*capsule_resp);
1512 	capsule_resp->rccqe = tcp_req->req.rsp->nvme_cpl;
1513 	if (tqpair->host_hdgst_enable) {
1514 		capsule_resp->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF;
1515 		capsule_resp->common.plen += SPDK_NVME_TCP_DIGEST_LEN;
1516 	}
1517 
1518 	spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_pdu_cmd_complete, tcp_req);
1519 }
1520 
1521 static void
1522 spdk_nvmf_tcp_pdu_c2h_data_complete(void *cb_arg)
1523 {
1524 	struct spdk_nvmf_tcp_req *tcp_req = cb_arg;
1525 	struct spdk_nvmf_tcp_qpair *tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair,
1526 					     struct spdk_nvmf_tcp_qpair, qpair);
1527 
1528 	assert(tqpair != NULL);
1529 	assert(tcp_req->c2h_data_pdu_num > 0);
1530 	tcp_req->c2h_data_pdu_num--;
1531 	if (!tcp_req->c2h_data_pdu_num) {
1532 		if (tqpair->qpair.transport->opts.c2h_success) {
1533 			nvmf_tcp_request_free(tcp_req);
1534 		} else {
1535 			spdk_nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair);
1536 		}
1537 	}
1538 
1539 	tqpair->c2h_data_pdu_cnt--;
1540 	spdk_nvmf_tcp_handle_pending_c2h_data_queue(tqpair);
1541 }
1542 
1543 static void
1544 spdk_nvmf_tcp_send_r2t_pdu(struct spdk_nvmf_tcp_qpair *tqpair,
1545 			   struct spdk_nvmf_tcp_req *tcp_req)
1546 {
1547 	struct nvme_tcp_pdu *rsp_pdu;
1548 	struct spdk_nvme_tcp_r2t_hdr *r2t;
1549 
1550 	rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair);
1551 	if (!rsp_pdu) {
1552 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
1553 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1554 		return;
1555 	}
1556 
1557 	r2t = &rsp_pdu->hdr->r2t;
1558 	r2t->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_R2T;
1559 	r2t->common.plen = r2t->common.hlen = sizeof(*r2t);
1560 
1561 	if (tqpair->host_hdgst_enable) {
1562 		r2t->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF;
1563 		r2t->common.plen += SPDK_NVME_TCP_DIGEST_LEN;
1564 	}
1565 
1566 	r2t->cccid = tcp_req->req.cmd->nvme_cmd.cid;
1567 	r2t->ttag = tcp_req->ttag;
1568 	r2t->r2to = tcp_req->next_expected_r2t_offset;
1569 	r2t->r2tl = spdk_min(tcp_req->req.length - tcp_req->next_expected_r2t_offset, tqpair->maxh2cdata);
1570 	tcp_req->r2tl_remain = r2t->r2tl;
1571 
1572 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
1573 		      "tcp_req(%p) on tqpair(%p), r2t_info: cccid=%u, ttag=%u, r2to=%u, r2tl=%u\n",
1574 		      tcp_req, tqpair, r2t->cccid, r2t->ttag, r2t->r2to, r2t->r2tl);
1575 	spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_pdu_cmd_complete, NULL);
1576 }
1577 
1578 static void
1579 spdk_nvmf_tcp_h2c_data_payload_handle(struct spdk_nvmf_tcp_transport *ttransport,
1580 				      struct spdk_nvmf_tcp_qpair *tqpair,
1581 				      struct nvme_tcp_pdu *pdu)
1582 {
1583 	struct spdk_nvmf_tcp_req *tcp_req;
1584 
1585 	tcp_req = pdu->ctx;
1586 	assert(tcp_req != NULL);
1587 
1588 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
1589 
1590 	tcp_req->next_expected_r2t_offset += pdu->data_len;
1591 	tcp_req->r2tl_remain -= pdu->data_len;
1592 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
1593 
1594 	if (!tcp_req->r2tl_remain) {
1595 		if (tcp_req->next_expected_r2t_offset == tcp_req->req.length) {
1596 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
1597 			spdk_nvmf_tcp_req_process(ttransport, tcp_req);
1598 		} else {
1599 			SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Send r2t pdu for tcp_req=%p on tqpair=%p\n", tcp_req, tqpair);
1600 			spdk_nvmf_tcp_send_r2t_pdu(tqpair, tcp_req);
1601 		}
1602 	}
1603 }
1604 
1605 static void
1606 spdk_nvmf_tcp_h2c_term_req_dump(struct spdk_nvme_tcp_term_req_hdr *h2c_term_req)
1607 {
1608 	SPDK_ERRLOG("Error info of pdu(%p): %s\n", h2c_term_req,
1609 		    spdk_nvmf_tcp_term_req_fes_str[h2c_term_req->fes]);
1610 	if ((h2c_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) ||
1611 	    (h2c_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) {
1612 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "The offset from the start of the PDU header is %u\n",
1613 			      DGET32(h2c_term_req->fei));
1614 	}
1615 }
1616 
1617 static void
1618 spdk_nvmf_tcp_h2c_term_req_hdr_handle(struct spdk_nvmf_tcp_qpair *tqpair,
1619 				      struct nvme_tcp_pdu *pdu)
1620 {
1621 	struct spdk_nvme_tcp_term_req_hdr *h2c_term_req = &pdu->hdr->term_req;
1622 	uint32_t error_offset = 0;
1623 	enum spdk_nvme_tcp_term_req_fes fes;
1624 
1625 
1626 	if (h2c_term_req->fes > SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER) {
1627 		SPDK_ERRLOG("Fatal Error Stauts(FES) is unknown for h2c_term_req pdu=%p\n", pdu);
1628 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1629 		error_offset = offsetof(struct spdk_nvme_tcp_term_req_hdr, fes);
1630 		goto end;
1631 	}
1632 
1633 	/* set the data buffer */
1634 	nvme_tcp_pdu_set_data(pdu, (uint8_t *)pdu->hdr->raw + h2c_term_req->common.hlen,
1635 			      h2c_term_req->common.plen - h2c_term_req->common.hlen);
1636 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
1637 	return;
1638 end:
1639 	spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1640 	return;
1641 }
1642 
1643 static void
1644 spdk_nvmf_tcp_h2c_term_req_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair,
1645 		struct nvme_tcp_pdu *pdu)
1646 {
1647 	struct spdk_nvme_tcp_term_req_hdr *h2c_term_req = &pdu->hdr->term_req;
1648 
1649 	spdk_nvmf_tcp_h2c_term_req_dump(h2c_term_req);
1650 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1651 	return;
1652 }
1653 
1654 static void
1655 spdk_nvmf_tcp_pdu_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair)
1656 {
1657 	int rc = 0;
1658 	struct nvme_tcp_pdu *pdu;
1659 	uint32_t crc32c, error_offset = 0;
1660 	enum spdk_nvme_tcp_term_req_fes fes;
1661 	struct spdk_nvmf_tcp_transport *ttransport;
1662 
1663 	assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
1664 	pdu = &tqpair->pdu_in_progress;
1665 
1666 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
1667 	/* check data digest if need */
1668 	if (pdu->ddgst_enable) {
1669 		crc32c = nvme_tcp_pdu_calc_data_digest(pdu);
1670 		rc = MATCH_DIGEST_WORD(pdu->data_digest, crc32c);
1671 		if (rc == 0) {
1672 			SPDK_ERRLOG("Data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu);
1673 			fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR;
1674 			spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1675 			return;
1676 
1677 		}
1678 	}
1679 
1680 	ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport, struct spdk_nvmf_tcp_transport, transport);
1681 	switch (pdu->hdr->common.pdu_type) {
1682 	case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD:
1683 		spdk_nvmf_tcp_capsule_cmd_payload_handle(ttransport, tqpair, pdu);
1684 		break;
1685 	case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA:
1686 		spdk_nvmf_tcp_h2c_data_payload_handle(ttransport, tqpair, pdu);
1687 		break;
1688 
1689 	case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ:
1690 		spdk_nvmf_tcp_h2c_term_req_payload_handle(tqpair, pdu);
1691 		break;
1692 
1693 	default:
1694 		/* The code should not go to here */
1695 		SPDK_ERRLOG("The code should not go to here\n");
1696 		break;
1697 	}
1698 }
1699 
1700 static void
1701 spdk_nvmf_tcp_send_icresp_complete(void *cb_arg)
1702 {
1703 	struct spdk_nvmf_tcp_qpair *tqpair = cb_arg;
1704 
1705 	tqpair->state = NVME_TCP_QPAIR_STATE_RUNNING;
1706 }
1707 
1708 static void
1709 spdk_nvmf_tcp_icreq_handle(struct spdk_nvmf_tcp_transport *ttransport,
1710 			   struct spdk_nvmf_tcp_qpair *tqpair,
1711 			   struct nvme_tcp_pdu *pdu)
1712 {
1713 	struct spdk_nvme_tcp_ic_req *ic_req = &pdu->hdr->ic_req;
1714 	struct nvme_tcp_pdu *rsp_pdu;
1715 	struct spdk_nvme_tcp_ic_resp *ic_resp;
1716 	uint32_t error_offset = 0;
1717 	enum spdk_nvme_tcp_term_req_fes fes;
1718 
1719 	/* Only PFV 0 is defined currently */
1720 	if (ic_req->pfv != 0) {
1721 		SPDK_ERRLOG("Expected ICReq PFV %u, got %u\n", 0u, ic_req->pfv);
1722 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1723 		error_offset = offsetof(struct spdk_nvme_tcp_ic_req, pfv);
1724 		goto end;
1725 	}
1726 
1727 	/* MAXR2T is 0's based */
1728 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "maxr2t =%u\n", (ic_req->maxr2t + 1u));
1729 
1730 	tqpair->host_hdgst_enable = ic_req->dgst.bits.hdgst_enable ? true : false;
1731 	if (!tqpair->host_hdgst_enable) {
1732 		tqpair->pdu_recv_buf.size -= SPDK_NVME_TCP_DIGEST_LEN * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR;
1733 	}
1734 	tqpair->host_ddgst_enable = ic_req->dgst.bits.ddgst_enable ? true : false;
1735 	if (!tqpair->host_ddgst_enable) {
1736 		tqpair->pdu_recv_buf.size -= SPDK_NVME_TCP_DIGEST_LEN * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR;
1737 	}
1738 
1739 	tqpair->cpda = spdk_min(ic_req->hpda, SPDK_NVME_TCP_CPDA_MAX);
1740 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "cpda of tqpair=(%p) is : %u\n", tqpair, tqpair->cpda);
1741 
1742 	rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair);
1743 	if (!rsp_pdu) {
1744 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
1745 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1746 		return;
1747 	}
1748 
1749 	ic_resp = &rsp_pdu->hdr->ic_resp;
1750 	ic_resp->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_IC_RESP;
1751 	ic_resp->common.hlen = ic_resp->common.plen =  sizeof(*ic_resp);
1752 	ic_resp->pfv = 0;
1753 	ic_resp->cpda = tqpair->cpda;
1754 	tqpair->maxh2cdata = spdk_min(NVMF_TCP_PDU_MAX_H2C_DATA_SIZE,
1755 				      ttransport->transport.opts.io_unit_size);
1756 	ic_resp->maxh2cdata = tqpair->maxh2cdata;
1757 	ic_resp->dgst.bits.hdgst_enable = tqpair->host_hdgst_enable ? 1 : 0;
1758 	ic_resp->dgst.bits.ddgst_enable = tqpair->host_ddgst_enable ? 1 : 0;
1759 
1760 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "host_hdgst_enable: %u\n", tqpair->host_hdgst_enable);
1761 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "host_ddgst_enable: %u\n", tqpair->host_ddgst_enable);
1762 
1763 	spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_send_icresp_complete, tqpair);
1764 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
1765 	return;
1766 end:
1767 	spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1768 	return;
1769 }
1770 
1771 static void
1772 spdk_nvmf_tcp_pdu_psh_handle(struct spdk_nvmf_tcp_qpair *tqpair)
1773 {
1774 	struct nvme_tcp_pdu *pdu;
1775 	int rc;
1776 	uint32_t crc32c, error_offset = 0;
1777 	enum spdk_nvme_tcp_term_req_fes fes;
1778 	struct spdk_nvmf_tcp_transport *ttransport;
1779 
1780 	assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH);
1781 	pdu = &tqpair->pdu_in_progress;
1782 
1783 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "pdu type of tqpair(%p) is %d\n", tqpair,
1784 		      pdu->hdr->common.pdu_type);
1785 	/* check header digest if needed */
1786 	if (pdu->has_hdgst) {
1787 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Compare the header of pdu=%p on tqpair=%p\n", pdu, tqpair);
1788 		crc32c = nvme_tcp_pdu_calc_header_digest(pdu);
1789 		rc = MATCH_DIGEST_WORD((uint8_t *)pdu->hdr->raw + pdu->hdr->common.hlen, crc32c);
1790 		if (rc == 0) {
1791 			SPDK_ERRLOG("Header digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu);
1792 			fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR;
1793 			spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1794 			return;
1795 
1796 		}
1797 	}
1798 
1799 	ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport, struct spdk_nvmf_tcp_transport, transport);
1800 	switch (pdu->hdr->common.pdu_type) {
1801 	case SPDK_NVME_TCP_PDU_TYPE_IC_REQ:
1802 		spdk_nvmf_tcp_icreq_handle(ttransport, tqpair, pdu);
1803 		break;
1804 	case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD:
1805 		spdk_nvmf_tcp_capsule_cmd_hdr_handle(ttransport, tqpair, pdu);
1806 		break;
1807 	case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA:
1808 		spdk_nvmf_tcp_h2c_data_hdr_handle(ttransport, tqpair, pdu);
1809 		break;
1810 
1811 	case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ:
1812 		spdk_nvmf_tcp_h2c_term_req_hdr_handle(tqpair, pdu);
1813 		break;
1814 
1815 	default:
1816 		SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->pdu_in_progress.hdr->common.pdu_type);
1817 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1818 		error_offset = 1;
1819 		spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1820 		break;
1821 	}
1822 }
1823 
1824 static void
1825 spdk_nvmf_tcp_pdu_ch_handle(struct spdk_nvmf_tcp_qpair *tqpair)
1826 {
1827 	struct nvme_tcp_pdu *pdu;
1828 	uint32_t error_offset = 0;
1829 	enum spdk_nvme_tcp_term_req_fes fes;
1830 	uint8_t expected_hlen, pdo;
1831 	bool plen_error = false, pdo_error = false;
1832 
1833 	assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH);
1834 	pdu = &tqpair->pdu_in_progress;
1835 
1836 	if (pdu->hdr->common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_REQ) {
1837 		if (tqpair->state != NVME_TCP_QPAIR_STATE_INVALID) {
1838 			SPDK_ERRLOG("Already received ICreq PDU, and reject this pdu=%p\n", pdu);
1839 			fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR;
1840 			goto err;
1841 		}
1842 		expected_hlen = sizeof(struct spdk_nvme_tcp_ic_req);
1843 		if (pdu->hdr->common.plen != expected_hlen) {
1844 			plen_error = true;
1845 		}
1846 	} else {
1847 		if (tqpair->state != NVME_TCP_QPAIR_STATE_RUNNING) {
1848 			SPDK_ERRLOG("The TCP/IP connection is not negotitated\n");
1849 			fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR;
1850 			goto err;
1851 		}
1852 
1853 		switch (pdu->hdr->common.pdu_type) {
1854 		case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD:
1855 			expected_hlen = sizeof(struct spdk_nvme_tcp_cmd);
1856 			pdo = pdu->hdr->common.pdo;
1857 			if ((tqpair->cpda != 0) && (pdo != ((tqpair->cpda + 1) << 2))) {
1858 				pdo_error = true;
1859 				break;
1860 			}
1861 
1862 			if (pdu->hdr->common.plen < expected_hlen) {
1863 				plen_error = true;
1864 			}
1865 			break;
1866 		case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA:
1867 			expected_hlen = sizeof(struct spdk_nvme_tcp_h2c_data_hdr);
1868 			pdo = pdu->hdr->common.pdo;
1869 			if ((tqpair->cpda != 0) && (pdo != ((tqpair->cpda + 1) << 2))) {
1870 				pdo_error = true;
1871 				break;
1872 			}
1873 			if (pdu->hdr->common.plen < expected_hlen) {
1874 				plen_error = true;
1875 			}
1876 			break;
1877 
1878 		case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ:
1879 			expected_hlen = sizeof(struct spdk_nvme_tcp_term_req_hdr);
1880 			if ((pdu->hdr->common.plen <= expected_hlen) ||
1881 			    (pdu->hdr->common.plen > SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE)) {
1882 				plen_error = true;
1883 			}
1884 			break;
1885 
1886 		default:
1887 			SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", pdu->hdr->common.pdu_type);
1888 			fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1889 			error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdu_type);
1890 			goto err;
1891 		}
1892 	}
1893 
1894 	if (pdu->hdr->common.hlen != expected_hlen) {
1895 		SPDK_ERRLOG("PDU type=0x%02x, Expected ICReq header length %u, got %u on tqpair=%p\n",
1896 			    pdu->hdr->common.pdu_type,
1897 			    expected_hlen, pdu->hdr->common.hlen, tqpair);
1898 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1899 		error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, hlen);
1900 		goto err;
1901 	} else if (pdo_error) {
1902 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1903 		error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdo);
1904 	} else if (plen_error) {
1905 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1906 		error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, plen);
1907 		goto err;
1908 	} else {
1909 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH);
1910 		nvme_tcp_pdu_calc_psh_len(&tqpair->pdu_in_progress, tqpair->host_hdgst_enable);
1911 		return;
1912 	}
1913 err:
1914 	spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1915 }
1916 
1917 static int
1918 nvmf_tcp_pdu_payload_insert_dif(struct nvme_tcp_pdu *pdu, uint32_t read_offset,
1919 				int read_len)
1920 {
1921 	int rc;
1922 
1923 	rc = spdk_dif_generate_stream(pdu->data_iov, pdu->data_iovcnt,
1924 				      read_offset, read_len, pdu->dif_ctx);
1925 	if (rc != 0) {
1926 		SPDK_ERRLOG("DIF generate failed\n");
1927 	}
1928 
1929 	return rc;
1930 }
1931 
1932 static int
1933 nvme_tcp_recv_buf_read(struct spdk_sock *sock, struct nvme_tcp_pdu_recv_buf *pdu_recv_buf)
1934 {
1935 	int rc;
1936 
1937 	rc = nvme_tcp_read_data(sock, pdu_recv_buf->size - pdu_recv_buf->off,
1938 				(void *)pdu_recv_buf->buf + pdu_recv_buf->off);
1939 	if (rc < 0) {
1940 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "will disconnect sock=%p\n", sock);
1941 	} else if (rc > 0) {
1942 		pdu_recv_buf->remain_size = rc;
1943 		spdk_trace_record(TRACE_TCP_READ_FROM_SOCKET_DONE, 0, rc, 0, 0);
1944 	}
1945 
1946 	return rc;
1947 }
1948 
1949 static uint32_t
1950 nvme_tcp_read_data_from_pdu_recv_buf(struct nvme_tcp_pdu_recv_buf *pdu_recv_buf,
1951 				     uint32_t expected_size,
1952 				     char *dst)
1953 {
1954 	uint32_t size;
1955 
1956 	assert(pdu_recv_buf->remain_size > 0);
1957 	size = spdk_min(expected_size, pdu_recv_buf->remain_size);
1958 	if (dst) {
1959 		memcpy(dst, (void *)pdu_recv_buf->buf + pdu_recv_buf->off, size);
1960 	}
1961 	pdu_recv_buf->off += size;
1962 	pdu_recv_buf->remain_size -= size;
1963 
1964 
1965 	return size;
1966 }
1967 
1968 static int
1969 nvme_tcp_read_payload_data_from_pdu_recv_buf(struct nvme_tcp_pdu_recv_buf *pdu_recv_buf,
1970 		struct nvme_tcp_pdu *pdu)
1971 {
1972 	struct iovec iov[NVME_TCP_MAX_SGL_DESCRIPTORS + 1];
1973 	int iovcnt, i;
1974 	uint32_t size = 0;
1975 	void *dst;
1976 
1977 	assert(pdu_recv_buf->remain_size > 0);
1978 	iovcnt = nvme_tcp_build_payload_iovs(iov, NVME_TCP_MAX_SGL_DESCRIPTORS + 1, pdu,
1979 					     pdu->ddgst_enable, NULL);
1980 	assert(iovcnt >= 0);
1981 	for (i = 0; i < iovcnt; i++) {
1982 		if (!pdu_recv_buf->remain_size) {
1983 			break;
1984 		}
1985 
1986 		dst = NULL;
1987 		if (pdu->hdr->common.pdu_type != SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ) {
1988 			dst = iov[i].iov_base;
1989 		}
1990 		size += nvme_tcp_read_data_from_pdu_recv_buf(pdu_recv_buf, iov[i].iov_len, dst);
1991 	}
1992 
1993 	return size;
1994 }
1995 
1996 static int
1997 spdk_nvmf_tcp_sock_process(struct spdk_nvmf_tcp_qpair *tqpair)
1998 {
1999 	int rc = 0;
2000 	struct nvme_tcp_pdu *pdu;
2001 	enum nvme_tcp_pdu_recv_state prev_state;
2002 	uint32_t data_len;
2003 
2004 	/* The loop here is to allow for several back-to-back state changes. */
2005 	do {
2006 		prev_state = tqpair->recv_state;
2007 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair(%p) recv pdu entering state %d\n", tqpair, prev_state);
2008 
2009 		pdu = &tqpair->pdu_in_progress;
2010 		switch (tqpair->recv_state) {
2011 		/* Wait for the common header  */
2012 		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY:
2013 		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH:
2014 			if (!tqpair->pdu_recv_buf.remain_size) {
2015 				rc = nvme_tcp_recv_buf_read(tqpair->sock, &tqpair->pdu_recv_buf);
2016 				if (rc <= 0) {
2017 					return rc;
2018 				}
2019 			}
2020 			rc = nvme_tcp_read_data_from_pdu_recv_buf(&tqpair->pdu_recv_buf,
2021 					sizeof(struct spdk_nvme_tcp_common_pdu_hdr) - pdu->ch_valid_bytes,
2022 					NULL);
2023 			pdu->ch_valid_bytes += rc;
2024 			if (spdk_likely(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY)) {
2025 				spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH);
2026 			}
2027 
2028 			if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) {
2029 				return NVME_TCP_PDU_IN_PROGRESS;
2030 			}
2031 
2032 			/* The command header of this PDU has now been read from the socket. */
2033 			spdk_nvmf_tcp_pdu_ch_handle(tqpair);
2034 			break;
2035 		/* Wait for the pdu specific header  */
2036 		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH:
2037 			if (!tqpair->pdu_recv_buf.remain_size) {
2038 				rc = nvme_tcp_recv_buf_read(tqpair->sock, &tqpair->pdu_recv_buf);
2039 				if (rc <= 0) {
2040 					return rc;
2041 				}
2042 			}
2043 
2044 			rc = nvme_tcp_read_data_from_pdu_recv_buf(&tqpair->pdu_recv_buf,
2045 					pdu->psh_len - pdu->psh_valid_bytes,
2046 					NULL);
2047 			pdu->psh_valid_bytes += rc;
2048 			if (pdu->psh_valid_bytes < pdu->psh_len) {
2049 				return NVME_TCP_PDU_IN_PROGRESS;
2050 			}
2051 
2052 			/* All header(ch, psh, head digist) of this PDU has now been read from the socket. */
2053 			spdk_nvmf_tcp_pdu_psh_handle(tqpair);
2054 			break;
2055 		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD:
2056 			/* check whether the data is valid, if not we just return */
2057 			if (!pdu->data_len) {
2058 				return NVME_TCP_PDU_IN_PROGRESS;
2059 			}
2060 
2061 			data_len = pdu->data_len;
2062 			/* data digest */
2063 			if (spdk_unlikely((pdu->hdr->common.pdu_type != SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ) &&
2064 					  tqpair->host_ddgst_enable)) {
2065 				data_len += SPDK_NVME_TCP_DIGEST_LEN;
2066 				pdu->ddgst_enable = true;
2067 			}
2068 
2069 			if (tqpair->pdu_recv_buf.remain_size) {
2070 				rc = nvme_tcp_read_payload_data_from_pdu_recv_buf(&tqpair->pdu_recv_buf, pdu);
2071 				pdu->readv_offset += rc;
2072 			}
2073 
2074 			if (pdu->readv_offset < data_len) {
2075 				rc = nvme_tcp_read_payload_data(tqpair->sock, pdu);
2076 				if (rc < 0) {
2077 					return NVME_TCP_PDU_IN_PROGRESS;
2078 				}
2079 				pdu->readv_offset += rc;
2080 			}
2081 
2082 			if (spdk_unlikely(pdu->dif_ctx != NULL)) {
2083 				rc = nvmf_tcp_pdu_payload_insert_dif(pdu, pdu->readv_offset - rc, rc);
2084 				if (rc != 0) {
2085 					return NVME_TCP_PDU_FATAL;
2086 				}
2087 			}
2088 
2089 			if (pdu->readv_offset < data_len) {
2090 				return NVME_TCP_PDU_IN_PROGRESS;
2091 			}
2092 
2093 			/* All of this PDU has now been read from the socket. */
2094 			spdk_nvmf_tcp_pdu_payload_handle(tqpair);
2095 			break;
2096 		case NVME_TCP_PDU_RECV_STATE_ERROR:
2097 			/* Check whether the connection is closed. Each time, we only read 1 byte every time */
2098 			rc = nvme_tcp_read_data(tqpair->sock, 1, (void *)&pdu->hdr->common);
2099 			if (rc < 0) {
2100 				return NVME_TCP_PDU_FATAL;
2101 			}
2102 			break;
2103 		default:
2104 			assert(0);
2105 			SPDK_ERRLOG("code should not come to here");
2106 			break;
2107 		}
2108 	} while (tqpair->recv_state != prev_state);
2109 
2110 	return rc;
2111 }
2112 
2113 static enum spdk_nvme_data_transfer
2114 spdk_nvmf_tcp_req_get_xfer(struct spdk_nvmf_tcp_req *tcp_req) {
2115 	enum spdk_nvme_data_transfer xfer;
2116 	struct spdk_nvme_cmd *cmd = &tcp_req->req.cmd->nvme_cmd;
2117 	struct spdk_nvme_sgl_descriptor *sgl = &cmd->dptr.sgl1;
2118 
2119 	/* Figure out data transfer direction */
2120 	if (cmd->opc == SPDK_NVME_OPC_FABRIC)
2121 	{
2122 		xfer = spdk_nvme_opc_get_data_transfer(tcp_req->req.cmd->nvmf_cmd.fctype);
2123 	} else
2124 	{
2125 		xfer = spdk_nvme_opc_get_data_transfer(cmd->opc);
2126 
2127 		/* Some admin commands are special cases */
2128 		if ((tcp_req->req.qpair->qid == 0) &&
2129 		    ((cmd->opc == SPDK_NVME_OPC_GET_FEATURES) ||
2130 		     (cmd->opc == SPDK_NVME_OPC_SET_FEATURES))) {
2131 			switch (cmd->cdw10 & 0xff) {
2132 			case SPDK_NVME_FEAT_LBA_RANGE_TYPE:
2133 			case SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION:
2134 			case SPDK_NVME_FEAT_HOST_IDENTIFIER:
2135 				break;
2136 			default:
2137 				xfer = SPDK_NVME_DATA_NONE;
2138 			}
2139 		}
2140 	}
2141 
2142 	if (xfer == SPDK_NVME_DATA_NONE)
2143 	{
2144 		return xfer;
2145 	}
2146 
2147 	/* Even for commands that may transfer data, they could have specified 0 length.
2148 	 * We want those to show up with xfer SPDK_NVME_DATA_NONE.
2149 	 */
2150 	switch (sgl->generic.type)
2151 	{
2152 	case SPDK_NVME_SGL_TYPE_DATA_BLOCK:
2153 	case SPDK_NVME_SGL_TYPE_BIT_BUCKET:
2154 	case SPDK_NVME_SGL_TYPE_SEGMENT:
2155 	case SPDK_NVME_SGL_TYPE_LAST_SEGMENT:
2156 	case SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK:
2157 		if (sgl->unkeyed.length == 0) {
2158 			xfer = SPDK_NVME_DATA_NONE;
2159 		}
2160 		break;
2161 	case SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK:
2162 		if (sgl->keyed.length == 0) {
2163 			xfer = SPDK_NVME_DATA_NONE;
2164 		}
2165 		break;
2166 	}
2167 
2168 	return xfer;
2169 }
2170 
2171 static int
2172 spdk_nvmf_tcp_req_parse_sgl(struct spdk_nvmf_tcp_req *tcp_req,
2173 			    struct spdk_nvmf_transport *transport,
2174 			    struct spdk_nvmf_transport_poll_group *group)
2175 {
2176 	struct spdk_nvmf_request		*req = &tcp_req->req;
2177 	struct spdk_nvme_cmd			*cmd;
2178 	struct spdk_nvme_cpl			*rsp;
2179 	struct spdk_nvme_sgl_descriptor		*sgl;
2180 	uint32_t				length;
2181 
2182 	cmd = &req->cmd->nvme_cmd;
2183 	rsp = &req->rsp->nvme_cpl;
2184 	sgl = &cmd->dptr.sgl1;
2185 
2186 	length = sgl->unkeyed.length;
2187 
2188 	if (sgl->generic.type == SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK &&
2189 	    sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_TRANSPORT) {
2190 		if (length > transport->opts.max_io_size) {
2191 			SPDK_ERRLOG("SGL length 0x%x exceeds max io size 0x%x\n",
2192 				    length, transport->opts.max_io_size);
2193 			rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
2194 			return -1;
2195 		}
2196 
2197 		/* fill request length and populate iovs */
2198 		req->length = length;
2199 
2200 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Data requested length= 0x%x\n", length);
2201 
2202 		if (spdk_unlikely(req->dif.dif_insert_or_strip)) {
2203 			req->dif.orig_length = length;
2204 			length = spdk_dif_get_length_with_md(length, &req->dif.dif_ctx);
2205 			req->dif.elba_length = length;
2206 		}
2207 
2208 		if (spdk_nvmf_request_get_buffers(req, group, transport, length)) {
2209 			/* No available buffers. Queue this request up. */
2210 			SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "No available large data buffers. Queueing request %p\n",
2211 				      tcp_req);
2212 			return 0;
2213 		}
2214 
2215 		/* backward compatible */
2216 		req->data = req->iov[0].iov_base;
2217 
2218 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Request %p took %d buffer/s from central pool, and data=%p\n",
2219 			      tcp_req, req->iovcnt, req->data);
2220 
2221 		return 0;
2222 	} else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK &&
2223 		   sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) {
2224 		uint64_t offset = sgl->address;
2225 		uint32_t max_len = transport->opts.in_capsule_data_size;
2226 
2227 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "In-capsule data: offset 0x%" PRIx64 ", length 0x%x\n",
2228 			      offset, length);
2229 
2230 		if (offset > max_len) {
2231 			SPDK_ERRLOG("In-capsule offset 0x%" PRIx64 " exceeds capsule length 0x%x\n",
2232 				    offset, max_len);
2233 			rsp->status.sc = SPDK_NVME_SC_INVALID_SGL_OFFSET;
2234 			return -1;
2235 		}
2236 		max_len -= (uint32_t)offset;
2237 
2238 		if (length > max_len) {
2239 			SPDK_ERRLOG("In-capsule data length 0x%x exceeds capsule length 0x%x\n",
2240 				    length, max_len);
2241 			rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
2242 			return -1;
2243 		}
2244 
2245 		req->data = tcp_req->buf + offset;
2246 		req->data_from_pool = false;
2247 		req->length = length;
2248 
2249 		if (spdk_unlikely(req->dif.dif_insert_or_strip)) {
2250 			length = spdk_dif_get_length_with_md(length, &req->dif.dif_ctx);
2251 			req->dif.elba_length = length;
2252 		}
2253 
2254 		req->iov[0].iov_base = req->data;
2255 		req->iov[0].iov_len = length;
2256 		req->iovcnt = 1;
2257 
2258 		return 0;
2259 	}
2260 
2261 	SPDK_ERRLOG("Invalid NVMf I/O Command SGL:  Type 0x%x, Subtype 0x%x\n",
2262 		    sgl->generic.type, sgl->generic.subtype);
2263 	rsp->status.sc = SPDK_NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID;
2264 	return -1;
2265 }
2266 
2267 static inline enum spdk_nvme_media_error_status_code
2268 nvmf_tcp_dif_error_to_compl_status(uint8_t err_type) {
2269 	enum spdk_nvme_media_error_status_code result;
2270 
2271 	switch (err_type)
2272 	{
2273 	case SPDK_DIF_REFTAG_ERROR:
2274 		result = SPDK_NVME_SC_REFERENCE_TAG_CHECK_ERROR;
2275 		break;
2276 	case SPDK_DIF_APPTAG_ERROR:
2277 		result = SPDK_NVME_SC_APPLICATION_TAG_CHECK_ERROR;
2278 		break;
2279 	case SPDK_DIF_GUARD_ERROR:
2280 		result = SPDK_NVME_SC_GUARD_CHECK_ERROR;
2281 		break;
2282 	default:
2283 		SPDK_UNREACHABLE();
2284 		break;
2285 	}
2286 
2287 	return result;
2288 }
2289 
2290 static void
2291 spdk_nvmf_tcp_send_c2h_data(struct spdk_nvmf_tcp_qpair *tqpair,
2292 			    struct spdk_nvmf_tcp_req *tcp_req)
2293 {
2294 	struct nvme_tcp_pdu *rsp_pdu;
2295 	struct spdk_nvme_tcp_c2h_data_hdr *c2h_data;
2296 	uint32_t plen, pdo, alignment;
2297 	int rc;
2298 
2299 	assert(tcp_req == STAILQ_FIRST(&tqpair->queued_c2h_data_tcp_req));
2300 
2301 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
2302 
2303 	rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair);
2304 	assert(rsp_pdu != NULL);
2305 
2306 	c2h_data = &rsp_pdu->hdr->c2h_data;
2307 	c2h_data->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_C2H_DATA;
2308 	plen = c2h_data->common.hlen = sizeof(*c2h_data);
2309 
2310 	if (tqpair->host_hdgst_enable) {
2311 		plen += SPDK_NVME_TCP_DIGEST_LEN;
2312 		c2h_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF;
2313 	}
2314 
2315 	/* set the psh */
2316 	c2h_data->cccid = tcp_req->req.cmd->nvme_cmd.cid;
2317 	c2h_data->datal = spdk_min(NVMF_TCP_PDU_MAX_C2H_DATA_SIZE,
2318 				   tcp_req->req.length - tcp_req->c2h_data_offset);
2319 	c2h_data->datao = tcp_req->c2h_data_offset;
2320 
2321 	/* set the padding */
2322 	rsp_pdu->padding_len = 0;
2323 	pdo = plen;
2324 	if (tqpair->cpda) {
2325 		alignment = (tqpair->cpda + 1) << 2;
2326 		if (alignment > plen) {
2327 			rsp_pdu->padding_len = alignment - plen;
2328 			pdo = plen = alignment;
2329 		}
2330 	}
2331 
2332 	c2h_data->common.pdo = pdo;
2333 	plen += c2h_data->datal;
2334 	if (tqpair->host_ddgst_enable) {
2335 		c2h_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF;
2336 		plen += SPDK_NVME_TCP_DIGEST_LEN;
2337 	}
2338 
2339 	c2h_data->common.plen = plen;
2340 
2341 	if (spdk_unlikely(tcp_req->req.dif.dif_insert_or_strip)) {
2342 		rsp_pdu->dif_ctx = &tcp_req->req.dif.dif_ctx;
2343 	}
2344 
2345 	nvme_tcp_pdu_set_data_buf(rsp_pdu, tcp_req->req.iov, tcp_req->req.iovcnt,
2346 				  c2h_data->datao, c2h_data->datal);
2347 
2348 	if (spdk_unlikely(tcp_req->req.dif.dif_insert_or_strip)) {
2349 		struct spdk_nvme_cpl *rsp = &tcp_req->req.rsp->nvme_cpl;
2350 		struct spdk_dif_error err_blk = {};
2351 
2352 		rc = spdk_dif_verify_stream(rsp_pdu->data_iov, rsp_pdu->data_iovcnt,
2353 					    0, rsp_pdu->data_len, rsp_pdu->dif_ctx, &err_blk);
2354 		if (rc != 0) {
2355 			SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n",
2356 				    err_blk.err_type, err_blk.err_offset);
2357 			rsp->status.sct = SPDK_NVME_SCT_MEDIA_ERROR;
2358 			rsp->status.sc = nvmf_tcp_dif_error_to_compl_status(err_blk.err_type);
2359 			STAILQ_REMOVE_HEAD(&tqpair->queued_c2h_data_tcp_req, link);
2360 			spdk_nvmf_tcp_pdu_put(tqpair, rsp_pdu);
2361 			spdk_nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair);
2362 			return;
2363 		}
2364 	}
2365 
2366 	tcp_req->c2h_data_offset += c2h_data->datal;
2367 	if (tcp_req->c2h_data_offset == tcp_req->req.length) {
2368 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Last pdu for tcp_req=%p on tqpair=%p\n", tcp_req, tqpair);
2369 		c2h_data->common.flags |= SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU;
2370 		if (tqpair->qpair.transport->opts.c2h_success) {
2371 			c2h_data->common.flags |= SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS;
2372 		}
2373 		STAILQ_REMOVE_HEAD(&tqpair->queued_c2h_data_tcp_req, link);
2374 	}
2375 
2376 	tqpair->c2h_data_pdu_cnt += 1;
2377 	spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_pdu_c2h_data_complete, tcp_req);
2378 }
2379 
2380 static int
2381 spdk_nvmf_tcp_calc_c2h_data_pdu_num(struct spdk_nvmf_tcp_req *tcp_req)
2382 {
2383 	return (tcp_req->req.length + NVMF_TCP_PDU_MAX_C2H_DATA_SIZE - 1) /
2384 	       NVMF_TCP_PDU_MAX_C2H_DATA_SIZE;
2385 }
2386 
2387 static void
2388 spdk_nvmf_tcp_handle_pending_c2h_data_queue(struct spdk_nvmf_tcp_qpair *tqpair)
2389 {
2390 	struct spdk_nvmf_tcp_req *tcp_req;
2391 
2392 	while (!STAILQ_EMPTY(&tqpair->queued_c2h_data_tcp_req) &&
2393 	       (tqpair->c2h_data_pdu_cnt < NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM)) {
2394 		tcp_req = STAILQ_FIRST(&tqpair->queued_c2h_data_tcp_req);
2395 		spdk_nvmf_tcp_send_c2h_data(tqpair, tcp_req);
2396 	}
2397 }
2398 
2399 static void
2400 spdk_nvmf_tcp_queue_c2h_data(struct spdk_nvmf_tcp_req *tcp_req,
2401 			     struct spdk_nvmf_tcp_qpair *tqpair)
2402 {
2403 	tcp_req->c2h_data_pdu_num = spdk_nvmf_tcp_calc_c2h_data_pdu_num(tcp_req);
2404 
2405 	assert(tcp_req->c2h_data_pdu_num < NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM);
2406 
2407 	STAILQ_INSERT_TAIL(&tqpair->queued_c2h_data_tcp_req, tcp_req, link);
2408 	spdk_nvmf_tcp_handle_pending_c2h_data_queue(tqpair);
2409 }
2410 
2411 static int
2412 request_transfer_out(struct spdk_nvmf_request *req)
2413 {
2414 	struct spdk_nvmf_tcp_req	*tcp_req;
2415 	struct spdk_nvmf_qpair		*qpair;
2416 	struct spdk_nvmf_tcp_qpair	*tqpair;
2417 	struct spdk_nvme_cpl		*rsp;
2418 
2419 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
2420 
2421 	qpair = req->qpair;
2422 	rsp = &req->rsp->nvme_cpl;
2423 	tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req);
2424 
2425 	/* Advance our sq_head pointer */
2426 	if (qpair->sq_head == qpair->sq_head_max) {
2427 		qpair->sq_head = 0;
2428 	} else {
2429 		qpair->sq_head++;
2430 	}
2431 	rsp->sqhd = qpair->sq_head;
2432 
2433 	tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair);
2434 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST);
2435 	if (rsp->status.sc == SPDK_NVME_SC_SUCCESS &&
2436 	    req->xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
2437 		spdk_nvmf_tcp_queue_c2h_data(tcp_req, tqpair);
2438 	} else {
2439 		spdk_nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair);
2440 	}
2441 
2442 	return 0;
2443 }
2444 
2445 static void
2446 spdk_nvmf_tcp_pdu_set_buf_from_req(struct spdk_nvmf_tcp_qpair *tqpair,
2447 				   struct spdk_nvmf_tcp_req *tcp_req)
2448 {
2449 	struct nvme_tcp_pdu *pdu;
2450 
2451 	if (tcp_req->req.data_from_pool) {
2452 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Will send r2t for tcp_req(%p) on tqpair=%p\n", tcp_req, tqpair);
2453 		tcp_req->next_expected_r2t_offset = 0;
2454 		spdk_nvmf_tcp_send_r2t_pdu(tqpair, tcp_req);
2455 	} else {
2456 		pdu = &tqpair->pdu_in_progress;
2457 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Not need to send r2t for tcp_req(%p) on tqpair=%p\n", tcp_req,
2458 			      tqpair);
2459 		/* No need to send r2t, contained in the capsuled data */
2460 		nvme_tcp_pdu_set_data_buf(pdu, tcp_req->req.iov, tcp_req->req.iovcnt,
2461 					  0, tcp_req->req.length);
2462 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
2463 	}
2464 }
2465 
2466 static void
2467 spdk_nvmf_tcp_set_incapsule_data(struct spdk_nvmf_tcp_qpair *tqpair,
2468 				 struct spdk_nvmf_tcp_req *tcp_req)
2469 {
2470 	struct nvme_tcp_pdu *pdu;
2471 	uint32_t plen = 0;
2472 
2473 	pdu = &tqpair->pdu_in_progress;
2474 	plen = pdu->hdr->common.hlen;
2475 
2476 	if (tqpair->host_hdgst_enable) {
2477 		plen += SPDK_NVME_TCP_DIGEST_LEN;
2478 	}
2479 
2480 	if (pdu->hdr->common.plen != plen) {
2481 		tcp_req->has_incapsule_data = true;
2482 	}
2483 }
2484 
2485 static bool
2486 spdk_nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport,
2487 			  struct spdk_nvmf_tcp_req *tcp_req)
2488 {
2489 	struct spdk_nvmf_tcp_qpair		*tqpair;
2490 	struct spdk_nvme_cpl			*rsp = &tcp_req->req.rsp->nvme_cpl;
2491 	int					rc;
2492 	enum spdk_nvmf_tcp_req_state		prev_state;
2493 	bool					progress = false;
2494 	struct spdk_nvmf_transport		*transport = &ttransport->transport;
2495 	struct spdk_nvmf_transport_poll_group	*group;
2496 
2497 	tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair);
2498 	group = &tqpair->group->group;
2499 	assert(tcp_req->state != TCP_REQUEST_STATE_FREE);
2500 
2501 	/* The loop here is to allow for several back-to-back state changes. */
2502 	do {
2503 		prev_state = tcp_req->state;
2504 
2505 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Request %p entering state %d on tqpair=%p\n", tcp_req, prev_state,
2506 			      tqpair);
2507 
2508 		switch (tcp_req->state) {
2509 		case TCP_REQUEST_STATE_FREE:
2510 			/* Some external code must kick a request into TCP_REQUEST_STATE_NEW
2511 			 * to escape this state. */
2512 			break;
2513 		case TCP_REQUEST_STATE_NEW:
2514 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_NEW, 0, 0, (uintptr_t)tcp_req, 0);
2515 
2516 			/* copy the cmd from the receive pdu */
2517 			tcp_req->cmd = tqpair->pdu_in_progress.hdr->capsule_cmd.ccsqe;
2518 
2519 			if (spdk_unlikely(spdk_nvmf_request_get_dif_ctx(&tcp_req->req, &tcp_req->req.dif.dif_ctx))) {
2520 				tcp_req->req.dif.dif_insert_or_strip = true;
2521 				tqpair->pdu_in_progress.dif_ctx = &tcp_req->req.dif.dif_ctx;
2522 			}
2523 
2524 			/* The next state transition depends on the data transfer needs of this request. */
2525 			tcp_req->req.xfer = spdk_nvmf_tcp_req_get_xfer(tcp_req);
2526 
2527 			/* If no data to transfer, ready to execute. */
2528 			if (tcp_req->req.xfer == SPDK_NVME_DATA_NONE) {
2529 				/* Reset the tqpair receving pdu state */
2530 				spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
2531 				spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
2532 				break;
2533 			}
2534 
2535 			spdk_nvmf_tcp_set_incapsule_data(tqpair, tcp_req);
2536 
2537 			if (!tcp_req->has_incapsule_data) {
2538 				spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
2539 			}
2540 
2541 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEED_BUFFER);
2542 			STAILQ_INSERT_TAIL(&group->pending_buf_queue, &tcp_req->req, buf_link);
2543 			break;
2544 		case TCP_REQUEST_STATE_NEED_BUFFER:
2545 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_NEED_BUFFER, 0, 0, (uintptr_t)tcp_req, 0);
2546 
2547 			assert(tcp_req->req.xfer != SPDK_NVME_DATA_NONE);
2548 
2549 			if (!tcp_req->has_incapsule_data && (&tcp_req->req != STAILQ_FIRST(&group->pending_buf_queue))) {
2550 				SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
2551 					      "Not the first element to wait for the buf for tcp_req(%p) on tqpair=%p\n",
2552 					      tcp_req, tqpair);
2553 				/* This request needs to wait in line to obtain a buffer */
2554 				break;
2555 			}
2556 
2557 			/* Try to get a data buffer */
2558 			rc = spdk_nvmf_tcp_req_parse_sgl(tcp_req, transport, group);
2559 			if (rc < 0) {
2560 				STAILQ_REMOVE_HEAD(&group->pending_buf_queue, buf_link);
2561 				rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2562 				/* Reset the tqpair receving pdu state */
2563 				spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
2564 				spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE);
2565 				break;
2566 			}
2567 
2568 			if (!tcp_req->req.data) {
2569 				SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "No buffer allocated for tcp_req(%p) on tqpair(%p\n)",
2570 					      tcp_req, tqpair);
2571 				/* No buffers available. */
2572 				break;
2573 			}
2574 
2575 			STAILQ_REMOVE(&group->pending_buf_queue, &tcp_req->req, spdk_nvmf_request, buf_link);
2576 
2577 			/* If data is transferring from host to controller, we need to do a transfer from the host. */
2578 			if (tcp_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
2579 				spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
2580 				spdk_nvmf_tcp_pdu_set_buf_from_req(tqpair, tcp_req);
2581 				break;
2582 			}
2583 
2584 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
2585 			break;
2586 		case TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER:
2587 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, 0, 0,
2588 					  (uintptr_t)tcp_req, 0);
2589 			/* Some external code must kick a request into TCP_REQUEST_STATE_READY_TO_EXECUTE
2590 			 * to escape this state. */
2591 			break;
2592 		case TCP_REQUEST_STATE_READY_TO_EXECUTE:
2593 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE, 0, 0, (uintptr_t)tcp_req, 0);
2594 
2595 			if (spdk_unlikely(tcp_req->req.dif.dif_insert_or_strip)) {
2596 				assert(tcp_req->req.dif.elba_length >= tcp_req->req.length);
2597 				tcp_req->req.length = tcp_req->req.dif.elba_length;
2598 			}
2599 
2600 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_EXECUTING);
2601 			spdk_nvmf_request_exec(&tcp_req->req);
2602 			break;
2603 		case TCP_REQUEST_STATE_EXECUTING:
2604 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_EXECUTING, 0, 0, (uintptr_t)tcp_req, 0);
2605 			/* Some external code must kick a request into TCP_REQUEST_STATE_EXECUTED
2606 			 * to escape this state. */
2607 			break;
2608 		case TCP_REQUEST_STATE_EXECUTED:
2609 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_EXECUTED, 0, 0, (uintptr_t)tcp_req, 0);
2610 
2611 			if (spdk_unlikely(tcp_req->req.dif.dif_insert_or_strip)) {
2612 				tcp_req->req.length = tcp_req->req.dif.orig_length;
2613 			}
2614 
2615 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE);
2616 			break;
2617 		case TCP_REQUEST_STATE_READY_TO_COMPLETE:
2618 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE, 0, 0, (uintptr_t)tcp_req, 0);
2619 			rc = request_transfer_out(&tcp_req->req);
2620 			assert(rc == 0); /* No good way to handle this currently */
2621 			break;
2622 		case TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST:
2623 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST, 0, 0,
2624 					  (uintptr_t)tcp_req,
2625 					  0);
2626 			/* Some external code must kick a request into TCP_REQUEST_STATE_COMPLETED
2627 			 * to escape this state. */
2628 			break;
2629 		case TCP_REQUEST_STATE_COMPLETED:
2630 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_COMPLETED, 0, 0, (uintptr_t)tcp_req, 0);
2631 			if (tcp_req->req.data_from_pool) {
2632 				spdk_nvmf_request_free_buffers(&tcp_req->req, group, transport);
2633 			}
2634 			tcp_req->req.length = 0;
2635 			tcp_req->req.iovcnt = 0;
2636 			tcp_req->req.data = NULL;
2637 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_FREE);
2638 			break;
2639 		case TCP_REQUEST_NUM_STATES:
2640 		default:
2641 			assert(0);
2642 			break;
2643 		}
2644 
2645 		if (tcp_req->state != prev_state) {
2646 			progress = true;
2647 		}
2648 	} while (tcp_req->state != prev_state);
2649 
2650 	return progress;
2651 }
2652 
2653 static void
2654 spdk_nvmf_tcp_sock_cb(void *arg, struct spdk_sock_group *group, struct spdk_sock *sock)
2655 {
2656 	struct spdk_nvmf_tcp_qpair *tqpair = arg;
2657 	int rc;
2658 
2659 	assert(tqpair != NULL);
2660 	rc = spdk_nvmf_tcp_sock_process(tqpair);
2661 
2662 	/* check the following two factors:
2663 	 * rc: The socket is closed
2664 	 * State of tqpair: The tqpair is in EXITING state due to internal error
2665 	 */
2666 	if ((rc < 0) || (tqpair->state == NVME_TCP_QPAIR_STATE_EXITING)) {
2667 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITED;
2668 		spdk_nvmf_tcp_qpair_flush_pdus(tqpair);
2669 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "will disconect the tqpair=%p\n", tqpair);
2670 		spdk_poller_unregister(&tqpair->timeout_poller);
2671 		spdk_nvmf_qpair_disconnect(&tqpair->qpair, NULL, NULL);
2672 	}
2673 }
2674 
2675 static int
2676 spdk_nvmf_tcp_poll_group_add(struct spdk_nvmf_transport_poll_group *group,
2677 			     struct spdk_nvmf_qpair *qpair)
2678 {
2679 	struct spdk_nvmf_tcp_poll_group	*tgroup;
2680 	struct spdk_nvmf_tcp_qpair	*tqpair;
2681 	int				rc;
2682 
2683 	tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
2684 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
2685 
2686 	rc = spdk_sock_group_add_sock(tgroup->sock_group, tqpair->sock,
2687 				      spdk_nvmf_tcp_sock_cb, tqpair);
2688 	if (rc != 0) {
2689 		SPDK_ERRLOG("Could not add sock to sock_group: %s (%d)\n",
2690 			    spdk_strerror(errno), errno);
2691 		spdk_nvmf_tcp_qpair_destroy(tqpair);
2692 		return -1;
2693 	}
2694 
2695 	rc =  spdk_nvmf_tcp_qpair_sock_init(tqpair);
2696 	if (rc != 0) {
2697 		SPDK_ERRLOG("Cannot set sock opt for tqpair=%p\n", tqpair);
2698 		spdk_nvmf_tcp_qpair_destroy(tqpair);
2699 		return -1;
2700 	}
2701 
2702 	rc = spdk_nvmf_tcp_qpair_init(&tqpair->qpair);
2703 	if (rc < 0) {
2704 		SPDK_ERRLOG("Cannot init tqpair=%p\n", tqpair);
2705 		spdk_nvmf_tcp_qpair_destroy(tqpair);
2706 		return -1;
2707 	}
2708 
2709 	rc = spdk_nvmf_tcp_qpair_init_mem_resource(tqpair, 1);
2710 	if (rc < 0) {
2711 		SPDK_ERRLOG("Cannot init memory resource info for tqpair=%p\n", tqpair);
2712 		spdk_nvmf_tcp_qpair_destroy(tqpair);
2713 		return -1;
2714 	}
2715 
2716 	tqpair->group = tgroup;
2717 	tqpair->state = NVME_TCP_QPAIR_STATE_INVALID;
2718 	TAILQ_INSERT_TAIL(&tgroup->qpairs, tqpair, link);
2719 
2720 	return 0;
2721 }
2722 
2723 static int
2724 spdk_nvmf_tcp_poll_group_remove(struct spdk_nvmf_transport_poll_group *group,
2725 				struct spdk_nvmf_qpair *qpair)
2726 {
2727 	struct spdk_nvmf_tcp_poll_group	*tgroup;
2728 	struct spdk_nvmf_tcp_qpair		*tqpair;
2729 	int				rc;
2730 
2731 	tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
2732 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
2733 
2734 	assert(tqpair->group == tgroup);
2735 
2736 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "remove tqpair=%p from the tgroup=%p\n", tqpair, tgroup);
2737 	TAILQ_REMOVE(&tgroup->qpairs, tqpair, link);
2738 	rc = spdk_sock_group_remove_sock(tgroup->sock_group, tqpair->sock);
2739 	if (rc != 0) {
2740 		SPDK_ERRLOG("Could not remove sock from sock_group: %s (%d)\n",
2741 			    spdk_strerror(errno), errno);
2742 	}
2743 
2744 	return rc;
2745 }
2746 
2747 static int
2748 spdk_nvmf_tcp_req_complete(struct spdk_nvmf_request *req)
2749 {
2750 	struct spdk_nvmf_tcp_transport *ttransport;
2751 	struct spdk_nvmf_tcp_req *tcp_req;
2752 
2753 	ttransport = SPDK_CONTAINEROF(req->qpair->transport, struct spdk_nvmf_tcp_transport, transport);
2754 	tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req);
2755 
2756 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_EXECUTED);
2757 	spdk_nvmf_tcp_req_process(ttransport, tcp_req);
2758 
2759 	return 0;
2760 }
2761 
2762 static void
2763 spdk_nvmf_tcp_close_qpair(struct spdk_nvmf_qpair *qpair)
2764 {
2765 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
2766 
2767 	spdk_nvmf_tcp_qpair_destroy(SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair));
2768 }
2769 
2770 static int
2771 spdk_nvmf_tcp_poll_group_poll(struct spdk_nvmf_transport_poll_group *group)
2772 {
2773 	struct spdk_nvmf_tcp_poll_group *tgroup;
2774 	int rc;
2775 	struct spdk_nvmf_request *req, *req_tmp;
2776 	struct spdk_nvmf_tcp_req *tcp_req;
2777 	struct spdk_nvmf_tcp_transport *ttransport = SPDK_CONTAINEROF(group->transport,
2778 			struct spdk_nvmf_tcp_transport, transport);
2779 
2780 	tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
2781 
2782 	if (spdk_unlikely(TAILQ_EMPTY(&tgroup->qpairs))) {
2783 		return 0;
2784 	}
2785 
2786 	STAILQ_FOREACH_SAFE(req, &group->pending_buf_queue, buf_link, req_tmp) {
2787 		tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req);
2788 		if (spdk_nvmf_tcp_req_process(ttransport, tcp_req) == false) {
2789 			break;
2790 		}
2791 	}
2792 
2793 	rc = spdk_sock_group_poll(tgroup->sock_group);
2794 	if (rc < 0) {
2795 		SPDK_ERRLOG("Failed to poll sock_group=%p\n", tgroup->sock_group);
2796 	}
2797 
2798 	return rc;
2799 }
2800 
2801 static int
2802 spdk_nvmf_tcp_qpair_get_trid(struct spdk_nvmf_qpair *qpair,
2803 			     struct spdk_nvme_transport_id *trid, bool peer)
2804 {
2805 	struct spdk_nvmf_tcp_qpair     *tqpair;
2806 	uint16_t			port;
2807 
2808 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
2809 	trid->trtype = SPDK_NVME_TRANSPORT_TCP;
2810 
2811 	if (peer) {
2812 		snprintf(trid->traddr, sizeof(trid->traddr), "%s", tqpair->initiator_addr);
2813 		port = tqpair->initiator_port;
2814 	} else {
2815 		snprintf(trid->traddr, sizeof(trid->traddr), "%s", tqpair->target_addr);
2816 		port = tqpair->target_port;
2817 	}
2818 
2819 	if (spdk_sock_is_ipv4(tqpair->sock)) {
2820 		trid->adrfam = SPDK_NVMF_ADRFAM_IPV4;
2821 	} else if (spdk_sock_is_ipv4(tqpair->sock)) {
2822 		trid->adrfam = SPDK_NVMF_ADRFAM_IPV6;
2823 	} else {
2824 		return -1;
2825 	}
2826 
2827 	snprintf(trid->trsvcid, sizeof(trid->trsvcid), "%d", port);
2828 	return 0;
2829 }
2830 
2831 static int
2832 spdk_nvmf_tcp_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair,
2833 				   struct spdk_nvme_transport_id *trid)
2834 {
2835 	return spdk_nvmf_tcp_qpair_get_trid(qpair, trid, 0);
2836 }
2837 
2838 static int
2839 spdk_nvmf_tcp_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair,
2840 				  struct spdk_nvme_transport_id *trid)
2841 {
2842 	return spdk_nvmf_tcp_qpair_get_trid(qpair, trid, 1);
2843 }
2844 
2845 static int
2846 spdk_nvmf_tcp_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair,
2847 				    struct spdk_nvme_transport_id *trid)
2848 {
2849 	return spdk_nvmf_tcp_qpair_get_trid(qpair, trid, 0);
2850 }
2851 
2852 static int
2853 spdk_nvmf_tcp_qpair_set_sq_size(struct spdk_nvmf_qpair *qpair)
2854 {
2855 	struct spdk_nvmf_tcp_qpair     *tqpair;
2856 	int rc;
2857 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
2858 
2859 	rc = spdk_nvmf_tcp_qpair_init_mem_resource(tqpair, tqpair->qpair.sq_head_max);
2860 	if (!rc) {
2861 		tqpair->max_queue_depth += tqpair->qpair.sq_head_max;
2862 		tqpair->free_pdu_num += tqpair->qpair.sq_head_max;
2863 		tqpair->state_cntr[TCP_REQUEST_STATE_FREE] += tqpair->qpair.sq_head_max;
2864 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "The queue depth=%u for tqpair=%p\n",
2865 			      tqpair->max_queue_depth, tqpair);
2866 	}
2867 
2868 	return rc;
2869 
2870 }
2871 
2872 #define SPDK_NVMF_TCP_DEFAULT_MAX_QUEUE_DEPTH 128
2873 #define SPDK_NVMF_TCP_DEFAULT_AQ_DEPTH 128
2874 #define SPDK_NVMF_TCP_DEFAULT_MAX_QPAIRS_PER_CTRLR 128
2875 #define SPDK_NVMF_TCP_DEFAULT_IN_CAPSULE_DATA_SIZE 4096
2876 #define SPDK_NVMF_TCP_DEFAULT_MAX_IO_SIZE 131072
2877 #define SPDK_NVMF_TCP_DEFAULT_IO_UNIT_SIZE 131072
2878 #define SPDK_NVMF_TCP_DEFAULT_NUM_SHARED_BUFFERS 511
2879 #define SPDK_NVMF_TCP_DEFAULT_BUFFER_CACHE_SIZE 32
2880 #define SPDK_NVMF_TCP_DEFAULT_SUCCESS_OPTIMIZATION true
2881 #define SPDK_NVMF_TCP_DEFAULT_DIF_INSERT_OR_STRIP false
2882 #define SPDK_NVMF_TCP_DEFAULT_SOCK_PRIORITY 0
2883 
2884 static void
2885 spdk_nvmf_tcp_opts_init(struct spdk_nvmf_transport_opts *opts)
2886 {
2887 	opts->max_queue_depth =		SPDK_NVMF_TCP_DEFAULT_MAX_QUEUE_DEPTH;
2888 	opts->max_qpairs_per_ctrlr =	SPDK_NVMF_TCP_DEFAULT_MAX_QPAIRS_PER_CTRLR;
2889 	opts->in_capsule_data_size =	SPDK_NVMF_TCP_DEFAULT_IN_CAPSULE_DATA_SIZE;
2890 	opts->max_io_size =		SPDK_NVMF_TCP_DEFAULT_MAX_IO_SIZE;
2891 	opts->io_unit_size =		SPDK_NVMF_TCP_DEFAULT_IO_UNIT_SIZE;
2892 	opts->max_aq_depth =		SPDK_NVMF_TCP_DEFAULT_AQ_DEPTH;
2893 	opts->num_shared_buffers =	SPDK_NVMF_TCP_DEFAULT_NUM_SHARED_BUFFERS;
2894 	opts->buf_cache_size =		SPDK_NVMF_TCP_DEFAULT_BUFFER_CACHE_SIZE;
2895 	opts->c2h_success =		SPDK_NVMF_TCP_DEFAULT_SUCCESS_OPTIMIZATION;
2896 	opts->dif_insert_or_strip =	SPDK_NVMF_TCP_DEFAULT_DIF_INSERT_OR_STRIP;
2897 	opts->sock_priority =		SPDK_NVMF_TCP_DEFAULT_SOCK_PRIORITY;
2898 }
2899 
2900 const struct spdk_nvmf_transport_ops spdk_nvmf_transport_tcp = {
2901 	.type = SPDK_NVME_TRANSPORT_TCP,
2902 	.opts_init = spdk_nvmf_tcp_opts_init,
2903 	.create = spdk_nvmf_tcp_create,
2904 	.destroy = spdk_nvmf_tcp_destroy,
2905 
2906 	.listen = spdk_nvmf_tcp_listen,
2907 	.stop_listen = spdk_nvmf_tcp_stop_listen,
2908 	.accept = spdk_nvmf_tcp_accept,
2909 
2910 	.listener_discover = spdk_nvmf_tcp_discover,
2911 
2912 	.poll_group_create = spdk_nvmf_tcp_poll_group_create,
2913 	.get_optimal_poll_group = spdk_nvmf_tcp_get_optimal_poll_group,
2914 	.poll_group_destroy = spdk_nvmf_tcp_poll_group_destroy,
2915 	.poll_group_add = spdk_nvmf_tcp_poll_group_add,
2916 	.poll_group_remove = spdk_nvmf_tcp_poll_group_remove,
2917 	.poll_group_poll = spdk_nvmf_tcp_poll_group_poll,
2918 
2919 	.req_free = spdk_nvmf_tcp_req_free,
2920 	.req_complete = spdk_nvmf_tcp_req_complete,
2921 
2922 	.qpair_fini = spdk_nvmf_tcp_close_qpair,
2923 	.qpair_get_local_trid = spdk_nvmf_tcp_qpair_get_local_trid,
2924 	.qpair_get_peer_trid = spdk_nvmf_tcp_qpair_get_peer_trid,
2925 	.qpair_get_listen_trid = spdk_nvmf_tcp_qpair_get_listen_trid,
2926 	.qpair_set_sqsize = spdk_nvmf_tcp_qpair_set_sq_size,
2927 };
2928 
2929 SPDK_LOG_REGISTER_COMPONENT("nvmf_tcp", SPDK_LOG_NVMF_TCP)
2930