xref: /spdk/lib/nvmf/tcp.c (revision ae7b5890ef728af40bd233a5011b924c482603bf)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/stdinc.h"
35 #include "spdk/crc32.h"
36 #include "spdk/endian.h"
37 #include "spdk/assert.h"
38 #include "spdk/thread.h"
39 #include "spdk/nvmf.h"
40 #include "spdk/nvmf_spec.h"
41 #include "spdk/sock.h"
42 #include "spdk/string.h"
43 #include "spdk/trace.h"
44 #include "spdk/util.h"
45 
46 #include "nvmf_internal.h"
47 #include "transport.h"
48 
49 #include "spdk_internal/log.h"
50 #include "spdk_internal/nvme_tcp.h"
51 
52 #define NVMF_TCP_MAX_ACCEPT_SOCK_ONE_TIME 16
53 
54 #define NVMF_TCP_PDU_MAX_H2C_DATA_SIZE	131072
55 #define NVMF_TCP_PDU_MAX_C2H_DATA_SIZE	131072
56 #define NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM  64  /* Maximal c2h_data pdu number for ecah tqpair */
57 #define SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY 6
58 
59 /* spdk nvmf related structure */
60 enum spdk_nvmf_tcp_req_state {
61 
62 	/* The request is not currently in use */
63 	TCP_REQUEST_STATE_FREE = 0,
64 
65 	/* Initial state when request first received */
66 	TCP_REQUEST_STATE_NEW,
67 
68 	/* The request is queued until a data buffer is available. */
69 	TCP_REQUEST_STATE_NEED_BUFFER,
70 
71 	/* The request is currently transferring data from the host to the controller. */
72 	TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER,
73 
74 	/* The request is ready to execute at the block device */
75 	TCP_REQUEST_STATE_READY_TO_EXECUTE,
76 
77 	/* The request is currently executing at the block device */
78 	TCP_REQUEST_STATE_EXECUTING,
79 
80 	/* The request finished executing at the block device */
81 	TCP_REQUEST_STATE_EXECUTED,
82 
83 	/* The request is ready to send a completion */
84 	TCP_REQUEST_STATE_READY_TO_COMPLETE,
85 
86 	/* The request is currently transferring final pdus from the controller to the host. */
87 	TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST,
88 
89 	/* The request completed and can be marked free. */
90 	TCP_REQUEST_STATE_COMPLETED,
91 
92 	/* Terminator */
93 	TCP_REQUEST_NUM_STATES,
94 };
95 
96 static const char *spdk_nvmf_tcp_term_req_fes_str[] = {
97 	"Invalid PDU Header Field",
98 	"PDU Sequence Error",
99 	"Header Digiest Error",
100 	"Data Transfer Out of Range",
101 	"R2T Limit Exceeded",
102 	"Unsupported parameter",
103 };
104 
105 #define OBJECT_NVMF_TCP_IO				0x80
106 
107 #define TRACE_GROUP_NVMF_TCP				0x5
108 #define TRACE_TCP_REQUEST_STATE_NEW					SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x0)
109 #define TRACE_TCP_REQUEST_STATE_NEED_BUFFER				SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x1)
110 #define TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER		SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x2)
111 #define TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE			SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x3)
112 #define TRACE_TCP_REQUEST_STATE_EXECUTING				SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x4)
113 #define TRACE_TCP_REQUEST_STATE_EXECUTED				SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x5)
114 #define TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE			SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x6)
115 #define TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST		SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x7)
116 #define TRACE_TCP_REQUEST_STATE_COMPLETED				SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x8)
117 #define TRACE_TCP_FLUSH_WRITEBUF_START					SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x9)
118 #define TRACE_TCP_FLUSH_WRITEBUF_DONE					SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xA)
119 #define TRACE_TCP_READ_FROM_SOCKET_DONE					SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xB)
120 
121 SPDK_TRACE_REGISTER_FN(nvmf_tcp_trace, "nvmf_tcp", TRACE_GROUP_NVMF_TCP)
122 {
123 	spdk_trace_register_object(OBJECT_NVMF_TCP_IO, 'r');
124 	spdk_trace_register_description("TCP_REQ_NEW",
125 					TRACE_TCP_REQUEST_STATE_NEW,
126 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 1, 1, "");
127 	spdk_trace_register_description("TCP_REQ_NEED_BUFFER",
128 					TRACE_TCP_REQUEST_STATE_NEED_BUFFER,
129 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
130 	spdk_trace_register_description("TCP_REQ_TX_H_TO_C",
131 					TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER,
132 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
133 	spdk_trace_register_description("TCP_REQ_RDY_TO_EXECUTE",
134 					TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE,
135 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
136 	spdk_trace_register_description("TCP_REQ_EXECUTING",
137 					TRACE_TCP_REQUEST_STATE_EXECUTING,
138 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
139 	spdk_trace_register_description("TCP_REQ_EXECUTED",
140 					TRACE_TCP_REQUEST_STATE_EXECUTED,
141 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
142 	spdk_trace_register_description("TCP_REQ_RDY_TO_COMPLETE",
143 					TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE,
144 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
145 	spdk_trace_register_description("TCP_REQ_TRANSFER_C2H",
146 					TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST,
147 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
148 	spdk_trace_register_description("TCP_REQ_COMPLETED",
149 					TRACE_TCP_REQUEST_STATE_COMPLETED,
150 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
151 	spdk_trace_register_description("TCP_WRITE_START",
152 					TRACE_TCP_FLUSH_WRITEBUF_START,
153 					OWNER_NONE, OBJECT_NONE, 0, 0, "");
154 	spdk_trace_register_description("TCP_WRITE_DONE",
155 					TRACE_TCP_FLUSH_WRITEBUF_DONE,
156 					OWNER_NONE, OBJECT_NONE, 0, 0, "");
157 	spdk_trace_register_description("TCP_READ_DONE",
158 					TRACE_TCP_READ_FROM_SOCKET_DONE,
159 					OWNER_NONE, OBJECT_NONE, 0, 0, "");
160 }
161 
162 struct spdk_nvmf_tcp_req  {
163 	struct spdk_nvmf_request		req;
164 	struct spdk_nvme_cpl			rsp;
165 	struct spdk_nvme_cmd			cmd;
166 
167 	/* In-capsule data buffer */
168 	uint8_t					*buf;
169 
170 	bool					data_from_pool;
171 	bool					has_incapsule_data;
172 
173 	/* transfer_tag */
174 	uint16_t				ttag;
175 
176 	enum spdk_nvmf_tcp_req_state		state;
177 
178 	void					*buffers[SPDK_NVMF_MAX_SGL_ENTRIES];
179 
180 	/*
181 	 * next_expected_r2t_offset is used when we receive the h2c_data PDU.
182 	 */
183 	uint32_t				next_expected_r2t_offset;
184 	uint32_t				r2tl_remain;
185 
186 	/*
187 	 * c2h_data_offset is used when we send the c2h_data PDU.
188 	 */
189 	uint32_t				c2h_data_offset;
190 	uint32_t				c2h_data_pdu_num;
191 
192 	struct spdk_dif_ctx			dif_ctx;
193 	bool					dif_insert_or_strip;
194 	uint32_t				elba_length;
195 	uint32_t				orig_length;
196 
197 	TAILQ_ENTRY(spdk_nvmf_tcp_req)		link;
198 	TAILQ_ENTRY(spdk_nvmf_tcp_req)		state_link;
199 };
200 
201 struct spdk_nvmf_tcp_qpair {
202 	struct spdk_nvmf_qpair			qpair;
203 	struct spdk_nvmf_tcp_poll_group		*group;
204 	struct spdk_nvmf_tcp_port		*port;
205 	struct spdk_sock			*sock;
206 	struct spdk_poller			*flush_poller;
207 
208 	enum nvme_tcp_pdu_recv_state		recv_state;
209 	enum nvme_tcp_qpair_state		state;
210 
211 	struct nvme_tcp_pdu			pdu_in_progress;
212 
213 	TAILQ_HEAD(, nvme_tcp_pdu)		send_queue;
214 	TAILQ_HEAD(, nvme_tcp_pdu)		free_queue;
215 
216 	struct nvme_tcp_pdu			*pdu;
217 	struct nvme_tcp_pdu			*pdu_pool;
218 	uint16_t				free_pdu_num;
219 
220 	/* Queues to track the requests in all states */
221 	TAILQ_HEAD(, spdk_nvmf_tcp_req)		state_queue[TCP_REQUEST_NUM_STATES];
222 	/* Number of requests in each state */
223 	int32_t					state_cntr[TCP_REQUEST_NUM_STATES];
224 
225 	TAILQ_HEAD(, spdk_nvmf_tcp_req)		queued_c2h_data_tcp_req;
226 
227 	uint8_t					cpda;
228 
229 	/* Array of size "max_queue_depth * InCapsuleDataSize" containing
230 	 * buffers to be used for in capsule data.
231 	 */
232 	void					*buf;
233 	void					*bufs;
234 	struct spdk_nvmf_tcp_req		*req;
235 	struct spdk_nvmf_tcp_req		*reqs;
236 
237 	bool					host_hdgst_enable;
238 	bool					host_ddgst_enable;
239 
240 
241 	/* The maximum number of I/O outstanding on this connection at one time */
242 	uint16_t				max_queue_depth;
243 
244 
245 	/** Specifies the maximum number of PDU-Data bytes per H2C Data Transfer PDU */
246 	uint32_t				maxh2cdata;
247 
248 	uint32_t				c2h_data_pdu_cnt;
249 
250 	/* IP address */
251 	char					initiator_addr[SPDK_NVMF_TRADDR_MAX_LEN];
252 	char					target_addr[SPDK_NVMF_TRADDR_MAX_LEN];
253 
254 	/* IP port */
255 	uint16_t				initiator_port;
256 	uint16_t				target_port;
257 
258 	/* Timer used to destroy qpair after detecting transport error issue if initiator does
259 	 *  not close the connection.
260 	 */
261 	struct spdk_poller			*timeout_poller;
262 
263 	TAILQ_ENTRY(spdk_nvmf_tcp_qpair)	link;
264 };
265 
266 struct spdk_nvmf_tcp_poll_group {
267 	struct spdk_nvmf_transport_poll_group	group;
268 	struct spdk_sock_group			*sock_group;
269 
270 	/* Requests that are waiting to obtain a data buffer */
271 	TAILQ_HEAD(, spdk_nvmf_tcp_req)		pending_data_buf_queue;
272 
273 	TAILQ_HEAD(, spdk_nvmf_tcp_qpair)	qpairs;
274 };
275 
276 struct spdk_nvmf_tcp_port {
277 	struct spdk_nvme_transport_id		trid;
278 	struct spdk_sock			*listen_sock;
279 	uint32_t				ref;
280 	TAILQ_ENTRY(spdk_nvmf_tcp_port)		link;
281 };
282 
283 struct spdk_nvmf_tcp_transport {
284 	struct spdk_nvmf_transport		transport;
285 
286 	pthread_mutex_t				lock;
287 
288 	TAILQ_HEAD(, spdk_nvmf_tcp_port)	ports;
289 };
290 
291 static bool spdk_nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport,
292 				      struct spdk_nvmf_tcp_req *tcp_req);
293 static void spdk_nvmf_tcp_handle_pending_c2h_data_queue(struct spdk_nvmf_tcp_qpair *tqpair);
294 
295 static void
296 spdk_nvmf_tcp_req_set_state(struct spdk_nvmf_tcp_req *tcp_req,
297 			    enum spdk_nvmf_tcp_req_state state)
298 {
299 	struct spdk_nvmf_qpair *qpair;
300 	struct spdk_nvmf_tcp_qpair *tqpair;
301 
302 	qpair = tcp_req->req.qpair;
303 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
304 
305 	TAILQ_REMOVE(&tqpair->state_queue[tcp_req->state], tcp_req, state_link);
306 	tqpair->state_cntr[tcp_req->state]--;
307 	assert(tqpair->state_cntr[tcp_req->state] >= 0);
308 
309 	TAILQ_INSERT_TAIL(&tqpair->state_queue[state], tcp_req, state_link);
310 	tqpair->state_cntr[state]++;
311 
312 	tcp_req->state = state;
313 }
314 
315 static struct nvme_tcp_pdu *
316 spdk_nvmf_tcp_pdu_get(struct spdk_nvmf_tcp_qpair *tqpair)
317 {
318 	struct nvme_tcp_pdu *pdu;
319 
320 	pdu = TAILQ_FIRST(&tqpair->free_queue);
321 	if (!pdu) {
322 		SPDK_ERRLOG("Unable to get PDU for tqpair=%p\n", tqpair);
323 		abort();
324 		return NULL;
325 	}
326 
327 	tqpair->free_pdu_num--;
328 	TAILQ_REMOVE(&tqpair->free_queue, pdu, tailq);
329 	memset(pdu, 0, sizeof(*pdu));
330 	pdu->ref = 1;
331 
332 	return pdu;
333 }
334 
335 static void
336 spdk_nvmf_tcp_pdu_put(struct spdk_nvmf_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu)
337 {
338 	if (!pdu) {
339 		return;
340 	}
341 
342 	assert(pdu->ref > 0);
343 
344 	pdu->ref--;
345 	if (pdu->ref == 0) {
346 		tqpair->free_pdu_num++;
347 		TAILQ_INSERT_HEAD(&tqpair->free_queue, pdu, tailq);
348 	}
349 }
350 
351 static struct spdk_nvmf_tcp_req *
352 spdk_nvmf_tcp_req_get(struct spdk_nvmf_tcp_qpair *tqpair)
353 {
354 	struct spdk_nvmf_tcp_req *tcp_req;
355 
356 	tcp_req = TAILQ_FIRST(&tqpair->state_queue[TCP_REQUEST_STATE_FREE]);
357 	if (!tcp_req) {
358 		SPDK_ERRLOG("Cannot allocate tcp_req on tqpair=%p\n", tqpair);
359 		return NULL;
360 	}
361 
362 	memset(&tcp_req->cmd, 0, sizeof(tcp_req->cmd));
363 	memset(&tcp_req->rsp, 0, sizeof(tcp_req->rsp));
364 	tcp_req->next_expected_r2t_offset = 0;
365 	tcp_req->r2tl_remain = 0;
366 	tcp_req->c2h_data_offset = 0;
367 	tcp_req->has_incapsule_data = false;
368 	tcp_req->dif_insert_or_strip = false;
369 
370 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEW);
371 	return tcp_req;
372 }
373 
374 static void
375 nvmf_tcp_request_free(struct spdk_nvmf_tcp_req *tcp_req)
376 {
377 	struct spdk_nvmf_tcp_transport *ttransport;
378 
379 	if (!tcp_req) {
380 		return;
381 	}
382 
383 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tcp_req=%p will be freed\n", tcp_req);
384 	ttransport = SPDK_CONTAINEROF(tcp_req->req.qpair->transport,
385 				      struct spdk_nvmf_tcp_transport, transport);
386 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_COMPLETED);
387 	spdk_nvmf_tcp_req_process(ttransport, tcp_req);
388 }
389 
390 static int
391 spdk_nvmf_tcp_req_free(struct spdk_nvmf_request *req)
392 {
393 	struct spdk_nvmf_tcp_req *tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req);
394 
395 	nvmf_tcp_request_free(tcp_req);
396 
397 	return 0;
398 }
399 
400 static void
401 spdk_nvmf_tcp_drain_state_queue(struct spdk_nvmf_tcp_qpair *tqpair,
402 				enum spdk_nvmf_tcp_req_state state)
403 {
404 	struct spdk_nvmf_tcp_req *tcp_req, *req_tmp;
405 
406 	TAILQ_FOREACH_SAFE(tcp_req, &tqpair->state_queue[state], state_link, req_tmp) {
407 		nvmf_tcp_request_free(tcp_req);
408 	}
409 }
410 
411 static void
412 spdk_nvmf_tcp_cleanup_all_states(struct spdk_nvmf_tcp_qpair *tqpair)
413 {
414 	struct spdk_nvmf_tcp_req *tcp_req, *req_tmp;
415 	struct nvme_tcp_pdu *pdu, *tmp_pdu;
416 
417 	/* Free the pdus in the send_queue */
418 	TAILQ_FOREACH_SAFE(pdu, &tqpair->send_queue, tailq, tmp_pdu) {
419 		TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq);
420 		/* Also check the pdu type, we need to calculte the c2h_data_pdu_cnt later */
421 		if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_DATA) {
422 			assert(tqpair->c2h_data_pdu_cnt > 0);
423 			tqpair->c2h_data_pdu_cnt--;
424 		}
425 		spdk_nvmf_tcp_pdu_put(tqpair, pdu);
426 	}
427 
428 	TAILQ_FOREACH_SAFE(tcp_req, &tqpair->queued_c2h_data_tcp_req, link, req_tmp) {
429 		TAILQ_REMOVE(&tqpair->queued_c2h_data_tcp_req, tcp_req, link);
430 	}
431 	spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST);
432 
433 	spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEW);
434 
435 	/* Wipe the requests waiting for buffer from the global list */
436 	TAILQ_FOREACH_SAFE(tcp_req, &tqpair->state_queue[TCP_REQUEST_STATE_NEED_BUFFER], state_link,
437 			   req_tmp) {
438 		TAILQ_REMOVE(&tqpair->group->pending_data_buf_queue, tcp_req, link);
439 	}
440 
441 	spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEED_BUFFER);
442 	spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_EXECUTING);
443 	spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
444 }
445 
446 static void
447 nvmf_tcp_dump_qpair_req_contents(struct spdk_nvmf_tcp_qpair *tqpair)
448 {
449 	int i;
450 	struct spdk_nvmf_tcp_req *tcp_req;
451 
452 	SPDK_ERRLOG("Dumping contents of queue pair (QID %d)\n", tqpair->qpair.qid);
453 	for (i = 1; i < TCP_REQUEST_NUM_STATES; i++) {
454 		SPDK_ERRLOG("\tNum of requests in state[%d] = %d\n", i, tqpair->state_cntr[i]);
455 		TAILQ_FOREACH(tcp_req, &tqpair->state_queue[i], state_link) {
456 			SPDK_ERRLOG("\t\tRequest Data From Pool: %d\n", tcp_req->data_from_pool);
457 			SPDK_ERRLOG("\t\tRequest opcode: %d\n", tcp_req->req.cmd->nvmf_cmd.opcode);
458 		}
459 	}
460 }
461 
462 static void
463 spdk_nvmf_tcp_qpair_destroy(struct spdk_nvmf_tcp_qpair *tqpair)
464 {
465 	int err = 0;
466 
467 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
468 
469 	spdk_poller_unregister(&tqpair->flush_poller);
470 	spdk_sock_close(&tqpair->sock);
471 	spdk_nvmf_tcp_cleanup_all_states(tqpair);
472 
473 	if (tqpair->free_pdu_num != (tqpair->max_queue_depth + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM)) {
474 		SPDK_ERRLOG("tqpair(%p) free pdu pool num is %u but should be %u\n", tqpair,
475 			    tqpair->free_pdu_num,
476 			    (tqpair->max_queue_depth + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM));
477 		err++;
478 	}
479 
480 	if (tqpair->state_cntr[TCP_REQUEST_STATE_FREE] != tqpair->max_queue_depth) {
481 		SPDK_ERRLOG("tqpair(%p) free tcp request num is %u but should be %u\n", tqpair,
482 			    tqpair->state_cntr[TCP_REQUEST_STATE_FREE],
483 			    tqpair->max_queue_depth);
484 		err++;
485 	}
486 
487 	if (tqpair->c2h_data_pdu_cnt != 0) {
488 		SPDK_ERRLOG("tqpair(%p) free c2h_data_pdu cnt is %u but should be 0\n", tqpair,
489 			    tqpair->c2h_data_pdu_cnt);
490 		err++;
491 	}
492 
493 	if (err > 0) {
494 		nvmf_tcp_dump_qpair_req_contents(tqpair);
495 	}
496 	free(tqpair->pdu);
497 	free(tqpair->pdu_pool);
498 	free(tqpair->req);
499 	free(tqpair->reqs);
500 	spdk_free(tqpair->buf);
501 	spdk_free(tqpair->bufs);
502 	free(tqpair);
503 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Leave\n");
504 }
505 
506 static int
507 spdk_nvmf_tcp_destroy(struct spdk_nvmf_transport *transport)
508 {
509 	struct spdk_nvmf_tcp_transport	*ttransport;
510 
511 	assert(transport != NULL);
512 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
513 
514 	pthread_mutex_destroy(&ttransport->lock);
515 	free(ttransport);
516 	return 0;
517 }
518 
519 static struct spdk_nvmf_transport *
520 spdk_nvmf_tcp_create(struct spdk_nvmf_transport_opts *opts)
521 {
522 	struct spdk_nvmf_tcp_transport *ttransport;
523 	uint32_t sge_count;
524 	uint32_t min_shared_buffers;
525 
526 	ttransport = calloc(1, sizeof(*ttransport));
527 	if (!ttransport) {
528 		return NULL;
529 	}
530 
531 	TAILQ_INIT(&ttransport->ports);
532 
533 	ttransport->transport.ops = &spdk_nvmf_transport_tcp;
534 
535 	SPDK_NOTICELOG("*** TCP Transport Init ***\n");
536 
537 	SPDK_INFOLOG(SPDK_LOG_NVMF_TCP, "*** TCP Transport Init ***\n"
538 		     "  Transport opts:  max_ioq_depth=%d, max_io_size=%d,\n"
539 		     "  max_qpairs_per_ctrlr=%d, io_unit_size=%d,\n"
540 		     "  in_capsule_data_size=%d, max_aq_depth=%d\n"
541 		     "  num_shared_buffers=%d, c2h_success=%d,\n"
542 		     "  dif_insert_or_strip=%d, sock_priority=%d\n",
543 		     opts->max_queue_depth,
544 		     opts->max_io_size,
545 		     opts->max_qpairs_per_ctrlr,
546 		     opts->io_unit_size,
547 		     opts->in_capsule_data_size,
548 		     opts->max_aq_depth,
549 		     opts->num_shared_buffers,
550 		     opts->c2h_success,
551 		     opts->dif_insert_or_strip,
552 		     opts->sock_priority);
553 
554 	if (opts->sock_priority > SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY) {
555 		SPDK_ERRLOG("Unsupported socket_priority=%d, the current range is: 0 to %d\n"
556 			    "you can use man 7 socket to view the range of priority under SO_PRIORITY item\n",
557 			    opts->sock_priority, SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY);
558 		free(ttransport);
559 		return NULL;
560 	}
561 
562 	/* I/O unit size cannot be larger than max I/O size */
563 	if (opts->io_unit_size > opts->max_io_size) {
564 		opts->io_unit_size = opts->max_io_size;
565 	}
566 
567 	sge_count = opts->max_io_size / opts->io_unit_size;
568 	if (sge_count > SPDK_NVMF_MAX_SGL_ENTRIES) {
569 		SPDK_ERRLOG("Unsupported IO Unit size specified, %d bytes\n", opts->io_unit_size);
570 		free(ttransport);
571 		return NULL;
572 	}
573 
574 	min_shared_buffers = spdk_thread_get_count() * opts->buf_cache_size;
575 	if (min_shared_buffers > opts->num_shared_buffers) {
576 		SPDK_ERRLOG("There are not enough buffers to satisfy"
577 			    "per-poll group caches for each thread. (%" PRIu32 ")"
578 			    "supplied. (%" PRIu32 ") required\n", opts->num_shared_buffers, min_shared_buffers);
579 		SPDK_ERRLOG("Please specify a larger number of shared buffers\n");
580 		spdk_nvmf_tcp_destroy(&ttransport->transport);
581 		return NULL;
582 	}
583 
584 	pthread_mutex_init(&ttransport->lock, NULL);
585 
586 	return &ttransport->transport;
587 }
588 
589 static int
590 _spdk_nvmf_tcp_trsvcid_to_int(const char *trsvcid)
591 {
592 	unsigned long long ull;
593 	char *end = NULL;
594 
595 	ull = strtoull(trsvcid, &end, 10);
596 	if (end == NULL || end == trsvcid || *end != '\0') {
597 		return -1;
598 	}
599 
600 	/* Valid TCP/IP port numbers are in [0, 65535] */
601 	if (ull > 65535) {
602 		return -1;
603 	}
604 
605 	return (int)ull;
606 }
607 
608 /**
609  * Canonicalize a listen address trid.
610  */
611 static int
612 _spdk_nvmf_tcp_canon_listen_trid(struct spdk_nvme_transport_id *canon_trid,
613 				 const struct spdk_nvme_transport_id *trid)
614 {
615 	int trsvcid_int;
616 
617 	trsvcid_int = _spdk_nvmf_tcp_trsvcid_to_int(trid->trsvcid);
618 	if (trsvcid_int < 0) {
619 		return -EINVAL;
620 	}
621 
622 	memset(canon_trid, 0, sizeof(*canon_trid));
623 	canon_trid->trtype = SPDK_NVME_TRANSPORT_TCP;
624 	canon_trid->adrfam = trid->adrfam;
625 	snprintf(canon_trid->traddr, sizeof(canon_trid->traddr), "%s", trid->traddr);
626 	snprintf(canon_trid->trsvcid, sizeof(canon_trid->trsvcid), "%d", trsvcid_int);
627 
628 	return 0;
629 }
630 
631 /**
632  * Find an existing listening port.
633  *
634  * Caller must hold ttransport->lock.
635  */
636 static struct spdk_nvmf_tcp_port *
637 _spdk_nvmf_tcp_find_port(struct spdk_nvmf_tcp_transport *ttransport,
638 			 const struct spdk_nvme_transport_id *trid)
639 {
640 	struct spdk_nvme_transport_id canon_trid;
641 	struct spdk_nvmf_tcp_port *port;
642 
643 	if (_spdk_nvmf_tcp_canon_listen_trid(&canon_trid, trid) != 0) {
644 		return NULL;
645 	}
646 
647 	TAILQ_FOREACH(port, &ttransport->ports, link) {
648 		if (spdk_nvme_transport_id_compare(&canon_trid, &port->trid) == 0) {
649 			return port;
650 		}
651 	}
652 
653 	return NULL;
654 }
655 
656 static int
657 spdk_nvmf_tcp_listen(struct spdk_nvmf_transport *transport,
658 		     const struct spdk_nvme_transport_id *trid)
659 {
660 	struct spdk_nvmf_tcp_transport *ttransport;
661 	struct spdk_nvmf_tcp_port *port;
662 	int trsvcid_int;
663 	uint8_t adrfam;
664 
665 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
666 
667 	trsvcid_int = _spdk_nvmf_tcp_trsvcid_to_int(trid->trsvcid);
668 	if (trsvcid_int < 0) {
669 		SPDK_ERRLOG("Invalid trsvcid '%s'\n", trid->trsvcid);
670 		return -EINVAL;
671 	}
672 
673 	pthread_mutex_lock(&ttransport->lock);
674 
675 	port = _spdk_nvmf_tcp_find_port(ttransport, trid);
676 	if (port) {
677 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Already listening on %s port %s\n",
678 			      trid->traddr, trid->trsvcid);
679 		port->ref++;
680 		pthread_mutex_unlock(&ttransport->lock);
681 		return 0;
682 	}
683 
684 	port = calloc(1, sizeof(*port));
685 	if (!port) {
686 		SPDK_ERRLOG("Port allocation failed\n");
687 		free(port);
688 		pthread_mutex_unlock(&ttransport->lock);
689 		return -ENOMEM;
690 	}
691 
692 	port->ref = 1;
693 
694 	if (_spdk_nvmf_tcp_canon_listen_trid(&port->trid, trid) != 0) {
695 		SPDK_ERRLOG("Invalid traddr %s / trsvcid %s\n",
696 			    trid->traddr, trid->trsvcid);
697 		free(port);
698 		pthread_mutex_unlock(&ttransport->lock);
699 		return -ENOMEM;
700 	}
701 
702 	port->listen_sock = spdk_sock_listen(trid->traddr, trsvcid_int);
703 	if (port->listen_sock == NULL) {
704 		SPDK_ERRLOG("spdk_sock_listen(%s, %d) failed: %s (%d)\n",
705 			    trid->traddr, trsvcid_int,
706 			    spdk_strerror(errno), errno);
707 		free(port);
708 		pthread_mutex_unlock(&ttransport->lock);
709 		return -errno;
710 	}
711 
712 	if (spdk_sock_is_ipv4(port->listen_sock)) {
713 		adrfam = SPDK_NVMF_ADRFAM_IPV4;
714 	} else if (spdk_sock_is_ipv6(port->listen_sock)) {
715 		adrfam = SPDK_NVMF_ADRFAM_IPV6;
716 	} else {
717 		SPDK_ERRLOG("Unhandled socket type\n");
718 		adrfam = 0;
719 	}
720 
721 	if (adrfam != trid->adrfam) {
722 		SPDK_ERRLOG("Socket address family mismatch\n");
723 		spdk_sock_close(&port->listen_sock);
724 		free(port);
725 		pthread_mutex_unlock(&ttransport->lock);
726 		return -EINVAL;
727 	}
728 
729 	SPDK_NOTICELOG("*** NVMe/TCP Target Listening on %s port %d ***\n",
730 		       trid->traddr, trsvcid_int);
731 
732 	TAILQ_INSERT_TAIL(&ttransport->ports, port, link);
733 	pthread_mutex_unlock(&ttransport->lock);
734 
735 	return 0;
736 }
737 
738 static int
739 spdk_nvmf_tcp_stop_listen(struct spdk_nvmf_transport *transport,
740 			  const struct spdk_nvme_transport_id *trid)
741 {
742 	struct spdk_nvmf_tcp_transport *ttransport;
743 	struct spdk_nvmf_tcp_port *port;
744 	int rc;
745 
746 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
747 
748 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Removing listen address %s port %s\n",
749 		      trid->traddr, trid->trsvcid);
750 
751 	pthread_mutex_lock(&ttransport->lock);
752 	port = _spdk_nvmf_tcp_find_port(ttransport, trid);
753 	if (port) {
754 		assert(port->ref > 0);
755 		port->ref--;
756 		if (port->ref == 0) {
757 			TAILQ_REMOVE(&ttransport->ports, port, link);
758 			spdk_sock_close(&port->listen_sock);
759 			free(port);
760 		}
761 		rc = 0;
762 	} else {
763 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Port not found\n");
764 		rc = -ENOENT;
765 	}
766 	pthread_mutex_unlock(&ttransport->lock);
767 
768 	return rc;
769 }
770 
771 static int
772 spdk_nvmf_tcp_qpair_flush_pdus_internal(struct spdk_nvmf_tcp_qpair *tqpair)
773 {
774 	const int array_size = 32;
775 	struct iovec iovs[array_size];
776 	int iovcnt = 0;
777 	int bytes = 0;
778 	int total_length = 0;
779 	uint32_t mapped_length;
780 	struct nvme_tcp_pdu *pdu;
781 	int pdu_length;
782 	TAILQ_HEAD(, nvme_tcp_pdu) completed_pdus_list;
783 
784 	pdu = TAILQ_FIRST(&tqpair->send_queue);
785 
786 	if (pdu == NULL) {
787 		return 0;
788 	}
789 
790 	/*
791 	 * Build up a list of iovecs for the first few PDUs in the
792 	 *  tqpair 's send_queue.
793 	 */
794 	while (pdu != NULL && ((array_size - iovcnt) >= 3)) {
795 		iovcnt += nvme_tcp_build_iovs(&iovs[iovcnt],
796 					      array_size - iovcnt,
797 					      pdu,
798 					      tqpair->host_hdgst_enable,
799 					      tqpair->host_ddgst_enable,
800 					      &mapped_length);
801 		total_length += mapped_length;
802 		pdu = TAILQ_NEXT(pdu, tailq);
803 	}
804 
805 	spdk_trace_record(TRACE_TCP_FLUSH_WRITEBUF_START, 0, total_length, 0, iovcnt);
806 
807 	bytes = spdk_sock_writev(tqpair->sock, iovs, iovcnt);
808 	if (bytes == -1) {
809 		if (errno == EWOULDBLOCK || errno == EAGAIN) {
810 			return 1;
811 		} else {
812 			SPDK_ERRLOG("spdk_sock_writev() failed, errno %d: %s\n",
813 				    errno, spdk_strerror(errno));
814 			return -1;
815 		}
816 	}
817 
818 	spdk_trace_record(TRACE_TCP_FLUSH_WRITEBUF_DONE, 0, bytes, 0, 0);
819 
820 	pdu = TAILQ_FIRST(&tqpair->send_queue);
821 
822 	/*
823 	 * Free any PDUs that were fully written.  If a PDU was only
824 	 *  partially written, update its writev_offset so that next
825 	 *  time only the unwritten portion will be sent to writev().
826 	 */
827 	TAILQ_INIT(&completed_pdus_list);
828 	while (bytes > 0) {
829 		pdu_length = pdu->hdr.common.plen - pdu->writev_offset;
830 		if (bytes >= pdu_length) {
831 			bytes -= pdu_length;
832 			TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq);
833 			TAILQ_INSERT_TAIL(&completed_pdus_list, pdu, tailq);
834 			pdu = TAILQ_FIRST(&tqpair->send_queue);
835 
836 		} else {
837 			pdu->writev_offset += bytes;
838 			bytes = 0;
839 		}
840 	}
841 
842 	while (!TAILQ_EMPTY(&completed_pdus_list)) {
843 		pdu = TAILQ_FIRST(&completed_pdus_list);
844 		TAILQ_REMOVE(&completed_pdus_list, pdu, tailq);
845 		assert(pdu->cb_fn != NULL);
846 		pdu->cb_fn(pdu->cb_arg);
847 		spdk_nvmf_tcp_pdu_put(tqpair, pdu);
848 	}
849 
850 	return TAILQ_EMPTY(&tqpair->send_queue) ? 0 : 1;
851 }
852 
853 static int
854 spdk_nvmf_tcp_qpair_flush_pdus(void *_tqpair)
855 {
856 	struct spdk_nvmf_tcp_qpair *tqpair = _tqpair;
857 	int rc;
858 
859 	if (tqpair->state == NVME_TCP_QPAIR_STATE_RUNNING) {
860 		rc = spdk_nvmf_tcp_qpair_flush_pdus_internal(tqpair);
861 		if (rc == 0 && tqpair->flush_poller != NULL) {
862 			spdk_poller_unregister(&tqpair->flush_poller);
863 		} else if (rc == 1 && tqpair->flush_poller == NULL) {
864 			tqpair->flush_poller = spdk_poller_register(spdk_nvmf_tcp_qpair_flush_pdus,
865 					       tqpair, 50);
866 		}
867 	} else {
868 		/*
869 		 * If the tqpair state is not RUNNING, then
870 		 * keep trying to flush PDUs until our list is
871 		 * empty - to make sure all data is sent before
872 		 * closing the connection.
873 		 */
874 		do {
875 			rc = spdk_nvmf_tcp_qpair_flush_pdus_internal(tqpair);
876 		} while (rc == 1);
877 	}
878 
879 	if (rc < 0 && tqpair->state < NVME_TCP_QPAIR_STATE_EXITING) {
880 		/*
881 		 * If the poller has already started destruction of the tqpair,
882 		 *  i.e. the socket read failed, then the connection state may already
883 		 *  be EXITED.  We don't want to set it back to EXITING in that case.
884 		 */
885 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
886 	}
887 
888 	return -1;
889 }
890 
891 static void
892 spdk_nvmf_tcp_qpair_write_pdu(struct spdk_nvmf_tcp_qpair *tqpair,
893 			      struct nvme_tcp_pdu *pdu,
894 			      nvme_tcp_qpair_xfer_complete_cb cb_fn,
895 			      void *cb_arg)
896 {
897 	int enable_digest;
898 	int hlen;
899 	uint32_t crc32c;
900 
901 	hlen = pdu->hdr.common.hlen;
902 	enable_digest = 1;
903 	if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP ||
904 	    pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ) {
905 		/* this PDU should be sent without digest */
906 		enable_digest = 0;
907 	}
908 
909 	/* Header Digest */
910 	if (enable_digest && tqpair->host_hdgst_enable) {
911 		crc32c = nvme_tcp_pdu_calc_header_digest(pdu);
912 		MAKE_DIGEST_WORD((uint8_t *)pdu->hdr.raw + hlen, crc32c);
913 	}
914 
915 	/* Data Digest */
916 	if (pdu->data_len > 0 && enable_digest && tqpair->host_ddgst_enable) {
917 		crc32c = nvme_tcp_pdu_calc_data_digest(pdu);
918 		MAKE_DIGEST_WORD(pdu->data_digest, crc32c);
919 	}
920 
921 	pdu->cb_fn = cb_fn;
922 	pdu->cb_arg = cb_arg;
923 	TAILQ_INSERT_TAIL(&tqpair->send_queue, pdu, tailq);
924 	spdk_nvmf_tcp_qpair_flush_pdus(tqpair);
925 }
926 
927 static int
928 spdk_nvmf_tcp_qpair_init_mem_resource(struct spdk_nvmf_tcp_qpair *tqpair, uint16_t size)
929 {
930 	int i;
931 	struct spdk_nvmf_tcp_req *tcp_req;
932 	struct spdk_nvmf_transport *transport = tqpair->qpair.transport;
933 	uint32_t in_capsule_data_size;
934 
935 	in_capsule_data_size = transport->opts.in_capsule_data_size;
936 	if (transport->opts.dif_insert_or_strip) {
937 		in_capsule_data_size = SPDK_BDEV_BUF_SIZE_WITH_MD(in_capsule_data_size);
938 	}
939 
940 	if (!tqpair->qpair.sq_head_max) {
941 		tqpair->req = calloc(1, sizeof(*tqpair->req));
942 		if (!tqpair->req) {
943 			SPDK_ERRLOG("Unable to allocate req on tqpair=%p.\n", tqpair);
944 			return -1;
945 		}
946 
947 		if (in_capsule_data_size) {
948 			tqpair->buf = spdk_zmalloc(in_capsule_data_size, 0x1000,
949 						   NULL, SPDK_ENV_LCORE_ID_ANY,
950 						   SPDK_MALLOC_DMA);
951 			if (!tqpair->buf) {
952 				SPDK_ERRLOG("Unable to allocate buf on tqpair=%p.\n", tqpair);
953 				return -1;
954 			}
955 		}
956 
957 		tcp_req = tqpair->req;
958 		tcp_req->ttag = 0;
959 		tcp_req->req.qpair = &tqpair->qpair;
960 
961 		/* Set up memory to receive commands */
962 		if (tqpair->buf) {
963 			tcp_req->buf = tqpair->buf;
964 		}
965 
966 		/* Set the cmdn and rsp */
967 		tcp_req->req.rsp = (union nvmf_c2h_msg *)&tcp_req->rsp;
968 		tcp_req->req.cmd = (union nvmf_h2c_msg *)&tcp_req->cmd;
969 
970 		/* Initialize request state to FREE */
971 		tcp_req->state = TCP_REQUEST_STATE_FREE;
972 		TAILQ_INSERT_TAIL(&tqpair->state_queue[tcp_req->state], tcp_req, state_link);
973 
974 		tqpair->pdu = calloc(NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM + 1, sizeof(*tqpair->pdu));
975 		if (!tqpair->pdu) {
976 			SPDK_ERRLOG("Unable to allocate pdu on tqpair=%p.\n", tqpair);
977 			return -1;
978 		}
979 
980 		for (i = 0; i < 1 + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM; i++) {
981 			TAILQ_INSERT_TAIL(&tqpair->free_queue, &tqpair->pdu[i], tailq);
982 		}
983 
984 	} else {
985 		tqpair->reqs = calloc(size, sizeof(*tqpair->reqs));
986 		if (!tqpair->reqs) {
987 			SPDK_ERRLOG("Unable to allocate reqs on tqpair=%p\n", tqpair);
988 			return -1;
989 		}
990 
991 		if (in_capsule_data_size) {
992 			tqpair->bufs = spdk_zmalloc(size * in_capsule_data_size, 0x1000,
993 						    NULL, SPDK_ENV_LCORE_ID_ANY,
994 						    SPDK_MALLOC_DMA);
995 			if (!tqpair->bufs) {
996 				SPDK_ERRLOG("Unable to allocate bufs on tqpair=%p.\n", tqpair);
997 				return -1;
998 			}
999 		}
1000 
1001 		for (i = 0; i < size; i++) {
1002 			struct spdk_nvmf_tcp_req *tcp_req = &tqpair->reqs[i];
1003 
1004 			tcp_req->ttag = i + 1;
1005 			tcp_req->req.qpair = &tqpair->qpair;
1006 
1007 			/* Set up memory to receive commands */
1008 			if (tqpair->bufs) {
1009 				tcp_req->buf = (void *)((uintptr_t)tqpair->bufs + (i * in_capsule_data_size));
1010 			}
1011 
1012 			/* Set the cmdn and rsp */
1013 			tcp_req->req.rsp = (union nvmf_c2h_msg *)&tcp_req->rsp;
1014 			tcp_req->req.cmd = (union nvmf_h2c_msg *)&tcp_req->cmd;
1015 
1016 			/* Initialize request state to FREE */
1017 			tcp_req->state = TCP_REQUEST_STATE_FREE;
1018 			TAILQ_INSERT_TAIL(&tqpair->state_queue[tcp_req->state], tcp_req, state_link);
1019 		}
1020 
1021 		tqpair->pdu_pool = calloc(size, sizeof(*tqpair->pdu_pool));
1022 		if (!tqpair->pdu_pool) {
1023 			SPDK_ERRLOG("Unable to allocate pdu pool on tqpair =%p.\n", tqpair);
1024 			return -1;
1025 		}
1026 
1027 		for (i = 0; i < size; i++) {
1028 			TAILQ_INSERT_TAIL(&tqpair->free_queue, &tqpair->pdu_pool[i], tailq);
1029 		}
1030 	}
1031 
1032 	return 0;
1033 }
1034 
1035 static int
1036 spdk_nvmf_tcp_qpair_init(struct spdk_nvmf_qpair *qpair)
1037 {
1038 	struct spdk_nvmf_tcp_qpair *tqpair;
1039 	int i;
1040 
1041 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
1042 
1043 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "New TCP Connection: %p\n", qpair);
1044 
1045 	TAILQ_INIT(&tqpair->send_queue);
1046 	TAILQ_INIT(&tqpair->free_queue);
1047 	TAILQ_INIT(&tqpair->queued_c2h_data_tcp_req);
1048 
1049 	/* Initialise request state queues of the qpair */
1050 	for (i = TCP_REQUEST_STATE_FREE; i < TCP_REQUEST_NUM_STATES; i++) {
1051 		TAILQ_INIT(&tqpair->state_queue[i]);
1052 	}
1053 
1054 	tqpair->host_hdgst_enable = true;
1055 	tqpair->host_ddgst_enable = true;
1056 
1057 	return 0;
1058 }
1059 
1060 static int
1061 spdk_nvmf_tcp_qpair_sock_init(struct spdk_nvmf_tcp_qpair *tqpair)
1062 {
1063 
1064 	int rc;
1065 	int buf_size;
1066 
1067 	/* set recv buffer size */
1068 	buf_size = 2 * 1024 * 1024;
1069 	rc = spdk_sock_set_recvbuf(tqpair->sock, buf_size);
1070 	if (rc != 0) {
1071 		SPDK_ERRLOG("spdk_sock_set_recvbuf failed\n");
1072 		return rc;
1073 	}
1074 
1075 	/* set send buffer size */
1076 	rc = spdk_sock_set_sendbuf(tqpair->sock, buf_size);
1077 	if (rc != 0) {
1078 		SPDK_ERRLOG("spdk_sock_set_sendbuf failed\n");
1079 		return rc;
1080 	}
1081 
1082 	/* set low water mark */
1083 	rc = spdk_sock_set_recvlowat(tqpair->sock, sizeof(struct spdk_nvme_tcp_c2h_data_hdr));
1084 	if (rc != 0) {
1085 		SPDK_ERRLOG("spdk_sock_set_recvlowat() failed\n");
1086 		return rc;
1087 	}
1088 
1089 	return 0;
1090 }
1091 
1092 static void
1093 _spdk_nvmf_tcp_handle_connect(struct spdk_nvmf_transport *transport,
1094 			      struct spdk_nvmf_tcp_port *port,
1095 			      struct spdk_sock *sock, new_qpair_fn cb_fn)
1096 {
1097 	struct spdk_nvmf_tcp_qpair *tqpair;
1098 	int rc;
1099 
1100 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "New connection accepted on %s port %s\n",
1101 		      port->trid.traddr, port->trid.trsvcid);
1102 
1103 	if (transport->opts.sock_priority) {
1104 		rc = spdk_sock_set_priority(sock, transport->opts.sock_priority);
1105 		if (rc) {
1106 			SPDK_ERRLOG("Failed to set the priority of the socket\n");
1107 			spdk_sock_close(&sock);
1108 			return;
1109 		}
1110 	}
1111 
1112 	tqpair = calloc(1, sizeof(struct spdk_nvmf_tcp_qpair));
1113 	if (tqpair == NULL) {
1114 		SPDK_ERRLOG("Could not allocate new connection.\n");
1115 		spdk_sock_close(&sock);
1116 		return;
1117 	}
1118 
1119 	tqpair->sock = sock;
1120 	tqpair->max_queue_depth = 1;
1121 	tqpair->free_pdu_num = tqpair->max_queue_depth + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM;
1122 	tqpair->state_cntr[TCP_REQUEST_STATE_FREE] = tqpair->max_queue_depth;
1123 	tqpair->port = port;
1124 	tqpair->qpair.transport = transport;
1125 
1126 	rc = spdk_sock_getaddr(tqpair->sock, tqpair->target_addr,
1127 			       sizeof(tqpair->target_addr), &tqpair->target_port,
1128 			       tqpair->initiator_addr, sizeof(tqpair->initiator_addr),
1129 			       &tqpair->initiator_port);
1130 	if (rc < 0) {
1131 		SPDK_ERRLOG("spdk_sock_getaddr() failed of tqpair=%p\n", tqpair);
1132 		spdk_nvmf_tcp_qpair_destroy(tqpair);
1133 		return;
1134 	}
1135 
1136 	cb_fn(&tqpair->qpair);
1137 }
1138 
1139 static void
1140 spdk_nvmf_tcp_port_accept(struct spdk_nvmf_transport *transport, struct spdk_nvmf_tcp_port *port,
1141 			  new_qpair_fn cb_fn)
1142 {
1143 	struct spdk_sock *sock;
1144 	int i;
1145 
1146 	for (i = 0; i < NVMF_TCP_MAX_ACCEPT_SOCK_ONE_TIME; i++) {
1147 		sock = spdk_sock_accept(port->listen_sock);
1148 		if (sock) {
1149 			_spdk_nvmf_tcp_handle_connect(transport, port, sock, cb_fn);
1150 		}
1151 	}
1152 }
1153 
1154 static void
1155 spdk_nvmf_tcp_accept(struct spdk_nvmf_transport *transport, new_qpair_fn cb_fn)
1156 {
1157 	struct spdk_nvmf_tcp_transport *ttransport;
1158 	struct spdk_nvmf_tcp_port *port;
1159 
1160 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
1161 
1162 	TAILQ_FOREACH(port, &ttransport->ports, link) {
1163 		spdk_nvmf_tcp_port_accept(transport, port, cb_fn);
1164 	}
1165 }
1166 
1167 static void
1168 spdk_nvmf_tcp_discover(struct spdk_nvmf_transport *transport,
1169 		       struct spdk_nvme_transport_id *trid,
1170 		       struct spdk_nvmf_discovery_log_page_entry *entry)
1171 {
1172 	entry->trtype = SPDK_NVMF_TRTYPE_TCP;
1173 	entry->adrfam = trid->adrfam;
1174 	entry->treq.secure_channel = SPDK_NVMF_TREQ_SECURE_CHANNEL_NOT_SPECIFIED;
1175 
1176 	spdk_strcpy_pad(entry->trsvcid, trid->trsvcid, sizeof(entry->trsvcid), ' ');
1177 	spdk_strcpy_pad(entry->traddr, trid->traddr, sizeof(entry->traddr), ' ');
1178 
1179 	entry->tsas.tcp.sectype = SPDK_NVME_TCP_SECURITY_NONE;
1180 }
1181 
1182 static struct spdk_nvmf_transport_poll_group *
1183 spdk_nvmf_tcp_poll_group_create(struct spdk_nvmf_transport *transport)
1184 {
1185 	struct spdk_nvmf_tcp_poll_group *tgroup;
1186 
1187 	tgroup = calloc(1, sizeof(*tgroup));
1188 	if (!tgroup) {
1189 		return NULL;
1190 	}
1191 
1192 	tgroup->sock_group = spdk_sock_group_create(&tgroup->group);
1193 	if (!tgroup->sock_group) {
1194 		goto cleanup;
1195 	}
1196 
1197 	TAILQ_INIT(&tgroup->qpairs);
1198 	TAILQ_INIT(&tgroup->pending_data_buf_queue);
1199 
1200 	return &tgroup->group;
1201 
1202 cleanup:
1203 	free(tgroup);
1204 	return NULL;
1205 }
1206 
1207 static struct spdk_nvmf_transport_poll_group *
1208 spdk_nvmf_tcp_get_optimal_poll_group(struct spdk_nvmf_qpair *qpair)
1209 {
1210 	struct spdk_nvmf_tcp_qpair *tqpair;
1211 	struct spdk_sock_group *group = NULL;
1212 	int rc;
1213 
1214 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
1215 	rc = spdk_sock_get_optimal_sock_group(tqpair->sock, &group);
1216 	if (!rc && group != NULL) {
1217 		return spdk_sock_group_get_ctx(group);
1218 	}
1219 
1220 	return NULL;
1221 }
1222 
1223 static void
1224 spdk_nvmf_tcp_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group)
1225 {
1226 	struct spdk_nvmf_tcp_poll_group *tgroup;
1227 
1228 	tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
1229 	spdk_sock_group_close(&tgroup->sock_group);
1230 
1231 	if (!TAILQ_EMPTY(&tgroup->pending_data_buf_queue)) {
1232 		SPDK_ERRLOG("Pending I/O list wasn't empty on poll group destruction\n");
1233 	}
1234 
1235 	free(tgroup);
1236 }
1237 
1238 static void
1239 spdk_nvmf_tcp_qpair_set_recv_state(struct spdk_nvmf_tcp_qpair *tqpair,
1240 				   enum nvme_tcp_pdu_recv_state state)
1241 {
1242 	if (tqpair->recv_state == state) {
1243 		SPDK_ERRLOG("The recv state of tqpair=%p is same with the state(%d) to be set\n",
1244 			    tqpair, state);
1245 		return;
1246 	}
1247 
1248 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair(%p) recv state=%d\n", tqpair, state);
1249 	tqpair->recv_state = state;
1250 
1251 	switch (state) {
1252 	case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH:
1253 	case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH:
1254 	case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD:
1255 		break;
1256 	case NVME_TCP_PDU_RECV_STATE_ERROR:
1257 	case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY:
1258 		memset(&tqpair->pdu_in_progress, 0, sizeof(tqpair->pdu_in_progress));
1259 		break;
1260 	default:
1261 		SPDK_ERRLOG("The state(%d) is invalid\n", state);
1262 		abort();
1263 		break;
1264 	}
1265 }
1266 
1267 static int
1268 spdk_nvmf_tcp_qpair_handle_timeout(void *ctx)
1269 {
1270 	struct spdk_nvmf_tcp_qpair *tqpair = ctx;
1271 
1272 	assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_ERROR);
1273 
1274 	SPDK_ERRLOG("No pdu coming for tqpair=%p within %d seconds\n", tqpair,
1275 		    SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT);
1276 	tqpair->state = NVME_TCP_QPAIR_STATE_EXITED;
1277 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "will disconect the tqpair=%p\n", tqpair);
1278 	spdk_poller_unregister(&tqpair->timeout_poller);
1279 	spdk_nvmf_qpair_disconnect(&tqpair->qpair, NULL, NULL);
1280 
1281 	return 0;
1282 }
1283 
1284 static void
1285 spdk_nvmf_tcp_send_c2h_term_req_complete(void *cb_arg)
1286 {
1287 	struct spdk_nvmf_tcp_qpair *tqpair = (struct spdk_nvmf_tcp_qpair *)cb_arg;
1288 
1289 	if (!tqpair->timeout_poller) {
1290 		tqpair->timeout_poller = spdk_poller_register(spdk_nvmf_tcp_qpair_handle_timeout, tqpair,
1291 					 SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT * 1000000);
1292 	}
1293 }
1294 
1295 static void
1296 spdk_nvmf_tcp_send_c2h_term_req(struct spdk_nvmf_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu,
1297 				enum spdk_nvme_tcp_term_req_fes fes, uint32_t error_offset)
1298 {
1299 	struct nvme_tcp_pdu *rsp_pdu;
1300 	struct spdk_nvme_tcp_term_req_hdr *c2h_term_req;
1301 	uint32_t c2h_term_req_hdr_len = sizeof(*c2h_term_req);
1302 	uint32_t copy_len;
1303 
1304 	rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair);
1305 	if (!rsp_pdu) {
1306 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
1307 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1308 		return;
1309 	}
1310 
1311 	c2h_term_req = &rsp_pdu->hdr.term_req;
1312 	c2h_term_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ;
1313 	c2h_term_req->common.hlen = c2h_term_req_hdr_len;
1314 
1315 	if ((fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) ||
1316 	    (fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) {
1317 		DSET32(&c2h_term_req->fei, error_offset);
1318 	}
1319 
1320 	copy_len = pdu->hdr.common.hlen;
1321 	if (copy_len > SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE) {
1322 		copy_len = SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE;
1323 	}
1324 
1325 	/* Copy the error info into the buffer */
1326 	memcpy((uint8_t *)rsp_pdu->hdr.raw + c2h_term_req_hdr_len, pdu->hdr.raw, copy_len);
1327 	nvme_tcp_pdu_set_data(rsp_pdu, (uint8_t *)rsp_pdu->hdr.raw + c2h_term_req_hdr_len, copy_len);
1328 
1329 	/* Contain the header of the wrong received pdu */
1330 	c2h_term_req->common.plen = c2h_term_req->common.hlen + copy_len;
1331 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1332 	spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_send_c2h_term_req_complete, tqpair);
1333 }
1334 
1335 static void
1336 spdk_nvmf_tcp_capsule_cmd_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport,
1337 				     struct spdk_nvmf_tcp_qpair *tqpair,
1338 				     struct nvme_tcp_pdu *pdu)
1339 {
1340 	struct spdk_nvmf_tcp_req *tcp_req;
1341 
1342 	tcp_req = spdk_nvmf_tcp_req_get(tqpair);
1343 	if (!tcp_req) {
1344 		SPDK_ERRLOG("Cannot allocate tcp_req\n");
1345 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
1346 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1347 		return;
1348 	}
1349 
1350 	pdu->ctx = tcp_req;
1351 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEW);
1352 	spdk_nvmf_tcp_req_process(ttransport, tcp_req);
1353 	return;
1354 }
1355 
1356 static void
1357 spdk_nvmf_tcp_capsule_cmd_payload_handle(struct spdk_nvmf_tcp_transport *ttransport,
1358 		struct spdk_nvmf_tcp_qpair *tqpair,
1359 		struct nvme_tcp_pdu *pdu)
1360 {
1361 	struct spdk_nvmf_tcp_req *tcp_req;
1362 	struct spdk_nvme_tcp_cmd *capsule_cmd;
1363 	uint32_t error_offset = 0;
1364 	enum spdk_nvme_tcp_term_req_fes fes;
1365 
1366 	capsule_cmd = &pdu->hdr.capsule_cmd;
1367 	tcp_req = pdu->ctx;
1368 	assert(tcp_req != NULL);
1369 	if (capsule_cmd->common.pdo > SPDK_NVME_TCP_PDU_PDO_MAX_OFFSET) {
1370 		SPDK_ERRLOG("Expected ICReq capsule_cmd pdu offset <= %d, got %c\n",
1371 			    SPDK_NVME_TCP_PDU_PDO_MAX_OFFSET, capsule_cmd->common.pdo);
1372 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1373 		error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdo);
1374 		goto err;
1375 	}
1376 
1377 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
1378 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
1379 	spdk_nvmf_tcp_req_process(ttransport, tcp_req);
1380 
1381 	return;
1382 err:
1383 	spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1384 }
1385 
1386 static void
1387 spdk_nvmf_tcp_h2c_data_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport,
1388 				  struct spdk_nvmf_tcp_qpair *tqpair,
1389 				  struct nvme_tcp_pdu *pdu)
1390 {
1391 	struct spdk_nvmf_tcp_req *tcp_req;
1392 	uint32_t error_offset = 0;
1393 	enum spdk_nvme_tcp_term_req_fes fes = 0;
1394 	struct spdk_nvme_tcp_h2c_data_hdr *h2c_data;
1395 	bool ttag_offset_error = false;
1396 
1397 	h2c_data = &pdu->hdr.h2c_data;
1398 
1399 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair=%p, r2t_info: datao=%u, datal=%u, cccid=%u, ttag=%u\n",
1400 		      tqpair, h2c_data->datao, h2c_data->datal, h2c_data->cccid, h2c_data->ttag);
1401 
1402 	/* According to the information in the pdu to find the req */
1403 	TAILQ_FOREACH(tcp_req, &tqpair->state_queue[TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER],
1404 		      state_link) {
1405 		if ((tcp_req->req.cmd->nvme_cmd.cid == h2c_data->cccid) && (tcp_req->ttag == h2c_data->ttag)) {
1406 			break;
1407 		}
1408 
1409 		if (!ttag_offset_error && (tcp_req->req.cmd->nvme_cmd.cid == h2c_data->cccid)) {
1410 			ttag_offset_error = true;
1411 		}
1412 	}
1413 
1414 	if (!tcp_req) {
1415 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tcp_req is not found for tqpair=%p\n", tqpair);
1416 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER;
1417 		if (!ttag_offset_error) {
1418 			error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, cccid);
1419 		} else {
1420 			error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, ttag);
1421 		}
1422 		goto err;
1423 	}
1424 
1425 	if (tcp_req->next_expected_r2t_offset != h2c_data->datao) {
1426 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
1427 			      "tcp_req(%p), tqpair=%p,  expected_r2t_offset=%u, but data offset =%u\n",
1428 			      tcp_req, tqpair, tcp_req->next_expected_r2t_offset, h2c_data->datao);
1429 		fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE;
1430 		goto err;
1431 	}
1432 
1433 	if (h2c_data->datal > tqpair->maxh2cdata) {
1434 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tcp_req(%p), tqpair=%p,  datao=%u execeeds maxh2cdata size=%u\n",
1435 			      tcp_req, tqpair, h2c_data->datao, tqpair->maxh2cdata);
1436 		fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE;
1437 		goto err;
1438 	}
1439 
1440 	if ((h2c_data->datao + h2c_data->datal) > tcp_req->req.length) {
1441 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
1442 			      "tcp_req(%p), tqpair=%p,  (datao=%u + datal=%u) execeeds requested length=%u\n",
1443 			      tcp_req, tqpair, h2c_data->datao, h2c_data->datal, tcp_req->req.length);
1444 		fes = SPDK_NVME_TCP_TERM_REQ_FES_R2T_LIMIT_EXCEEDED;
1445 		goto err;
1446 	}
1447 
1448 	pdu->ctx = tcp_req;
1449 
1450 	if (spdk_unlikely(tcp_req->dif_insert_or_strip)) {
1451 		pdu->dif_ctx = &tcp_req->dif_ctx;
1452 	}
1453 
1454 	nvme_tcp_pdu_set_data_buf(pdu, tcp_req->req.iov, tcp_req->req.iovcnt,
1455 				  h2c_data->datao, h2c_data->datal);
1456 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
1457 	return;
1458 
1459 err:
1460 	spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1461 }
1462 
1463 static void
1464 spdk_nvmf_tcp_pdu_cmd_complete(void *cb_arg)
1465 {
1466 	struct spdk_nvmf_tcp_req *tcp_req = cb_arg;
1467 	nvmf_tcp_request_free(tcp_req);
1468 }
1469 
1470 static void
1471 spdk_nvmf_tcp_send_capsule_resp_pdu(struct spdk_nvmf_tcp_req *tcp_req,
1472 				    struct spdk_nvmf_tcp_qpair *tqpair)
1473 {
1474 	struct nvme_tcp_pdu *rsp_pdu;
1475 	struct spdk_nvme_tcp_rsp *capsule_resp;
1476 
1477 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter, tqpair=%p\n", tqpair);
1478 	rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair);
1479 	if (!rsp_pdu) {
1480 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1481 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
1482 		return;
1483 	}
1484 
1485 	capsule_resp = &rsp_pdu->hdr.capsule_resp;
1486 	capsule_resp->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP;
1487 	capsule_resp->common.plen = capsule_resp->common.hlen = sizeof(*capsule_resp);
1488 	capsule_resp->rccqe = tcp_req->req.rsp->nvme_cpl;
1489 	if (tqpair->host_hdgst_enable) {
1490 		capsule_resp->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF;
1491 		capsule_resp->common.plen += SPDK_NVME_TCP_DIGEST_LEN;
1492 	}
1493 
1494 	spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_pdu_cmd_complete, tcp_req);
1495 }
1496 
1497 static void
1498 spdk_nvmf_tcp_pdu_c2h_data_complete(void *cb_arg)
1499 {
1500 	struct spdk_nvmf_tcp_req *tcp_req = cb_arg;
1501 	struct spdk_nvmf_tcp_qpair *tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair,
1502 					     struct spdk_nvmf_tcp_qpair, qpair);
1503 
1504 	assert(tqpair != NULL);
1505 	assert(tcp_req->c2h_data_pdu_num > 0);
1506 	tcp_req->c2h_data_pdu_num--;
1507 	if (!tcp_req->c2h_data_pdu_num) {
1508 		if (tqpair->qpair.transport->opts.c2h_success) {
1509 			nvmf_tcp_request_free(tcp_req);
1510 		} else {
1511 			spdk_nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair);
1512 		}
1513 	}
1514 
1515 	tqpair->c2h_data_pdu_cnt--;
1516 	spdk_nvmf_tcp_handle_pending_c2h_data_queue(tqpair);
1517 }
1518 
1519 static void
1520 spdk_nvmf_tcp_send_r2t_pdu(struct spdk_nvmf_tcp_qpair *tqpair,
1521 			   struct spdk_nvmf_tcp_req *tcp_req)
1522 {
1523 	struct nvme_tcp_pdu *rsp_pdu;
1524 	struct spdk_nvme_tcp_r2t_hdr *r2t;
1525 
1526 	rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair);
1527 	if (!rsp_pdu) {
1528 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
1529 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1530 		return;
1531 	}
1532 
1533 	r2t = &rsp_pdu->hdr.r2t;
1534 	r2t->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_R2T;
1535 	r2t->common.plen = r2t->common.hlen = sizeof(*r2t);
1536 
1537 	if (tqpair->host_hdgst_enable) {
1538 		r2t->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF;
1539 		r2t->common.plen += SPDK_NVME_TCP_DIGEST_LEN;
1540 	}
1541 
1542 	r2t->cccid = tcp_req->req.cmd->nvme_cmd.cid;
1543 	r2t->ttag = tcp_req->ttag;
1544 	r2t->r2to = tcp_req->next_expected_r2t_offset;
1545 	r2t->r2tl = spdk_min(tcp_req->req.length - tcp_req->next_expected_r2t_offset, tqpair->maxh2cdata);
1546 	tcp_req->r2tl_remain = r2t->r2tl;
1547 
1548 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
1549 		      "tcp_req(%p) on tqpair(%p), r2t_info: cccid=%u, ttag=%u, r2to=%u, r2tl=%u\n",
1550 		      tcp_req, tqpair, r2t->cccid, r2t->ttag, r2t->r2to, r2t->r2tl);
1551 	spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_pdu_cmd_complete, NULL);
1552 }
1553 
1554 static void
1555 spdk_nvmf_tcp_h2c_data_payload_handle(struct spdk_nvmf_tcp_transport *ttransport,
1556 				      struct spdk_nvmf_tcp_qpair *tqpair,
1557 				      struct nvme_tcp_pdu *pdu)
1558 {
1559 	struct spdk_nvmf_tcp_req *tcp_req;
1560 
1561 	tcp_req = pdu->ctx;
1562 	assert(tcp_req != NULL);
1563 
1564 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
1565 
1566 	tcp_req->next_expected_r2t_offset += pdu->data_len;
1567 	tcp_req->r2tl_remain -= pdu->data_len;
1568 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
1569 
1570 	if (!tcp_req->r2tl_remain) {
1571 		if (tcp_req->next_expected_r2t_offset == tcp_req->req.length) {
1572 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
1573 			spdk_nvmf_tcp_req_process(ttransport, tcp_req);
1574 		} else {
1575 			SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Send r2t pdu for tcp_req=%p on tqpair=%p\n", tcp_req, tqpair);
1576 			spdk_nvmf_tcp_send_r2t_pdu(tqpair, tcp_req);
1577 		}
1578 	}
1579 }
1580 
1581 static void
1582 spdk_nvmf_tcp_h2c_term_req_dump(struct spdk_nvme_tcp_term_req_hdr *h2c_term_req)
1583 {
1584 	SPDK_ERRLOG("Error info of pdu(%p): %s\n", h2c_term_req,
1585 		    spdk_nvmf_tcp_term_req_fes_str[h2c_term_req->fes]);
1586 	if ((h2c_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) ||
1587 	    (h2c_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) {
1588 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "The offset from the start of the PDU header is %u\n",
1589 			      DGET32(h2c_term_req->fei));
1590 	}
1591 }
1592 
1593 static void
1594 spdk_nvmf_tcp_h2c_term_req_hdr_handle(struct spdk_nvmf_tcp_qpair *tqpair,
1595 				      struct nvme_tcp_pdu *pdu)
1596 {
1597 	struct spdk_nvme_tcp_term_req_hdr *h2c_term_req = &pdu->hdr.term_req;
1598 	uint32_t error_offset = 0;
1599 	enum spdk_nvme_tcp_term_req_fes fes;
1600 
1601 
1602 	if (h2c_term_req->fes > SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER) {
1603 		SPDK_ERRLOG("Fatal Error Stauts(FES) is unknown for h2c_term_req pdu=%p\n", pdu);
1604 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1605 		error_offset = offsetof(struct spdk_nvme_tcp_term_req_hdr, fes);
1606 		goto end;
1607 	}
1608 
1609 	/* set the data buffer */
1610 	nvme_tcp_pdu_set_data(pdu, (uint8_t *)pdu->hdr.raw + h2c_term_req->common.hlen,
1611 			      h2c_term_req->common.plen - h2c_term_req->common.hlen);
1612 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
1613 	return;
1614 end:
1615 	spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1616 	return;
1617 }
1618 
1619 static void
1620 spdk_nvmf_tcp_h2c_term_req_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair,
1621 		struct nvme_tcp_pdu *pdu)
1622 {
1623 	struct spdk_nvme_tcp_term_req_hdr *h2c_term_req = &pdu->hdr.term_req;
1624 
1625 	spdk_nvmf_tcp_h2c_term_req_dump(h2c_term_req);
1626 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1627 	return;
1628 }
1629 
1630 static void
1631 spdk_nvmf_tcp_pdu_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair)
1632 {
1633 	int rc = 0;
1634 	struct nvme_tcp_pdu *pdu;
1635 	uint32_t crc32c, error_offset = 0;
1636 	enum spdk_nvme_tcp_term_req_fes fes;
1637 	struct spdk_nvmf_tcp_transport *ttransport;
1638 
1639 	assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
1640 	pdu = &tqpair->pdu_in_progress;
1641 
1642 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
1643 	/* check data digest if need */
1644 	if (pdu->ddgst_enable) {
1645 		crc32c = nvme_tcp_pdu_calc_data_digest(pdu);
1646 		rc = MATCH_DIGEST_WORD(pdu->data_digest, crc32c);
1647 		if (rc == 0) {
1648 			SPDK_ERRLOG("Data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu);
1649 			fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR;
1650 			spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1651 			return;
1652 
1653 		}
1654 	}
1655 
1656 	ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport, struct spdk_nvmf_tcp_transport, transport);
1657 	switch (pdu->hdr.common.pdu_type) {
1658 	case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD:
1659 		spdk_nvmf_tcp_capsule_cmd_payload_handle(ttransport, tqpair, pdu);
1660 		break;
1661 	case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA:
1662 		spdk_nvmf_tcp_h2c_data_payload_handle(ttransport, tqpair, pdu);
1663 		break;
1664 
1665 	case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ:
1666 		spdk_nvmf_tcp_h2c_term_req_payload_handle(tqpair, pdu);
1667 		break;
1668 
1669 	default:
1670 		/* The code should not go to here */
1671 		SPDK_ERRLOG("The code should not go to here\n");
1672 		break;
1673 	}
1674 }
1675 
1676 static void
1677 spdk_nvmf_tcp_send_icresp_complete(void *cb_arg)
1678 {
1679 	struct spdk_nvmf_tcp_qpair *tqpair = cb_arg;
1680 
1681 	tqpair->state = NVME_TCP_QPAIR_STATE_RUNNING;
1682 }
1683 
1684 static void
1685 spdk_nvmf_tcp_icreq_handle(struct spdk_nvmf_tcp_transport *ttransport,
1686 			   struct spdk_nvmf_tcp_qpair *tqpair,
1687 			   struct nvme_tcp_pdu *pdu)
1688 {
1689 	struct spdk_nvme_tcp_ic_req *ic_req = &pdu->hdr.ic_req;
1690 	struct nvme_tcp_pdu *rsp_pdu;
1691 	struct spdk_nvme_tcp_ic_resp *ic_resp;
1692 	uint32_t error_offset = 0;
1693 	enum spdk_nvme_tcp_term_req_fes fes;
1694 
1695 	/* Only PFV 0 is defined currently */
1696 	if (ic_req->pfv != 0) {
1697 		SPDK_ERRLOG("Expected ICReq PFV %u, got %u\n", 0u, ic_req->pfv);
1698 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1699 		error_offset = offsetof(struct spdk_nvme_tcp_ic_req, pfv);
1700 		goto end;
1701 	}
1702 
1703 	/* MAXR2T is 0's based */
1704 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "maxr2t =%u\n", (ic_req->maxr2t + 1u));
1705 
1706 	tqpair->host_hdgst_enable = ic_req->dgst.bits.hdgst_enable ? true : false;
1707 	tqpair->host_ddgst_enable = ic_req->dgst.bits.ddgst_enable ? true : false;
1708 
1709 	tqpair->cpda = spdk_min(ic_req->hpda, SPDK_NVME_TCP_CPDA_MAX);
1710 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "cpda of tqpair=(%p) is : %u\n", tqpair, tqpair->cpda);
1711 
1712 	rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair);
1713 	if (!rsp_pdu) {
1714 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
1715 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1716 		return;
1717 	}
1718 
1719 	ic_resp = &rsp_pdu->hdr.ic_resp;
1720 	ic_resp->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_IC_RESP;
1721 	ic_resp->common.hlen = ic_resp->common.plen =  sizeof(*ic_resp);
1722 	ic_resp->pfv = 0;
1723 	ic_resp->cpda = tqpair->cpda;
1724 	tqpair->maxh2cdata = spdk_min(NVMF_TCP_PDU_MAX_H2C_DATA_SIZE,
1725 				      ttransport->transport.opts.io_unit_size);
1726 	ic_resp->maxh2cdata = tqpair->maxh2cdata;
1727 	ic_resp->dgst.bits.hdgst_enable = tqpair->host_hdgst_enable ? 1 : 0;
1728 	ic_resp->dgst.bits.ddgst_enable = tqpair->host_ddgst_enable ? 1 : 0;
1729 
1730 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "host_hdgst_enable: %u\n", tqpair->host_hdgst_enable);
1731 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "host_ddgst_enable: %u\n", tqpair->host_ddgst_enable);
1732 
1733 	spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_send_icresp_complete, tqpair);
1734 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
1735 	return;
1736 end:
1737 	spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1738 	return;
1739 }
1740 
1741 static void
1742 spdk_nvmf_tcp_pdu_psh_handle(struct spdk_nvmf_tcp_qpair *tqpair)
1743 {
1744 	struct nvme_tcp_pdu *pdu;
1745 	int rc;
1746 	uint32_t crc32c, error_offset = 0;
1747 	enum spdk_nvme_tcp_term_req_fes fes;
1748 	struct spdk_nvmf_tcp_transport *ttransport;
1749 
1750 	assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH);
1751 	pdu = &tqpair->pdu_in_progress;
1752 
1753 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "pdu type of tqpair(%p) is %d\n", tqpair,
1754 		      pdu->hdr.common.pdu_type);
1755 	/* check header digest if needed */
1756 	if (pdu->has_hdgst) {
1757 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Compare the header of pdu=%p on tqpair=%p\n", pdu, tqpair);
1758 		crc32c = nvme_tcp_pdu_calc_header_digest(pdu);
1759 		rc = MATCH_DIGEST_WORD((uint8_t *)pdu->hdr.raw + pdu->hdr.common.hlen, crc32c);
1760 		if (rc == 0) {
1761 			SPDK_ERRLOG("Header digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu);
1762 			fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR;
1763 			spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1764 			return;
1765 
1766 		}
1767 	}
1768 
1769 	ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport, struct spdk_nvmf_tcp_transport, transport);
1770 	switch (pdu->hdr.common.pdu_type) {
1771 	case SPDK_NVME_TCP_PDU_TYPE_IC_REQ:
1772 		spdk_nvmf_tcp_icreq_handle(ttransport, tqpair, pdu);
1773 		break;
1774 	case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD:
1775 		spdk_nvmf_tcp_capsule_cmd_hdr_handle(ttransport, tqpair, pdu);
1776 		break;
1777 	case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA:
1778 		spdk_nvmf_tcp_h2c_data_hdr_handle(ttransport, tqpair, pdu);
1779 		break;
1780 
1781 	case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ:
1782 		spdk_nvmf_tcp_h2c_term_req_hdr_handle(tqpair, pdu);
1783 		break;
1784 
1785 	default:
1786 		SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->pdu_in_progress.hdr.common.pdu_type);
1787 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1788 		error_offset = 1;
1789 		spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1790 		break;
1791 	}
1792 }
1793 
1794 static void
1795 spdk_nvmf_tcp_pdu_ch_handle(struct spdk_nvmf_tcp_qpair *tqpair)
1796 {
1797 	struct nvme_tcp_pdu *pdu;
1798 	uint32_t error_offset = 0;
1799 	enum spdk_nvme_tcp_term_req_fes fes;
1800 	uint8_t expected_hlen, pdo;
1801 	bool plen_error = false, pdo_error = false;
1802 
1803 	assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH);
1804 	pdu = &tqpair->pdu_in_progress;
1805 
1806 	if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_REQ) {
1807 		if (tqpair->state != NVME_TCP_QPAIR_STATE_INVALID) {
1808 			SPDK_ERRLOG("Already received ICreq PDU, and reject this pdu=%p\n", pdu);
1809 			fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR;
1810 			goto err;
1811 		}
1812 		expected_hlen = sizeof(struct spdk_nvme_tcp_ic_req);
1813 		if (pdu->hdr.common.plen != expected_hlen) {
1814 			plen_error = true;
1815 		}
1816 	} else {
1817 		if (tqpair->state != NVME_TCP_QPAIR_STATE_RUNNING) {
1818 			SPDK_ERRLOG("The TCP/IP connection is not negotitated\n");
1819 			fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR;
1820 			goto err;
1821 		}
1822 
1823 		switch (pdu->hdr.common.pdu_type) {
1824 		case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD:
1825 			expected_hlen = sizeof(struct spdk_nvme_tcp_cmd);
1826 			pdo = pdu->hdr.common.pdo;
1827 			if ((tqpair->cpda != 0) && (pdo != ((tqpair->cpda + 1) << 2))) {
1828 				pdo_error = true;
1829 				break;
1830 			}
1831 
1832 			if (pdu->hdr.common.plen < expected_hlen) {
1833 				plen_error = true;
1834 			}
1835 			break;
1836 		case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA:
1837 			expected_hlen = sizeof(struct spdk_nvme_tcp_h2c_data_hdr);
1838 			pdo = pdu->hdr.common.pdo;
1839 			if ((tqpair->cpda != 0) && (pdo != ((tqpair->cpda + 1) << 2))) {
1840 				pdo_error = true;
1841 				break;
1842 			}
1843 			if (pdu->hdr.common.plen < expected_hlen) {
1844 				plen_error = true;
1845 			}
1846 			break;
1847 
1848 		case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ:
1849 			expected_hlen = sizeof(struct spdk_nvme_tcp_term_req_hdr);
1850 			if ((pdu->hdr.common.plen <= expected_hlen) ||
1851 			    (pdu->hdr.common.plen > SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE)) {
1852 				plen_error = true;
1853 			}
1854 			break;
1855 
1856 		default:
1857 			SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", pdu->hdr.common.pdu_type);
1858 			fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1859 			error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdu_type);
1860 			goto err;
1861 		}
1862 	}
1863 
1864 	if (pdu->hdr.common.hlen != expected_hlen) {
1865 		SPDK_ERRLOG("PDU type=0x%02x, Expected ICReq header length %u, got %u on tqpair=%p\n",
1866 			    pdu->hdr.common.pdu_type,
1867 			    expected_hlen, pdu->hdr.common.hlen, tqpair);
1868 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1869 		error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, hlen);
1870 		goto err;
1871 	} else if (pdo_error) {
1872 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1873 		error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdo);
1874 	} else if (plen_error) {
1875 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1876 		error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, plen);
1877 		goto err;
1878 	} else {
1879 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH);
1880 		nvme_tcp_pdu_calc_psh_len(&tqpair->pdu_in_progress, tqpair->host_hdgst_enable);
1881 		return;
1882 	}
1883 err:
1884 	spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1885 }
1886 
1887 static int
1888 nvmf_tcp_pdu_payload_insert_dif(struct nvme_tcp_pdu *pdu, uint32_t read_offset,
1889 				int read_len)
1890 {
1891 	int rc;
1892 
1893 	rc = spdk_dif_generate_stream(pdu->data_iov, pdu->data_iovcnt,
1894 				      read_offset, read_len, pdu->dif_ctx);
1895 	if (rc != 0) {
1896 		SPDK_ERRLOG("DIF generate failed\n");
1897 	}
1898 
1899 	return rc;
1900 }
1901 
1902 #define MAX_NVME_TCP_PDU_LOOP_COUNT 32
1903 
1904 static int
1905 spdk_nvmf_tcp_sock_process(struct spdk_nvmf_tcp_qpair *tqpair)
1906 {
1907 	int rc = 0;
1908 	struct nvme_tcp_pdu *pdu;
1909 	enum nvme_tcp_pdu_recv_state prev_state;
1910 	uint32_t data_len, current_pdu_num = 0;
1911 
1912 	/* The loop here is to allow for several back-to-back state changes. */
1913 	do {
1914 		prev_state = tqpair->recv_state;
1915 
1916 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair(%p) recv pdu entering state %d\n", tqpair, prev_state);
1917 
1918 		switch (tqpair->recv_state) {
1919 		/* Wait for the common header  */
1920 		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY:
1921 		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH:
1922 			pdu = &tqpair->pdu_in_progress;
1923 
1924 			rc = nvme_tcp_read_data(tqpair->sock,
1925 						sizeof(struct spdk_nvme_tcp_common_pdu_hdr) - pdu->ch_valid_bytes,
1926 						(void *)&pdu->hdr.common + pdu->ch_valid_bytes);
1927 			if (rc < 0) {
1928 				SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "will disconnect tqpair=%p\n", tqpair);
1929 				return NVME_TCP_PDU_FATAL;
1930 			} else if (rc > 0) {
1931 				pdu->ch_valid_bytes += rc;
1932 				spdk_trace_record(TRACE_TCP_READ_FROM_SOCKET_DONE, 0, rc, 0, 0);
1933 				if (spdk_likely(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY)) {
1934 					spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH);
1935 				}
1936 			}
1937 
1938 			if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) {
1939 				return NVME_TCP_PDU_IN_PROGRESS;
1940 			}
1941 
1942 			/* The command header of this PDU has now been read from the socket. */
1943 			spdk_nvmf_tcp_pdu_ch_handle(tqpair);
1944 			break;
1945 		/* Wait for the pdu specific header  */
1946 		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH:
1947 			pdu = &tqpair->pdu_in_progress;
1948 			rc = nvme_tcp_read_data(tqpair->sock,
1949 						pdu->psh_len - pdu->psh_valid_bytes,
1950 						(void *)&pdu->hdr.raw + sizeof(struct spdk_nvme_tcp_common_pdu_hdr) + pdu->psh_valid_bytes);
1951 			if (rc < 0) {
1952 				return NVME_TCP_PDU_FATAL;
1953 			} else if (rc > 0) {
1954 				spdk_trace_record(TRACE_TCP_READ_FROM_SOCKET_DONE,
1955 						  0, rc, 0, 0);
1956 				pdu->psh_valid_bytes += rc;
1957 			}
1958 			if (pdu->psh_valid_bytes < pdu->psh_len) {
1959 				return NVME_TCP_PDU_IN_PROGRESS;
1960 			}
1961 
1962 			/* All header(ch, psh, head digist) of this PDU has now been read from the socket. */
1963 			spdk_nvmf_tcp_pdu_psh_handle(tqpair);
1964 			if (tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY) {
1965 				current_pdu_num++;
1966 			}
1967 			break;
1968 		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD:
1969 			pdu = &tqpair->pdu_in_progress;
1970 
1971 			/* check whether the data is valid, if not we just return */
1972 			if (!pdu->data_len) {
1973 				return NVME_TCP_PDU_IN_PROGRESS;
1974 			}
1975 
1976 			data_len = pdu->data_len;
1977 			/* data digest */
1978 			if (spdk_unlikely((pdu->hdr.common.pdu_type != SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ) &&
1979 					  tqpair->host_ddgst_enable)) {
1980 				data_len += SPDK_NVME_TCP_DIGEST_LEN;
1981 				pdu->ddgst_enable = true;
1982 			}
1983 
1984 			rc = nvme_tcp_read_payload_data(tqpair->sock, pdu);
1985 			if (rc < 0) {
1986 				return NVME_TCP_PDU_IN_PROGRESS;
1987 			}
1988 			pdu->readv_offset += rc;
1989 
1990 			if (spdk_unlikely(pdu->dif_ctx != NULL)) {
1991 				rc = nvmf_tcp_pdu_payload_insert_dif(pdu, pdu->readv_offset - rc, rc);
1992 				if (rc != 0) {
1993 					return NVME_TCP_PDU_FATAL;
1994 				}
1995 			}
1996 
1997 			if (pdu->readv_offset < data_len) {
1998 				return NVME_TCP_PDU_IN_PROGRESS;
1999 			}
2000 
2001 			/* All of this PDU has now been read from the socket. */
2002 			spdk_nvmf_tcp_pdu_payload_handle(tqpair);
2003 			current_pdu_num++;
2004 			break;
2005 		case NVME_TCP_PDU_RECV_STATE_ERROR:
2006 			pdu = &tqpair->pdu_in_progress;
2007 			/* Check whether the connection is closed. Each time, we only read 1 byte every time */
2008 			rc = nvme_tcp_read_data(tqpair->sock, 1, (void *)&pdu->hdr.common);
2009 			if (rc < 0) {
2010 				return NVME_TCP_PDU_FATAL;
2011 			}
2012 			break;
2013 		default:
2014 			assert(0);
2015 			SPDK_ERRLOG("code should not come to here");
2016 			break;
2017 		}
2018 	} while ((tqpair->recv_state != prev_state) && (current_pdu_num < MAX_NVME_TCP_PDU_LOOP_COUNT));
2019 
2020 	return rc;
2021 }
2022 
2023 static enum spdk_nvme_data_transfer
2024 spdk_nvmf_tcp_req_get_xfer(struct spdk_nvmf_tcp_req *tcp_req) {
2025 	enum spdk_nvme_data_transfer xfer;
2026 	struct spdk_nvme_cmd *cmd = &tcp_req->req.cmd->nvme_cmd;
2027 	struct spdk_nvme_sgl_descriptor *sgl = &cmd->dptr.sgl1;
2028 
2029 	/* Figure out data transfer direction */
2030 	if (cmd->opc == SPDK_NVME_OPC_FABRIC)
2031 	{
2032 		xfer = spdk_nvme_opc_get_data_transfer(tcp_req->req.cmd->nvmf_cmd.fctype);
2033 	} else
2034 	{
2035 		xfer = spdk_nvme_opc_get_data_transfer(cmd->opc);
2036 
2037 		/* Some admin commands are special cases */
2038 		if ((tcp_req->req.qpair->qid == 0) &&
2039 		    ((cmd->opc == SPDK_NVME_OPC_GET_FEATURES) ||
2040 		     (cmd->opc == SPDK_NVME_OPC_SET_FEATURES))) {
2041 			switch (cmd->cdw10 & 0xff) {
2042 			case SPDK_NVME_FEAT_LBA_RANGE_TYPE:
2043 			case SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION:
2044 			case SPDK_NVME_FEAT_HOST_IDENTIFIER:
2045 				break;
2046 			default:
2047 				xfer = SPDK_NVME_DATA_NONE;
2048 			}
2049 		}
2050 	}
2051 
2052 	if (xfer == SPDK_NVME_DATA_NONE)
2053 	{
2054 		return xfer;
2055 	}
2056 
2057 	/* Even for commands that may transfer data, they could have specified 0 length.
2058 	 * We want those to show up with xfer SPDK_NVME_DATA_NONE.
2059 	 */
2060 	switch (sgl->generic.type)
2061 	{
2062 	case SPDK_NVME_SGL_TYPE_DATA_BLOCK:
2063 	case SPDK_NVME_SGL_TYPE_BIT_BUCKET:
2064 	case SPDK_NVME_SGL_TYPE_SEGMENT:
2065 	case SPDK_NVME_SGL_TYPE_LAST_SEGMENT:
2066 	case SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK:
2067 		if (sgl->unkeyed.length == 0) {
2068 			xfer = SPDK_NVME_DATA_NONE;
2069 		}
2070 		break;
2071 	case SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK:
2072 		if (sgl->keyed.length == 0) {
2073 			xfer = SPDK_NVME_DATA_NONE;
2074 		}
2075 		break;
2076 	}
2077 
2078 	return xfer;
2079 }
2080 
2081 static void
2082 spdk_nvmf_tcp_request_free_buffers(struct spdk_nvmf_tcp_req *tcp_req,
2083 				   struct spdk_nvmf_transport_poll_group *group, struct spdk_nvmf_transport *transport)
2084 {
2085 	for (uint32_t i = 0; i < tcp_req->req.iovcnt; i++) {
2086 		assert(tcp_req->buffers[i] != NULL);
2087 		if (group->buf_cache_count < group->buf_cache_size) {
2088 			STAILQ_INSERT_HEAD(&group->buf_cache,
2089 					   (struct spdk_nvmf_transport_pg_cache_buf *)tcp_req->buffers[i], link);
2090 			group->buf_cache_count++;
2091 		} else {
2092 			spdk_mempool_put(transport->data_buf_pool, tcp_req->buffers[i]);
2093 		}
2094 		tcp_req->req.iov[i].iov_base = NULL;
2095 		tcp_req->buffers[i] = NULL;
2096 		tcp_req->req.iov[i].iov_len = 0;
2097 	}
2098 	tcp_req->data_from_pool = false;
2099 }
2100 
2101 static int
2102 spdk_nvmf_tcp_req_fill_iovs(struct spdk_nvmf_tcp_transport *ttransport,
2103 			    struct spdk_nvmf_tcp_req *tcp_req, uint32_t length)
2104 {
2105 	void					*buf = NULL;
2106 	uint32_t				i = 0;
2107 	struct spdk_nvmf_tcp_qpair		*tqpair;
2108 	struct spdk_nvmf_transport_poll_group	*group;
2109 
2110 	tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair);
2111 	group = &tqpair->group->group;
2112 
2113 	tcp_req->req.iovcnt = 0;
2114 	while (length) {
2115 		if (!(STAILQ_EMPTY(&group->buf_cache))) {
2116 			group->buf_cache_count--;
2117 			buf = STAILQ_FIRST(&group->buf_cache);
2118 			STAILQ_REMOVE_HEAD(&group->buf_cache, link);
2119 		} else {
2120 			buf = spdk_mempool_get(ttransport->transport.data_buf_pool);
2121 			if (!buf) {
2122 				goto nomem;
2123 			}
2124 		}
2125 
2126 		tcp_req->req.iov[i].iov_base = (void *)((uintptr_t)(buf + NVMF_DATA_BUFFER_MASK) &
2127 							~NVMF_DATA_BUFFER_MASK);
2128 		tcp_req->req.iov[i].iov_len  = spdk_min(length, ttransport->transport.opts.io_unit_size);
2129 		tcp_req->req.iovcnt++;
2130 		tcp_req->buffers[i] = buf;
2131 		length -= tcp_req->req.iov[i].iov_len;
2132 		i++;
2133 	}
2134 
2135 	assert(tcp_req->req.iovcnt <= SPDK_NVMF_MAX_SGL_ENTRIES);
2136 	tcp_req->data_from_pool = true;
2137 	return 0;
2138 
2139 nomem:
2140 	spdk_nvmf_tcp_request_free_buffers(tcp_req, group, &ttransport->transport);
2141 	tcp_req->req.iovcnt = 0;
2142 	return -ENOMEM;
2143 }
2144 
2145 static int
2146 spdk_nvmf_tcp_req_parse_sgl(struct spdk_nvmf_tcp_transport *ttransport,
2147 			    struct spdk_nvmf_tcp_req *tcp_req)
2148 {
2149 	struct spdk_nvme_cmd			*cmd;
2150 	struct spdk_nvme_cpl			*rsp;
2151 	struct spdk_nvme_sgl_descriptor		*sgl;
2152 	uint32_t				length;
2153 
2154 	cmd = &tcp_req->req.cmd->nvme_cmd;
2155 	rsp = &tcp_req->req.rsp->nvme_cpl;
2156 	sgl = &cmd->dptr.sgl1;
2157 
2158 	length = sgl->unkeyed.length;
2159 
2160 	if (sgl->generic.type == SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK &&
2161 	    sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_TRANSPORT) {
2162 		if (length > ttransport->transport.opts.max_io_size) {
2163 			SPDK_ERRLOG("SGL length 0x%x exceeds max io size 0x%x\n",
2164 				    length, ttransport->transport.opts.max_io_size);
2165 			rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
2166 			return -1;
2167 		}
2168 
2169 		/* fill request length and populate iovs */
2170 		tcp_req->req.length = length;
2171 
2172 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Data requested length= 0x%x\n", length);
2173 
2174 		if (spdk_unlikely(tcp_req->dif_insert_or_strip)) {
2175 			length = spdk_dif_get_length_with_md(length, &tcp_req->dif_ctx);
2176 			tcp_req->elba_length = length;
2177 		}
2178 
2179 		if (spdk_nvmf_tcp_req_fill_iovs(ttransport, tcp_req, length) < 0) {
2180 			/* No available buffers. Queue this request up. */
2181 			SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "No available large data buffers. Queueing request %p\n",
2182 				      tcp_req);
2183 			return 0;
2184 		}
2185 
2186 		/* backward compatible */
2187 		tcp_req->req.data = tcp_req->req.iov[0].iov_base;
2188 
2189 
2190 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Request %p took %d buffer/s from central pool, and data=%p\n",
2191 			      tcp_req,
2192 			      tcp_req->req.iovcnt, tcp_req->req.data);
2193 
2194 		return 0;
2195 	} else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK &&
2196 		   sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) {
2197 		uint64_t offset = sgl->address;
2198 		uint32_t max_len = ttransport->transport.opts.in_capsule_data_size;
2199 
2200 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "In-capsule data: offset 0x%" PRIx64 ", length 0x%x\n",
2201 			      offset, length);
2202 
2203 		if (offset > max_len) {
2204 			SPDK_ERRLOG("In-capsule offset 0x%" PRIx64 " exceeds capsule length 0x%x\n",
2205 				    offset, max_len);
2206 			rsp->status.sc = SPDK_NVME_SC_INVALID_SGL_OFFSET;
2207 			return -1;
2208 		}
2209 		max_len -= (uint32_t)offset;
2210 
2211 		if (length > max_len) {
2212 			SPDK_ERRLOG("In-capsule data length 0x%x exceeds capsule length 0x%x\n",
2213 				    length, max_len);
2214 			rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
2215 			return -1;
2216 		}
2217 
2218 		tcp_req->req.data = tcp_req->buf + offset;
2219 		tcp_req->data_from_pool = false;
2220 		tcp_req->req.length = length;
2221 
2222 		if (spdk_unlikely(tcp_req->dif_insert_or_strip)) {
2223 			length = spdk_dif_get_length_with_md(length, &tcp_req->dif_ctx);
2224 			tcp_req->elba_length = length;
2225 		}
2226 
2227 		tcp_req->req.iov[0].iov_base = tcp_req->req.data;
2228 		tcp_req->req.iov[0].iov_len = length;
2229 		tcp_req->req.iovcnt = 1;
2230 
2231 		return 0;
2232 	}
2233 
2234 	SPDK_ERRLOG("Invalid NVMf I/O Command SGL:  Type 0x%x, Subtype 0x%x\n",
2235 		    sgl->generic.type, sgl->generic.subtype);
2236 	rsp->status.sc = SPDK_NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID;
2237 	return -1;
2238 }
2239 
2240 static int
2241 nvmf_tcp_pdu_verify_dif(struct nvme_tcp_pdu *pdu,
2242 			const struct spdk_dif_ctx *dif_ctx)
2243 {
2244 	struct spdk_dif_error err_blk = {};
2245 	int rc;
2246 
2247 	rc = spdk_dif_verify_stream(pdu->data_iov, pdu->data_iovcnt,
2248 				    0, pdu->data_len, pdu->dif_ctx, &err_blk);
2249 	if (rc != 0) {
2250 		SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n",
2251 			    err_blk.err_type, err_blk.err_offset);
2252 	}
2253 
2254 	return rc;
2255 }
2256 
2257 static void
2258 spdk_nvmf_tcp_send_c2h_data(struct spdk_nvmf_tcp_qpair *tqpair,
2259 			    struct spdk_nvmf_tcp_req *tcp_req)
2260 {
2261 	struct nvme_tcp_pdu *rsp_pdu;
2262 	struct spdk_nvme_tcp_c2h_data_hdr *c2h_data;
2263 	uint32_t plen, pdo, alignment;
2264 	int rc;
2265 
2266 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
2267 
2268 	rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair);
2269 	assert(rsp_pdu != NULL);
2270 
2271 	c2h_data = &rsp_pdu->hdr.c2h_data;
2272 	c2h_data->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_C2H_DATA;
2273 	plen = c2h_data->common.hlen = sizeof(*c2h_data);
2274 
2275 	if (tqpair->host_hdgst_enable) {
2276 		plen += SPDK_NVME_TCP_DIGEST_LEN;
2277 		c2h_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF;
2278 	}
2279 
2280 	/* set the psh */
2281 	c2h_data->cccid = tcp_req->req.cmd->nvme_cmd.cid;
2282 	c2h_data->datal = spdk_min(NVMF_TCP_PDU_MAX_C2H_DATA_SIZE,
2283 				   tcp_req->req.length - tcp_req->c2h_data_offset);
2284 	c2h_data->datao = tcp_req->c2h_data_offset;
2285 
2286 	/* set the padding */
2287 	rsp_pdu->padding_len = 0;
2288 	pdo = plen;
2289 	if (tqpair->cpda) {
2290 		alignment = (tqpair->cpda + 1) << 2;
2291 		if (alignment > plen) {
2292 			rsp_pdu->padding_len = alignment - plen;
2293 			pdo = plen = alignment;
2294 		}
2295 	}
2296 
2297 	c2h_data->common.pdo = pdo;
2298 	plen += c2h_data->datal;
2299 	if (tqpair->host_ddgst_enable) {
2300 		c2h_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF;
2301 		plen += SPDK_NVME_TCP_DIGEST_LEN;
2302 	}
2303 
2304 	c2h_data->common.plen = plen;
2305 
2306 	if (spdk_unlikely(tcp_req->dif_insert_or_strip)) {
2307 		rsp_pdu->dif_ctx = &tcp_req->dif_ctx;
2308 	}
2309 
2310 	nvme_tcp_pdu_set_data_buf(rsp_pdu, tcp_req->req.iov, tcp_req->req.iovcnt,
2311 				  c2h_data->datao, c2h_data->datal);
2312 
2313 	if (spdk_unlikely(tcp_req->dif_insert_or_strip)) {
2314 		rc = nvmf_tcp_pdu_verify_dif(rsp_pdu, rsp_pdu->dif_ctx);
2315 		if (rc != 0) {
2316 			/* Data digest error detected by the NVMe/TCP target is treated as non-fatal
2317 			 * transport error because the cause will be outside the NVMe/TCP target.
2318 			 *
2319 			 * On the other hand, treat DIF check error as fatal transport error here
2320 			 * here because the error is caused by the target itself. Fatal NVMe/TCP
2321 			 * transport error is handled by terminating the connection.
2322 			 */
2323 			tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
2324 			return;
2325 		}
2326 	}
2327 
2328 	tcp_req->c2h_data_offset += c2h_data->datal;
2329 	if (tcp_req->c2h_data_offset == tcp_req->req.length) {
2330 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Last pdu for tcp_req=%p on tqpair=%p\n", tcp_req, tqpair);
2331 		c2h_data->common.flags |= SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU;
2332 		if (tqpair->qpair.transport->opts.c2h_success) {
2333 			c2h_data->common.flags |= SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS;
2334 		}
2335 		TAILQ_REMOVE(&tqpair->queued_c2h_data_tcp_req, tcp_req, link);
2336 	}
2337 
2338 	tqpair->c2h_data_pdu_cnt += 1;
2339 	spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_pdu_c2h_data_complete, tcp_req);
2340 }
2341 
2342 static int
2343 spdk_nvmf_tcp_calc_c2h_data_pdu_num(struct spdk_nvmf_tcp_req *tcp_req)
2344 {
2345 	return (tcp_req->req.length + NVMF_TCP_PDU_MAX_C2H_DATA_SIZE - 1) /
2346 	       NVMF_TCP_PDU_MAX_C2H_DATA_SIZE;
2347 }
2348 
2349 static void
2350 spdk_nvmf_tcp_handle_pending_c2h_data_queue(struct spdk_nvmf_tcp_qpair *tqpair)
2351 {
2352 	struct spdk_nvmf_tcp_req *tcp_req;
2353 
2354 	while (!TAILQ_EMPTY(&tqpair->queued_c2h_data_tcp_req) &&
2355 	       (tqpair->c2h_data_pdu_cnt < NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM)) {
2356 		tcp_req = TAILQ_FIRST(&tqpair->queued_c2h_data_tcp_req);
2357 		spdk_nvmf_tcp_send_c2h_data(tqpair, tcp_req);
2358 	}
2359 }
2360 
2361 static void
2362 spdk_nvmf_tcp_queue_c2h_data(struct spdk_nvmf_tcp_req *tcp_req,
2363 			     struct spdk_nvmf_tcp_qpair *tqpair)
2364 {
2365 	tcp_req->c2h_data_pdu_num = spdk_nvmf_tcp_calc_c2h_data_pdu_num(tcp_req);
2366 
2367 	assert(tcp_req->c2h_data_pdu_num < NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM);
2368 
2369 	TAILQ_INSERT_TAIL(&tqpair->queued_c2h_data_tcp_req, tcp_req, link);
2370 	spdk_nvmf_tcp_handle_pending_c2h_data_queue(tqpair);
2371 }
2372 
2373 static int
2374 request_transfer_out(struct spdk_nvmf_request *req)
2375 {
2376 	struct spdk_nvmf_tcp_req	*tcp_req;
2377 	struct spdk_nvmf_qpair		*qpair;
2378 	struct spdk_nvmf_tcp_qpair	*tqpair;
2379 	struct spdk_nvme_cpl		*rsp;
2380 
2381 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
2382 
2383 	qpair = req->qpair;
2384 	rsp = &req->rsp->nvme_cpl;
2385 	tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req);
2386 
2387 	/* Advance our sq_head pointer */
2388 	if (qpair->sq_head == qpair->sq_head_max) {
2389 		qpair->sq_head = 0;
2390 	} else {
2391 		qpair->sq_head++;
2392 	}
2393 	rsp->sqhd = qpair->sq_head;
2394 
2395 	tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair);
2396 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST);
2397 	if (rsp->status.sc == SPDK_NVME_SC_SUCCESS &&
2398 	    req->xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
2399 		spdk_nvmf_tcp_queue_c2h_data(tcp_req, tqpair);
2400 	} else {
2401 		spdk_nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair);
2402 	}
2403 
2404 	return 0;
2405 }
2406 
2407 static void
2408 spdk_nvmf_tcp_pdu_set_buf_from_req(struct spdk_nvmf_tcp_qpair *tqpair,
2409 				   struct spdk_nvmf_tcp_req *tcp_req)
2410 {
2411 	struct nvme_tcp_pdu *pdu;
2412 
2413 	if (tcp_req->data_from_pool) {
2414 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Will send r2t for tcp_req(%p) on tqpair=%p\n", tcp_req, tqpair);
2415 		tcp_req->next_expected_r2t_offset = 0;
2416 		spdk_nvmf_tcp_send_r2t_pdu(tqpair, tcp_req);
2417 	} else {
2418 		pdu = &tqpair->pdu_in_progress;
2419 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Not need to send r2t for tcp_req(%p) on tqpair=%p\n", tcp_req,
2420 			      tqpair);
2421 		/* No need to send r2t, contained in the capsuled data */
2422 		nvme_tcp_pdu_set_data_buf(pdu, tcp_req->req.iov, tcp_req->req.iovcnt,
2423 					  0, tcp_req->req.length);
2424 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
2425 	}
2426 }
2427 
2428 static void
2429 spdk_nvmf_tcp_set_incapsule_data(struct spdk_nvmf_tcp_qpair *tqpair,
2430 				 struct spdk_nvmf_tcp_req *tcp_req)
2431 {
2432 	struct nvme_tcp_pdu *pdu;
2433 	uint32_t plen = 0;
2434 
2435 	pdu = &tqpair->pdu_in_progress;
2436 	plen = pdu->hdr.common.hlen;
2437 
2438 	if (tqpair->host_hdgst_enable) {
2439 		plen += SPDK_NVME_TCP_DIGEST_LEN;
2440 	}
2441 
2442 	if (pdu->hdr.common.plen != plen) {
2443 		tcp_req->has_incapsule_data = true;
2444 	}
2445 }
2446 
2447 static bool
2448 spdk_nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport,
2449 			  struct spdk_nvmf_tcp_req *tcp_req)
2450 {
2451 	struct spdk_nvmf_tcp_qpair		*tqpair;
2452 	struct spdk_nvme_cpl			*rsp = &tcp_req->req.rsp->nvme_cpl;
2453 	int					rc;
2454 	enum spdk_nvmf_tcp_req_state		prev_state;
2455 	bool					progress = false;
2456 	struct spdk_nvmf_transport_poll_group	*group;
2457 
2458 	tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair);
2459 	group = &tqpair->group->group;
2460 	assert(tcp_req->state != TCP_REQUEST_STATE_FREE);
2461 
2462 	/* The loop here is to allow for several back-to-back state changes. */
2463 	do {
2464 		prev_state = tcp_req->state;
2465 
2466 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Request %p entering state %d on tqpair=%p\n", tcp_req, prev_state,
2467 			      tqpair);
2468 
2469 		switch (tcp_req->state) {
2470 		case TCP_REQUEST_STATE_FREE:
2471 			/* Some external code must kick a request into TCP_REQUEST_STATE_NEW
2472 			 * to escape this state. */
2473 			break;
2474 		case TCP_REQUEST_STATE_NEW:
2475 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_NEW, 0, 0, (uintptr_t)tcp_req, 0);
2476 
2477 			/* copy the cmd from the receive pdu */
2478 			tcp_req->cmd = tqpair->pdu_in_progress.hdr.capsule_cmd.ccsqe;
2479 
2480 			if (spdk_unlikely(spdk_nvmf_request_get_dif_ctx(&tcp_req->req, &tcp_req->dif_ctx))) {
2481 				tcp_req->dif_insert_or_strip = true;
2482 				tqpair->pdu_in_progress.dif_ctx = &tcp_req->dif_ctx;
2483 			}
2484 
2485 			/* The next state transition depends on the data transfer needs of this request. */
2486 			tcp_req->req.xfer = spdk_nvmf_tcp_req_get_xfer(tcp_req);
2487 
2488 			/* If no data to transfer, ready to execute. */
2489 			if (tcp_req->req.xfer == SPDK_NVME_DATA_NONE) {
2490 				/* Reset the tqpair receving pdu state */
2491 				spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
2492 				spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
2493 				break;
2494 			}
2495 
2496 			spdk_nvmf_tcp_set_incapsule_data(tqpair, tcp_req);
2497 
2498 			if (!tcp_req->has_incapsule_data) {
2499 				spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
2500 			}
2501 
2502 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEED_BUFFER);
2503 			TAILQ_INSERT_TAIL(&tqpair->group->pending_data_buf_queue, tcp_req, link);
2504 			break;
2505 		case TCP_REQUEST_STATE_NEED_BUFFER:
2506 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_NEED_BUFFER, 0, 0, (uintptr_t)tcp_req, 0);
2507 
2508 			assert(tcp_req->req.xfer != SPDK_NVME_DATA_NONE);
2509 
2510 			if (!tcp_req->has_incapsule_data &&
2511 			    (tcp_req != TAILQ_FIRST(&tqpair->group->pending_data_buf_queue))) {
2512 				SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
2513 					      "Not the first element to wait for the buf for tcp_req(%p) on tqpair=%p\n",
2514 					      tcp_req, tqpair);
2515 				/* This request needs to wait in line to obtain a buffer */
2516 				break;
2517 			}
2518 
2519 			/* Try to get a data buffer */
2520 			rc = spdk_nvmf_tcp_req_parse_sgl(ttransport, tcp_req);
2521 			if (rc < 0) {
2522 				TAILQ_REMOVE(&tqpair->group->pending_data_buf_queue, tcp_req, link);
2523 				rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2524 				/* Reset the tqpair receving pdu state */
2525 				spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
2526 				spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE);
2527 				break;
2528 			}
2529 
2530 			if (!tcp_req->req.data) {
2531 				SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "No buffer allocated for tcp_req(%p) on tqpair(%p\n)",
2532 					      tcp_req, tqpair);
2533 				/* No buffers available. */
2534 				break;
2535 			}
2536 
2537 			TAILQ_REMOVE(&tqpair->group->pending_data_buf_queue, tcp_req, link);
2538 
2539 			/* If data is transferring from host to controller, we need to do a transfer from the host. */
2540 			if (tcp_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
2541 				spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
2542 				spdk_nvmf_tcp_pdu_set_buf_from_req(tqpair, tcp_req);
2543 				break;
2544 			}
2545 
2546 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
2547 			break;
2548 		case TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER:
2549 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, 0, 0,
2550 					  (uintptr_t)tcp_req, 0);
2551 			/* Some external code must kick a request into TCP_REQUEST_STATE_READY_TO_EXECUTE
2552 			 * to escape this state. */
2553 			break;
2554 		case TCP_REQUEST_STATE_READY_TO_EXECUTE:
2555 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE, 0, 0, (uintptr_t)tcp_req, 0);
2556 
2557 			if (spdk_unlikely(tcp_req->dif_insert_or_strip)) {
2558 				assert(tcp_req->elba_length >= tcp_req->req.length);
2559 				tcp_req->orig_length = tcp_req->req.length;
2560 				tcp_req->req.length = tcp_req->elba_length;
2561 			}
2562 
2563 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_EXECUTING);
2564 			spdk_nvmf_request_exec(&tcp_req->req);
2565 			break;
2566 		case TCP_REQUEST_STATE_EXECUTING:
2567 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_EXECUTING, 0, 0, (uintptr_t)tcp_req, 0);
2568 			/* Some external code must kick a request into TCP_REQUEST_STATE_EXECUTED
2569 			 * to escape this state. */
2570 			break;
2571 		case TCP_REQUEST_STATE_EXECUTED:
2572 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_EXECUTED, 0, 0, (uintptr_t)tcp_req, 0);
2573 
2574 			if (spdk_unlikely(tcp_req->dif_insert_or_strip)) {
2575 				tcp_req->req.length = tcp_req->orig_length;
2576 			}
2577 
2578 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE);
2579 			break;
2580 		case TCP_REQUEST_STATE_READY_TO_COMPLETE:
2581 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE, 0, 0, (uintptr_t)tcp_req, 0);
2582 			rc = request_transfer_out(&tcp_req->req);
2583 			assert(rc == 0); /* No good way to handle this currently */
2584 			break;
2585 		case TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST:
2586 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST, 0, 0,
2587 					  (uintptr_t)tcp_req,
2588 					  0);
2589 			/* Some external code must kick a request into TCP_REQUEST_STATE_COMPLETED
2590 			 * to escape this state. */
2591 			break;
2592 		case TCP_REQUEST_STATE_COMPLETED:
2593 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_COMPLETED, 0, 0, (uintptr_t)tcp_req, 0);
2594 			if (tcp_req->data_from_pool) {
2595 				spdk_nvmf_tcp_request_free_buffers(tcp_req, group, &ttransport->transport);
2596 			}
2597 			tcp_req->req.length = 0;
2598 			tcp_req->req.iovcnt = 0;
2599 			tcp_req->req.data = NULL;
2600 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_FREE);
2601 			break;
2602 		case TCP_REQUEST_NUM_STATES:
2603 		default:
2604 			assert(0);
2605 			break;
2606 		}
2607 
2608 		if (tcp_req->state != prev_state) {
2609 			progress = true;
2610 		}
2611 	} while (tcp_req->state != prev_state);
2612 
2613 	return progress;
2614 }
2615 static void
2616 spdk_nvmf_tcp_sock_cb(void *arg, struct spdk_sock_group *group, struct spdk_sock *sock)
2617 {
2618 	struct spdk_nvmf_tcp_qpair *tqpair = arg;
2619 	int rc;
2620 
2621 	assert(tqpair != NULL);
2622 	rc = spdk_nvmf_tcp_sock_process(tqpair);
2623 
2624 	/* check the following two factors:
2625 	 * rc: The socket is closed
2626 	 * State of tqpair: The tqpair is in EXITING state due to internal error
2627 	 */
2628 	if ((rc < 0) || (tqpair->state == NVME_TCP_QPAIR_STATE_EXITING)) {
2629 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITED;
2630 		spdk_nvmf_tcp_qpair_flush_pdus(tqpair);
2631 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "will disconect the tqpair=%p\n", tqpair);
2632 		spdk_poller_unregister(&tqpair->timeout_poller);
2633 		spdk_nvmf_qpair_disconnect(&tqpair->qpair, NULL, NULL);
2634 	}
2635 }
2636 
2637 static int
2638 spdk_nvmf_tcp_poll_group_add(struct spdk_nvmf_transport_poll_group *group,
2639 			     struct spdk_nvmf_qpair *qpair)
2640 {
2641 	struct spdk_nvmf_tcp_poll_group	*tgroup;
2642 	struct spdk_nvmf_tcp_qpair	*tqpair;
2643 	int				rc;
2644 
2645 	tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
2646 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
2647 
2648 	rc = spdk_sock_group_add_sock(tgroup->sock_group, tqpair->sock,
2649 				      spdk_nvmf_tcp_sock_cb, tqpair);
2650 	if (rc != 0) {
2651 		SPDK_ERRLOG("Could not add sock to sock_group: %s (%d)\n",
2652 			    spdk_strerror(errno), errno);
2653 		spdk_nvmf_tcp_qpair_destroy(tqpair);
2654 		return -1;
2655 	}
2656 
2657 	rc =  spdk_nvmf_tcp_qpair_sock_init(tqpair);
2658 	if (rc != 0) {
2659 		SPDK_ERRLOG("Cannot set sock opt for tqpair=%p\n", tqpair);
2660 		spdk_nvmf_tcp_qpair_destroy(tqpair);
2661 		return -1;
2662 	}
2663 
2664 	rc = spdk_nvmf_tcp_qpair_init(&tqpair->qpair);
2665 	if (rc < 0) {
2666 		SPDK_ERRLOG("Cannot init tqpair=%p\n", tqpair);
2667 		spdk_nvmf_tcp_qpair_destroy(tqpair);
2668 		return -1;
2669 	}
2670 
2671 	rc = spdk_nvmf_tcp_qpair_init_mem_resource(tqpair, 1);
2672 	if (rc < 0) {
2673 		SPDK_ERRLOG("Cannot init memory resource info for tqpair=%p\n", tqpair);
2674 		spdk_nvmf_tcp_qpair_destroy(tqpair);
2675 		return -1;
2676 	}
2677 
2678 	tqpair->group = tgroup;
2679 	tqpair->state = NVME_TCP_QPAIR_STATE_INVALID;
2680 	TAILQ_INSERT_TAIL(&tgroup->qpairs, tqpair, link);
2681 
2682 	return 0;
2683 }
2684 
2685 static int
2686 spdk_nvmf_tcp_poll_group_remove(struct spdk_nvmf_transport_poll_group *group,
2687 				struct spdk_nvmf_qpair *qpair)
2688 {
2689 	struct spdk_nvmf_tcp_poll_group	*tgroup;
2690 	struct spdk_nvmf_tcp_qpair		*tqpair;
2691 	int				rc;
2692 
2693 	tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
2694 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
2695 
2696 	assert(tqpair->group == tgroup);
2697 
2698 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "remove tqpair=%p from the tgroup=%p\n", tqpair, tgroup);
2699 	TAILQ_REMOVE(&tgroup->qpairs, tqpair, link);
2700 	rc = spdk_sock_group_remove_sock(tgroup->sock_group, tqpair->sock);
2701 	if (rc != 0) {
2702 		SPDK_ERRLOG("Could not remove sock from sock_group: %s (%d)\n",
2703 			    spdk_strerror(errno), errno);
2704 	}
2705 
2706 	return rc;
2707 }
2708 
2709 static int
2710 spdk_nvmf_tcp_req_complete(struct spdk_nvmf_request *req)
2711 {
2712 	struct spdk_nvmf_tcp_transport *ttransport;
2713 	struct spdk_nvmf_tcp_req *tcp_req;
2714 
2715 	ttransport = SPDK_CONTAINEROF(req->qpair->transport, struct spdk_nvmf_tcp_transport, transport);
2716 	tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req);
2717 
2718 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_EXECUTED);
2719 	spdk_nvmf_tcp_req_process(ttransport, tcp_req);
2720 
2721 	return 0;
2722 }
2723 
2724 static void
2725 spdk_nvmf_tcp_close_qpair(struct spdk_nvmf_qpair *qpair)
2726 {
2727 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
2728 
2729 	spdk_nvmf_tcp_qpair_destroy(SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair));
2730 }
2731 
2732 static int
2733 spdk_nvmf_tcp_poll_group_poll(struct spdk_nvmf_transport_poll_group *group)
2734 {
2735 	struct spdk_nvmf_tcp_poll_group *tgroup;
2736 	int rc;
2737 	struct spdk_nvmf_tcp_req *tcp_req, *req_tmp;
2738 	struct spdk_nvmf_tcp_transport *ttransport = SPDK_CONTAINEROF(group->transport,
2739 			struct spdk_nvmf_tcp_transport, transport);
2740 
2741 	tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
2742 
2743 	if (spdk_unlikely(TAILQ_EMPTY(&tgroup->qpairs))) {
2744 		return 0;
2745 	}
2746 
2747 	TAILQ_FOREACH_SAFE(tcp_req, &tgroup->pending_data_buf_queue, link, req_tmp) {
2748 		if (spdk_nvmf_tcp_req_process(ttransport, tcp_req) == false) {
2749 			break;
2750 		}
2751 	}
2752 
2753 	rc = spdk_sock_group_poll(tgroup->sock_group);
2754 	if (rc < 0) {
2755 		SPDK_ERRLOG("Failed to poll sock_group=%p\n", tgroup->sock_group);
2756 	}
2757 
2758 	return rc;
2759 }
2760 
2761 static int
2762 spdk_nvmf_tcp_qpair_get_trid(struct spdk_nvmf_qpair *qpair,
2763 			     struct spdk_nvme_transport_id *trid, bool peer)
2764 {
2765 	struct spdk_nvmf_tcp_qpair     *tqpair;
2766 	uint16_t			port;
2767 
2768 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
2769 	trid->trtype = SPDK_NVME_TRANSPORT_TCP;
2770 
2771 	if (peer) {
2772 		snprintf(trid->traddr, sizeof(trid->traddr), "%s", tqpair->initiator_addr);
2773 		port = tqpair->initiator_port;
2774 	} else {
2775 		snprintf(trid->traddr, sizeof(trid->traddr), "%s", tqpair->target_addr);
2776 		port = tqpair->target_port;
2777 	}
2778 
2779 	if (spdk_sock_is_ipv4(tqpair->sock)) {
2780 		trid->adrfam = SPDK_NVMF_ADRFAM_IPV4;
2781 	} else if (spdk_sock_is_ipv4(tqpair->sock)) {
2782 		trid->adrfam = SPDK_NVMF_ADRFAM_IPV6;
2783 	} else {
2784 		return -1;
2785 	}
2786 
2787 	snprintf(trid->trsvcid, sizeof(trid->trsvcid), "%d", port);
2788 	return 0;
2789 }
2790 
2791 static int
2792 spdk_nvmf_tcp_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair,
2793 				   struct spdk_nvme_transport_id *trid)
2794 {
2795 	return spdk_nvmf_tcp_qpair_get_trid(qpair, trid, 0);
2796 }
2797 
2798 static int
2799 spdk_nvmf_tcp_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair,
2800 				  struct spdk_nvme_transport_id *trid)
2801 {
2802 	return spdk_nvmf_tcp_qpair_get_trid(qpair, trid, 1);
2803 }
2804 
2805 static int
2806 spdk_nvmf_tcp_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair,
2807 				    struct spdk_nvme_transport_id *trid)
2808 {
2809 	return spdk_nvmf_tcp_qpair_get_trid(qpair, trid, 0);
2810 }
2811 
2812 static int
2813 spdk_nvmf_tcp_qpair_set_sq_size(struct spdk_nvmf_qpair *qpair)
2814 {
2815 	struct spdk_nvmf_tcp_qpair     *tqpair;
2816 	int rc;
2817 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
2818 
2819 	rc = spdk_nvmf_tcp_qpair_init_mem_resource(tqpair, tqpair->qpair.sq_head_max);
2820 	if (!rc) {
2821 		tqpair->max_queue_depth += tqpair->qpair.sq_head_max;
2822 		tqpair->free_pdu_num += tqpair->qpair.sq_head_max;
2823 		tqpair->state_cntr[TCP_REQUEST_STATE_FREE] += tqpair->qpair.sq_head_max;
2824 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "The queue depth=%u for tqpair=%p\n",
2825 			      tqpair->max_queue_depth, tqpair);
2826 	}
2827 
2828 	return rc;
2829 
2830 }
2831 
2832 #define SPDK_NVMF_TCP_DEFAULT_MAX_QUEUE_DEPTH 128
2833 #define SPDK_NVMF_TCP_DEFAULT_AQ_DEPTH 128
2834 #define SPDK_NVMF_TCP_DEFAULT_MAX_QPAIRS_PER_CTRLR 128
2835 #define SPDK_NVMF_TCP_DEFAULT_IN_CAPSULE_DATA_SIZE 4096
2836 #define SPDK_NVMF_TCP_DEFAULT_MAX_IO_SIZE 131072
2837 #define SPDK_NVMF_TCP_DEFAULT_IO_UNIT_SIZE 131072
2838 #define SPDK_NVMF_TCP_DEFAULT_NUM_SHARED_BUFFERS 511
2839 #define SPDK_NVMF_TCP_DEFAULT_BUFFER_CACHE_SIZE 32
2840 #define SPDK_NVMF_TCP_DEFAULT_SUCCESS_OPTIMIZATION true
2841 #define SPDK_NVMF_TCP_DEFAULT_DIF_INSERT_OR_STRIP false
2842 #define SPDK_NVMF_TCP_DEFAULT_SOCK_PRIORITY 0
2843 
2844 static void
2845 spdk_nvmf_tcp_opts_init(struct spdk_nvmf_transport_opts *opts)
2846 {
2847 	opts->max_queue_depth =		SPDK_NVMF_TCP_DEFAULT_MAX_QUEUE_DEPTH;
2848 	opts->max_qpairs_per_ctrlr =	SPDK_NVMF_TCP_DEFAULT_MAX_QPAIRS_PER_CTRLR;
2849 	opts->in_capsule_data_size =	SPDK_NVMF_TCP_DEFAULT_IN_CAPSULE_DATA_SIZE;
2850 	opts->max_io_size =		SPDK_NVMF_TCP_DEFAULT_MAX_IO_SIZE;
2851 	opts->io_unit_size =		SPDK_NVMF_TCP_DEFAULT_IO_UNIT_SIZE;
2852 	opts->max_aq_depth =		SPDK_NVMF_TCP_DEFAULT_AQ_DEPTH;
2853 	opts->num_shared_buffers =	SPDK_NVMF_TCP_DEFAULT_NUM_SHARED_BUFFERS;
2854 	opts->buf_cache_size =		SPDK_NVMF_TCP_DEFAULT_BUFFER_CACHE_SIZE;
2855 	opts->c2h_success =		SPDK_NVMF_TCP_DEFAULT_SUCCESS_OPTIMIZATION;
2856 	opts->dif_insert_or_strip =	SPDK_NVMF_TCP_DEFAULT_DIF_INSERT_OR_STRIP;
2857 	opts->sock_priority =		SPDK_NVMF_TCP_DEFAULT_SOCK_PRIORITY;
2858 }
2859 
2860 const struct spdk_nvmf_transport_ops spdk_nvmf_transport_tcp = {
2861 	.type = SPDK_NVME_TRANSPORT_TCP,
2862 	.opts_init = spdk_nvmf_tcp_opts_init,
2863 	.create = spdk_nvmf_tcp_create,
2864 	.destroy = spdk_nvmf_tcp_destroy,
2865 
2866 	.listen = spdk_nvmf_tcp_listen,
2867 	.stop_listen = spdk_nvmf_tcp_stop_listen,
2868 	.accept = spdk_nvmf_tcp_accept,
2869 
2870 	.listener_discover = spdk_nvmf_tcp_discover,
2871 
2872 	.poll_group_create = spdk_nvmf_tcp_poll_group_create,
2873 	.get_optimal_poll_group = spdk_nvmf_tcp_get_optimal_poll_group,
2874 	.poll_group_destroy = spdk_nvmf_tcp_poll_group_destroy,
2875 	.poll_group_add = spdk_nvmf_tcp_poll_group_add,
2876 	.poll_group_remove = spdk_nvmf_tcp_poll_group_remove,
2877 	.poll_group_poll = spdk_nvmf_tcp_poll_group_poll,
2878 
2879 	.req_free = spdk_nvmf_tcp_req_free,
2880 	.req_complete = spdk_nvmf_tcp_req_complete,
2881 
2882 	.qpair_fini = spdk_nvmf_tcp_close_qpair,
2883 	.qpair_get_local_trid = spdk_nvmf_tcp_qpair_get_local_trid,
2884 	.qpair_get_peer_trid = spdk_nvmf_tcp_qpair_get_peer_trid,
2885 	.qpair_get_listen_trid = spdk_nvmf_tcp_qpair_get_listen_trid,
2886 	.qpair_set_sqsize = spdk_nvmf_tcp_qpair_set_sq_size,
2887 };
2888 
2889 SPDK_LOG_REGISTER_COMPONENT("nvmf_tcp", SPDK_LOG_NVMF_TCP)
2890