xref: /spdk/lib/nvmf/tcp.c (revision 9889ab2dc80e40dae92dcef361d53dcba722043d)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation. All rights reserved.
5  *   Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/stdinc.h"
35 #include "spdk/crc32.h"
36 #include "spdk/endian.h"
37 #include "spdk/assert.h"
38 #include "spdk/thread.h"
39 #include "spdk/nvmf.h"
40 #include "spdk/nvmf_spec.h"
41 #include "spdk/sock.h"
42 #include "spdk/string.h"
43 #include "spdk/trace.h"
44 #include "spdk/util.h"
45 
46 #include "nvmf_internal.h"
47 #include "transport.h"
48 
49 #include "spdk_internal/assert.h"
50 #include "spdk_internal/log.h"
51 #include "spdk_internal/nvme_tcp.h"
52 
53 #define NVMF_TCP_MAX_ACCEPT_SOCK_ONE_TIME 16
54 
55 #define NVMF_TCP_PDU_MAX_H2C_DATA_SIZE	131072
56 #define NVMF_TCP_PDU_MAX_C2H_DATA_SIZE	131072
57 #define NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM  64  /* Maximal c2h_data pdu number for ecah tqpair */
58 #define SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY 6
59 #define SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR 4
60 
61 /* spdk nvmf related structure */
62 enum spdk_nvmf_tcp_req_state {
63 
64 	/* The request is not currently in use */
65 	TCP_REQUEST_STATE_FREE = 0,
66 
67 	/* Initial state when request first received */
68 	TCP_REQUEST_STATE_NEW,
69 
70 	/* The request is queued until a data buffer is available. */
71 	TCP_REQUEST_STATE_NEED_BUFFER,
72 
73 	/* The request is currently transferring data from the host to the controller. */
74 	TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER,
75 
76 	/* The request is ready to execute at the block device */
77 	TCP_REQUEST_STATE_READY_TO_EXECUTE,
78 
79 	/* The request is currently executing at the block device */
80 	TCP_REQUEST_STATE_EXECUTING,
81 
82 	/* The request finished executing at the block device */
83 	TCP_REQUEST_STATE_EXECUTED,
84 
85 	/* The request is ready to send a completion */
86 	TCP_REQUEST_STATE_READY_TO_COMPLETE,
87 
88 	/* The request is currently transferring final pdus from the controller to the host. */
89 	TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST,
90 
91 	/* The request completed and can be marked free. */
92 	TCP_REQUEST_STATE_COMPLETED,
93 
94 	/* Terminator */
95 	TCP_REQUEST_NUM_STATES,
96 };
97 
98 static const char *spdk_nvmf_tcp_term_req_fes_str[] = {
99 	"Invalid PDU Header Field",
100 	"PDU Sequence Error",
101 	"Header Digiest Error",
102 	"Data Transfer Out of Range",
103 	"R2T Limit Exceeded",
104 	"Unsupported parameter",
105 };
106 
107 #define OBJECT_NVMF_TCP_IO				0x80
108 
109 #define TRACE_GROUP_NVMF_TCP				0x5
110 #define TRACE_TCP_REQUEST_STATE_NEW					SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x0)
111 #define TRACE_TCP_REQUEST_STATE_NEED_BUFFER				SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x1)
112 #define TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER		SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x2)
113 #define TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE			SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x3)
114 #define TRACE_TCP_REQUEST_STATE_EXECUTING				SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x4)
115 #define TRACE_TCP_REQUEST_STATE_EXECUTED				SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x5)
116 #define TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE			SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x6)
117 #define TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST		SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x7)
118 #define TRACE_TCP_REQUEST_STATE_COMPLETED				SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x8)
119 #define TRACE_TCP_FLUSH_WRITEBUF_START					SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x9)
120 #define TRACE_TCP_FLUSH_WRITEBUF_DONE					SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xA)
121 #define TRACE_TCP_READ_FROM_SOCKET_DONE					SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xB)
122 
123 SPDK_TRACE_REGISTER_FN(nvmf_tcp_trace, "nvmf_tcp", TRACE_GROUP_NVMF_TCP)
124 {
125 	spdk_trace_register_object(OBJECT_NVMF_TCP_IO, 'r');
126 	spdk_trace_register_description("TCP_REQ_NEW",
127 					TRACE_TCP_REQUEST_STATE_NEW,
128 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 1, 1, "");
129 	spdk_trace_register_description("TCP_REQ_NEED_BUFFER",
130 					TRACE_TCP_REQUEST_STATE_NEED_BUFFER,
131 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
132 	spdk_trace_register_description("TCP_REQ_TX_H_TO_C",
133 					TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER,
134 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
135 	spdk_trace_register_description("TCP_REQ_RDY_TO_EXECUTE",
136 					TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE,
137 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
138 	spdk_trace_register_description("TCP_REQ_EXECUTING",
139 					TRACE_TCP_REQUEST_STATE_EXECUTING,
140 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
141 	spdk_trace_register_description("TCP_REQ_EXECUTED",
142 					TRACE_TCP_REQUEST_STATE_EXECUTED,
143 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
144 	spdk_trace_register_description("TCP_REQ_RDY_TO_COMPLETE",
145 					TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE,
146 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
147 	spdk_trace_register_description("TCP_REQ_TRANSFER_C2H",
148 					TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST,
149 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
150 	spdk_trace_register_description("TCP_REQ_COMPLETED",
151 					TRACE_TCP_REQUEST_STATE_COMPLETED,
152 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
153 	spdk_trace_register_description("TCP_WRITE_START",
154 					TRACE_TCP_FLUSH_WRITEBUF_START,
155 					OWNER_NONE, OBJECT_NONE, 0, 0, "");
156 	spdk_trace_register_description("TCP_WRITE_DONE",
157 					TRACE_TCP_FLUSH_WRITEBUF_DONE,
158 					OWNER_NONE, OBJECT_NONE, 0, 0, "");
159 	spdk_trace_register_description("TCP_READ_DONE",
160 					TRACE_TCP_READ_FROM_SOCKET_DONE,
161 					OWNER_NONE, OBJECT_NONE, 0, 0, "");
162 }
163 
164 struct spdk_nvmf_tcp_req  {
165 	struct spdk_nvmf_request		req;
166 	struct spdk_nvme_cpl			rsp;
167 	struct spdk_nvme_cmd			cmd;
168 
169 	/* In-capsule data buffer */
170 	uint8_t					*buf;
171 
172 	bool					has_incapsule_data;
173 
174 	/* transfer_tag */
175 	uint16_t				ttag;
176 
177 	enum spdk_nvmf_tcp_req_state		state;
178 
179 	/*
180 	 * next_expected_r2t_offset is used when we receive the h2c_data PDU.
181 	 */
182 	uint32_t				next_expected_r2t_offset;
183 	uint32_t				r2tl_remain;
184 
185 	/*
186 	 * c2h_data_offset is used when we send the c2h_data PDU.
187 	 */
188 	uint32_t				c2h_data_offset;
189 	uint32_t				c2h_data_pdu_num;
190 
191 	STAILQ_ENTRY(spdk_nvmf_tcp_req)		link;
192 	TAILQ_ENTRY(spdk_nvmf_tcp_req)		state_link;
193 };
194 
195 struct nvme_tcp_pdu_recv_buf {
196 	char					*buf;
197 	uint32_t				off;
198 	uint32_t				size;
199 	uint32_t				remain_size;
200 };
201 
202 struct spdk_nvmf_tcp_qpair {
203 	struct spdk_nvmf_qpair			qpair;
204 	struct spdk_nvmf_tcp_poll_group		*group;
205 	struct spdk_nvmf_tcp_port		*port;
206 	struct spdk_sock			*sock;
207 	struct spdk_poller			*flush_poller;
208 
209 	enum nvme_tcp_pdu_recv_state		recv_state;
210 	enum nvme_tcp_qpair_state		state;
211 
212 	struct nvme_tcp_pdu			pdu_in_progress;
213 	struct nvme_tcp_pdu_recv_buf		pdu_recv_buf;
214 
215 	TAILQ_HEAD(, nvme_tcp_pdu)		send_queue;
216 	TAILQ_HEAD(, nvme_tcp_pdu)		free_queue;
217 
218 	struct nvme_tcp_pdu			*pdu;
219 	struct nvme_tcp_pdu			*pdu_pool;
220 	uint16_t				free_pdu_num;
221 
222 	/* Queues to track the requests in all states */
223 	TAILQ_HEAD(, spdk_nvmf_tcp_req)		state_queue[TCP_REQUEST_NUM_STATES];
224 	/* Number of requests in each state */
225 	int32_t					state_cntr[TCP_REQUEST_NUM_STATES];
226 
227 	STAILQ_HEAD(, spdk_nvmf_tcp_req)	queued_c2h_data_tcp_req;
228 
229 	uint8_t					cpda;
230 
231 	/* Array of size "max_queue_depth * InCapsuleDataSize" containing
232 	 * buffers to be used for in capsule data.
233 	 */
234 	void					*buf;
235 	void					*bufs;
236 	struct spdk_nvmf_tcp_req		*req;
237 	struct spdk_nvmf_tcp_req		*reqs;
238 
239 	bool					host_hdgst_enable;
240 	bool					host_ddgst_enable;
241 
242 
243 	/* The maximum number of I/O outstanding on this connection at one time */
244 	uint16_t				max_queue_depth;
245 
246 
247 	/** Specifies the maximum number of PDU-Data bytes per H2C Data Transfer PDU */
248 	uint32_t				maxh2cdata;
249 
250 	uint32_t				c2h_data_pdu_cnt;
251 
252 	/* IP address */
253 	char					initiator_addr[SPDK_NVMF_TRADDR_MAX_LEN];
254 	char					target_addr[SPDK_NVMF_TRADDR_MAX_LEN];
255 
256 	/* IP port */
257 	uint16_t				initiator_port;
258 	uint16_t				target_port;
259 
260 	/* Timer used to destroy qpair after detecting transport error issue if initiator does
261 	 *  not close the connection.
262 	 */
263 	struct spdk_poller			*timeout_poller;
264 
265 	TAILQ_ENTRY(spdk_nvmf_tcp_qpair)	link;
266 };
267 
268 struct spdk_nvmf_tcp_poll_group {
269 	struct spdk_nvmf_transport_poll_group	group;
270 	struct spdk_sock_group			*sock_group;
271 
272 	TAILQ_HEAD(, spdk_nvmf_tcp_qpair)	qpairs;
273 };
274 
275 struct spdk_nvmf_tcp_port {
276 	struct spdk_nvme_transport_id		trid;
277 	struct spdk_sock			*listen_sock;
278 	uint32_t				ref;
279 	TAILQ_ENTRY(spdk_nvmf_tcp_port)		link;
280 };
281 
282 struct spdk_nvmf_tcp_transport {
283 	struct spdk_nvmf_transport		transport;
284 
285 	pthread_mutex_t				lock;
286 
287 	TAILQ_HEAD(, spdk_nvmf_tcp_port)	ports;
288 };
289 
290 static bool spdk_nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport,
291 				      struct spdk_nvmf_tcp_req *tcp_req);
292 static void spdk_nvmf_tcp_handle_pending_c2h_data_queue(struct spdk_nvmf_tcp_qpair *tqpair);
293 
294 static void
295 spdk_nvmf_tcp_req_set_state(struct spdk_nvmf_tcp_req *tcp_req,
296 			    enum spdk_nvmf_tcp_req_state state)
297 {
298 	struct spdk_nvmf_qpair *qpair;
299 	struct spdk_nvmf_tcp_qpair *tqpair;
300 
301 	qpair = tcp_req->req.qpair;
302 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
303 
304 	TAILQ_REMOVE(&tqpair->state_queue[tcp_req->state], tcp_req, state_link);
305 	tqpair->state_cntr[tcp_req->state]--;
306 	assert(tqpair->state_cntr[tcp_req->state] >= 0);
307 
308 	TAILQ_INSERT_TAIL(&tqpair->state_queue[state], tcp_req, state_link);
309 	tqpair->state_cntr[state]++;
310 
311 	tcp_req->state = state;
312 }
313 
314 static struct nvme_tcp_pdu *
315 spdk_nvmf_tcp_pdu_get(struct spdk_nvmf_tcp_qpair *tqpair)
316 {
317 	struct nvme_tcp_pdu *pdu;
318 
319 	pdu = TAILQ_FIRST(&tqpair->free_queue);
320 	if (!pdu) {
321 		SPDK_ERRLOG("Unable to get PDU for tqpair=%p\n", tqpair);
322 		abort();
323 		return NULL;
324 	}
325 
326 	tqpair->free_pdu_num--;
327 	TAILQ_REMOVE(&tqpair->free_queue, pdu, tailq);
328 	memset(pdu, 0, sizeof(*pdu));
329 	pdu->qpair = tqpair;
330 	pdu->ref = 1;
331 	pdu->hdr = &pdu->hdr_mem;
332 
333 	return pdu;
334 }
335 
336 static void
337 spdk_nvmf_tcp_pdu_put(struct spdk_nvmf_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu)
338 {
339 	if (!pdu) {
340 		return;
341 	}
342 
343 	assert(pdu->ref > 0);
344 
345 	pdu->ref--;
346 	if (pdu->ref == 0) {
347 		tqpair->free_pdu_num++;
348 		TAILQ_INSERT_HEAD(&tqpair->free_queue, pdu, tailq);
349 	}
350 }
351 
352 static struct spdk_nvmf_tcp_req *
353 spdk_nvmf_tcp_req_get(struct spdk_nvmf_tcp_qpair *tqpair)
354 {
355 	struct spdk_nvmf_tcp_req *tcp_req;
356 
357 	tcp_req = TAILQ_FIRST(&tqpair->state_queue[TCP_REQUEST_STATE_FREE]);
358 	if (!tcp_req) {
359 		SPDK_ERRLOG("Cannot allocate tcp_req on tqpair=%p\n", tqpair);
360 		return NULL;
361 	}
362 
363 	memset(&tcp_req->cmd, 0, sizeof(tcp_req->cmd));
364 	memset(&tcp_req->rsp, 0, sizeof(tcp_req->rsp));
365 	tcp_req->next_expected_r2t_offset = 0;
366 	tcp_req->r2tl_remain = 0;
367 	tcp_req->c2h_data_offset = 0;
368 	tcp_req->has_incapsule_data = false;
369 	memset(&tcp_req->req.dif, 0, sizeof(tcp_req->req.dif));
370 
371 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEW);
372 	return tcp_req;
373 }
374 
375 static void
376 nvmf_tcp_request_free(struct spdk_nvmf_tcp_req *tcp_req)
377 {
378 	struct spdk_nvmf_tcp_transport *ttransport;
379 
380 	if (!tcp_req) {
381 		return;
382 	}
383 
384 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tcp_req=%p will be freed\n", tcp_req);
385 	ttransport = SPDK_CONTAINEROF(tcp_req->req.qpair->transport,
386 				      struct spdk_nvmf_tcp_transport, transport);
387 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_COMPLETED);
388 	spdk_nvmf_tcp_req_process(ttransport, tcp_req);
389 }
390 
391 static int
392 spdk_nvmf_tcp_req_free(struct spdk_nvmf_request *req)
393 {
394 	struct spdk_nvmf_tcp_req *tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req);
395 
396 	nvmf_tcp_request_free(tcp_req);
397 
398 	return 0;
399 }
400 
401 static void
402 spdk_nvmf_tcp_drain_state_queue(struct spdk_nvmf_tcp_qpair *tqpair,
403 				enum spdk_nvmf_tcp_req_state state)
404 {
405 	struct spdk_nvmf_tcp_req *tcp_req, *req_tmp;
406 
407 	TAILQ_FOREACH_SAFE(tcp_req, &tqpair->state_queue[state], state_link, req_tmp) {
408 		nvmf_tcp_request_free(tcp_req);
409 	}
410 }
411 
412 static void
413 spdk_nvmf_tcp_cleanup_all_states(struct spdk_nvmf_tcp_qpair *tqpair)
414 {
415 	struct spdk_nvmf_tcp_req *tcp_req, *req_tmp;
416 	struct nvme_tcp_pdu *pdu, *tmp_pdu;
417 
418 	/* Free the pdus in the send_queue */
419 	TAILQ_FOREACH_SAFE(pdu, &tqpair->send_queue, tailq, tmp_pdu) {
420 		TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq);
421 		/* Also check the pdu type, we need to calculte the c2h_data_pdu_cnt later */
422 		if (pdu->hdr->common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_DATA) {
423 			assert(tqpair->c2h_data_pdu_cnt > 0);
424 			tqpair->c2h_data_pdu_cnt--;
425 		}
426 		spdk_nvmf_tcp_pdu_put(tqpair, pdu);
427 	}
428 
429 	while (!STAILQ_EMPTY(&tqpair->queued_c2h_data_tcp_req)) {
430 		STAILQ_REMOVE_HEAD(&tqpair->queued_c2h_data_tcp_req, link);
431 	}
432 	spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST);
433 
434 	spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEW);
435 
436 	/* Wipe the requests waiting for buffer from the global list */
437 	TAILQ_FOREACH_SAFE(tcp_req, &tqpair->state_queue[TCP_REQUEST_STATE_NEED_BUFFER], state_link,
438 			   req_tmp) {
439 		STAILQ_REMOVE(&tqpair->group->group.pending_buf_queue, &tcp_req->req,
440 			      spdk_nvmf_request, buf_link);
441 	}
442 
443 	spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEED_BUFFER);
444 	spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_EXECUTING);
445 	spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
446 }
447 
448 static void
449 nvmf_tcp_dump_qpair_req_contents(struct spdk_nvmf_tcp_qpair *tqpair)
450 {
451 	int i;
452 	struct spdk_nvmf_tcp_req *tcp_req;
453 
454 	SPDK_ERRLOG("Dumping contents of queue pair (QID %d)\n", tqpair->qpair.qid);
455 	for (i = 1; i < TCP_REQUEST_NUM_STATES; i++) {
456 		SPDK_ERRLOG("\tNum of requests in state[%d] = %d\n", i, tqpair->state_cntr[i]);
457 		TAILQ_FOREACH(tcp_req, &tqpair->state_queue[i], state_link) {
458 			SPDK_ERRLOG("\t\tRequest Data From Pool: %d\n", tcp_req->req.data_from_pool);
459 			SPDK_ERRLOG("\t\tRequest opcode: %d\n", tcp_req->req.cmd->nvmf_cmd.opcode);
460 		}
461 	}
462 }
463 
464 static void
465 spdk_nvmf_tcp_qpair_destroy(struct spdk_nvmf_tcp_qpair *tqpair)
466 {
467 	int err = 0;
468 
469 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
470 
471 	spdk_poller_unregister(&tqpair->flush_poller);
472 	spdk_sock_close(&tqpair->sock);
473 	spdk_nvmf_tcp_cleanup_all_states(tqpair);
474 
475 	if (tqpair->free_pdu_num != (tqpair->max_queue_depth + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM)) {
476 		SPDK_ERRLOG("tqpair(%p) free pdu pool num is %u but should be %u\n", tqpair,
477 			    tqpair->free_pdu_num,
478 			    (tqpair->max_queue_depth + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM));
479 		err++;
480 	}
481 
482 	if (tqpair->state_cntr[TCP_REQUEST_STATE_FREE] != tqpair->max_queue_depth) {
483 		SPDK_ERRLOG("tqpair(%p) free tcp request num is %u but should be %u\n", tqpair,
484 			    tqpair->state_cntr[TCP_REQUEST_STATE_FREE],
485 			    tqpair->max_queue_depth);
486 		err++;
487 	}
488 
489 	if (tqpair->c2h_data_pdu_cnt != 0) {
490 		SPDK_ERRLOG("tqpair(%p) free c2h_data_pdu cnt is %u but should be 0\n", tqpair,
491 			    tqpair->c2h_data_pdu_cnt);
492 		err++;
493 	}
494 
495 	if (err > 0) {
496 		nvmf_tcp_dump_qpair_req_contents(tqpair);
497 	}
498 	free(tqpair->pdu);
499 	free(tqpair->pdu_pool);
500 	free(tqpair->req);
501 	free(tqpair->reqs);
502 	spdk_free(tqpair->buf);
503 	spdk_free(tqpair->bufs);
504 	free(tqpair->pdu_recv_buf.buf);
505 	free(tqpair);
506 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Leave\n");
507 }
508 
509 static int
510 spdk_nvmf_tcp_destroy(struct spdk_nvmf_transport *transport)
511 {
512 	struct spdk_nvmf_tcp_transport	*ttransport;
513 
514 	assert(transport != NULL);
515 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
516 
517 	pthread_mutex_destroy(&ttransport->lock);
518 	free(ttransport);
519 	return 0;
520 }
521 
522 static struct spdk_nvmf_transport *
523 spdk_nvmf_tcp_create(struct spdk_nvmf_transport_opts *opts)
524 {
525 	struct spdk_nvmf_tcp_transport *ttransport;
526 	uint32_t sge_count;
527 	uint32_t min_shared_buffers;
528 
529 	ttransport = calloc(1, sizeof(*ttransport));
530 	if (!ttransport) {
531 		return NULL;
532 	}
533 
534 	TAILQ_INIT(&ttransport->ports);
535 
536 	ttransport->transport.ops = &spdk_nvmf_transport_tcp;
537 
538 	SPDK_NOTICELOG("*** TCP Transport Init ***\n");
539 
540 	SPDK_INFOLOG(SPDK_LOG_NVMF_TCP, "*** TCP Transport Init ***\n"
541 		     "  Transport opts:  max_ioq_depth=%d, max_io_size=%d,\n"
542 		     "  max_qpairs_per_ctrlr=%d, io_unit_size=%d,\n"
543 		     "  in_capsule_data_size=%d, max_aq_depth=%d\n"
544 		     "  num_shared_buffers=%d, c2h_success=%d,\n"
545 		     "  dif_insert_or_strip=%d, sock_priority=%d\n",
546 		     opts->max_queue_depth,
547 		     opts->max_io_size,
548 		     opts->max_qpairs_per_ctrlr,
549 		     opts->io_unit_size,
550 		     opts->in_capsule_data_size,
551 		     opts->max_aq_depth,
552 		     opts->num_shared_buffers,
553 		     opts->c2h_success,
554 		     opts->dif_insert_or_strip,
555 		     opts->sock_priority);
556 
557 	if (opts->sock_priority > SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY) {
558 		SPDK_ERRLOG("Unsupported socket_priority=%d, the current range is: 0 to %d\n"
559 			    "you can use man 7 socket to view the range of priority under SO_PRIORITY item\n",
560 			    opts->sock_priority, SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY);
561 		free(ttransport);
562 		return NULL;
563 	}
564 
565 	/* I/O unit size cannot be larger than max I/O size */
566 	if (opts->io_unit_size > opts->max_io_size) {
567 		opts->io_unit_size = opts->max_io_size;
568 	}
569 
570 	sge_count = opts->max_io_size / opts->io_unit_size;
571 	if (sge_count > SPDK_NVMF_MAX_SGL_ENTRIES) {
572 		SPDK_ERRLOG("Unsupported IO Unit size specified, %d bytes\n", opts->io_unit_size);
573 		free(ttransport);
574 		return NULL;
575 	}
576 
577 	min_shared_buffers = spdk_thread_get_count() * opts->buf_cache_size;
578 	if (min_shared_buffers > opts->num_shared_buffers) {
579 		SPDK_ERRLOG("There are not enough buffers to satisfy"
580 			    "per-poll group caches for each thread. (%" PRIu32 ")"
581 			    "supplied. (%" PRIu32 ") required\n", opts->num_shared_buffers, min_shared_buffers);
582 		SPDK_ERRLOG("Please specify a larger number of shared buffers\n");
583 		spdk_nvmf_tcp_destroy(&ttransport->transport);
584 		return NULL;
585 	}
586 
587 	pthread_mutex_init(&ttransport->lock, NULL);
588 
589 	return &ttransport->transport;
590 }
591 
592 static int
593 _spdk_nvmf_tcp_trsvcid_to_int(const char *trsvcid)
594 {
595 	unsigned long long ull;
596 	char *end = NULL;
597 
598 	ull = strtoull(trsvcid, &end, 10);
599 	if (end == NULL || end == trsvcid || *end != '\0') {
600 		return -1;
601 	}
602 
603 	/* Valid TCP/IP port numbers are in [0, 65535] */
604 	if (ull > 65535) {
605 		return -1;
606 	}
607 
608 	return (int)ull;
609 }
610 
611 /**
612  * Canonicalize a listen address trid.
613  */
614 static int
615 _spdk_nvmf_tcp_canon_listen_trid(struct spdk_nvme_transport_id *canon_trid,
616 				 const struct spdk_nvme_transport_id *trid)
617 {
618 	int trsvcid_int;
619 
620 	trsvcid_int = _spdk_nvmf_tcp_trsvcid_to_int(trid->trsvcid);
621 	if (trsvcid_int < 0) {
622 		return -EINVAL;
623 	}
624 
625 	memset(canon_trid, 0, sizeof(*canon_trid));
626 	canon_trid->trtype = SPDK_NVME_TRANSPORT_TCP;
627 	canon_trid->adrfam = trid->adrfam;
628 	snprintf(canon_trid->traddr, sizeof(canon_trid->traddr), "%s", trid->traddr);
629 	snprintf(canon_trid->trsvcid, sizeof(canon_trid->trsvcid), "%d", trsvcid_int);
630 
631 	return 0;
632 }
633 
634 /**
635  * Find an existing listening port.
636  *
637  * Caller must hold ttransport->lock.
638  */
639 static struct spdk_nvmf_tcp_port *
640 _spdk_nvmf_tcp_find_port(struct spdk_nvmf_tcp_transport *ttransport,
641 			 const struct spdk_nvme_transport_id *trid)
642 {
643 	struct spdk_nvme_transport_id canon_trid;
644 	struct spdk_nvmf_tcp_port *port;
645 
646 	if (_spdk_nvmf_tcp_canon_listen_trid(&canon_trid, trid) != 0) {
647 		return NULL;
648 	}
649 
650 	TAILQ_FOREACH(port, &ttransport->ports, link) {
651 		if (spdk_nvme_transport_id_compare(&canon_trid, &port->trid) == 0) {
652 			return port;
653 		}
654 	}
655 
656 	return NULL;
657 }
658 
659 static int
660 spdk_nvmf_tcp_listen(struct spdk_nvmf_transport *transport,
661 		     const struct spdk_nvme_transport_id *trid)
662 {
663 	struct spdk_nvmf_tcp_transport *ttransport;
664 	struct spdk_nvmf_tcp_port *port;
665 	int trsvcid_int;
666 	uint8_t adrfam;
667 
668 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
669 
670 	trsvcid_int = _spdk_nvmf_tcp_trsvcid_to_int(trid->trsvcid);
671 	if (trsvcid_int < 0) {
672 		SPDK_ERRLOG("Invalid trsvcid '%s'\n", trid->trsvcid);
673 		return -EINVAL;
674 	}
675 
676 	pthread_mutex_lock(&ttransport->lock);
677 
678 	port = _spdk_nvmf_tcp_find_port(ttransport, trid);
679 	if (port) {
680 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Already listening on %s port %s\n",
681 			      trid->traddr, trid->trsvcid);
682 		port->ref++;
683 		pthread_mutex_unlock(&ttransport->lock);
684 		return 0;
685 	}
686 
687 	port = calloc(1, sizeof(*port));
688 	if (!port) {
689 		SPDK_ERRLOG("Port allocation failed\n");
690 		free(port);
691 		pthread_mutex_unlock(&ttransport->lock);
692 		return -ENOMEM;
693 	}
694 
695 	port->ref = 1;
696 
697 	if (_spdk_nvmf_tcp_canon_listen_trid(&port->trid, trid) != 0) {
698 		SPDK_ERRLOG("Invalid traddr %s / trsvcid %s\n",
699 			    trid->traddr, trid->trsvcid);
700 		free(port);
701 		pthread_mutex_unlock(&ttransport->lock);
702 		return -ENOMEM;
703 	}
704 
705 	port->listen_sock = spdk_sock_listen(trid->traddr, trsvcid_int);
706 	if (port->listen_sock == NULL) {
707 		SPDK_ERRLOG("spdk_sock_listen(%s, %d) failed: %s (%d)\n",
708 			    trid->traddr, trsvcid_int,
709 			    spdk_strerror(errno), errno);
710 		free(port);
711 		pthread_mutex_unlock(&ttransport->lock);
712 		return -errno;
713 	}
714 
715 	if (spdk_sock_is_ipv4(port->listen_sock)) {
716 		adrfam = SPDK_NVMF_ADRFAM_IPV4;
717 	} else if (spdk_sock_is_ipv6(port->listen_sock)) {
718 		adrfam = SPDK_NVMF_ADRFAM_IPV6;
719 	} else {
720 		SPDK_ERRLOG("Unhandled socket type\n");
721 		adrfam = 0;
722 	}
723 
724 	if (adrfam != trid->adrfam) {
725 		SPDK_ERRLOG("Socket address family mismatch\n");
726 		spdk_sock_close(&port->listen_sock);
727 		free(port);
728 		pthread_mutex_unlock(&ttransport->lock);
729 		return -EINVAL;
730 	}
731 
732 	SPDK_NOTICELOG("*** NVMe/TCP Target Listening on %s port %d ***\n",
733 		       trid->traddr, trsvcid_int);
734 
735 	TAILQ_INSERT_TAIL(&ttransport->ports, port, link);
736 	pthread_mutex_unlock(&ttransport->lock);
737 
738 	return 0;
739 }
740 
741 static int
742 spdk_nvmf_tcp_stop_listen(struct spdk_nvmf_transport *transport,
743 			  const struct spdk_nvme_transport_id *trid)
744 {
745 	struct spdk_nvmf_tcp_transport *ttransport;
746 	struct spdk_nvmf_tcp_port *port;
747 	int rc;
748 
749 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
750 
751 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Removing listen address %s port %s\n",
752 		      trid->traddr, trid->trsvcid);
753 
754 	pthread_mutex_lock(&ttransport->lock);
755 	port = _spdk_nvmf_tcp_find_port(ttransport, trid);
756 	if (port) {
757 		assert(port->ref > 0);
758 		port->ref--;
759 		if (port->ref == 0) {
760 			TAILQ_REMOVE(&ttransport->ports, port, link);
761 			spdk_sock_close(&port->listen_sock);
762 			free(port);
763 		}
764 		rc = 0;
765 	} else {
766 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Port not found\n");
767 		rc = -ENOENT;
768 	}
769 	pthread_mutex_unlock(&ttransport->lock);
770 
771 	return rc;
772 }
773 
774 static int
775 spdk_nvmf_tcp_qpair_flush_pdus_internal(struct spdk_nvmf_tcp_qpair *tqpair)
776 {
777 	const int array_size = 32;
778 	struct iovec iovs[array_size];
779 	int iovcnt = 0;
780 	int bytes = 0;
781 	int total_length = 0;
782 	uint32_t mapped_length = 0;
783 	struct nvme_tcp_pdu *pdu;
784 	int pdu_length;
785 	TAILQ_HEAD(, nvme_tcp_pdu) completed_pdus_list;
786 
787 	pdu = TAILQ_FIRST(&tqpair->send_queue);
788 
789 	if (pdu == NULL) {
790 		return 0;
791 	}
792 
793 	/*
794 	 * Build up a list of iovecs for the first few PDUs in the
795 	 *  tqpair 's send_queue.
796 	 */
797 	while (pdu != NULL && ((array_size - iovcnt) >= (2 + (int)pdu->data_iovcnt))) {
798 		iovcnt += nvme_tcp_build_iovs(&iovs[iovcnt],
799 					      array_size - iovcnt,
800 					      pdu,
801 					      tqpair->host_hdgst_enable,
802 					      tqpair->host_ddgst_enable,
803 					      &mapped_length);
804 		total_length += mapped_length;
805 		pdu = TAILQ_NEXT(pdu, tailq);
806 	}
807 
808 	spdk_trace_record(TRACE_TCP_FLUSH_WRITEBUF_START, 0, total_length, 0, iovcnt);
809 
810 	bytes = spdk_sock_writev(tqpair->sock, iovs, iovcnt);
811 	if (bytes == -1) {
812 		if (errno == EWOULDBLOCK || errno == EAGAIN) {
813 			return 1;
814 		} else {
815 			SPDK_ERRLOG("spdk_sock_writev() failed, errno %d: %s\n",
816 				    errno, spdk_strerror(errno));
817 			return -1;
818 		}
819 	}
820 
821 	spdk_trace_record(TRACE_TCP_FLUSH_WRITEBUF_DONE, 0, bytes, 0, 0);
822 
823 	pdu = TAILQ_FIRST(&tqpair->send_queue);
824 
825 	/*
826 	 * Free any PDUs that were fully written.  If a PDU was only
827 	 *  partially written, update its writev_offset so that next
828 	 *  time only the unwritten portion will be sent to writev().
829 	 */
830 	TAILQ_INIT(&completed_pdus_list);
831 	while (bytes > 0) {
832 		pdu_length = pdu->hdr->common.plen - pdu->writev_offset;
833 		if (bytes >= pdu_length) {
834 			bytes -= pdu_length;
835 			TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq);
836 			TAILQ_INSERT_TAIL(&completed_pdus_list, pdu, tailq);
837 			pdu = TAILQ_FIRST(&tqpair->send_queue);
838 
839 		} else {
840 			pdu->writev_offset += bytes;
841 			bytes = 0;
842 		}
843 	}
844 
845 	while (!TAILQ_EMPTY(&completed_pdus_list)) {
846 		pdu = TAILQ_FIRST(&completed_pdus_list);
847 		TAILQ_REMOVE(&completed_pdus_list, pdu, tailq);
848 		assert(pdu->cb_fn != NULL);
849 		pdu->cb_fn(pdu->cb_arg);
850 		spdk_nvmf_tcp_pdu_put(tqpair, pdu);
851 	}
852 
853 	return TAILQ_EMPTY(&tqpair->send_queue) ? 0 : 1;
854 }
855 
856 static int
857 spdk_nvmf_tcp_qpair_flush_pdus(void *_tqpair)
858 {
859 	struct spdk_nvmf_tcp_qpair *tqpair = _tqpair;
860 	int rc;
861 
862 	if (tqpair->state == NVME_TCP_QPAIR_STATE_RUNNING) {
863 		rc = spdk_nvmf_tcp_qpair_flush_pdus_internal(tqpair);
864 		if (rc == 0 && tqpair->flush_poller != NULL) {
865 			spdk_poller_unregister(&tqpair->flush_poller);
866 		} else if (rc == 1 && tqpair->flush_poller == NULL) {
867 			tqpair->flush_poller = spdk_poller_register(spdk_nvmf_tcp_qpair_flush_pdus,
868 					       tqpair, 50);
869 		}
870 	} else {
871 		/*
872 		 * If the tqpair state is not RUNNING, then
873 		 * keep trying to flush PDUs until our list is
874 		 * empty - to make sure all data is sent before
875 		 * closing the connection.
876 		 */
877 		do {
878 			rc = spdk_nvmf_tcp_qpair_flush_pdus_internal(tqpair);
879 		} while (rc == 1);
880 	}
881 
882 	if (rc < 0 && tqpair->state < NVME_TCP_QPAIR_STATE_EXITING) {
883 		/*
884 		 * If the poller has already started destruction of the tqpair,
885 		 *  i.e. the socket read failed, then the connection state may already
886 		 *  be EXITED.  We don't want to set it back to EXITING in that case.
887 		 */
888 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
889 	}
890 
891 	return -1;
892 }
893 
894 static void
895 spdk_nvmf_tcp_qpair_disconnect(struct spdk_nvmf_tcp_qpair *tqpair)
896 {
897 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Disconnecting qpair %p\n", tqpair);
898 
899 	tqpair->state = NVME_TCP_QPAIR_STATE_EXITED;
900 	spdk_nvmf_tcp_qpair_flush_pdus(tqpair);
901 	spdk_poller_unregister(&tqpair->timeout_poller);
902 	spdk_nvmf_qpair_disconnect(&tqpair->qpair, NULL, NULL);
903 }
904 
905 static void
906 spdk_nvmf_tcp_qpair_write_pdu(struct spdk_nvmf_tcp_qpair *tqpair,
907 			      struct nvme_tcp_pdu *pdu,
908 			      nvme_tcp_qpair_xfer_complete_cb cb_fn,
909 			      void *cb_arg)
910 {
911 	int enable_digest;
912 	int hlen;
913 	uint32_t crc32c;
914 
915 	hlen = pdu->hdr->common.hlen;
916 	enable_digest = 1;
917 	if (pdu->hdr->common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP ||
918 	    pdu->hdr->common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ) {
919 		/* this PDU should be sent without digest */
920 		enable_digest = 0;
921 	}
922 
923 	/* Header Digest */
924 	if (enable_digest && tqpair->host_hdgst_enable) {
925 		crc32c = nvme_tcp_pdu_calc_header_digest(pdu);
926 		MAKE_DIGEST_WORD((uint8_t *)pdu->hdr->raw + hlen, crc32c);
927 	}
928 
929 	/* Data Digest */
930 	if (pdu->data_len > 0 && enable_digest && tqpair->host_ddgst_enable) {
931 		crc32c = nvme_tcp_pdu_calc_data_digest(pdu);
932 		MAKE_DIGEST_WORD(pdu->data_digest, crc32c);
933 	}
934 
935 	pdu->cb_fn = cb_fn;
936 	pdu->cb_arg = cb_arg;
937 	TAILQ_INSERT_TAIL(&tqpair->send_queue, pdu, tailq);
938 	spdk_nvmf_tcp_qpair_flush_pdus(tqpair);
939 }
940 
941 static int
942 spdk_nvmf_tcp_qpair_init_mem_resource(struct spdk_nvmf_tcp_qpair *tqpair, uint16_t size)
943 {
944 	int i;
945 	struct spdk_nvmf_tcp_req *tcp_req;
946 	struct spdk_nvmf_transport *transport = tqpair->qpair.transport;
947 	uint32_t in_capsule_data_size;
948 
949 	in_capsule_data_size = transport->opts.in_capsule_data_size;
950 	if (transport->opts.dif_insert_or_strip) {
951 		in_capsule_data_size = SPDK_BDEV_BUF_SIZE_WITH_MD(in_capsule_data_size);
952 	}
953 
954 	if (!tqpair->qpair.sq_head_max) {
955 		tqpair->req = calloc(1, sizeof(*tqpair->req));
956 		if (!tqpair->req) {
957 			SPDK_ERRLOG("Unable to allocate req on tqpair=%p.\n", tqpair);
958 			return -1;
959 		}
960 
961 		if (in_capsule_data_size) {
962 			tqpair->buf = spdk_zmalloc(in_capsule_data_size, 0x1000,
963 						   NULL, SPDK_ENV_LCORE_ID_ANY,
964 						   SPDK_MALLOC_DMA);
965 			if (!tqpair->buf) {
966 				SPDK_ERRLOG("Unable to allocate buf on tqpair=%p.\n", tqpair);
967 				return -1;
968 			}
969 		}
970 
971 		tcp_req = tqpair->req;
972 		tcp_req->ttag = 0;
973 		tcp_req->req.qpair = &tqpair->qpair;
974 
975 		/* Set up memory to receive commands */
976 		if (tqpair->buf) {
977 			tcp_req->buf = tqpair->buf;
978 		}
979 
980 		/* Set the cmdn and rsp */
981 		tcp_req->req.rsp = (union nvmf_c2h_msg *)&tcp_req->rsp;
982 		tcp_req->req.cmd = (union nvmf_h2c_msg *)&tcp_req->cmd;
983 
984 		/* Initialize request state to FREE */
985 		tcp_req->state = TCP_REQUEST_STATE_FREE;
986 		TAILQ_INSERT_TAIL(&tqpair->state_queue[tcp_req->state], tcp_req, state_link);
987 
988 		tqpair->pdu = calloc(NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM + 1, sizeof(*tqpair->pdu));
989 		if (!tqpair->pdu) {
990 			SPDK_ERRLOG("Unable to allocate pdu on tqpair=%p.\n", tqpair);
991 			return -1;
992 		}
993 
994 		for (i = 0; i < 1 + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM; i++) {
995 			TAILQ_INSERT_TAIL(&tqpair->free_queue, &tqpair->pdu[i], tailq);
996 		}
997 
998 		tqpair->pdu_recv_buf.size = (in_capsule_data_size + sizeof(struct spdk_nvme_tcp_cmd) + 2 *
999 					     SPDK_NVME_TCP_DIGEST_LEN) * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR;
1000 		tqpair->pdu_recv_buf.buf = calloc(1, tqpair->pdu_recv_buf.size);
1001 		if (!tqpair->pdu_recv_buf.buf) {
1002 			SPDK_ERRLOG("Unable to allocate the pdu recv buf on tqpair=%p with size=%d\n", tqpair,
1003 				    tqpair->pdu_recv_buf.size);
1004 			return -1;
1005 		}
1006 		tqpair->pdu_in_progress.hdr = (union nvme_tcp_pdu_hdr *)tqpair->pdu_recv_buf.buf;
1007 	} else {
1008 		tqpair->reqs = calloc(size, sizeof(*tqpair->reqs));
1009 		if (!tqpair->reqs) {
1010 			SPDK_ERRLOG("Unable to allocate reqs on tqpair=%p\n", tqpair);
1011 			return -1;
1012 		}
1013 
1014 		if (in_capsule_data_size) {
1015 			tqpair->bufs = spdk_zmalloc(size * in_capsule_data_size, 0x1000,
1016 						    NULL, SPDK_ENV_LCORE_ID_ANY,
1017 						    SPDK_MALLOC_DMA);
1018 			if (!tqpair->bufs) {
1019 				SPDK_ERRLOG("Unable to allocate bufs on tqpair=%p.\n", tqpair);
1020 				return -1;
1021 			}
1022 		}
1023 
1024 		for (i = 0; i < size; i++) {
1025 			struct spdk_nvmf_tcp_req *tcp_req = &tqpair->reqs[i];
1026 
1027 			tcp_req->ttag = i + 1;
1028 			tcp_req->req.qpair = &tqpair->qpair;
1029 
1030 			/* Set up memory to receive commands */
1031 			if (tqpair->bufs) {
1032 				tcp_req->buf = (void *)((uintptr_t)tqpair->bufs + (i * in_capsule_data_size));
1033 			}
1034 
1035 			/* Set the cmdn and rsp */
1036 			tcp_req->req.rsp = (union nvmf_c2h_msg *)&tcp_req->rsp;
1037 			tcp_req->req.cmd = (union nvmf_h2c_msg *)&tcp_req->cmd;
1038 
1039 			/* Initialize request state to FREE */
1040 			tcp_req->state = TCP_REQUEST_STATE_FREE;
1041 			TAILQ_INSERT_TAIL(&tqpair->state_queue[tcp_req->state], tcp_req, state_link);
1042 		}
1043 
1044 		tqpair->pdu_pool = calloc(size, sizeof(*tqpair->pdu_pool));
1045 		if (!tqpair->pdu_pool) {
1046 			SPDK_ERRLOG("Unable to allocate pdu pool on tqpair =%p.\n", tqpair);
1047 			return -1;
1048 		}
1049 
1050 		for (i = 0; i < size; i++) {
1051 			TAILQ_INSERT_TAIL(&tqpair->free_queue, &tqpair->pdu_pool[i], tailq);
1052 		}
1053 	}
1054 
1055 	return 0;
1056 }
1057 
1058 static int
1059 spdk_nvmf_tcp_qpair_init(struct spdk_nvmf_qpair *qpair)
1060 {
1061 	struct spdk_nvmf_tcp_qpair *tqpair;
1062 	int i;
1063 
1064 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
1065 
1066 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "New TCP Connection: %p\n", qpair);
1067 
1068 	TAILQ_INIT(&tqpair->send_queue);
1069 	TAILQ_INIT(&tqpair->free_queue);
1070 	STAILQ_INIT(&tqpair->queued_c2h_data_tcp_req);
1071 
1072 	/* Initialise request state queues of the qpair */
1073 	for (i = TCP_REQUEST_STATE_FREE; i < TCP_REQUEST_NUM_STATES; i++) {
1074 		TAILQ_INIT(&tqpair->state_queue[i]);
1075 	}
1076 
1077 	tqpair->host_hdgst_enable = true;
1078 	tqpair->host_ddgst_enable = true;
1079 	return 0;
1080 }
1081 
1082 static int
1083 spdk_nvmf_tcp_qpair_sock_init(struct spdk_nvmf_tcp_qpair *tqpair)
1084 {
1085 	int rc;
1086 
1087 	/* set low water mark */
1088 	rc = spdk_sock_set_recvlowat(tqpair->sock, sizeof(struct spdk_nvme_tcp_c2h_data_hdr));
1089 	if (rc != 0) {
1090 		SPDK_ERRLOG("spdk_sock_set_recvlowat() failed\n");
1091 		return rc;
1092 	}
1093 
1094 	return 0;
1095 }
1096 
1097 static void
1098 _spdk_nvmf_tcp_handle_connect(struct spdk_nvmf_transport *transport,
1099 			      struct spdk_nvmf_tcp_port *port,
1100 			      struct spdk_sock *sock,
1101 			      new_qpair_fn cb_fn, void *cb_arg)
1102 {
1103 	struct spdk_nvmf_tcp_qpair *tqpair;
1104 	int rc;
1105 
1106 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "New connection accepted on %s port %s\n",
1107 		      port->trid.traddr, port->trid.trsvcid);
1108 
1109 	if (transport->opts.sock_priority) {
1110 		rc = spdk_sock_set_priority(sock, transport->opts.sock_priority);
1111 		if (rc) {
1112 			SPDK_ERRLOG("Failed to set the priority of the socket\n");
1113 			spdk_sock_close(&sock);
1114 			return;
1115 		}
1116 	}
1117 
1118 	tqpair = calloc(1, sizeof(struct spdk_nvmf_tcp_qpair));
1119 	if (tqpair == NULL) {
1120 		SPDK_ERRLOG("Could not allocate new connection.\n");
1121 		spdk_sock_close(&sock);
1122 		return;
1123 	}
1124 
1125 	tqpair->sock = sock;
1126 	tqpair->max_queue_depth = 1;
1127 	tqpair->free_pdu_num = tqpair->max_queue_depth + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM;
1128 	tqpair->state_cntr[TCP_REQUEST_STATE_FREE] = tqpair->max_queue_depth;
1129 	tqpair->port = port;
1130 	tqpair->qpair.transport = transport;
1131 
1132 	rc = spdk_sock_getaddr(tqpair->sock, tqpair->target_addr,
1133 			       sizeof(tqpair->target_addr), &tqpair->target_port,
1134 			       tqpair->initiator_addr, sizeof(tqpair->initiator_addr),
1135 			       &tqpair->initiator_port);
1136 	if (rc < 0) {
1137 		SPDK_ERRLOG("spdk_sock_getaddr() failed of tqpair=%p\n", tqpair);
1138 		spdk_nvmf_tcp_qpair_destroy(tqpair);
1139 		return;
1140 	}
1141 
1142 	cb_fn(&tqpair->qpair, cb_arg);
1143 }
1144 
1145 static void
1146 spdk_nvmf_tcp_port_accept(struct spdk_nvmf_transport *transport, struct spdk_nvmf_tcp_port *port,
1147 			  new_qpair_fn cb_fn, void *cb_arg)
1148 {
1149 	struct spdk_sock *sock;
1150 	int i;
1151 
1152 	for (i = 0; i < NVMF_TCP_MAX_ACCEPT_SOCK_ONE_TIME; i++) {
1153 		sock = spdk_sock_accept(port->listen_sock);
1154 		if (sock) {
1155 			_spdk_nvmf_tcp_handle_connect(transport, port, sock, cb_fn, cb_arg);
1156 		}
1157 	}
1158 }
1159 
1160 static void
1161 spdk_nvmf_tcp_accept(struct spdk_nvmf_transport *transport, new_qpair_fn cb_fn, void *cb_arg)
1162 {
1163 	struct spdk_nvmf_tcp_transport *ttransport;
1164 	struct spdk_nvmf_tcp_port *port;
1165 
1166 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
1167 
1168 	TAILQ_FOREACH(port, &ttransport->ports, link) {
1169 		spdk_nvmf_tcp_port_accept(transport, port, cb_fn, cb_arg);
1170 	}
1171 }
1172 
1173 static void
1174 spdk_nvmf_tcp_discover(struct spdk_nvmf_transport *transport,
1175 		       struct spdk_nvme_transport_id *trid,
1176 		       struct spdk_nvmf_discovery_log_page_entry *entry)
1177 {
1178 	entry->trtype = SPDK_NVMF_TRTYPE_TCP;
1179 	entry->adrfam = trid->adrfam;
1180 	entry->treq.secure_channel = SPDK_NVMF_TREQ_SECURE_CHANNEL_NOT_REQUIRED;
1181 
1182 	spdk_strcpy_pad(entry->trsvcid, trid->trsvcid, sizeof(entry->trsvcid), ' ');
1183 	spdk_strcpy_pad(entry->traddr, trid->traddr, sizeof(entry->traddr), ' ');
1184 
1185 	entry->tsas.tcp.sectype = SPDK_NVME_TCP_SECURITY_NONE;
1186 }
1187 
1188 static struct spdk_nvmf_transport_poll_group *
1189 spdk_nvmf_tcp_poll_group_create(struct spdk_nvmf_transport *transport)
1190 {
1191 	struct spdk_nvmf_tcp_poll_group *tgroup;
1192 
1193 	tgroup = calloc(1, sizeof(*tgroup));
1194 	if (!tgroup) {
1195 		return NULL;
1196 	}
1197 
1198 	tgroup->sock_group = spdk_sock_group_create(&tgroup->group);
1199 	if (!tgroup->sock_group) {
1200 		goto cleanup;
1201 	}
1202 
1203 	TAILQ_INIT(&tgroup->qpairs);
1204 
1205 	return &tgroup->group;
1206 
1207 cleanup:
1208 	free(tgroup);
1209 	return NULL;
1210 }
1211 
1212 static struct spdk_nvmf_transport_poll_group *
1213 spdk_nvmf_tcp_get_optimal_poll_group(struct spdk_nvmf_qpair *qpair)
1214 {
1215 	struct spdk_nvmf_tcp_qpair *tqpair;
1216 	struct spdk_sock_group *group = NULL;
1217 	int rc;
1218 
1219 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
1220 	rc = spdk_sock_get_optimal_sock_group(tqpair->sock, &group);
1221 	if (!rc && group != NULL) {
1222 		return spdk_sock_group_get_ctx(group);
1223 	}
1224 
1225 	return NULL;
1226 }
1227 
1228 static void
1229 spdk_nvmf_tcp_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group)
1230 {
1231 	struct spdk_nvmf_tcp_poll_group *tgroup;
1232 
1233 	tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
1234 	spdk_sock_group_close(&tgroup->sock_group);
1235 
1236 	free(tgroup);
1237 }
1238 
1239 static inline void
1240 spdk_nvmf_tcp_reset_pdu_in_process(struct spdk_nvmf_tcp_qpair *tqpair)
1241 {
1242 	struct nvme_tcp_pdu_recv_buf *pdu_recv_buf = &tqpair->pdu_recv_buf;
1243 	char *dst, *src;
1244 
1245 	if (spdk_unlikely((pdu_recv_buf->off + sizeof(union nvme_tcp_pdu_hdr)) >
1246 			  pdu_recv_buf->size)) {
1247 		if (pdu_recv_buf->remain_size) {
1248 			dst = pdu_recv_buf->buf;
1249 			src = (char *)((void *)pdu_recv_buf->buf + pdu_recv_buf->off);
1250 
1251 			/* purpose: to avoid overlap copy, so do not use memcpy if there is overlap case */
1252 			memmove(dst, src, pdu_recv_buf->remain_size);
1253 		}
1254 		tqpair->pdu_recv_buf.off = 0;
1255 	} else if (!pdu_recv_buf->remain_size) {
1256 		tqpair->pdu_recv_buf.off = 0;
1257 	}
1258 
1259 	tqpair->pdu_in_progress.hdr = (union nvme_tcp_pdu_hdr *)((void *)pdu_recv_buf->buf +
1260 				      pdu_recv_buf->off);
1261 }
1262 
1263 static void
1264 spdk_nvmf_tcp_qpair_set_recv_state(struct spdk_nvmf_tcp_qpair *tqpair,
1265 				   enum nvme_tcp_pdu_recv_state state)
1266 {
1267 	if (tqpair->recv_state == state) {
1268 		SPDK_ERRLOG("The recv state of tqpair=%p is same with the state(%d) to be set\n",
1269 			    tqpair, state);
1270 		return;
1271 	}
1272 
1273 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair(%p) recv state=%d\n", tqpair, state);
1274 	tqpair->recv_state = state;
1275 
1276 	switch (state) {
1277 	case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH:
1278 	case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH:
1279 	case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD:
1280 		break;
1281 	case NVME_TCP_PDU_RECV_STATE_ERROR:
1282 	case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY:
1283 		memset(&tqpair->pdu_in_progress, 0, sizeof(tqpair->pdu_in_progress));
1284 		spdk_nvmf_tcp_reset_pdu_in_process(tqpair);
1285 		break;
1286 	default:
1287 		SPDK_ERRLOG("The state(%d) is invalid\n", state);
1288 		abort();
1289 		break;
1290 	}
1291 }
1292 
1293 static int
1294 spdk_nvmf_tcp_qpair_handle_timeout(void *ctx)
1295 {
1296 	struct spdk_nvmf_tcp_qpair *tqpair = ctx;
1297 
1298 	assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_ERROR);
1299 
1300 	SPDK_ERRLOG("No pdu coming for tqpair=%p within %d seconds\n", tqpair,
1301 		    SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT);
1302 
1303 	spdk_nvmf_tcp_qpair_disconnect(tqpair);
1304 	return 0;
1305 }
1306 
1307 static void
1308 spdk_nvmf_tcp_send_c2h_term_req_complete(void *cb_arg)
1309 {
1310 	struct spdk_nvmf_tcp_qpair *tqpair = (struct spdk_nvmf_tcp_qpair *)cb_arg;
1311 
1312 	if (!tqpair->timeout_poller) {
1313 		tqpair->timeout_poller = spdk_poller_register(spdk_nvmf_tcp_qpair_handle_timeout, tqpair,
1314 					 SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT * 1000000);
1315 	}
1316 }
1317 
1318 static void
1319 spdk_nvmf_tcp_send_c2h_term_req(struct spdk_nvmf_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu,
1320 				enum spdk_nvme_tcp_term_req_fes fes, uint32_t error_offset)
1321 {
1322 	struct nvme_tcp_pdu *rsp_pdu;
1323 	struct spdk_nvme_tcp_term_req_hdr *c2h_term_req;
1324 	uint32_t c2h_term_req_hdr_len = sizeof(*c2h_term_req);
1325 	uint32_t copy_len;
1326 
1327 	rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair);
1328 	if (!rsp_pdu) {
1329 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
1330 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1331 		return;
1332 	}
1333 
1334 	c2h_term_req = &rsp_pdu->hdr->term_req;
1335 	c2h_term_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ;
1336 	c2h_term_req->common.hlen = c2h_term_req_hdr_len;
1337 
1338 	if ((fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) ||
1339 	    (fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) {
1340 		DSET32(&c2h_term_req->fei, error_offset);
1341 	}
1342 
1343 	copy_len = pdu->hdr->common.hlen;
1344 	if (copy_len > SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE) {
1345 		copy_len = SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE;
1346 	}
1347 
1348 	/* Copy the error info into the buffer */
1349 	memcpy((uint8_t *)rsp_pdu->hdr->raw + c2h_term_req_hdr_len, pdu->hdr->raw, copy_len);
1350 	nvme_tcp_pdu_set_data(rsp_pdu, (uint8_t *)rsp_pdu->hdr->raw + c2h_term_req_hdr_len, copy_len);
1351 
1352 	/* Contain the header of the wrong received pdu */
1353 	c2h_term_req->common.plen = c2h_term_req->common.hlen + copy_len;
1354 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1355 	spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_send_c2h_term_req_complete, tqpair);
1356 }
1357 
1358 static void
1359 spdk_nvmf_tcp_capsule_cmd_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport,
1360 				     struct spdk_nvmf_tcp_qpair *tqpair,
1361 				     struct nvme_tcp_pdu *pdu)
1362 {
1363 	struct spdk_nvmf_tcp_req *tcp_req;
1364 
1365 	tcp_req = spdk_nvmf_tcp_req_get(tqpair);
1366 	if (!tcp_req) {
1367 		SPDK_ERRLOG("Cannot allocate tcp_req\n");
1368 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
1369 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1370 		return;
1371 	}
1372 
1373 	pdu->req = tcp_req;
1374 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEW);
1375 	spdk_nvmf_tcp_req_process(ttransport, tcp_req);
1376 	return;
1377 }
1378 
1379 static void
1380 spdk_nvmf_tcp_capsule_cmd_payload_handle(struct spdk_nvmf_tcp_transport *ttransport,
1381 		struct spdk_nvmf_tcp_qpair *tqpair,
1382 		struct nvme_tcp_pdu *pdu)
1383 {
1384 	struct spdk_nvmf_tcp_req *tcp_req;
1385 	struct spdk_nvme_tcp_cmd *capsule_cmd;
1386 	uint32_t error_offset = 0;
1387 	enum spdk_nvme_tcp_term_req_fes fes;
1388 
1389 	capsule_cmd = &pdu->hdr->capsule_cmd;
1390 	tcp_req = pdu->req;
1391 	assert(tcp_req != NULL);
1392 	if (capsule_cmd->common.pdo > SPDK_NVME_TCP_PDU_PDO_MAX_OFFSET) {
1393 		SPDK_ERRLOG("Expected ICReq capsule_cmd pdu offset <= %d, got %c\n",
1394 			    SPDK_NVME_TCP_PDU_PDO_MAX_OFFSET, capsule_cmd->common.pdo);
1395 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1396 		error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdo);
1397 		goto err;
1398 	}
1399 
1400 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
1401 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
1402 	spdk_nvmf_tcp_req_process(ttransport, tcp_req);
1403 
1404 	return;
1405 err:
1406 	spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1407 }
1408 
1409 static void
1410 spdk_nvmf_tcp_h2c_data_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport,
1411 				  struct spdk_nvmf_tcp_qpair *tqpair,
1412 				  struct nvme_tcp_pdu *pdu)
1413 {
1414 	struct spdk_nvmf_tcp_req *tcp_req;
1415 	uint32_t error_offset = 0;
1416 	enum spdk_nvme_tcp_term_req_fes fes = 0;
1417 	struct spdk_nvme_tcp_h2c_data_hdr *h2c_data;
1418 	bool ttag_offset_error = false;
1419 
1420 	h2c_data = &pdu->hdr->h2c_data;
1421 
1422 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair=%p, r2t_info: datao=%u, datal=%u, cccid=%u, ttag=%u\n",
1423 		      tqpair, h2c_data->datao, h2c_data->datal, h2c_data->cccid, h2c_data->ttag);
1424 
1425 	/* According to the information in the pdu to find the req */
1426 	TAILQ_FOREACH(tcp_req, &tqpair->state_queue[TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER],
1427 		      state_link) {
1428 		if ((tcp_req->req.cmd->nvme_cmd.cid == h2c_data->cccid) && (tcp_req->ttag == h2c_data->ttag)) {
1429 			break;
1430 		}
1431 
1432 		if (!ttag_offset_error && (tcp_req->req.cmd->nvme_cmd.cid == h2c_data->cccid)) {
1433 			ttag_offset_error = true;
1434 		}
1435 	}
1436 
1437 	if (!tcp_req) {
1438 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tcp_req is not found for tqpair=%p\n", tqpair);
1439 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER;
1440 		if (!ttag_offset_error) {
1441 			error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, cccid);
1442 		} else {
1443 			error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, ttag);
1444 		}
1445 		goto err;
1446 	}
1447 
1448 	if (tcp_req->next_expected_r2t_offset != h2c_data->datao) {
1449 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
1450 			      "tcp_req(%p), tqpair=%p,  expected_r2t_offset=%u, but data offset =%u\n",
1451 			      tcp_req, tqpair, tcp_req->next_expected_r2t_offset, h2c_data->datao);
1452 		fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE;
1453 		goto err;
1454 	}
1455 
1456 	if (h2c_data->datal > tqpair->maxh2cdata) {
1457 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tcp_req(%p), tqpair=%p,  datao=%u execeeds maxh2cdata size=%u\n",
1458 			      tcp_req, tqpair, h2c_data->datao, tqpair->maxh2cdata);
1459 		fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE;
1460 		goto err;
1461 	}
1462 
1463 	if ((h2c_data->datao + h2c_data->datal) > tcp_req->req.length) {
1464 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
1465 			      "tcp_req(%p), tqpair=%p,  (datao=%u + datal=%u) execeeds requested length=%u\n",
1466 			      tcp_req, tqpair, h2c_data->datao, h2c_data->datal, tcp_req->req.length);
1467 		fes = SPDK_NVME_TCP_TERM_REQ_FES_R2T_LIMIT_EXCEEDED;
1468 		goto err;
1469 	}
1470 
1471 	pdu->req = tcp_req;
1472 
1473 	if (spdk_unlikely(tcp_req->req.dif.dif_insert_or_strip)) {
1474 		pdu->dif_ctx = &tcp_req->req.dif.dif_ctx;
1475 	}
1476 
1477 	nvme_tcp_pdu_set_data_buf(pdu, tcp_req->req.iov, tcp_req->req.iovcnt,
1478 				  h2c_data->datao, h2c_data->datal);
1479 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
1480 	return;
1481 
1482 err:
1483 	spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1484 }
1485 
1486 static void
1487 spdk_nvmf_tcp_pdu_cmd_complete(void *cb_arg)
1488 {
1489 	struct spdk_nvmf_tcp_req *tcp_req = cb_arg;
1490 	nvmf_tcp_request_free(tcp_req);
1491 }
1492 
1493 static void
1494 spdk_nvmf_tcp_send_capsule_resp_pdu(struct spdk_nvmf_tcp_req *tcp_req,
1495 				    struct spdk_nvmf_tcp_qpair *tqpair)
1496 {
1497 	struct nvme_tcp_pdu *rsp_pdu;
1498 	struct spdk_nvme_tcp_rsp *capsule_resp;
1499 
1500 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter, tqpair=%p\n", tqpair);
1501 	rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair);
1502 	if (!rsp_pdu) {
1503 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1504 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
1505 		return;
1506 	}
1507 
1508 	capsule_resp = &rsp_pdu->hdr->capsule_resp;
1509 	capsule_resp->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP;
1510 	capsule_resp->common.plen = capsule_resp->common.hlen = sizeof(*capsule_resp);
1511 	capsule_resp->rccqe = tcp_req->req.rsp->nvme_cpl;
1512 	if (tqpair->host_hdgst_enable) {
1513 		capsule_resp->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF;
1514 		capsule_resp->common.plen += SPDK_NVME_TCP_DIGEST_LEN;
1515 	}
1516 
1517 	spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_pdu_cmd_complete, tcp_req);
1518 }
1519 
1520 static void
1521 spdk_nvmf_tcp_pdu_c2h_data_complete(void *cb_arg)
1522 {
1523 	struct spdk_nvmf_tcp_req *tcp_req = cb_arg;
1524 	struct spdk_nvmf_tcp_qpair *tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair,
1525 					     struct spdk_nvmf_tcp_qpair, qpair);
1526 
1527 	assert(tqpair != NULL);
1528 	assert(tcp_req->c2h_data_pdu_num > 0);
1529 	tcp_req->c2h_data_pdu_num--;
1530 	if (!tcp_req->c2h_data_pdu_num) {
1531 		if (tqpair->qpair.transport->opts.c2h_success) {
1532 			nvmf_tcp_request_free(tcp_req);
1533 		} else {
1534 			spdk_nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair);
1535 		}
1536 	}
1537 
1538 	tqpair->c2h_data_pdu_cnt--;
1539 	spdk_nvmf_tcp_handle_pending_c2h_data_queue(tqpair);
1540 }
1541 
1542 static void
1543 spdk_nvmf_tcp_send_r2t_pdu(struct spdk_nvmf_tcp_qpair *tqpair,
1544 			   struct spdk_nvmf_tcp_req *tcp_req)
1545 {
1546 	struct nvme_tcp_pdu *rsp_pdu;
1547 	struct spdk_nvme_tcp_r2t_hdr *r2t;
1548 
1549 	rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair);
1550 	if (!rsp_pdu) {
1551 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
1552 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1553 		return;
1554 	}
1555 
1556 	r2t = &rsp_pdu->hdr->r2t;
1557 	r2t->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_R2T;
1558 	r2t->common.plen = r2t->common.hlen = sizeof(*r2t);
1559 
1560 	if (tqpair->host_hdgst_enable) {
1561 		r2t->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF;
1562 		r2t->common.plen += SPDK_NVME_TCP_DIGEST_LEN;
1563 	}
1564 
1565 	r2t->cccid = tcp_req->req.cmd->nvme_cmd.cid;
1566 	r2t->ttag = tcp_req->ttag;
1567 	r2t->r2to = tcp_req->next_expected_r2t_offset;
1568 	r2t->r2tl = spdk_min(tcp_req->req.length - tcp_req->next_expected_r2t_offset, tqpair->maxh2cdata);
1569 	tcp_req->r2tl_remain = r2t->r2tl;
1570 
1571 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
1572 		      "tcp_req(%p) on tqpair(%p), r2t_info: cccid=%u, ttag=%u, r2to=%u, r2tl=%u\n",
1573 		      tcp_req, tqpair, r2t->cccid, r2t->ttag, r2t->r2to, r2t->r2tl);
1574 	spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_pdu_cmd_complete, NULL);
1575 }
1576 
1577 static void
1578 spdk_nvmf_tcp_h2c_data_payload_handle(struct spdk_nvmf_tcp_transport *ttransport,
1579 				      struct spdk_nvmf_tcp_qpair *tqpair,
1580 				      struct nvme_tcp_pdu *pdu)
1581 {
1582 	struct spdk_nvmf_tcp_req *tcp_req;
1583 
1584 	tcp_req = pdu->req;
1585 	assert(tcp_req != NULL);
1586 
1587 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
1588 
1589 	tcp_req->next_expected_r2t_offset += pdu->data_len;
1590 	tcp_req->r2tl_remain -= pdu->data_len;
1591 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
1592 
1593 	if (!tcp_req->r2tl_remain) {
1594 		if (tcp_req->next_expected_r2t_offset == tcp_req->req.length) {
1595 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
1596 			spdk_nvmf_tcp_req_process(ttransport, tcp_req);
1597 		} else {
1598 			SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Send r2t pdu for tcp_req=%p on tqpair=%p\n", tcp_req, tqpair);
1599 			spdk_nvmf_tcp_send_r2t_pdu(tqpair, tcp_req);
1600 		}
1601 	}
1602 }
1603 
1604 static void
1605 spdk_nvmf_tcp_h2c_term_req_dump(struct spdk_nvme_tcp_term_req_hdr *h2c_term_req)
1606 {
1607 	SPDK_ERRLOG("Error info of pdu(%p): %s\n", h2c_term_req,
1608 		    spdk_nvmf_tcp_term_req_fes_str[h2c_term_req->fes]);
1609 	if ((h2c_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) ||
1610 	    (h2c_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) {
1611 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "The offset from the start of the PDU header is %u\n",
1612 			      DGET32(h2c_term_req->fei));
1613 	}
1614 }
1615 
1616 static void
1617 spdk_nvmf_tcp_h2c_term_req_hdr_handle(struct spdk_nvmf_tcp_qpair *tqpair,
1618 				      struct nvme_tcp_pdu *pdu)
1619 {
1620 	struct spdk_nvme_tcp_term_req_hdr *h2c_term_req = &pdu->hdr->term_req;
1621 	uint32_t error_offset = 0;
1622 	enum spdk_nvme_tcp_term_req_fes fes;
1623 
1624 
1625 	if (h2c_term_req->fes > SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER) {
1626 		SPDK_ERRLOG("Fatal Error Stauts(FES) is unknown for h2c_term_req pdu=%p\n", pdu);
1627 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1628 		error_offset = offsetof(struct spdk_nvme_tcp_term_req_hdr, fes);
1629 		goto end;
1630 	}
1631 
1632 	/* set the data buffer */
1633 	nvme_tcp_pdu_set_data(pdu, (uint8_t *)pdu->hdr->raw + h2c_term_req->common.hlen,
1634 			      h2c_term_req->common.plen - h2c_term_req->common.hlen);
1635 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
1636 	return;
1637 end:
1638 	spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1639 	return;
1640 }
1641 
1642 static void
1643 spdk_nvmf_tcp_h2c_term_req_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair,
1644 		struct nvme_tcp_pdu *pdu)
1645 {
1646 	struct spdk_nvme_tcp_term_req_hdr *h2c_term_req = &pdu->hdr->term_req;
1647 
1648 	spdk_nvmf_tcp_h2c_term_req_dump(h2c_term_req);
1649 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1650 	return;
1651 }
1652 
1653 static void
1654 spdk_nvmf_tcp_pdu_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair)
1655 {
1656 	int rc = 0;
1657 	struct nvme_tcp_pdu *pdu;
1658 	uint32_t crc32c, error_offset = 0;
1659 	enum spdk_nvme_tcp_term_req_fes fes;
1660 	struct spdk_nvmf_tcp_transport *ttransport;
1661 
1662 	assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
1663 	pdu = &tqpair->pdu_in_progress;
1664 
1665 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
1666 	/* check data digest if need */
1667 	if (pdu->ddgst_enable) {
1668 		crc32c = nvme_tcp_pdu_calc_data_digest(pdu);
1669 		rc = MATCH_DIGEST_WORD(pdu->data_digest, crc32c);
1670 		if (rc == 0) {
1671 			SPDK_ERRLOG("Data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu);
1672 			fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR;
1673 			spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1674 			return;
1675 
1676 		}
1677 	}
1678 
1679 	ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport, struct spdk_nvmf_tcp_transport, transport);
1680 	switch (pdu->hdr->common.pdu_type) {
1681 	case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD:
1682 		spdk_nvmf_tcp_capsule_cmd_payload_handle(ttransport, tqpair, pdu);
1683 		break;
1684 	case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA:
1685 		spdk_nvmf_tcp_h2c_data_payload_handle(ttransport, tqpair, pdu);
1686 		break;
1687 
1688 	case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ:
1689 		spdk_nvmf_tcp_h2c_term_req_payload_handle(tqpair, pdu);
1690 		break;
1691 
1692 	default:
1693 		/* The code should not go to here */
1694 		SPDK_ERRLOG("The code should not go to here\n");
1695 		break;
1696 	}
1697 }
1698 
1699 static void
1700 spdk_nvmf_tcp_send_icresp_complete(void *cb_arg)
1701 {
1702 	struct spdk_nvmf_tcp_qpair *tqpair = cb_arg;
1703 
1704 	tqpair->state = NVME_TCP_QPAIR_STATE_RUNNING;
1705 }
1706 
1707 static void
1708 spdk_nvmf_tcp_icreq_handle(struct spdk_nvmf_tcp_transport *ttransport,
1709 			   struct spdk_nvmf_tcp_qpair *tqpair,
1710 			   struct nvme_tcp_pdu *pdu)
1711 {
1712 	struct spdk_nvme_tcp_ic_req *ic_req = &pdu->hdr->ic_req;
1713 	struct nvme_tcp_pdu *rsp_pdu;
1714 	struct spdk_nvme_tcp_ic_resp *ic_resp;
1715 	uint32_t error_offset = 0;
1716 	enum spdk_nvme_tcp_term_req_fes fes;
1717 
1718 	/* Only PFV 0 is defined currently */
1719 	if (ic_req->pfv != 0) {
1720 		SPDK_ERRLOG("Expected ICReq PFV %u, got %u\n", 0u, ic_req->pfv);
1721 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1722 		error_offset = offsetof(struct spdk_nvme_tcp_ic_req, pfv);
1723 		goto end;
1724 	}
1725 
1726 	/* MAXR2T is 0's based */
1727 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "maxr2t =%u\n", (ic_req->maxr2t + 1u));
1728 
1729 	tqpair->host_hdgst_enable = ic_req->dgst.bits.hdgst_enable ? true : false;
1730 	if (!tqpair->host_hdgst_enable) {
1731 		tqpair->pdu_recv_buf.size -= SPDK_NVME_TCP_DIGEST_LEN * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR;
1732 	}
1733 	tqpair->host_ddgst_enable = ic_req->dgst.bits.ddgst_enable ? true : false;
1734 	if (!tqpair->host_ddgst_enable) {
1735 		tqpair->pdu_recv_buf.size -= SPDK_NVME_TCP_DIGEST_LEN * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR;
1736 	}
1737 
1738 	tqpair->cpda = spdk_min(ic_req->hpda, SPDK_NVME_TCP_CPDA_MAX);
1739 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "cpda of tqpair=(%p) is : %u\n", tqpair, tqpair->cpda);
1740 
1741 	rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair);
1742 	if (!rsp_pdu) {
1743 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
1744 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1745 		return;
1746 	}
1747 
1748 	ic_resp = &rsp_pdu->hdr->ic_resp;
1749 	ic_resp->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_IC_RESP;
1750 	ic_resp->common.hlen = ic_resp->common.plen =  sizeof(*ic_resp);
1751 	ic_resp->pfv = 0;
1752 	ic_resp->cpda = tqpair->cpda;
1753 	tqpair->maxh2cdata = spdk_min(NVMF_TCP_PDU_MAX_H2C_DATA_SIZE,
1754 				      ttransport->transport.opts.io_unit_size);
1755 	ic_resp->maxh2cdata = tqpair->maxh2cdata;
1756 	ic_resp->dgst.bits.hdgst_enable = tqpair->host_hdgst_enable ? 1 : 0;
1757 	ic_resp->dgst.bits.ddgst_enable = tqpair->host_ddgst_enable ? 1 : 0;
1758 
1759 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "host_hdgst_enable: %u\n", tqpair->host_hdgst_enable);
1760 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "host_ddgst_enable: %u\n", tqpair->host_ddgst_enable);
1761 
1762 	spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_send_icresp_complete, tqpair);
1763 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
1764 	return;
1765 end:
1766 	spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1767 	return;
1768 }
1769 
1770 static void
1771 spdk_nvmf_tcp_pdu_psh_handle(struct spdk_nvmf_tcp_qpair *tqpair)
1772 {
1773 	struct nvme_tcp_pdu *pdu;
1774 	int rc;
1775 	uint32_t crc32c, error_offset = 0;
1776 	enum spdk_nvme_tcp_term_req_fes fes;
1777 	struct spdk_nvmf_tcp_transport *ttransport;
1778 
1779 	assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH);
1780 	pdu = &tqpair->pdu_in_progress;
1781 
1782 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "pdu type of tqpair(%p) is %d\n", tqpair,
1783 		      pdu->hdr->common.pdu_type);
1784 	/* check header digest if needed */
1785 	if (pdu->has_hdgst) {
1786 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Compare the header of pdu=%p on tqpair=%p\n", pdu, tqpair);
1787 		crc32c = nvme_tcp_pdu_calc_header_digest(pdu);
1788 		rc = MATCH_DIGEST_WORD((uint8_t *)pdu->hdr->raw + pdu->hdr->common.hlen, crc32c);
1789 		if (rc == 0) {
1790 			SPDK_ERRLOG("Header digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu);
1791 			fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR;
1792 			spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1793 			return;
1794 
1795 		}
1796 	}
1797 
1798 	ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport, struct spdk_nvmf_tcp_transport, transport);
1799 	switch (pdu->hdr->common.pdu_type) {
1800 	case SPDK_NVME_TCP_PDU_TYPE_IC_REQ:
1801 		spdk_nvmf_tcp_icreq_handle(ttransport, tqpair, pdu);
1802 		break;
1803 	case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD:
1804 		spdk_nvmf_tcp_capsule_cmd_hdr_handle(ttransport, tqpair, pdu);
1805 		break;
1806 	case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA:
1807 		spdk_nvmf_tcp_h2c_data_hdr_handle(ttransport, tqpair, pdu);
1808 		break;
1809 
1810 	case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ:
1811 		spdk_nvmf_tcp_h2c_term_req_hdr_handle(tqpair, pdu);
1812 		break;
1813 
1814 	default:
1815 		SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->pdu_in_progress.hdr->common.pdu_type);
1816 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1817 		error_offset = 1;
1818 		spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1819 		break;
1820 	}
1821 }
1822 
1823 static void
1824 spdk_nvmf_tcp_pdu_ch_handle(struct spdk_nvmf_tcp_qpair *tqpair)
1825 {
1826 	struct nvme_tcp_pdu *pdu;
1827 	uint32_t error_offset = 0;
1828 	enum spdk_nvme_tcp_term_req_fes fes;
1829 	uint8_t expected_hlen, pdo;
1830 	bool plen_error = false, pdo_error = false;
1831 
1832 	assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH);
1833 	pdu = &tqpair->pdu_in_progress;
1834 
1835 	if (pdu->hdr->common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_REQ) {
1836 		if (tqpair->state != NVME_TCP_QPAIR_STATE_INVALID) {
1837 			SPDK_ERRLOG("Already received ICreq PDU, and reject this pdu=%p\n", pdu);
1838 			fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR;
1839 			goto err;
1840 		}
1841 		expected_hlen = sizeof(struct spdk_nvme_tcp_ic_req);
1842 		if (pdu->hdr->common.plen != expected_hlen) {
1843 			plen_error = true;
1844 		}
1845 	} else {
1846 		if (tqpair->state != NVME_TCP_QPAIR_STATE_RUNNING) {
1847 			SPDK_ERRLOG("The TCP/IP connection is not negotitated\n");
1848 			fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR;
1849 			goto err;
1850 		}
1851 
1852 		switch (pdu->hdr->common.pdu_type) {
1853 		case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD:
1854 			expected_hlen = sizeof(struct spdk_nvme_tcp_cmd);
1855 			pdo = pdu->hdr->common.pdo;
1856 			if ((tqpair->cpda != 0) && (pdo != ((tqpair->cpda + 1) << 2))) {
1857 				pdo_error = true;
1858 				break;
1859 			}
1860 
1861 			if (pdu->hdr->common.plen < expected_hlen) {
1862 				plen_error = true;
1863 			}
1864 			break;
1865 		case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA:
1866 			expected_hlen = sizeof(struct spdk_nvme_tcp_h2c_data_hdr);
1867 			pdo = pdu->hdr->common.pdo;
1868 			if ((tqpair->cpda != 0) && (pdo != ((tqpair->cpda + 1) << 2))) {
1869 				pdo_error = true;
1870 				break;
1871 			}
1872 			if (pdu->hdr->common.plen < expected_hlen) {
1873 				plen_error = true;
1874 			}
1875 			break;
1876 
1877 		case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ:
1878 			expected_hlen = sizeof(struct spdk_nvme_tcp_term_req_hdr);
1879 			if ((pdu->hdr->common.plen <= expected_hlen) ||
1880 			    (pdu->hdr->common.plen > SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE)) {
1881 				plen_error = true;
1882 			}
1883 			break;
1884 
1885 		default:
1886 			SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", pdu->hdr->common.pdu_type);
1887 			fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1888 			error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdu_type);
1889 			goto err;
1890 		}
1891 	}
1892 
1893 	if (pdu->hdr->common.hlen != expected_hlen) {
1894 		SPDK_ERRLOG("PDU type=0x%02x, Expected ICReq header length %u, got %u on tqpair=%p\n",
1895 			    pdu->hdr->common.pdu_type,
1896 			    expected_hlen, pdu->hdr->common.hlen, tqpair);
1897 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1898 		error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, hlen);
1899 		goto err;
1900 	} else if (pdo_error) {
1901 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1902 		error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdo);
1903 	} else if (plen_error) {
1904 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1905 		error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, plen);
1906 		goto err;
1907 	} else {
1908 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH);
1909 		nvme_tcp_pdu_calc_psh_len(&tqpair->pdu_in_progress, tqpair->host_hdgst_enable);
1910 		return;
1911 	}
1912 err:
1913 	spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1914 }
1915 
1916 static int
1917 nvmf_tcp_pdu_payload_insert_dif(struct nvme_tcp_pdu *pdu, uint32_t read_offset,
1918 				int read_len)
1919 {
1920 	int rc;
1921 
1922 	rc = spdk_dif_generate_stream(pdu->data_iov, pdu->data_iovcnt,
1923 				      read_offset, read_len, pdu->dif_ctx);
1924 	if (rc != 0) {
1925 		SPDK_ERRLOG("DIF generate failed\n");
1926 	}
1927 
1928 	return rc;
1929 }
1930 
1931 static int
1932 nvme_tcp_recv_buf_read(struct spdk_sock *sock, struct nvme_tcp_pdu_recv_buf *pdu_recv_buf)
1933 {
1934 	int rc;
1935 
1936 	rc = nvme_tcp_read_data(sock, pdu_recv_buf->size - pdu_recv_buf->off,
1937 				(void *)pdu_recv_buf->buf + pdu_recv_buf->off);
1938 	if (rc < 0) {
1939 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "will disconnect sock=%p\n", sock);
1940 	} else if (rc > 0) {
1941 		pdu_recv_buf->remain_size = rc;
1942 		spdk_trace_record(TRACE_TCP_READ_FROM_SOCKET_DONE, 0, rc, 0, 0);
1943 	}
1944 
1945 	return rc;
1946 }
1947 
1948 static uint32_t
1949 nvme_tcp_read_data_from_pdu_recv_buf(struct nvme_tcp_pdu_recv_buf *pdu_recv_buf,
1950 				     uint32_t expected_size,
1951 				     char *dst)
1952 {
1953 	uint32_t size;
1954 
1955 	assert(pdu_recv_buf->remain_size > 0);
1956 	size = spdk_min(expected_size, pdu_recv_buf->remain_size);
1957 	if (dst) {
1958 		memcpy(dst, (void *)pdu_recv_buf->buf + pdu_recv_buf->off, size);
1959 	}
1960 	pdu_recv_buf->off += size;
1961 	pdu_recv_buf->remain_size -= size;
1962 
1963 
1964 	return size;
1965 }
1966 
1967 static int
1968 nvme_tcp_read_payload_data_from_pdu_recv_buf(struct nvme_tcp_pdu_recv_buf *pdu_recv_buf,
1969 		struct nvme_tcp_pdu *pdu)
1970 {
1971 	struct iovec iov[NVME_TCP_MAX_SGL_DESCRIPTORS + 1];
1972 	int iovcnt, i;
1973 	uint32_t size = 0;
1974 	void *dst;
1975 
1976 	assert(pdu_recv_buf->remain_size > 0);
1977 	iovcnt = nvme_tcp_build_payload_iovs(iov, NVME_TCP_MAX_SGL_DESCRIPTORS + 1, pdu,
1978 					     pdu->ddgst_enable, NULL);
1979 	assert(iovcnt >= 0);
1980 	for (i = 0; i < iovcnt; i++) {
1981 		if (!pdu_recv_buf->remain_size) {
1982 			break;
1983 		}
1984 
1985 		dst = NULL;
1986 		if (pdu->hdr->common.pdu_type != SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ) {
1987 			dst = iov[i].iov_base;
1988 		}
1989 		size += nvme_tcp_read_data_from_pdu_recv_buf(pdu_recv_buf, iov[i].iov_len, dst);
1990 	}
1991 
1992 	return size;
1993 }
1994 
1995 static int
1996 spdk_nvmf_tcp_sock_process(struct spdk_nvmf_tcp_qpair *tqpair)
1997 {
1998 	int rc = 0;
1999 	struct nvme_tcp_pdu *pdu;
2000 	enum nvme_tcp_pdu_recv_state prev_state;
2001 	uint32_t data_len;
2002 
2003 	/* The loop here is to allow for several back-to-back state changes. */
2004 	do {
2005 		prev_state = tqpair->recv_state;
2006 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair(%p) recv pdu entering state %d\n", tqpair, prev_state);
2007 
2008 		pdu = &tqpair->pdu_in_progress;
2009 		switch (tqpair->recv_state) {
2010 		/* Wait for the common header  */
2011 		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY:
2012 		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH:
2013 			if (!tqpair->pdu_recv_buf.remain_size) {
2014 				rc = nvme_tcp_recv_buf_read(tqpair->sock, &tqpair->pdu_recv_buf);
2015 				if (rc <= 0) {
2016 					return rc;
2017 				}
2018 			}
2019 			rc = nvme_tcp_read_data_from_pdu_recv_buf(&tqpair->pdu_recv_buf,
2020 					sizeof(struct spdk_nvme_tcp_common_pdu_hdr) - pdu->ch_valid_bytes,
2021 					NULL);
2022 			pdu->ch_valid_bytes += rc;
2023 			if (spdk_likely(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY)) {
2024 				spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH);
2025 			}
2026 
2027 			if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) {
2028 				return NVME_TCP_PDU_IN_PROGRESS;
2029 			}
2030 
2031 			/* The command header of this PDU has now been read from the socket. */
2032 			spdk_nvmf_tcp_pdu_ch_handle(tqpair);
2033 			break;
2034 		/* Wait for the pdu specific header  */
2035 		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH:
2036 			if (!tqpair->pdu_recv_buf.remain_size) {
2037 				rc = nvme_tcp_recv_buf_read(tqpair->sock, &tqpair->pdu_recv_buf);
2038 				if (rc <= 0) {
2039 					return rc;
2040 				}
2041 			}
2042 
2043 			rc = nvme_tcp_read_data_from_pdu_recv_buf(&tqpair->pdu_recv_buf,
2044 					pdu->psh_len - pdu->psh_valid_bytes,
2045 					NULL);
2046 			pdu->psh_valid_bytes += rc;
2047 			if (pdu->psh_valid_bytes < pdu->psh_len) {
2048 				return NVME_TCP_PDU_IN_PROGRESS;
2049 			}
2050 
2051 			/* All header(ch, psh, head digist) of this PDU has now been read from the socket. */
2052 			spdk_nvmf_tcp_pdu_psh_handle(tqpair);
2053 			break;
2054 		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD:
2055 			/* check whether the data is valid, if not we just return */
2056 			if (!pdu->data_len) {
2057 				return NVME_TCP_PDU_IN_PROGRESS;
2058 			}
2059 
2060 			data_len = pdu->data_len;
2061 			/* data digest */
2062 			if (spdk_unlikely((pdu->hdr->common.pdu_type != SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ) &&
2063 					  tqpair->host_ddgst_enable)) {
2064 				data_len += SPDK_NVME_TCP_DIGEST_LEN;
2065 				pdu->ddgst_enable = true;
2066 			}
2067 
2068 			if (tqpair->pdu_recv_buf.remain_size) {
2069 				rc = nvme_tcp_read_payload_data_from_pdu_recv_buf(&tqpair->pdu_recv_buf, pdu);
2070 				pdu->readv_offset += rc;
2071 			}
2072 
2073 			if (pdu->readv_offset < data_len) {
2074 				rc = nvme_tcp_read_payload_data(tqpair->sock, pdu);
2075 				if (rc < 0) {
2076 					return NVME_TCP_PDU_IN_PROGRESS;
2077 				}
2078 				pdu->readv_offset += rc;
2079 			}
2080 
2081 			if (spdk_unlikely(pdu->dif_ctx != NULL)) {
2082 				rc = nvmf_tcp_pdu_payload_insert_dif(pdu, pdu->readv_offset - rc, rc);
2083 				if (rc != 0) {
2084 					return NVME_TCP_PDU_FATAL;
2085 				}
2086 			}
2087 
2088 			if (pdu->readv_offset < data_len) {
2089 				return NVME_TCP_PDU_IN_PROGRESS;
2090 			}
2091 
2092 			/* All of this PDU has now been read from the socket. */
2093 			spdk_nvmf_tcp_pdu_payload_handle(tqpair);
2094 			break;
2095 		case NVME_TCP_PDU_RECV_STATE_ERROR:
2096 			if (!spdk_sock_is_connected(tqpair->sock)) {
2097 				return NVME_TCP_PDU_FATAL;
2098 			}
2099 			break;
2100 		default:
2101 			assert(0);
2102 			SPDK_ERRLOG("code should not come to here");
2103 			break;
2104 		}
2105 	} while (tqpair->recv_state != prev_state);
2106 
2107 	return rc;
2108 }
2109 
2110 static enum spdk_nvme_data_transfer
2111 spdk_nvmf_tcp_req_get_xfer(struct spdk_nvmf_tcp_req *tcp_req) {
2112 	enum spdk_nvme_data_transfer xfer;
2113 	struct spdk_nvme_cmd *cmd = &tcp_req->req.cmd->nvme_cmd;
2114 	struct spdk_nvme_sgl_descriptor *sgl = &cmd->dptr.sgl1;
2115 
2116 	/* Figure out data transfer direction */
2117 	if (cmd->opc == SPDK_NVME_OPC_FABRIC)
2118 	{
2119 		xfer = spdk_nvme_opc_get_data_transfer(tcp_req->req.cmd->nvmf_cmd.fctype);
2120 	} else
2121 	{
2122 		xfer = spdk_nvme_opc_get_data_transfer(cmd->opc);
2123 
2124 		/* Some admin commands are special cases */
2125 		if ((tcp_req->req.qpair->qid == 0) &&
2126 		    ((cmd->opc == SPDK_NVME_OPC_GET_FEATURES) ||
2127 		     (cmd->opc == SPDK_NVME_OPC_SET_FEATURES))) {
2128 			switch (cmd->cdw10 & 0xff) {
2129 			case SPDK_NVME_FEAT_LBA_RANGE_TYPE:
2130 			case SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION:
2131 			case SPDK_NVME_FEAT_HOST_IDENTIFIER:
2132 				break;
2133 			default:
2134 				xfer = SPDK_NVME_DATA_NONE;
2135 			}
2136 		}
2137 	}
2138 
2139 	if (xfer == SPDK_NVME_DATA_NONE)
2140 	{
2141 		return xfer;
2142 	}
2143 
2144 	/* Even for commands that may transfer data, they could have specified 0 length.
2145 	 * We want those to show up with xfer SPDK_NVME_DATA_NONE.
2146 	 */
2147 	switch (sgl->generic.type)
2148 	{
2149 	case SPDK_NVME_SGL_TYPE_DATA_BLOCK:
2150 	case SPDK_NVME_SGL_TYPE_BIT_BUCKET:
2151 	case SPDK_NVME_SGL_TYPE_SEGMENT:
2152 	case SPDK_NVME_SGL_TYPE_LAST_SEGMENT:
2153 	case SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK:
2154 		if (sgl->unkeyed.length == 0) {
2155 			xfer = SPDK_NVME_DATA_NONE;
2156 		}
2157 		break;
2158 	case SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK:
2159 		if (sgl->keyed.length == 0) {
2160 			xfer = SPDK_NVME_DATA_NONE;
2161 		}
2162 		break;
2163 	}
2164 
2165 	return xfer;
2166 }
2167 
2168 static int
2169 spdk_nvmf_tcp_req_parse_sgl(struct spdk_nvmf_tcp_req *tcp_req,
2170 			    struct spdk_nvmf_transport *transport,
2171 			    struct spdk_nvmf_transport_poll_group *group)
2172 {
2173 	struct spdk_nvmf_request		*req = &tcp_req->req;
2174 	struct spdk_nvme_cmd			*cmd;
2175 	struct spdk_nvme_cpl			*rsp;
2176 	struct spdk_nvme_sgl_descriptor		*sgl;
2177 	uint32_t				length;
2178 
2179 	cmd = &req->cmd->nvme_cmd;
2180 	rsp = &req->rsp->nvme_cpl;
2181 	sgl = &cmd->dptr.sgl1;
2182 
2183 	length = sgl->unkeyed.length;
2184 
2185 	if (sgl->generic.type == SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK &&
2186 	    sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_TRANSPORT) {
2187 		if (length > transport->opts.max_io_size) {
2188 			SPDK_ERRLOG("SGL length 0x%x exceeds max io size 0x%x\n",
2189 				    length, transport->opts.max_io_size);
2190 			rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
2191 			return -1;
2192 		}
2193 
2194 		/* fill request length and populate iovs */
2195 		req->length = length;
2196 
2197 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Data requested length= 0x%x\n", length);
2198 
2199 		if (spdk_unlikely(req->dif.dif_insert_or_strip)) {
2200 			req->dif.orig_length = length;
2201 			length = spdk_dif_get_length_with_md(length, &req->dif.dif_ctx);
2202 			req->dif.elba_length = length;
2203 		}
2204 
2205 		if (spdk_nvmf_request_get_buffers(req, group, transport, length)) {
2206 			/* No available buffers. Queue this request up. */
2207 			SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "No available large data buffers. Queueing request %p\n",
2208 				      tcp_req);
2209 			return 0;
2210 		}
2211 
2212 		/* backward compatible */
2213 		req->data = req->iov[0].iov_base;
2214 
2215 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Request %p took %d buffer/s from central pool, and data=%p\n",
2216 			      tcp_req, req->iovcnt, req->data);
2217 
2218 		return 0;
2219 	} else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK &&
2220 		   sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) {
2221 		uint64_t offset = sgl->address;
2222 		uint32_t max_len = transport->opts.in_capsule_data_size;
2223 
2224 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "In-capsule data: offset 0x%" PRIx64 ", length 0x%x\n",
2225 			      offset, length);
2226 
2227 		if (offset > max_len) {
2228 			SPDK_ERRLOG("In-capsule offset 0x%" PRIx64 " exceeds capsule length 0x%x\n",
2229 				    offset, max_len);
2230 			rsp->status.sc = SPDK_NVME_SC_INVALID_SGL_OFFSET;
2231 			return -1;
2232 		}
2233 		max_len -= (uint32_t)offset;
2234 
2235 		if (length > max_len) {
2236 			SPDK_ERRLOG("In-capsule data length 0x%x exceeds capsule length 0x%x\n",
2237 				    length, max_len);
2238 			rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
2239 			return -1;
2240 		}
2241 
2242 		req->data = tcp_req->buf + offset;
2243 		req->data_from_pool = false;
2244 		req->length = length;
2245 
2246 		if (spdk_unlikely(req->dif.dif_insert_or_strip)) {
2247 			length = spdk_dif_get_length_with_md(length, &req->dif.dif_ctx);
2248 			req->dif.elba_length = length;
2249 		}
2250 
2251 		req->iov[0].iov_base = req->data;
2252 		req->iov[0].iov_len = length;
2253 		req->iovcnt = 1;
2254 
2255 		return 0;
2256 	}
2257 
2258 	SPDK_ERRLOG("Invalid NVMf I/O Command SGL:  Type 0x%x, Subtype 0x%x\n",
2259 		    sgl->generic.type, sgl->generic.subtype);
2260 	rsp->status.sc = SPDK_NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID;
2261 	return -1;
2262 }
2263 
2264 static inline enum spdk_nvme_media_error_status_code
2265 nvmf_tcp_dif_error_to_compl_status(uint8_t err_type) {
2266 	enum spdk_nvme_media_error_status_code result;
2267 
2268 	switch (err_type)
2269 	{
2270 	case SPDK_DIF_REFTAG_ERROR:
2271 		result = SPDK_NVME_SC_REFERENCE_TAG_CHECK_ERROR;
2272 		break;
2273 	case SPDK_DIF_APPTAG_ERROR:
2274 		result = SPDK_NVME_SC_APPLICATION_TAG_CHECK_ERROR;
2275 		break;
2276 	case SPDK_DIF_GUARD_ERROR:
2277 		result = SPDK_NVME_SC_GUARD_CHECK_ERROR;
2278 		break;
2279 	default:
2280 		SPDK_UNREACHABLE();
2281 		break;
2282 	}
2283 
2284 	return result;
2285 }
2286 
2287 static void
2288 spdk_nvmf_tcp_send_c2h_data(struct spdk_nvmf_tcp_qpair *tqpair,
2289 			    struct spdk_nvmf_tcp_req *tcp_req)
2290 {
2291 	struct nvme_tcp_pdu *rsp_pdu;
2292 	struct spdk_nvme_tcp_c2h_data_hdr *c2h_data;
2293 	uint32_t plen, pdo, alignment;
2294 	int rc;
2295 
2296 	assert(tcp_req == STAILQ_FIRST(&tqpair->queued_c2h_data_tcp_req));
2297 
2298 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
2299 
2300 	rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair);
2301 	assert(rsp_pdu != NULL);
2302 
2303 	c2h_data = &rsp_pdu->hdr->c2h_data;
2304 	c2h_data->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_C2H_DATA;
2305 	plen = c2h_data->common.hlen = sizeof(*c2h_data);
2306 
2307 	if (tqpair->host_hdgst_enable) {
2308 		plen += SPDK_NVME_TCP_DIGEST_LEN;
2309 		c2h_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF;
2310 	}
2311 
2312 	/* set the psh */
2313 	c2h_data->cccid = tcp_req->req.cmd->nvme_cmd.cid;
2314 	c2h_data->datal = spdk_min(NVMF_TCP_PDU_MAX_C2H_DATA_SIZE,
2315 				   tcp_req->req.length - tcp_req->c2h_data_offset);
2316 	c2h_data->datao = tcp_req->c2h_data_offset;
2317 
2318 	/* set the padding */
2319 	rsp_pdu->padding_len = 0;
2320 	pdo = plen;
2321 	if (tqpair->cpda) {
2322 		alignment = (tqpair->cpda + 1) << 2;
2323 		if (alignment > plen) {
2324 			rsp_pdu->padding_len = alignment - plen;
2325 			pdo = plen = alignment;
2326 		}
2327 	}
2328 
2329 	c2h_data->common.pdo = pdo;
2330 	plen += c2h_data->datal;
2331 	if (tqpair->host_ddgst_enable) {
2332 		c2h_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF;
2333 		plen += SPDK_NVME_TCP_DIGEST_LEN;
2334 	}
2335 
2336 	c2h_data->common.plen = plen;
2337 
2338 	if (spdk_unlikely(tcp_req->req.dif.dif_insert_or_strip)) {
2339 		rsp_pdu->dif_ctx = &tcp_req->req.dif.dif_ctx;
2340 	}
2341 
2342 	nvme_tcp_pdu_set_data_buf(rsp_pdu, tcp_req->req.iov, tcp_req->req.iovcnt,
2343 				  c2h_data->datao, c2h_data->datal);
2344 
2345 	if (spdk_unlikely(tcp_req->req.dif.dif_insert_or_strip)) {
2346 		struct spdk_nvme_cpl *rsp = &tcp_req->req.rsp->nvme_cpl;
2347 		struct spdk_dif_error err_blk = {};
2348 
2349 		rc = spdk_dif_verify_stream(rsp_pdu->data_iov, rsp_pdu->data_iovcnt,
2350 					    0, rsp_pdu->data_len, rsp_pdu->dif_ctx, &err_blk);
2351 		if (rc != 0) {
2352 			SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n",
2353 				    err_blk.err_type, err_blk.err_offset);
2354 			rsp->status.sct = SPDK_NVME_SCT_MEDIA_ERROR;
2355 			rsp->status.sc = nvmf_tcp_dif_error_to_compl_status(err_blk.err_type);
2356 			STAILQ_REMOVE_HEAD(&tqpair->queued_c2h_data_tcp_req, link);
2357 			spdk_nvmf_tcp_pdu_put(tqpair, rsp_pdu);
2358 			spdk_nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair);
2359 			return;
2360 		}
2361 	}
2362 
2363 	tcp_req->c2h_data_offset += c2h_data->datal;
2364 	if (tcp_req->c2h_data_offset == tcp_req->req.length) {
2365 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Last pdu for tcp_req=%p on tqpair=%p\n", tcp_req, tqpair);
2366 		c2h_data->common.flags |= SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU;
2367 		if (tqpair->qpair.transport->opts.c2h_success) {
2368 			c2h_data->common.flags |= SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS;
2369 		}
2370 		STAILQ_REMOVE_HEAD(&tqpair->queued_c2h_data_tcp_req, link);
2371 	}
2372 
2373 	tqpair->c2h_data_pdu_cnt += 1;
2374 	spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_pdu_c2h_data_complete, tcp_req);
2375 }
2376 
2377 static int
2378 spdk_nvmf_tcp_calc_c2h_data_pdu_num(struct spdk_nvmf_tcp_req *tcp_req)
2379 {
2380 	return (tcp_req->req.length + NVMF_TCP_PDU_MAX_C2H_DATA_SIZE - 1) /
2381 	       NVMF_TCP_PDU_MAX_C2H_DATA_SIZE;
2382 }
2383 
2384 static void
2385 spdk_nvmf_tcp_handle_pending_c2h_data_queue(struct spdk_nvmf_tcp_qpair *tqpair)
2386 {
2387 	struct spdk_nvmf_tcp_req *tcp_req;
2388 
2389 	while (!STAILQ_EMPTY(&tqpair->queued_c2h_data_tcp_req) &&
2390 	       (tqpair->c2h_data_pdu_cnt < NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM)) {
2391 		tcp_req = STAILQ_FIRST(&tqpair->queued_c2h_data_tcp_req);
2392 		spdk_nvmf_tcp_send_c2h_data(tqpair, tcp_req);
2393 	}
2394 }
2395 
2396 static void
2397 spdk_nvmf_tcp_queue_c2h_data(struct spdk_nvmf_tcp_req *tcp_req,
2398 			     struct spdk_nvmf_tcp_qpair *tqpair)
2399 {
2400 	tcp_req->c2h_data_pdu_num = spdk_nvmf_tcp_calc_c2h_data_pdu_num(tcp_req);
2401 
2402 	assert(tcp_req->c2h_data_pdu_num < NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM);
2403 
2404 	STAILQ_INSERT_TAIL(&tqpair->queued_c2h_data_tcp_req, tcp_req, link);
2405 	spdk_nvmf_tcp_handle_pending_c2h_data_queue(tqpair);
2406 }
2407 
2408 static int
2409 request_transfer_out(struct spdk_nvmf_request *req)
2410 {
2411 	struct spdk_nvmf_tcp_req	*tcp_req;
2412 	struct spdk_nvmf_qpair		*qpair;
2413 	struct spdk_nvmf_tcp_qpair	*tqpair;
2414 	struct spdk_nvme_cpl		*rsp;
2415 
2416 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
2417 
2418 	qpair = req->qpair;
2419 	rsp = &req->rsp->nvme_cpl;
2420 	tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req);
2421 
2422 	/* Advance our sq_head pointer */
2423 	if (qpair->sq_head == qpair->sq_head_max) {
2424 		qpair->sq_head = 0;
2425 	} else {
2426 		qpair->sq_head++;
2427 	}
2428 	rsp->sqhd = qpair->sq_head;
2429 
2430 	tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair);
2431 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST);
2432 	if (rsp->status.sc == SPDK_NVME_SC_SUCCESS &&
2433 	    req->xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
2434 		spdk_nvmf_tcp_queue_c2h_data(tcp_req, tqpair);
2435 	} else {
2436 		spdk_nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair);
2437 	}
2438 
2439 	return 0;
2440 }
2441 
2442 static void
2443 spdk_nvmf_tcp_pdu_set_buf_from_req(struct spdk_nvmf_tcp_qpair *tqpair,
2444 				   struct spdk_nvmf_tcp_req *tcp_req)
2445 {
2446 	struct nvme_tcp_pdu *pdu;
2447 
2448 	if (tcp_req->req.data_from_pool) {
2449 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Will send r2t for tcp_req(%p) on tqpair=%p\n", tcp_req, tqpair);
2450 		tcp_req->next_expected_r2t_offset = 0;
2451 		spdk_nvmf_tcp_send_r2t_pdu(tqpair, tcp_req);
2452 	} else {
2453 		pdu = &tqpair->pdu_in_progress;
2454 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Not need to send r2t for tcp_req(%p) on tqpair=%p\n", tcp_req,
2455 			      tqpair);
2456 		/* No need to send r2t, contained in the capsuled data */
2457 		nvme_tcp_pdu_set_data_buf(pdu, tcp_req->req.iov, tcp_req->req.iovcnt,
2458 					  0, tcp_req->req.length);
2459 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
2460 	}
2461 }
2462 
2463 static void
2464 spdk_nvmf_tcp_set_incapsule_data(struct spdk_nvmf_tcp_qpair *tqpair,
2465 				 struct spdk_nvmf_tcp_req *tcp_req)
2466 {
2467 	struct nvme_tcp_pdu *pdu;
2468 	uint32_t plen = 0;
2469 
2470 	pdu = &tqpair->pdu_in_progress;
2471 	plen = pdu->hdr->common.hlen;
2472 
2473 	if (tqpair->host_hdgst_enable) {
2474 		plen += SPDK_NVME_TCP_DIGEST_LEN;
2475 	}
2476 
2477 	if (pdu->hdr->common.plen != plen) {
2478 		tcp_req->has_incapsule_data = true;
2479 	}
2480 }
2481 
2482 static bool
2483 spdk_nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport,
2484 			  struct spdk_nvmf_tcp_req *tcp_req)
2485 {
2486 	struct spdk_nvmf_tcp_qpair		*tqpair;
2487 	struct spdk_nvme_cpl			*rsp = &tcp_req->req.rsp->nvme_cpl;
2488 	int					rc;
2489 	enum spdk_nvmf_tcp_req_state		prev_state;
2490 	bool					progress = false;
2491 	struct spdk_nvmf_transport		*transport = &ttransport->transport;
2492 	struct spdk_nvmf_transport_poll_group	*group;
2493 
2494 	tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair);
2495 	group = &tqpair->group->group;
2496 	assert(tcp_req->state != TCP_REQUEST_STATE_FREE);
2497 
2498 	/* The loop here is to allow for several back-to-back state changes. */
2499 	do {
2500 		prev_state = tcp_req->state;
2501 
2502 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Request %p entering state %d on tqpair=%p\n", tcp_req, prev_state,
2503 			      tqpair);
2504 
2505 		switch (tcp_req->state) {
2506 		case TCP_REQUEST_STATE_FREE:
2507 			/* Some external code must kick a request into TCP_REQUEST_STATE_NEW
2508 			 * to escape this state. */
2509 			break;
2510 		case TCP_REQUEST_STATE_NEW:
2511 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_NEW, 0, 0, (uintptr_t)tcp_req, 0);
2512 
2513 			/* copy the cmd from the receive pdu */
2514 			tcp_req->cmd = tqpair->pdu_in_progress.hdr->capsule_cmd.ccsqe;
2515 
2516 			if (spdk_unlikely(spdk_nvmf_request_get_dif_ctx(&tcp_req->req, &tcp_req->req.dif.dif_ctx))) {
2517 				tcp_req->req.dif.dif_insert_or_strip = true;
2518 				tqpair->pdu_in_progress.dif_ctx = &tcp_req->req.dif.dif_ctx;
2519 			}
2520 
2521 			/* The next state transition depends on the data transfer needs of this request. */
2522 			tcp_req->req.xfer = spdk_nvmf_tcp_req_get_xfer(tcp_req);
2523 
2524 			/* If no data to transfer, ready to execute. */
2525 			if (tcp_req->req.xfer == SPDK_NVME_DATA_NONE) {
2526 				/* Reset the tqpair receving pdu state */
2527 				spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
2528 				spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
2529 				break;
2530 			}
2531 
2532 			spdk_nvmf_tcp_set_incapsule_data(tqpair, tcp_req);
2533 
2534 			if (!tcp_req->has_incapsule_data) {
2535 				spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
2536 			}
2537 
2538 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEED_BUFFER);
2539 			STAILQ_INSERT_TAIL(&group->pending_buf_queue, &tcp_req->req, buf_link);
2540 			break;
2541 		case TCP_REQUEST_STATE_NEED_BUFFER:
2542 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_NEED_BUFFER, 0, 0, (uintptr_t)tcp_req, 0);
2543 
2544 			assert(tcp_req->req.xfer != SPDK_NVME_DATA_NONE);
2545 
2546 			if (!tcp_req->has_incapsule_data && (&tcp_req->req != STAILQ_FIRST(&group->pending_buf_queue))) {
2547 				SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
2548 					      "Not the first element to wait for the buf for tcp_req(%p) on tqpair=%p\n",
2549 					      tcp_req, tqpair);
2550 				/* This request needs to wait in line to obtain a buffer */
2551 				break;
2552 			}
2553 
2554 			/* Try to get a data buffer */
2555 			rc = spdk_nvmf_tcp_req_parse_sgl(tcp_req, transport, group);
2556 			if (rc < 0) {
2557 				STAILQ_REMOVE_HEAD(&group->pending_buf_queue, buf_link);
2558 				rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2559 				/* Reset the tqpair receving pdu state */
2560 				spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
2561 				spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE);
2562 				break;
2563 			}
2564 
2565 			if (!tcp_req->req.data) {
2566 				SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "No buffer allocated for tcp_req(%p) on tqpair(%p\n)",
2567 					      tcp_req, tqpair);
2568 				/* No buffers available. */
2569 				break;
2570 			}
2571 
2572 			STAILQ_REMOVE(&group->pending_buf_queue, &tcp_req->req, spdk_nvmf_request, buf_link);
2573 
2574 			/* If data is transferring from host to controller, we need to do a transfer from the host. */
2575 			if (tcp_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
2576 				spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
2577 				spdk_nvmf_tcp_pdu_set_buf_from_req(tqpair, tcp_req);
2578 				break;
2579 			}
2580 
2581 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
2582 			break;
2583 		case TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER:
2584 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, 0, 0,
2585 					  (uintptr_t)tcp_req, 0);
2586 			/* Some external code must kick a request into TCP_REQUEST_STATE_READY_TO_EXECUTE
2587 			 * to escape this state. */
2588 			break;
2589 		case TCP_REQUEST_STATE_READY_TO_EXECUTE:
2590 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE, 0, 0, (uintptr_t)tcp_req, 0);
2591 
2592 			if (spdk_unlikely(tcp_req->req.dif.dif_insert_or_strip)) {
2593 				assert(tcp_req->req.dif.elba_length >= tcp_req->req.length);
2594 				tcp_req->req.length = tcp_req->req.dif.elba_length;
2595 			}
2596 
2597 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_EXECUTING);
2598 			spdk_nvmf_request_exec(&tcp_req->req);
2599 			break;
2600 		case TCP_REQUEST_STATE_EXECUTING:
2601 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_EXECUTING, 0, 0, (uintptr_t)tcp_req, 0);
2602 			/* Some external code must kick a request into TCP_REQUEST_STATE_EXECUTED
2603 			 * to escape this state. */
2604 			break;
2605 		case TCP_REQUEST_STATE_EXECUTED:
2606 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_EXECUTED, 0, 0, (uintptr_t)tcp_req, 0);
2607 
2608 			if (spdk_unlikely(tcp_req->req.dif.dif_insert_or_strip)) {
2609 				tcp_req->req.length = tcp_req->req.dif.orig_length;
2610 			}
2611 
2612 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE);
2613 			break;
2614 		case TCP_REQUEST_STATE_READY_TO_COMPLETE:
2615 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE, 0, 0, (uintptr_t)tcp_req, 0);
2616 			rc = request_transfer_out(&tcp_req->req);
2617 			assert(rc == 0); /* No good way to handle this currently */
2618 			break;
2619 		case TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST:
2620 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST, 0, 0,
2621 					  (uintptr_t)tcp_req,
2622 					  0);
2623 			/* Some external code must kick a request into TCP_REQUEST_STATE_COMPLETED
2624 			 * to escape this state. */
2625 			break;
2626 		case TCP_REQUEST_STATE_COMPLETED:
2627 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_COMPLETED, 0, 0, (uintptr_t)tcp_req, 0);
2628 			if (tcp_req->req.data_from_pool) {
2629 				spdk_nvmf_request_free_buffers(&tcp_req->req, group, transport);
2630 			}
2631 			tcp_req->req.length = 0;
2632 			tcp_req->req.iovcnt = 0;
2633 			tcp_req->req.data = NULL;
2634 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_FREE);
2635 			break;
2636 		case TCP_REQUEST_NUM_STATES:
2637 		default:
2638 			assert(0);
2639 			break;
2640 		}
2641 
2642 		if (tcp_req->state != prev_state) {
2643 			progress = true;
2644 		}
2645 	} while (tcp_req->state != prev_state);
2646 
2647 	return progress;
2648 }
2649 
2650 static void
2651 spdk_nvmf_tcp_sock_cb(void *arg, struct spdk_sock_group *group, struct spdk_sock *sock)
2652 {
2653 	struct spdk_nvmf_tcp_qpair *tqpair = arg;
2654 	int rc;
2655 
2656 	assert(tqpair != NULL);
2657 	rc = spdk_nvmf_tcp_sock_process(tqpair);
2658 
2659 	/* check the following two factors:
2660 	 * rc: The socket is closed
2661 	 * State of tqpair: The tqpair is in EXITING state due to internal error
2662 	 */
2663 	if ((rc < 0) || (tqpair->state == NVME_TCP_QPAIR_STATE_EXITING)) {
2664 		spdk_nvmf_tcp_qpair_disconnect(tqpair);
2665 	}
2666 }
2667 
2668 static int
2669 spdk_nvmf_tcp_poll_group_add(struct spdk_nvmf_transport_poll_group *group,
2670 			     struct spdk_nvmf_qpair *qpair)
2671 {
2672 	struct spdk_nvmf_tcp_poll_group	*tgroup;
2673 	struct spdk_nvmf_tcp_qpair	*tqpair;
2674 	int				rc;
2675 
2676 	tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
2677 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
2678 
2679 	rc = spdk_sock_group_add_sock(tgroup->sock_group, tqpair->sock,
2680 				      spdk_nvmf_tcp_sock_cb, tqpair);
2681 	if (rc != 0) {
2682 		SPDK_ERRLOG("Could not add sock to sock_group: %s (%d)\n",
2683 			    spdk_strerror(errno), errno);
2684 		spdk_nvmf_tcp_qpair_destroy(tqpair);
2685 		return -1;
2686 	}
2687 
2688 	rc =  spdk_nvmf_tcp_qpair_sock_init(tqpair);
2689 	if (rc != 0) {
2690 		SPDK_ERRLOG("Cannot set sock opt for tqpair=%p\n", tqpair);
2691 		spdk_nvmf_tcp_qpair_destroy(tqpair);
2692 		return -1;
2693 	}
2694 
2695 	rc = spdk_nvmf_tcp_qpair_init(&tqpair->qpair);
2696 	if (rc < 0) {
2697 		SPDK_ERRLOG("Cannot init tqpair=%p\n", tqpair);
2698 		spdk_nvmf_tcp_qpair_destroy(tqpair);
2699 		return -1;
2700 	}
2701 
2702 	rc = spdk_nvmf_tcp_qpair_init_mem_resource(tqpair, 1);
2703 	if (rc < 0) {
2704 		SPDK_ERRLOG("Cannot init memory resource info for tqpair=%p\n", tqpair);
2705 		spdk_nvmf_tcp_qpair_destroy(tqpair);
2706 		return -1;
2707 	}
2708 
2709 	tqpair->group = tgroup;
2710 	tqpair->state = NVME_TCP_QPAIR_STATE_INVALID;
2711 	TAILQ_INSERT_TAIL(&tgroup->qpairs, tqpair, link);
2712 
2713 	return 0;
2714 }
2715 
2716 static int
2717 spdk_nvmf_tcp_poll_group_remove(struct spdk_nvmf_transport_poll_group *group,
2718 				struct spdk_nvmf_qpair *qpair)
2719 {
2720 	struct spdk_nvmf_tcp_poll_group	*tgroup;
2721 	struct spdk_nvmf_tcp_qpair		*tqpair;
2722 	int				rc;
2723 
2724 	tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
2725 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
2726 
2727 	assert(tqpair->group == tgroup);
2728 
2729 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "remove tqpair=%p from the tgroup=%p\n", tqpair, tgroup);
2730 	TAILQ_REMOVE(&tgroup->qpairs, tqpair, link);
2731 	rc = spdk_sock_group_remove_sock(tgroup->sock_group, tqpair->sock);
2732 	if (rc != 0) {
2733 		SPDK_ERRLOG("Could not remove sock from sock_group: %s (%d)\n",
2734 			    spdk_strerror(errno), errno);
2735 	}
2736 
2737 	return rc;
2738 }
2739 
2740 static int
2741 spdk_nvmf_tcp_req_complete(struct spdk_nvmf_request *req)
2742 {
2743 	struct spdk_nvmf_tcp_transport *ttransport;
2744 	struct spdk_nvmf_tcp_req *tcp_req;
2745 
2746 	ttransport = SPDK_CONTAINEROF(req->qpair->transport, struct spdk_nvmf_tcp_transport, transport);
2747 	tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req);
2748 
2749 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_EXECUTED);
2750 	spdk_nvmf_tcp_req_process(ttransport, tcp_req);
2751 
2752 	return 0;
2753 }
2754 
2755 static void
2756 spdk_nvmf_tcp_close_qpair(struct spdk_nvmf_qpair *qpair)
2757 {
2758 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
2759 
2760 	spdk_nvmf_tcp_qpair_destroy(SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair));
2761 }
2762 
2763 static int
2764 spdk_nvmf_tcp_poll_group_poll(struct spdk_nvmf_transport_poll_group *group)
2765 {
2766 	struct spdk_nvmf_tcp_poll_group *tgroup;
2767 	int rc;
2768 	struct spdk_nvmf_request *req, *req_tmp;
2769 	struct spdk_nvmf_tcp_req *tcp_req;
2770 	struct spdk_nvmf_tcp_transport *ttransport = SPDK_CONTAINEROF(group->transport,
2771 			struct spdk_nvmf_tcp_transport, transport);
2772 
2773 	tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
2774 
2775 	if (spdk_unlikely(TAILQ_EMPTY(&tgroup->qpairs))) {
2776 		return 0;
2777 	}
2778 
2779 	STAILQ_FOREACH_SAFE(req, &group->pending_buf_queue, buf_link, req_tmp) {
2780 		tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req);
2781 		if (spdk_nvmf_tcp_req_process(ttransport, tcp_req) == false) {
2782 			break;
2783 		}
2784 	}
2785 
2786 	rc = spdk_sock_group_poll(tgroup->sock_group);
2787 	if (rc < 0) {
2788 		SPDK_ERRLOG("Failed to poll sock_group=%p\n", tgroup->sock_group);
2789 	}
2790 
2791 	return rc;
2792 }
2793 
2794 static int
2795 spdk_nvmf_tcp_qpair_get_trid(struct spdk_nvmf_qpair *qpair,
2796 			     struct spdk_nvme_transport_id *trid, bool peer)
2797 {
2798 	struct spdk_nvmf_tcp_qpair     *tqpair;
2799 	uint16_t			port;
2800 
2801 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
2802 	trid->trtype = SPDK_NVME_TRANSPORT_TCP;
2803 
2804 	if (peer) {
2805 		snprintf(trid->traddr, sizeof(trid->traddr), "%s", tqpair->initiator_addr);
2806 		port = tqpair->initiator_port;
2807 	} else {
2808 		snprintf(trid->traddr, sizeof(trid->traddr), "%s", tqpair->target_addr);
2809 		port = tqpair->target_port;
2810 	}
2811 
2812 	if (spdk_sock_is_ipv4(tqpair->sock)) {
2813 		trid->adrfam = SPDK_NVMF_ADRFAM_IPV4;
2814 	} else if (spdk_sock_is_ipv4(tqpair->sock)) {
2815 		trid->adrfam = SPDK_NVMF_ADRFAM_IPV6;
2816 	} else {
2817 		return -1;
2818 	}
2819 
2820 	snprintf(trid->trsvcid, sizeof(trid->trsvcid), "%d", port);
2821 	return 0;
2822 }
2823 
2824 static int
2825 spdk_nvmf_tcp_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair,
2826 				   struct spdk_nvme_transport_id *trid)
2827 {
2828 	return spdk_nvmf_tcp_qpair_get_trid(qpair, trid, 0);
2829 }
2830 
2831 static int
2832 spdk_nvmf_tcp_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair,
2833 				  struct spdk_nvme_transport_id *trid)
2834 {
2835 	return spdk_nvmf_tcp_qpair_get_trid(qpair, trid, 1);
2836 }
2837 
2838 static int
2839 spdk_nvmf_tcp_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair,
2840 				    struct spdk_nvme_transport_id *trid)
2841 {
2842 	return spdk_nvmf_tcp_qpair_get_trid(qpair, trid, 0);
2843 }
2844 
2845 static int
2846 spdk_nvmf_tcp_qpair_set_sq_size(struct spdk_nvmf_qpair *qpair)
2847 {
2848 	struct spdk_nvmf_tcp_qpair     *tqpair;
2849 	int rc;
2850 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
2851 
2852 	rc = spdk_nvmf_tcp_qpair_init_mem_resource(tqpair, tqpair->qpair.sq_head_max);
2853 	if (!rc) {
2854 		tqpair->max_queue_depth += tqpair->qpair.sq_head_max;
2855 		tqpair->free_pdu_num += tqpair->qpair.sq_head_max;
2856 		tqpair->state_cntr[TCP_REQUEST_STATE_FREE] += tqpair->qpair.sq_head_max;
2857 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "The queue depth=%u for tqpair=%p\n",
2858 			      tqpair->max_queue_depth, tqpair);
2859 	}
2860 
2861 	return rc;
2862 
2863 }
2864 
2865 #define SPDK_NVMF_TCP_DEFAULT_MAX_QUEUE_DEPTH 128
2866 #define SPDK_NVMF_TCP_DEFAULT_AQ_DEPTH 128
2867 #define SPDK_NVMF_TCP_DEFAULT_MAX_QPAIRS_PER_CTRLR 128
2868 #define SPDK_NVMF_TCP_DEFAULT_IN_CAPSULE_DATA_SIZE 4096
2869 #define SPDK_NVMF_TCP_DEFAULT_MAX_IO_SIZE 131072
2870 #define SPDK_NVMF_TCP_DEFAULT_IO_UNIT_SIZE 131072
2871 #define SPDK_NVMF_TCP_DEFAULT_NUM_SHARED_BUFFERS 511
2872 #define SPDK_NVMF_TCP_DEFAULT_BUFFER_CACHE_SIZE 32
2873 #define SPDK_NVMF_TCP_DEFAULT_SUCCESS_OPTIMIZATION true
2874 #define SPDK_NVMF_TCP_DEFAULT_DIF_INSERT_OR_STRIP false
2875 #define SPDK_NVMF_TCP_DEFAULT_SOCK_PRIORITY 0
2876 
2877 static void
2878 spdk_nvmf_tcp_opts_init(struct spdk_nvmf_transport_opts *opts)
2879 {
2880 	opts->max_queue_depth =		SPDK_NVMF_TCP_DEFAULT_MAX_QUEUE_DEPTH;
2881 	opts->max_qpairs_per_ctrlr =	SPDK_NVMF_TCP_DEFAULT_MAX_QPAIRS_PER_CTRLR;
2882 	opts->in_capsule_data_size =	SPDK_NVMF_TCP_DEFAULT_IN_CAPSULE_DATA_SIZE;
2883 	opts->max_io_size =		SPDK_NVMF_TCP_DEFAULT_MAX_IO_SIZE;
2884 	opts->io_unit_size =		SPDK_NVMF_TCP_DEFAULT_IO_UNIT_SIZE;
2885 	opts->max_aq_depth =		SPDK_NVMF_TCP_DEFAULT_AQ_DEPTH;
2886 	opts->num_shared_buffers =	SPDK_NVMF_TCP_DEFAULT_NUM_SHARED_BUFFERS;
2887 	opts->buf_cache_size =		SPDK_NVMF_TCP_DEFAULT_BUFFER_CACHE_SIZE;
2888 	opts->c2h_success =		SPDK_NVMF_TCP_DEFAULT_SUCCESS_OPTIMIZATION;
2889 	opts->dif_insert_or_strip =	SPDK_NVMF_TCP_DEFAULT_DIF_INSERT_OR_STRIP;
2890 	opts->sock_priority =		SPDK_NVMF_TCP_DEFAULT_SOCK_PRIORITY;
2891 }
2892 
2893 const struct spdk_nvmf_transport_ops spdk_nvmf_transport_tcp = {
2894 	.type = SPDK_NVME_TRANSPORT_TCP,
2895 	.opts_init = spdk_nvmf_tcp_opts_init,
2896 	.create = spdk_nvmf_tcp_create,
2897 	.destroy = spdk_nvmf_tcp_destroy,
2898 
2899 	.listen = spdk_nvmf_tcp_listen,
2900 	.stop_listen = spdk_nvmf_tcp_stop_listen,
2901 	.accept = spdk_nvmf_tcp_accept,
2902 
2903 	.listener_discover = spdk_nvmf_tcp_discover,
2904 
2905 	.poll_group_create = spdk_nvmf_tcp_poll_group_create,
2906 	.get_optimal_poll_group = spdk_nvmf_tcp_get_optimal_poll_group,
2907 	.poll_group_destroy = spdk_nvmf_tcp_poll_group_destroy,
2908 	.poll_group_add = spdk_nvmf_tcp_poll_group_add,
2909 	.poll_group_remove = spdk_nvmf_tcp_poll_group_remove,
2910 	.poll_group_poll = spdk_nvmf_tcp_poll_group_poll,
2911 
2912 	.req_free = spdk_nvmf_tcp_req_free,
2913 	.req_complete = spdk_nvmf_tcp_req_complete,
2914 
2915 	.qpair_fini = spdk_nvmf_tcp_close_qpair,
2916 	.qpair_get_local_trid = spdk_nvmf_tcp_qpair_get_local_trid,
2917 	.qpair_get_peer_trid = spdk_nvmf_tcp_qpair_get_peer_trid,
2918 	.qpair_get_listen_trid = spdk_nvmf_tcp_qpair_get_listen_trid,
2919 	.qpair_set_sqsize = spdk_nvmf_tcp_qpair_set_sq_size,
2920 };
2921 
2922 SPDK_LOG_REGISTER_COMPONENT("nvmf_tcp", SPDK_LOG_NVMF_TCP)
2923