xref: /spdk/lib/nvmf/tcp.c (revision 7ed0ec6832d9a5cf49bdc35f8e9c00fa80a5f67b)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/stdinc.h"
35 #include "spdk/crc32.h"
36 #include "spdk/endian.h"
37 #include "spdk/assert.h"
38 #include "spdk/thread.h"
39 #include "spdk/nvmf.h"
40 #include "spdk/nvmf_spec.h"
41 #include "spdk/sock.h"
42 #include "spdk/string.h"
43 #include "spdk/trace.h"
44 #include "spdk/util.h"
45 
46 #include "nvmf_internal.h"
47 #include "transport.h"
48 
49 #include "spdk_internal/assert.h"
50 #include "spdk_internal/log.h"
51 #include "spdk_internal/nvme_tcp.h"
52 
53 #define NVMF_TCP_MAX_ACCEPT_SOCK_ONE_TIME 16
54 
55 #define NVMF_TCP_PDU_MAX_H2C_DATA_SIZE	131072
56 #define NVMF_TCP_PDU_MAX_C2H_DATA_SIZE	131072
57 #define NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM  64  /* Maximal c2h_data pdu number for ecah tqpair */
58 #define SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY 6
59 #define SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR 4
60 
61 /* spdk nvmf related structure */
62 enum spdk_nvmf_tcp_req_state {
63 
64 	/* The request is not currently in use */
65 	TCP_REQUEST_STATE_FREE = 0,
66 
67 	/* Initial state when request first received */
68 	TCP_REQUEST_STATE_NEW,
69 
70 	/* The request is queued until a data buffer is available. */
71 	TCP_REQUEST_STATE_NEED_BUFFER,
72 
73 	/* The request is currently transferring data from the host to the controller. */
74 	TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER,
75 
76 	/* The request is ready to execute at the block device */
77 	TCP_REQUEST_STATE_READY_TO_EXECUTE,
78 
79 	/* The request is currently executing at the block device */
80 	TCP_REQUEST_STATE_EXECUTING,
81 
82 	/* The request finished executing at the block device */
83 	TCP_REQUEST_STATE_EXECUTED,
84 
85 	/* The request is ready to send a completion */
86 	TCP_REQUEST_STATE_READY_TO_COMPLETE,
87 
88 	/* The request is currently transferring final pdus from the controller to the host. */
89 	TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST,
90 
91 	/* The request completed and can be marked free. */
92 	TCP_REQUEST_STATE_COMPLETED,
93 
94 	/* Terminator */
95 	TCP_REQUEST_NUM_STATES,
96 };
97 
98 static const char *spdk_nvmf_tcp_term_req_fes_str[] = {
99 	"Invalid PDU Header Field",
100 	"PDU Sequence Error",
101 	"Header Digiest Error",
102 	"Data Transfer Out of Range",
103 	"R2T Limit Exceeded",
104 	"Unsupported parameter",
105 };
106 
107 #define OBJECT_NVMF_TCP_IO				0x80
108 
109 #define TRACE_GROUP_NVMF_TCP				0x5
110 #define TRACE_TCP_REQUEST_STATE_NEW					SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x0)
111 #define TRACE_TCP_REQUEST_STATE_NEED_BUFFER				SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x1)
112 #define TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER		SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x2)
113 #define TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE			SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x3)
114 #define TRACE_TCP_REQUEST_STATE_EXECUTING				SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x4)
115 #define TRACE_TCP_REQUEST_STATE_EXECUTED				SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x5)
116 #define TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE			SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x6)
117 #define TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST		SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x7)
118 #define TRACE_TCP_REQUEST_STATE_COMPLETED				SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x8)
119 #define TRACE_TCP_FLUSH_WRITEBUF_START					SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x9)
120 #define TRACE_TCP_FLUSH_WRITEBUF_DONE					SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xA)
121 #define TRACE_TCP_READ_FROM_SOCKET_DONE					SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xB)
122 
123 SPDK_TRACE_REGISTER_FN(nvmf_tcp_trace, "nvmf_tcp", TRACE_GROUP_NVMF_TCP)
124 {
125 	spdk_trace_register_object(OBJECT_NVMF_TCP_IO, 'r');
126 	spdk_trace_register_description("TCP_REQ_NEW",
127 					TRACE_TCP_REQUEST_STATE_NEW,
128 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 1, 1, "");
129 	spdk_trace_register_description("TCP_REQ_NEED_BUFFER",
130 					TRACE_TCP_REQUEST_STATE_NEED_BUFFER,
131 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
132 	spdk_trace_register_description("TCP_REQ_TX_H_TO_C",
133 					TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER,
134 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
135 	spdk_trace_register_description("TCP_REQ_RDY_TO_EXECUTE",
136 					TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE,
137 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
138 	spdk_trace_register_description("TCP_REQ_EXECUTING",
139 					TRACE_TCP_REQUEST_STATE_EXECUTING,
140 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
141 	spdk_trace_register_description("TCP_REQ_EXECUTED",
142 					TRACE_TCP_REQUEST_STATE_EXECUTED,
143 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
144 	spdk_trace_register_description("TCP_REQ_RDY_TO_COMPLETE",
145 					TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE,
146 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
147 	spdk_trace_register_description("TCP_REQ_TRANSFER_C2H",
148 					TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST,
149 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
150 	spdk_trace_register_description("TCP_REQ_COMPLETED",
151 					TRACE_TCP_REQUEST_STATE_COMPLETED,
152 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
153 	spdk_trace_register_description("TCP_WRITE_START",
154 					TRACE_TCP_FLUSH_WRITEBUF_START,
155 					OWNER_NONE, OBJECT_NONE, 0, 0, "");
156 	spdk_trace_register_description("TCP_WRITE_DONE",
157 					TRACE_TCP_FLUSH_WRITEBUF_DONE,
158 					OWNER_NONE, OBJECT_NONE, 0, 0, "");
159 	spdk_trace_register_description("TCP_READ_DONE",
160 					TRACE_TCP_READ_FROM_SOCKET_DONE,
161 					OWNER_NONE, OBJECT_NONE, 0, 0, "");
162 }
163 
164 struct spdk_nvmf_tcp_req  {
165 	struct spdk_nvmf_request		req;
166 	struct spdk_nvme_cpl			rsp;
167 	struct spdk_nvme_cmd			cmd;
168 
169 	/* In-capsule data buffer */
170 	uint8_t					*buf;
171 
172 	bool					has_incapsule_data;
173 
174 	/* transfer_tag */
175 	uint16_t				ttag;
176 
177 	enum spdk_nvmf_tcp_req_state		state;
178 
179 	/*
180 	 * next_expected_r2t_offset is used when we receive the h2c_data PDU.
181 	 */
182 	uint32_t				next_expected_r2t_offset;
183 	uint32_t				r2tl_remain;
184 
185 	/*
186 	 * c2h_data_offset is used when we send the c2h_data PDU.
187 	 */
188 	uint32_t				c2h_data_offset;
189 	uint32_t				c2h_data_pdu_num;
190 
191 	struct spdk_dif_ctx			dif_ctx;
192 	bool					dif_insert_or_strip;
193 	uint32_t				elba_length;
194 	uint32_t				orig_length;
195 
196 	STAILQ_ENTRY(spdk_nvmf_tcp_req)		link;
197 	TAILQ_ENTRY(spdk_nvmf_tcp_req)		state_link;
198 };
199 
200 struct nvme_tcp_pdu_recv_buf {
201 	char					*buf;
202 	uint32_t				off;
203 	uint32_t				size;
204 	uint32_t				remain_size;
205 };
206 
207 struct spdk_nvmf_tcp_qpair {
208 	struct spdk_nvmf_qpair			qpair;
209 	struct spdk_nvmf_tcp_poll_group		*group;
210 	struct spdk_nvmf_tcp_port		*port;
211 	struct spdk_sock			*sock;
212 	struct spdk_poller			*flush_poller;
213 
214 	enum nvme_tcp_pdu_recv_state		recv_state;
215 	enum nvme_tcp_qpair_state		state;
216 
217 	struct nvme_tcp_pdu			pdu_in_progress;
218 	struct nvme_tcp_pdu_recv_buf		pdu_recv_buf;
219 
220 	TAILQ_HEAD(, nvme_tcp_pdu)		send_queue;
221 	TAILQ_HEAD(, nvme_tcp_pdu)		free_queue;
222 
223 	struct nvme_tcp_pdu			*pdu;
224 	struct nvme_tcp_pdu			*pdu_pool;
225 	uint16_t				free_pdu_num;
226 
227 	/* Queues to track the requests in all states */
228 	TAILQ_HEAD(, spdk_nvmf_tcp_req)		state_queue[TCP_REQUEST_NUM_STATES];
229 	/* Number of requests in each state */
230 	int32_t					state_cntr[TCP_REQUEST_NUM_STATES];
231 
232 	STAILQ_HEAD(, spdk_nvmf_tcp_req)	queued_c2h_data_tcp_req;
233 
234 	uint8_t					cpda;
235 
236 	/* Array of size "max_queue_depth * InCapsuleDataSize" containing
237 	 * buffers to be used for in capsule data.
238 	 */
239 	void					*buf;
240 	void					*bufs;
241 	struct spdk_nvmf_tcp_req		*req;
242 	struct spdk_nvmf_tcp_req		*reqs;
243 
244 	bool					host_hdgst_enable;
245 	bool					host_ddgst_enable;
246 
247 
248 	/* The maximum number of I/O outstanding on this connection at one time */
249 	uint16_t				max_queue_depth;
250 
251 
252 	/** Specifies the maximum number of PDU-Data bytes per H2C Data Transfer PDU */
253 	uint32_t				maxh2cdata;
254 
255 	uint32_t				c2h_data_pdu_cnt;
256 
257 	/* IP address */
258 	char					initiator_addr[SPDK_NVMF_TRADDR_MAX_LEN];
259 	char					target_addr[SPDK_NVMF_TRADDR_MAX_LEN];
260 
261 	/* IP port */
262 	uint16_t				initiator_port;
263 	uint16_t				target_port;
264 
265 	/* Timer used to destroy qpair after detecting transport error issue if initiator does
266 	 *  not close the connection.
267 	 */
268 	struct spdk_poller			*timeout_poller;
269 
270 	TAILQ_ENTRY(spdk_nvmf_tcp_qpair)	link;
271 };
272 
273 struct spdk_nvmf_tcp_poll_group {
274 	struct spdk_nvmf_transport_poll_group	group;
275 	struct spdk_sock_group			*sock_group;
276 
277 	TAILQ_HEAD(, spdk_nvmf_tcp_qpair)	qpairs;
278 };
279 
280 struct spdk_nvmf_tcp_port {
281 	struct spdk_nvme_transport_id		trid;
282 	struct spdk_sock			*listen_sock;
283 	uint32_t				ref;
284 	TAILQ_ENTRY(spdk_nvmf_tcp_port)		link;
285 };
286 
287 struct spdk_nvmf_tcp_transport {
288 	struct spdk_nvmf_transport		transport;
289 
290 	pthread_mutex_t				lock;
291 
292 	TAILQ_HEAD(, spdk_nvmf_tcp_port)	ports;
293 };
294 
295 static bool spdk_nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport,
296 				      struct spdk_nvmf_tcp_req *tcp_req);
297 static void spdk_nvmf_tcp_handle_pending_c2h_data_queue(struct spdk_nvmf_tcp_qpair *tqpair);
298 
299 static void
300 spdk_nvmf_tcp_req_set_state(struct spdk_nvmf_tcp_req *tcp_req,
301 			    enum spdk_nvmf_tcp_req_state state)
302 {
303 	struct spdk_nvmf_qpair *qpair;
304 	struct spdk_nvmf_tcp_qpair *tqpair;
305 
306 	qpair = tcp_req->req.qpair;
307 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
308 
309 	TAILQ_REMOVE(&tqpair->state_queue[tcp_req->state], tcp_req, state_link);
310 	tqpair->state_cntr[tcp_req->state]--;
311 	assert(tqpair->state_cntr[tcp_req->state] >= 0);
312 
313 	TAILQ_INSERT_TAIL(&tqpair->state_queue[state], tcp_req, state_link);
314 	tqpair->state_cntr[state]++;
315 
316 	tcp_req->state = state;
317 }
318 
319 static struct nvme_tcp_pdu *
320 spdk_nvmf_tcp_pdu_get(struct spdk_nvmf_tcp_qpair *tqpair)
321 {
322 	struct nvme_tcp_pdu *pdu;
323 
324 	pdu = TAILQ_FIRST(&tqpair->free_queue);
325 	if (!pdu) {
326 		SPDK_ERRLOG("Unable to get PDU for tqpair=%p\n", tqpair);
327 		abort();
328 		return NULL;
329 	}
330 
331 	tqpair->free_pdu_num--;
332 	TAILQ_REMOVE(&tqpair->free_queue, pdu, tailq);
333 	memset(pdu, 0, sizeof(*pdu));
334 	pdu->ref = 1;
335 	pdu->hdr = &pdu->hdr_mem;
336 
337 	return pdu;
338 }
339 
340 static void
341 spdk_nvmf_tcp_pdu_put(struct spdk_nvmf_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu)
342 {
343 	if (!pdu) {
344 		return;
345 	}
346 
347 	assert(pdu->ref > 0);
348 
349 	pdu->ref--;
350 	if (pdu->ref == 0) {
351 		tqpair->free_pdu_num++;
352 		TAILQ_INSERT_HEAD(&tqpair->free_queue, pdu, tailq);
353 	}
354 }
355 
356 static struct spdk_nvmf_tcp_req *
357 spdk_nvmf_tcp_req_get(struct spdk_nvmf_tcp_qpair *tqpair)
358 {
359 	struct spdk_nvmf_tcp_req *tcp_req;
360 
361 	tcp_req = TAILQ_FIRST(&tqpair->state_queue[TCP_REQUEST_STATE_FREE]);
362 	if (!tcp_req) {
363 		SPDK_ERRLOG("Cannot allocate tcp_req on tqpair=%p\n", tqpair);
364 		return NULL;
365 	}
366 
367 	memset(&tcp_req->cmd, 0, sizeof(tcp_req->cmd));
368 	memset(&tcp_req->rsp, 0, sizeof(tcp_req->rsp));
369 	tcp_req->next_expected_r2t_offset = 0;
370 	tcp_req->r2tl_remain = 0;
371 	tcp_req->c2h_data_offset = 0;
372 	tcp_req->has_incapsule_data = false;
373 	tcp_req->dif_insert_or_strip = false;
374 
375 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEW);
376 	return tcp_req;
377 }
378 
379 static void
380 nvmf_tcp_request_free(struct spdk_nvmf_tcp_req *tcp_req)
381 {
382 	struct spdk_nvmf_tcp_transport *ttransport;
383 
384 	if (!tcp_req) {
385 		return;
386 	}
387 
388 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tcp_req=%p will be freed\n", tcp_req);
389 	ttransport = SPDK_CONTAINEROF(tcp_req->req.qpair->transport,
390 				      struct spdk_nvmf_tcp_transport, transport);
391 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_COMPLETED);
392 	spdk_nvmf_tcp_req_process(ttransport, tcp_req);
393 }
394 
395 static int
396 spdk_nvmf_tcp_req_free(struct spdk_nvmf_request *req)
397 {
398 	struct spdk_nvmf_tcp_req *tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req);
399 
400 	nvmf_tcp_request_free(tcp_req);
401 
402 	return 0;
403 }
404 
405 static void
406 spdk_nvmf_tcp_drain_state_queue(struct spdk_nvmf_tcp_qpair *tqpair,
407 				enum spdk_nvmf_tcp_req_state state)
408 {
409 	struct spdk_nvmf_tcp_req *tcp_req, *req_tmp;
410 
411 	TAILQ_FOREACH_SAFE(tcp_req, &tqpair->state_queue[state], state_link, req_tmp) {
412 		nvmf_tcp_request_free(tcp_req);
413 	}
414 }
415 
416 static void
417 spdk_nvmf_tcp_cleanup_all_states(struct spdk_nvmf_tcp_qpair *tqpair)
418 {
419 	struct spdk_nvmf_tcp_req *tcp_req, *req_tmp;
420 	struct nvme_tcp_pdu *pdu, *tmp_pdu;
421 
422 	/* Free the pdus in the send_queue */
423 	TAILQ_FOREACH_SAFE(pdu, &tqpair->send_queue, tailq, tmp_pdu) {
424 		TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq);
425 		/* Also check the pdu type, we need to calculte the c2h_data_pdu_cnt later */
426 		if (pdu->hdr->common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_DATA) {
427 			assert(tqpair->c2h_data_pdu_cnt > 0);
428 			tqpair->c2h_data_pdu_cnt--;
429 		}
430 		spdk_nvmf_tcp_pdu_put(tqpair, pdu);
431 	}
432 
433 	while (!STAILQ_EMPTY(&tqpair->queued_c2h_data_tcp_req)) {
434 		STAILQ_REMOVE_HEAD(&tqpair->queued_c2h_data_tcp_req, link);
435 	}
436 	spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST);
437 
438 	spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEW);
439 
440 	/* Wipe the requests waiting for buffer from the global list */
441 	TAILQ_FOREACH_SAFE(tcp_req, &tqpair->state_queue[TCP_REQUEST_STATE_NEED_BUFFER], state_link,
442 			   req_tmp) {
443 		STAILQ_REMOVE(&tqpair->group->group.pending_buf_queue, &tcp_req->req,
444 			      spdk_nvmf_request, buf_link);
445 	}
446 
447 	spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEED_BUFFER);
448 	spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_EXECUTING);
449 	spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
450 }
451 
452 static void
453 nvmf_tcp_dump_qpair_req_contents(struct spdk_nvmf_tcp_qpair *tqpair)
454 {
455 	int i;
456 	struct spdk_nvmf_tcp_req *tcp_req;
457 
458 	SPDK_ERRLOG("Dumping contents of queue pair (QID %d)\n", tqpair->qpair.qid);
459 	for (i = 1; i < TCP_REQUEST_NUM_STATES; i++) {
460 		SPDK_ERRLOG("\tNum of requests in state[%d] = %d\n", i, tqpair->state_cntr[i]);
461 		TAILQ_FOREACH(tcp_req, &tqpair->state_queue[i], state_link) {
462 			SPDK_ERRLOG("\t\tRequest Data From Pool: %d\n", tcp_req->req.data_from_pool);
463 			SPDK_ERRLOG("\t\tRequest opcode: %d\n", tcp_req->req.cmd->nvmf_cmd.opcode);
464 		}
465 	}
466 }
467 
468 static void
469 spdk_nvmf_tcp_qpair_destroy(struct spdk_nvmf_tcp_qpair *tqpair)
470 {
471 	int err = 0;
472 
473 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
474 
475 	spdk_poller_unregister(&tqpair->flush_poller);
476 	spdk_sock_close(&tqpair->sock);
477 	spdk_nvmf_tcp_cleanup_all_states(tqpair);
478 
479 	if (tqpair->free_pdu_num != (tqpair->max_queue_depth + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM)) {
480 		SPDK_ERRLOG("tqpair(%p) free pdu pool num is %u but should be %u\n", tqpair,
481 			    tqpair->free_pdu_num,
482 			    (tqpair->max_queue_depth + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM));
483 		err++;
484 	}
485 
486 	if (tqpair->state_cntr[TCP_REQUEST_STATE_FREE] != tqpair->max_queue_depth) {
487 		SPDK_ERRLOG("tqpair(%p) free tcp request num is %u but should be %u\n", tqpair,
488 			    tqpair->state_cntr[TCP_REQUEST_STATE_FREE],
489 			    tqpair->max_queue_depth);
490 		err++;
491 	}
492 
493 	if (tqpair->c2h_data_pdu_cnt != 0) {
494 		SPDK_ERRLOG("tqpair(%p) free c2h_data_pdu cnt is %u but should be 0\n", tqpair,
495 			    tqpair->c2h_data_pdu_cnt);
496 		err++;
497 	}
498 
499 	if (err > 0) {
500 		nvmf_tcp_dump_qpair_req_contents(tqpair);
501 	}
502 	free(tqpair->pdu);
503 	free(tqpair->pdu_pool);
504 	free(tqpair->req);
505 	free(tqpair->reqs);
506 	spdk_free(tqpair->buf);
507 	spdk_free(tqpair->bufs);
508 	free(tqpair->pdu_recv_buf.buf);
509 	free(tqpair);
510 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Leave\n");
511 }
512 
513 static int
514 spdk_nvmf_tcp_destroy(struct spdk_nvmf_transport *transport)
515 {
516 	struct spdk_nvmf_tcp_transport	*ttransport;
517 
518 	assert(transport != NULL);
519 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
520 
521 	pthread_mutex_destroy(&ttransport->lock);
522 	free(ttransport);
523 	return 0;
524 }
525 
526 static struct spdk_nvmf_transport *
527 spdk_nvmf_tcp_create(struct spdk_nvmf_transport_opts *opts)
528 {
529 	struct spdk_nvmf_tcp_transport *ttransport;
530 	uint32_t sge_count;
531 	uint32_t min_shared_buffers;
532 
533 	ttransport = calloc(1, sizeof(*ttransport));
534 	if (!ttransport) {
535 		return NULL;
536 	}
537 
538 	TAILQ_INIT(&ttransport->ports);
539 
540 	ttransport->transport.ops = &spdk_nvmf_transport_tcp;
541 
542 	SPDK_NOTICELOG("*** TCP Transport Init ***\n");
543 
544 	SPDK_INFOLOG(SPDK_LOG_NVMF_TCP, "*** TCP Transport Init ***\n"
545 		     "  Transport opts:  max_ioq_depth=%d, max_io_size=%d,\n"
546 		     "  max_qpairs_per_ctrlr=%d, io_unit_size=%d,\n"
547 		     "  in_capsule_data_size=%d, max_aq_depth=%d\n"
548 		     "  num_shared_buffers=%d, c2h_success=%d,\n"
549 		     "  dif_insert_or_strip=%d, sock_priority=%d\n",
550 		     opts->max_queue_depth,
551 		     opts->max_io_size,
552 		     opts->max_qpairs_per_ctrlr,
553 		     opts->io_unit_size,
554 		     opts->in_capsule_data_size,
555 		     opts->max_aq_depth,
556 		     opts->num_shared_buffers,
557 		     opts->c2h_success,
558 		     opts->dif_insert_or_strip,
559 		     opts->sock_priority);
560 
561 	if (opts->sock_priority > SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY) {
562 		SPDK_ERRLOG("Unsupported socket_priority=%d, the current range is: 0 to %d\n"
563 			    "you can use man 7 socket to view the range of priority under SO_PRIORITY item\n",
564 			    opts->sock_priority, SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY);
565 		free(ttransport);
566 		return NULL;
567 	}
568 
569 	/* I/O unit size cannot be larger than max I/O size */
570 	if (opts->io_unit_size > opts->max_io_size) {
571 		opts->io_unit_size = opts->max_io_size;
572 	}
573 
574 	sge_count = opts->max_io_size / opts->io_unit_size;
575 	if (sge_count > SPDK_NVMF_MAX_SGL_ENTRIES) {
576 		SPDK_ERRLOG("Unsupported IO Unit size specified, %d bytes\n", opts->io_unit_size);
577 		free(ttransport);
578 		return NULL;
579 	}
580 
581 	min_shared_buffers = spdk_thread_get_count() * opts->buf_cache_size;
582 	if (min_shared_buffers > opts->num_shared_buffers) {
583 		SPDK_ERRLOG("There are not enough buffers to satisfy"
584 			    "per-poll group caches for each thread. (%" PRIu32 ")"
585 			    "supplied. (%" PRIu32 ") required\n", opts->num_shared_buffers, min_shared_buffers);
586 		SPDK_ERRLOG("Please specify a larger number of shared buffers\n");
587 		spdk_nvmf_tcp_destroy(&ttransport->transport);
588 		return NULL;
589 	}
590 
591 	pthread_mutex_init(&ttransport->lock, NULL);
592 
593 	return &ttransport->transport;
594 }
595 
596 static int
597 _spdk_nvmf_tcp_trsvcid_to_int(const char *trsvcid)
598 {
599 	unsigned long long ull;
600 	char *end = NULL;
601 
602 	ull = strtoull(trsvcid, &end, 10);
603 	if (end == NULL || end == trsvcid || *end != '\0') {
604 		return -1;
605 	}
606 
607 	/* Valid TCP/IP port numbers are in [0, 65535] */
608 	if (ull > 65535) {
609 		return -1;
610 	}
611 
612 	return (int)ull;
613 }
614 
615 /**
616  * Canonicalize a listen address trid.
617  */
618 static int
619 _spdk_nvmf_tcp_canon_listen_trid(struct spdk_nvme_transport_id *canon_trid,
620 				 const struct spdk_nvme_transport_id *trid)
621 {
622 	int trsvcid_int;
623 
624 	trsvcid_int = _spdk_nvmf_tcp_trsvcid_to_int(trid->trsvcid);
625 	if (trsvcid_int < 0) {
626 		return -EINVAL;
627 	}
628 
629 	memset(canon_trid, 0, sizeof(*canon_trid));
630 	canon_trid->trtype = SPDK_NVME_TRANSPORT_TCP;
631 	canon_trid->adrfam = trid->adrfam;
632 	snprintf(canon_trid->traddr, sizeof(canon_trid->traddr), "%s", trid->traddr);
633 	snprintf(canon_trid->trsvcid, sizeof(canon_trid->trsvcid), "%d", trsvcid_int);
634 
635 	return 0;
636 }
637 
638 /**
639  * Find an existing listening port.
640  *
641  * Caller must hold ttransport->lock.
642  */
643 static struct spdk_nvmf_tcp_port *
644 _spdk_nvmf_tcp_find_port(struct spdk_nvmf_tcp_transport *ttransport,
645 			 const struct spdk_nvme_transport_id *trid)
646 {
647 	struct spdk_nvme_transport_id canon_trid;
648 	struct spdk_nvmf_tcp_port *port;
649 
650 	if (_spdk_nvmf_tcp_canon_listen_trid(&canon_trid, trid) != 0) {
651 		return NULL;
652 	}
653 
654 	TAILQ_FOREACH(port, &ttransport->ports, link) {
655 		if (spdk_nvme_transport_id_compare(&canon_trid, &port->trid) == 0) {
656 			return port;
657 		}
658 	}
659 
660 	return NULL;
661 }
662 
663 static int
664 spdk_nvmf_tcp_listen(struct spdk_nvmf_transport *transport,
665 		     const struct spdk_nvme_transport_id *trid)
666 {
667 	struct spdk_nvmf_tcp_transport *ttransport;
668 	struct spdk_nvmf_tcp_port *port;
669 	int trsvcid_int;
670 	uint8_t adrfam;
671 
672 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
673 
674 	trsvcid_int = _spdk_nvmf_tcp_trsvcid_to_int(trid->trsvcid);
675 	if (trsvcid_int < 0) {
676 		SPDK_ERRLOG("Invalid trsvcid '%s'\n", trid->trsvcid);
677 		return -EINVAL;
678 	}
679 
680 	pthread_mutex_lock(&ttransport->lock);
681 
682 	port = _spdk_nvmf_tcp_find_port(ttransport, trid);
683 	if (port) {
684 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Already listening on %s port %s\n",
685 			      trid->traddr, trid->trsvcid);
686 		port->ref++;
687 		pthread_mutex_unlock(&ttransport->lock);
688 		return 0;
689 	}
690 
691 	port = calloc(1, sizeof(*port));
692 	if (!port) {
693 		SPDK_ERRLOG("Port allocation failed\n");
694 		free(port);
695 		pthread_mutex_unlock(&ttransport->lock);
696 		return -ENOMEM;
697 	}
698 
699 	port->ref = 1;
700 
701 	if (_spdk_nvmf_tcp_canon_listen_trid(&port->trid, trid) != 0) {
702 		SPDK_ERRLOG("Invalid traddr %s / trsvcid %s\n",
703 			    trid->traddr, trid->trsvcid);
704 		free(port);
705 		pthread_mutex_unlock(&ttransport->lock);
706 		return -ENOMEM;
707 	}
708 
709 	port->listen_sock = spdk_sock_listen(trid->traddr, trsvcid_int);
710 	if (port->listen_sock == NULL) {
711 		SPDK_ERRLOG("spdk_sock_listen(%s, %d) failed: %s (%d)\n",
712 			    trid->traddr, trsvcid_int,
713 			    spdk_strerror(errno), errno);
714 		free(port);
715 		pthread_mutex_unlock(&ttransport->lock);
716 		return -errno;
717 	}
718 
719 	if (spdk_sock_is_ipv4(port->listen_sock)) {
720 		adrfam = SPDK_NVMF_ADRFAM_IPV4;
721 	} else if (spdk_sock_is_ipv6(port->listen_sock)) {
722 		adrfam = SPDK_NVMF_ADRFAM_IPV6;
723 	} else {
724 		SPDK_ERRLOG("Unhandled socket type\n");
725 		adrfam = 0;
726 	}
727 
728 	if (adrfam != trid->adrfam) {
729 		SPDK_ERRLOG("Socket address family mismatch\n");
730 		spdk_sock_close(&port->listen_sock);
731 		free(port);
732 		pthread_mutex_unlock(&ttransport->lock);
733 		return -EINVAL;
734 	}
735 
736 	SPDK_NOTICELOG("*** NVMe/TCP Target Listening on %s port %d ***\n",
737 		       trid->traddr, trsvcid_int);
738 
739 	TAILQ_INSERT_TAIL(&ttransport->ports, port, link);
740 	pthread_mutex_unlock(&ttransport->lock);
741 
742 	return 0;
743 }
744 
745 static int
746 spdk_nvmf_tcp_stop_listen(struct spdk_nvmf_transport *transport,
747 			  const struct spdk_nvme_transport_id *trid)
748 {
749 	struct spdk_nvmf_tcp_transport *ttransport;
750 	struct spdk_nvmf_tcp_port *port;
751 	int rc;
752 
753 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
754 
755 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Removing listen address %s port %s\n",
756 		      trid->traddr, trid->trsvcid);
757 
758 	pthread_mutex_lock(&ttransport->lock);
759 	port = _spdk_nvmf_tcp_find_port(ttransport, trid);
760 	if (port) {
761 		assert(port->ref > 0);
762 		port->ref--;
763 		if (port->ref == 0) {
764 			TAILQ_REMOVE(&ttransport->ports, port, link);
765 			spdk_sock_close(&port->listen_sock);
766 			free(port);
767 		}
768 		rc = 0;
769 	} else {
770 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Port not found\n");
771 		rc = -ENOENT;
772 	}
773 	pthread_mutex_unlock(&ttransport->lock);
774 
775 	return rc;
776 }
777 
778 static int
779 spdk_nvmf_tcp_qpair_flush_pdus_internal(struct spdk_nvmf_tcp_qpair *tqpair)
780 {
781 	const int array_size = 32;
782 	struct iovec iovs[array_size];
783 	int iovcnt = 0;
784 	int bytes = 0;
785 	int total_length = 0;
786 	uint32_t mapped_length = 0;
787 	struct nvme_tcp_pdu *pdu;
788 	int pdu_length;
789 	TAILQ_HEAD(, nvme_tcp_pdu) completed_pdus_list;
790 
791 	pdu = TAILQ_FIRST(&tqpair->send_queue);
792 
793 	if (pdu == NULL) {
794 		return 0;
795 	}
796 
797 	/*
798 	 * Build up a list of iovecs for the first few PDUs in the
799 	 *  tqpair 's send_queue.
800 	 */
801 	while (pdu != NULL && ((array_size - iovcnt) >= (2 + (int)pdu->data_iovcnt))) {
802 		iovcnt += nvme_tcp_build_iovs(&iovs[iovcnt],
803 					      array_size - iovcnt,
804 					      pdu,
805 					      tqpair->host_hdgst_enable,
806 					      tqpair->host_ddgst_enable,
807 					      &mapped_length);
808 		total_length += mapped_length;
809 		pdu = TAILQ_NEXT(pdu, tailq);
810 	}
811 
812 	spdk_trace_record(TRACE_TCP_FLUSH_WRITEBUF_START, 0, total_length, 0, iovcnt);
813 
814 	bytes = spdk_sock_writev(tqpair->sock, iovs, iovcnt);
815 	if (bytes == -1) {
816 		if (errno == EWOULDBLOCK || errno == EAGAIN) {
817 			return 1;
818 		} else {
819 			SPDK_ERRLOG("spdk_sock_writev() failed, errno %d: %s\n",
820 				    errno, spdk_strerror(errno));
821 			return -1;
822 		}
823 	}
824 
825 	spdk_trace_record(TRACE_TCP_FLUSH_WRITEBUF_DONE, 0, bytes, 0, 0);
826 
827 	pdu = TAILQ_FIRST(&tqpair->send_queue);
828 
829 	/*
830 	 * Free any PDUs that were fully written.  If a PDU was only
831 	 *  partially written, update its writev_offset so that next
832 	 *  time only the unwritten portion will be sent to writev().
833 	 */
834 	TAILQ_INIT(&completed_pdus_list);
835 	while (bytes > 0) {
836 		pdu_length = pdu->hdr->common.plen - pdu->writev_offset;
837 		if (bytes >= pdu_length) {
838 			bytes -= pdu_length;
839 			TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq);
840 			TAILQ_INSERT_TAIL(&completed_pdus_list, pdu, tailq);
841 			pdu = TAILQ_FIRST(&tqpair->send_queue);
842 
843 		} else {
844 			pdu->writev_offset += bytes;
845 			bytes = 0;
846 		}
847 	}
848 
849 	while (!TAILQ_EMPTY(&completed_pdus_list)) {
850 		pdu = TAILQ_FIRST(&completed_pdus_list);
851 		TAILQ_REMOVE(&completed_pdus_list, pdu, tailq);
852 		assert(pdu->cb_fn != NULL);
853 		pdu->cb_fn(pdu->cb_arg);
854 		spdk_nvmf_tcp_pdu_put(tqpair, pdu);
855 	}
856 
857 	return TAILQ_EMPTY(&tqpair->send_queue) ? 0 : 1;
858 }
859 
860 static int
861 spdk_nvmf_tcp_qpair_flush_pdus(void *_tqpair)
862 {
863 	struct spdk_nvmf_tcp_qpair *tqpair = _tqpair;
864 	int rc;
865 
866 	if (tqpair->state == NVME_TCP_QPAIR_STATE_RUNNING) {
867 		rc = spdk_nvmf_tcp_qpair_flush_pdus_internal(tqpair);
868 		if (rc == 0 && tqpair->flush_poller != NULL) {
869 			spdk_poller_unregister(&tqpair->flush_poller);
870 		} else if (rc == 1 && tqpair->flush_poller == NULL) {
871 			tqpair->flush_poller = spdk_poller_register(spdk_nvmf_tcp_qpair_flush_pdus,
872 					       tqpair, 50);
873 		}
874 	} else {
875 		/*
876 		 * If the tqpair state is not RUNNING, then
877 		 * keep trying to flush PDUs until our list is
878 		 * empty - to make sure all data is sent before
879 		 * closing the connection.
880 		 */
881 		do {
882 			rc = spdk_nvmf_tcp_qpair_flush_pdus_internal(tqpair);
883 		} while (rc == 1);
884 	}
885 
886 	if (rc < 0 && tqpair->state < NVME_TCP_QPAIR_STATE_EXITING) {
887 		/*
888 		 * If the poller has already started destruction of the tqpair,
889 		 *  i.e. the socket read failed, then the connection state may already
890 		 *  be EXITED.  We don't want to set it back to EXITING in that case.
891 		 */
892 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
893 	}
894 
895 	return -1;
896 }
897 
898 static void
899 spdk_nvmf_tcp_qpair_write_pdu(struct spdk_nvmf_tcp_qpair *tqpair,
900 			      struct nvme_tcp_pdu *pdu,
901 			      nvme_tcp_qpair_xfer_complete_cb cb_fn,
902 			      void *cb_arg)
903 {
904 	int enable_digest;
905 	int hlen;
906 	uint32_t crc32c;
907 
908 	hlen = pdu->hdr->common.hlen;
909 	enable_digest = 1;
910 	if (pdu->hdr->common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP ||
911 	    pdu->hdr->common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ) {
912 		/* this PDU should be sent without digest */
913 		enable_digest = 0;
914 	}
915 
916 	/* Header Digest */
917 	if (enable_digest && tqpair->host_hdgst_enable) {
918 		crc32c = nvme_tcp_pdu_calc_header_digest(pdu);
919 		MAKE_DIGEST_WORD((uint8_t *)pdu->hdr->raw + hlen, crc32c);
920 	}
921 
922 	/* Data Digest */
923 	if (pdu->data_len > 0 && enable_digest && tqpair->host_ddgst_enable) {
924 		crc32c = nvme_tcp_pdu_calc_data_digest(pdu);
925 		MAKE_DIGEST_WORD(pdu->data_digest, crc32c);
926 	}
927 
928 	pdu->cb_fn = cb_fn;
929 	pdu->cb_arg = cb_arg;
930 	TAILQ_INSERT_TAIL(&tqpair->send_queue, pdu, tailq);
931 	spdk_nvmf_tcp_qpair_flush_pdus(tqpair);
932 }
933 
934 static int
935 spdk_nvmf_tcp_qpair_init_mem_resource(struct spdk_nvmf_tcp_qpair *tqpair, uint16_t size)
936 {
937 	int i;
938 	struct spdk_nvmf_tcp_req *tcp_req;
939 	struct spdk_nvmf_transport *transport = tqpair->qpair.transport;
940 	uint32_t in_capsule_data_size;
941 
942 	in_capsule_data_size = transport->opts.in_capsule_data_size;
943 	if (transport->opts.dif_insert_or_strip) {
944 		in_capsule_data_size = SPDK_BDEV_BUF_SIZE_WITH_MD(in_capsule_data_size);
945 	}
946 
947 	if (!tqpair->qpair.sq_head_max) {
948 		tqpair->req = calloc(1, sizeof(*tqpair->req));
949 		if (!tqpair->req) {
950 			SPDK_ERRLOG("Unable to allocate req on tqpair=%p.\n", tqpair);
951 			return -1;
952 		}
953 
954 		if (in_capsule_data_size) {
955 			tqpair->buf = spdk_zmalloc(in_capsule_data_size, 0x1000,
956 						   NULL, SPDK_ENV_LCORE_ID_ANY,
957 						   SPDK_MALLOC_DMA);
958 			if (!tqpair->buf) {
959 				SPDK_ERRLOG("Unable to allocate buf on tqpair=%p.\n", tqpair);
960 				return -1;
961 			}
962 		}
963 
964 		tcp_req = tqpair->req;
965 		tcp_req->ttag = 0;
966 		tcp_req->req.qpair = &tqpair->qpair;
967 
968 		/* Set up memory to receive commands */
969 		if (tqpair->buf) {
970 			tcp_req->buf = tqpair->buf;
971 		}
972 
973 		/* Set the cmdn and rsp */
974 		tcp_req->req.rsp = (union nvmf_c2h_msg *)&tcp_req->rsp;
975 		tcp_req->req.cmd = (union nvmf_h2c_msg *)&tcp_req->cmd;
976 
977 		/* Initialize request state to FREE */
978 		tcp_req->state = TCP_REQUEST_STATE_FREE;
979 		TAILQ_INSERT_TAIL(&tqpair->state_queue[tcp_req->state], tcp_req, state_link);
980 
981 		tqpair->pdu = calloc(NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM + 1, sizeof(*tqpair->pdu));
982 		if (!tqpair->pdu) {
983 			SPDK_ERRLOG("Unable to allocate pdu on tqpair=%p.\n", tqpair);
984 			return -1;
985 		}
986 
987 		for (i = 0; i < 1 + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM; i++) {
988 			TAILQ_INSERT_TAIL(&tqpair->free_queue, &tqpair->pdu[i], tailq);
989 		}
990 
991 		tqpair->pdu_recv_buf.size = (in_capsule_data_size + sizeof(struct spdk_nvme_tcp_cmd) + 2 *
992 					     SPDK_NVME_TCP_DIGEST_LEN) * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR;
993 		tqpair->pdu_recv_buf.buf = calloc(1, tqpair->pdu_recv_buf.size);
994 		if (!tqpair->pdu_recv_buf.buf) {
995 			SPDK_ERRLOG("Unable to allocate the pdu recv buf on tqpair=%p with size=%d\n", tqpair,
996 				    tqpair->pdu_recv_buf.size);
997 			return -1;
998 		}
999 		tqpair->pdu_in_progress.hdr = (union nvme_tcp_pdu_hdr *)tqpair->pdu_recv_buf.buf;
1000 	} else {
1001 		tqpair->reqs = calloc(size, sizeof(*tqpair->reqs));
1002 		if (!tqpair->reqs) {
1003 			SPDK_ERRLOG("Unable to allocate reqs on tqpair=%p\n", tqpair);
1004 			return -1;
1005 		}
1006 
1007 		if (in_capsule_data_size) {
1008 			tqpair->bufs = spdk_zmalloc(size * in_capsule_data_size, 0x1000,
1009 						    NULL, SPDK_ENV_LCORE_ID_ANY,
1010 						    SPDK_MALLOC_DMA);
1011 			if (!tqpair->bufs) {
1012 				SPDK_ERRLOG("Unable to allocate bufs on tqpair=%p.\n", tqpair);
1013 				return -1;
1014 			}
1015 		}
1016 
1017 		for (i = 0; i < size; i++) {
1018 			struct spdk_nvmf_tcp_req *tcp_req = &tqpair->reqs[i];
1019 
1020 			tcp_req->ttag = i + 1;
1021 			tcp_req->req.qpair = &tqpair->qpair;
1022 
1023 			/* Set up memory to receive commands */
1024 			if (tqpair->bufs) {
1025 				tcp_req->buf = (void *)((uintptr_t)tqpair->bufs + (i * in_capsule_data_size));
1026 			}
1027 
1028 			/* Set the cmdn and rsp */
1029 			tcp_req->req.rsp = (union nvmf_c2h_msg *)&tcp_req->rsp;
1030 			tcp_req->req.cmd = (union nvmf_h2c_msg *)&tcp_req->cmd;
1031 
1032 			/* Initialize request state to FREE */
1033 			tcp_req->state = TCP_REQUEST_STATE_FREE;
1034 			TAILQ_INSERT_TAIL(&tqpair->state_queue[tcp_req->state], tcp_req, state_link);
1035 		}
1036 
1037 		tqpair->pdu_pool = calloc(size, sizeof(*tqpair->pdu_pool));
1038 		if (!tqpair->pdu_pool) {
1039 			SPDK_ERRLOG("Unable to allocate pdu pool on tqpair =%p.\n", tqpair);
1040 			return -1;
1041 		}
1042 
1043 		for (i = 0; i < size; i++) {
1044 			TAILQ_INSERT_TAIL(&tqpair->free_queue, &tqpair->pdu_pool[i], tailq);
1045 		}
1046 	}
1047 
1048 	return 0;
1049 }
1050 
1051 static int
1052 spdk_nvmf_tcp_qpair_init(struct spdk_nvmf_qpair *qpair)
1053 {
1054 	struct spdk_nvmf_tcp_qpair *tqpair;
1055 	int i;
1056 
1057 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
1058 
1059 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "New TCP Connection: %p\n", qpair);
1060 
1061 	TAILQ_INIT(&tqpair->send_queue);
1062 	TAILQ_INIT(&tqpair->free_queue);
1063 	STAILQ_INIT(&tqpair->queued_c2h_data_tcp_req);
1064 
1065 	/* Initialise request state queues of the qpair */
1066 	for (i = TCP_REQUEST_STATE_FREE; i < TCP_REQUEST_NUM_STATES; i++) {
1067 		TAILQ_INIT(&tqpair->state_queue[i]);
1068 	}
1069 
1070 	tqpair->host_hdgst_enable = true;
1071 	tqpair->host_ddgst_enable = true;
1072 	return 0;
1073 }
1074 
1075 static int
1076 spdk_nvmf_tcp_qpair_sock_init(struct spdk_nvmf_tcp_qpair *tqpair)
1077 {
1078 
1079 	int rc;
1080 	int buf_size;
1081 
1082 	/* set send buffer size */
1083 	buf_size = 2 * 1024 * 1024;
1084 	rc = spdk_sock_set_sendbuf(tqpair->sock, buf_size);
1085 	if (rc != 0) {
1086 		SPDK_ERRLOG("spdk_sock_set_sendbuf failed\n");
1087 		return rc;
1088 	}
1089 
1090 	/* set low water mark */
1091 	rc = spdk_sock_set_recvlowat(tqpair->sock, sizeof(struct spdk_nvme_tcp_c2h_data_hdr));
1092 	if (rc != 0) {
1093 		SPDK_ERRLOG("spdk_sock_set_recvlowat() failed\n");
1094 		return rc;
1095 	}
1096 
1097 	return 0;
1098 }
1099 
1100 static void
1101 _spdk_nvmf_tcp_handle_connect(struct spdk_nvmf_transport *transport,
1102 			      struct spdk_nvmf_tcp_port *port,
1103 			      struct spdk_sock *sock, new_qpair_fn cb_fn)
1104 {
1105 	struct spdk_nvmf_tcp_qpair *tqpair;
1106 	int rc;
1107 
1108 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "New connection accepted on %s port %s\n",
1109 		      port->trid.traddr, port->trid.trsvcid);
1110 
1111 	if (transport->opts.sock_priority) {
1112 		rc = spdk_sock_set_priority(sock, transport->opts.sock_priority);
1113 		if (rc) {
1114 			SPDK_ERRLOG("Failed to set the priority of the socket\n");
1115 			spdk_sock_close(&sock);
1116 			return;
1117 		}
1118 	}
1119 
1120 	tqpair = calloc(1, sizeof(struct spdk_nvmf_tcp_qpair));
1121 	if (tqpair == NULL) {
1122 		SPDK_ERRLOG("Could not allocate new connection.\n");
1123 		spdk_sock_close(&sock);
1124 		return;
1125 	}
1126 
1127 	tqpair->sock = sock;
1128 	tqpair->max_queue_depth = 1;
1129 	tqpair->free_pdu_num = tqpair->max_queue_depth + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM;
1130 	tqpair->state_cntr[TCP_REQUEST_STATE_FREE] = tqpair->max_queue_depth;
1131 	tqpair->port = port;
1132 	tqpair->qpair.transport = transport;
1133 
1134 	rc = spdk_sock_getaddr(tqpair->sock, tqpair->target_addr,
1135 			       sizeof(tqpair->target_addr), &tqpair->target_port,
1136 			       tqpair->initiator_addr, sizeof(tqpair->initiator_addr),
1137 			       &tqpair->initiator_port);
1138 	if (rc < 0) {
1139 		SPDK_ERRLOG("spdk_sock_getaddr() failed of tqpair=%p\n", tqpair);
1140 		spdk_nvmf_tcp_qpair_destroy(tqpair);
1141 		return;
1142 	}
1143 
1144 	cb_fn(&tqpair->qpair);
1145 }
1146 
1147 static void
1148 spdk_nvmf_tcp_port_accept(struct spdk_nvmf_transport *transport, struct spdk_nvmf_tcp_port *port,
1149 			  new_qpair_fn cb_fn)
1150 {
1151 	struct spdk_sock *sock;
1152 	int i;
1153 
1154 	for (i = 0; i < NVMF_TCP_MAX_ACCEPT_SOCK_ONE_TIME; i++) {
1155 		sock = spdk_sock_accept(port->listen_sock);
1156 		if (sock) {
1157 			_spdk_nvmf_tcp_handle_connect(transport, port, sock, cb_fn);
1158 		}
1159 	}
1160 }
1161 
1162 static void
1163 spdk_nvmf_tcp_accept(struct spdk_nvmf_transport *transport, new_qpair_fn cb_fn)
1164 {
1165 	struct spdk_nvmf_tcp_transport *ttransport;
1166 	struct spdk_nvmf_tcp_port *port;
1167 
1168 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
1169 
1170 	TAILQ_FOREACH(port, &ttransport->ports, link) {
1171 		spdk_nvmf_tcp_port_accept(transport, port, cb_fn);
1172 	}
1173 }
1174 
1175 static void
1176 spdk_nvmf_tcp_discover(struct spdk_nvmf_transport *transport,
1177 		       struct spdk_nvme_transport_id *trid,
1178 		       struct spdk_nvmf_discovery_log_page_entry *entry)
1179 {
1180 	entry->trtype = SPDK_NVMF_TRTYPE_TCP;
1181 	entry->adrfam = trid->adrfam;
1182 	entry->treq.secure_channel = SPDK_NVMF_TREQ_SECURE_CHANNEL_NOT_REQUIRED;
1183 
1184 	spdk_strcpy_pad(entry->trsvcid, trid->trsvcid, sizeof(entry->trsvcid), ' ');
1185 	spdk_strcpy_pad(entry->traddr, trid->traddr, sizeof(entry->traddr), ' ');
1186 
1187 	entry->tsas.tcp.sectype = SPDK_NVME_TCP_SECURITY_NONE;
1188 }
1189 
1190 static struct spdk_nvmf_transport_poll_group *
1191 spdk_nvmf_tcp_poll_group_create(struct spdk_nvmf_transport *transport)
1192 {
1193 	struct spdk_nvmf_tcp_poll_group *tgroup;
1194 
1195 	tgroup = calloc(1, sizeof(*tgroup));
1196 	if (!tgroup) {
1197 		return NULL;
1198 	}
1199 
1200 	tgroup->sock_group = spdk_sock_group_create(&tgroup->group);
1201 	if (!tgroup->sock_group) {
1202 		goto cleanup;
1203 	}
1204 
1205 	TAILQ_INIT(&tgroup->qpairs);
1206 
1207 	return &tgroup->group;
1208 
1209 cleanup:
1210 	free(tgroup);
1211 	return NULL;
1212 }
1213 
1214 static struct spdk_nvmf_transport_poll_group *
1215 spdk_nvmf_tcp_get_optimal_poll_group(struct spdk_nvmf_qpair *qpair)
1216 {
1217 	struct spdk_nvmf_tcp_qpair *tqpair;
1218 	struct spdk_sock_group *group = NULL;
1219 	int rc;
1220 
1221 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
1222 	rc = spdk_sock_get_optimal_sock_group(tqpair->sock, &group);
1223 	if (!rc && group != NULL) {
1224 		return spdk_sock_group_get_ctx(group);
1225 	}
1226 
1227 	return NULL;
1228 }
1229 
1230 static void
1231 spdk_nvmf_tcp_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group)
1232 {
1233 	struct spdk_nvmf_tcp_poll_group *tgroup;
1234 
1235 	tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
1236 	spdk_sock_group_close(&tgroup->sock_group);
1237 
1238 	free(tgroup);
1239 }
1240 
1241 static inline void
1242 spdk_nvmf_tcp_reset_pdu_in_process(struct spdk_nvmf_tcp_qpair *tqpair)
1243 {
1244 	struct nvme_tcp_pdu_recv_buf *pdu_recv_buf = &tqpair->pdu_recv_buf;
1245 	char *dst, *src;
1246 
1247 	if (spdk_unlikely((pdu_recv_buf->off + sizeof(union nvme_tcp_pdu_hdr)) >
1248 			  pdu_recv_buf->size)) {
1249 		if (pdu_recv_buf->remain_size) {
1250 			dst = pdu_recv_buf->buf;
1251 			src = (char *)((void *)pdu_recv_buf->buf + pdu_recv_buf->off);
1252 
1253 			/* purpose: to avoid overlap copy, so do not use memcpy if there is overlap case */
1254 			memmove(dst, src, pdu_recv_buf->remain_size);
1255 		}
1256 		tqpair->pdu_recv_buf.off = 0;
1257 	} else if (!pdu_recv_buf->remain_size) {
1258 		tqpair->pdu_recv_buf.off = 0;
1259 	}
1260 
1261 	tqpair->pdu_in_progress.hdr = (union nvme_tcp_pdu_hdr *)((void *)pdu_recv_buf->buf +
1262 				      pdu_recv_buf->off);
1263 }
1264 
1265 static void
1266 spdk_nvmf_tcp_qpair_set_recv_state(struct spdk_nvmf_tcp_qpair *tqpair,
1267 				   enum nvme_tcp_pdu_recv_state state)
1268 {
1269 	if (tqpair->recv_state == state) {
1270 		SPDK_ERRLOG("The recv state of tqpair=%p is same with the state(%d) to be set\n",
1271 			    tqpair, state);
1272 		return;
1273 	}
1274 
1275 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair(%p) recv state=%d\n", tqpair, state);
1276 	tqpair->recv_state = state;
1277 
1278 	switch (state) {
1279 	case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH:
1280 	case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH:
1281 	case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD:
1282 		break;
1283 	case NVME_TCP_PDU_RECV_STATE_ERROR:
1284 	case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY:
1285 		memset(&tqpair->pdu_in_progress, 0, sizeof(tqpair->pdu_in_progress));
1286 		spdk_nvmf_tcp_reset_pdu_in_process(tqpair);
1287 		break;
1288 	default:
1289 		SPDK_ERRLOG("The state(%d) is invalid\n", state);
1290 		abort();
1291 		break;
1292 	}
1293 }
1294 
1295 static int
1296 spdk_nvmf_tcp_qpair_handle_timeout(void *ctx)
1297 {
1298 	struct spdk_nvmf_tcp_qpair *tqpair = ctx;
1299 
1300 	assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_ERROR);
1301 
1302 	SPDK_ERRLOG("No pdu coming for tqpair=%p within %d seconds\n", tqpair,
1303 		    SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT);
1304 	tqpair->state = NVME_TCP_QPAIR_STATE_EXITED;
1305 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "will disconect the tqpair=%p\n", tqpair);
1306 	spdk_poller_unregister(&tqpair->timeout_poller);
1307 	spdk_nvmf_qpair_disconnect(&tqpair->qpair, NULL, NULL);
1308 
1309 	return 0;
1310 }
1311 
1312 static void
1313 spdk_nvmf_tcp_send_c2h_term_req_complete(void *cb_arg)
1314 {
1315 	struct spdk_nvmf_tcp_qpair *tqpair = (struct spdk_nvmf_tcp_qpair *)cb_arg;
1316 
1317 	if (!tqpair->timeout_poller) {
1318 		tqpair->timeout_poller = spdk_poller_register(spdk_nvmf_tcp_qpair_handle_timeout, tqpair,
1319 					 SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT * 1000000);
1320 	}
1321 }
1322 
1323 static void
1324 spdk_nvmf_tcp_send_c2h_term_req(struct spdk_nvmf_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu,
1325 				enum spdk_nvme_tcp_term_req_fes fes, uint32_t error_offset)
1326 {
1327 	struct nvme_tcp_pdu *rsp_pdu;
1328 	struct spdk_nvme_tcp_term_req_hdr *c2h_term_req;
1329 	uint32_t c2h_term_req_hdr_len = sizeof(*c2h_term_req);
1330 	uint32_t copy_len;
1331 
1332 	rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair);
1333 	if (!rsp_pdu) {
1334 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
1335 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1336 		return;
1337 	}
1338 
1339 	c2h_term_req = &rsp_pdu->hdr->term_req;
1340 	c2h_term_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ;
1341 	c2h_term_req->common.hlen = c2h_term_req_hdr_len;
1342 
1343 	if ((fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) ||
1344 	    (fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) {
1345 		DSET32(&c2h_term_req->fei, error_offset);
1346 	}
1347 
1348 	copy_len = pdu->hdr->common.hlen;
1349 	if (copy_len > SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE) {
1350 		copy_len = SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE;
1351 	}
1352 
1353 	/* Copy the error info into the buffer */
1354 	memcpy((uint8_t *)rsp_pdu->hdr->raw + c2h_term_req_hdr_len, pdu->hdr->raw, copy_len);
1355 	nvme_tcp_pdu_set_data(rsp_pdu, (uint8_t *)rsp_pdu->hdr->raw + c2h_term_req_hdr_len, copy_len);
1356 
1357 	/* Contain the header of the wrong received pdu */
1358 	c2h_term_req->common.plen = c2h_term_req->common.hlen + copy_len;
1359 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1360 	spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_send_c2h_term_req_complete, tqpair);
1361 }
1362 
1363 static void
1364 spdk_nvmf_tcp_capsule_cmd_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport,
1365 				     struct spdk_nvmf_tcp_qpair *tqpair,
1366 				     struct nvme_tcp_pdu *pdu)
1367 {
1368 	struct spdk_nvmf_tcp_req *tcp_req;
1369 
1370 	tcp_req = spdk_nvmf_tcp_req_get(tqpair);
1371 	if (!tcp_req) {
1372 		SPDK_ERRLOG("Cannot allocate tcp_req\n");
1373 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
1374 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1375 		return;
1376 	}
1377 
1378 	pdu->ctx = tcp_req;
1379 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEW);
1380 	spdk_nvmf_tcp_req_process(ttransport, tcp_req);
1381 	return;
1382 }
1383 
1384 static void
1385 spdk_nvmf_tcp_capsule_cmd_payload_handle(struct spdk_nvmf_tcp_transport *ttransport,
1386 		struct spdk_nvmf_tcp_qpair *tqpair,
1387 		struct nvme_tcp_pdu *pdu)
1388 {
1389 	struct spdk_nvmf_tcp_req *tcp_req;
1390 	struct spdk_nvme_tcp_cmd *capsule_cmd;
1391 	uint32_t error_offset = 0;
1392 	enum spdk_nvme_tcp_term_req_fes fes;
1393 
1394 	capsule_cmd = &pdu->hdr->capsule_cmd;
1395 	tcp_req = pdu->ctx;
1396 	assert(tcp_req != NULL);
1397 	if (capsule_cmd->common.pdo > SPDK_NVME_TCP_PDU_PDO_MAX_OFFSET) {
1398 		SPDK_ERRLOG("Expected ICReq capsule_cmd pdu offset <= %d, got %c\n",
1399 			    SPDK_NVME_TCP_PDU_PDO_MAX_OFFSET, capsule_cmd->common.pdo);
1400 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1401 		error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdo);
1402 		goto err;
1403 	}
1404 
1405 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
1406 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
1407 	spdk_nvmf_tcp_req_process(ttransport, tcp_req);
1408 
1409 	return;
1410 err:
1411 	spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1412 }
1413 
1414 static void
1415 spdk_nvmf_tcp_h2c_data_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport,
1416 				  struct spdk_nvmf_tcp_qpair *tqpair,
1417 				  struct nvme_tcp_pdu *pdu)
1418 {
1419 	struct spdk_nvmf_tcp_req *tcp_req;
1420 	uint32_t error_offset = 0;
1421 	enum spdk_nvme_tcp_term_req_fes fes = 0;
1422 	struct spdk_nvme_tcp_h2c_data_hdr *h2c_data;
1423 	bool ttag_offset_error = false;
1424 
1425 	h2c_data = &pdu->hdr->h2c_data;
1426 
1427 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair=%p, r2t_info: datao=%u, datal=%u, cccid=%u, ttag=%u\n",
1428 		      tqpair, h2c_data->datao, h2c_data->datal, h2c_data->cccid, h2c_data->ttag);
1429 
1430 	/* According to the information in the pdu to find the req */
1431 	TAILQ_FOREACH(tcp_req, &tqpair->state_queue[TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER],
1432 		      state_link) {
1433 		if ((tcp_req->req.cmd->nvme_cmd.cid == h2c_data->cccid) && (tcp_req->ttag == h2c_data->ttag)) {
1434 			break;
1435 		}
1436 
1437 		if (!ttag_offset_error && (tcp_req->req.cmd->nvme_cmd.cid == h2c_data->cccid)) {
1438 			ttag_offset_error = true;
1439 		}
1440 	}
1441 
1442 	if (!tcp_req) {
1443 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tcp_req is not found for tqpair=%p\n", tqpair);
1444 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER;
1445 		if (!ttag_offset_error) {
1446 			error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, cccid);
1447 		} else {
1448 			error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, ttag);
1449 		}
1450 		goto err;
1451 	}
1452 
1453 	if (tcp_req->next_expected_r2t_offset != h2c_data->datao) {
1454 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
1455 			      "tcp_req(%p), tqpair=%p,  expected_r2t_offset=%u, but data offset =%u\n",
1456 			      tcp_req, tqpair, tcp_req->next_expected_r2t_offset, h2c_data->datao);
1457 		fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE;
1458 		goto err;
1459 	}
1460 
1461 	if (h2c_data->datal > tqpair->maxh2cdata) {
1462 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tcp_req(%p), tqpair=%p,  datao=%u execeeds maxh2cdata size=%u\n",
1463 			      tcp_req, tqpair, h2c_data->datao, tqpair->maxh2cdata);
1464 		fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE;
1465 		goto err;
1466 	}
1467 
1468 	if ((h2c_data->datao + h2c_data->datal) > tcp_req->req.length) {
1469 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
1470 			      "tcp_req(%p), tqpair=%p,  (datao=%u + datal=%u) execeeds requested length=%u\n",
1471 			      tcp_req, tqpair, h2c_data->datao, h2c_data->datal, tcp_req->req.length);
1472 		fes = SPDK_NVME_TCP_TERM_REQ_FES_R2T_LIMIT_EXCEEDED;
1473 		goto err;
1474 	}
1475 
1476 	pdu->ctx = tcp_req;
1477 
1478 	if (spdk_unlikely(tcp_req->dif_insert_or_strip)) {
1479 		pdu->dif_ctx = &tcp_req->dif_ctx;
1480 	}
1481 
1482 	nvme_tcp_pdu_set_data_buf(pdu, tcp_req->req.iov, tcp_req->req.iovcnt,
1483 				  h2c_data->datao, h2c_data->datal);
1484 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
1485 	return;
1486 
1487 err:
1488 	spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1489 }
1490 
1491 static void
1492 spdk_nvmf_tcp_pdu_cmd_complete(void *cb_arg)
1493 {
1494 	struct spdk_nvmf_tcp_req *tcp_req = cb_arg;
1495 	nvmf_tcp_request_free(tcp_req);
1496 }
1497 
1498 static void
1499 spdk_nvmf_tcp_send_capsule_resp_pdu(struct spdk_nvmf_tcp_req *tcp_req,
1500 				    struct spdk_nvmf_tcp_qpair *tqpair)
1501 {
1502 	struct nvme_tcp_pdu *rsp_pdu;
1503 	struct spdk_nvme_tcp_rsp *capsule_resp;
1504 
1505 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter, tqpair=%p\n", tqpair);
1506 	rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair);
1507 	if (!rsp_pdu) {
1508 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1509 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
1510 		return;
1511 	}
1512 
1513 	capsule_resp = &rsp_pdu->hdr->capsule_resp;
1514 	capsule_resp->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP;
1515 	capsule_resp->common.plen = capsule_resp->common.hlen = sizeof(*capsule_resp);
1516 	capsule_resp->rccqe = tcp_req->req.rsp->nvme_cpl;
1517 	if (tqpair->host_hdgst_enable) {
1518 		capsule_resp->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF;
1519 		capsule_resp->common.plen += SPDK_NVME_TCP_DIGEST_LEN;
1520 	}
1521 
1522 	spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_pdu_cmd_complete, tcp_req);
1523 }
1524 
1525 static void
1526 spdk_nvmf_tcp_pdu_c2h_data_complete(void *cb_arg)
1527 {
1528 	struct spdk_nvmf_tcp_req *tcp_req = cb_arg;
1529 	struct spdk_nvmf_tcp_qpair *tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair,
1530 					     struct spdk_nvmf_tcp_qpair, qpair);
1531 
1532 	assert(tqpair != NULL);
1533 	assert(tcp_req->c2h_data_pdu_num > 0);
1534 	tcp_req->c2h_data_pdu_num--;
1535 	if (!tcp_req->c2h_data_pdu_num) {
1536 		if (tqpair->qpair.transport->opts.c2h_success) {
1537 			nvmf_tcp_request_free(tcp_req);
1538 		} else {
1539 			spdk_nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair);
1540 		}
1541 	}
1542 
1543 	tqpair->c2h_data_pdu_cnt--;
1544 	spdk_nvmf_tcp_handle_pending_c2h_data_queue(tqpair);
1545 }
1546 
1547 static void
1548 spdk_nvmf_tcp_send_r2t_pdu(struct spdk_nvmf_tcp_qpair *tqpair,
1549 			   struct spdk_nvmf_tcp_req *tcp_req)
1550 {
1551 	struct nvme_tcp_pdu *rsp_pdu;
1552 	struct spdk_nvme_tcp_r2t_hdr *r2t;
1553 
1554 	rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair);
1555 	if (!rsp_pdu) {
1556 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
1557 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1558 		return;
1559 	}
1560 
1561 	r2t = &rsp_pdu->hdr->r2t;
1562 	r2t->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_R2T;
1563 	r2t->common.plen = r2t->common.hlen = sizeof(*r2t);
1564 
1565 	if (tqpair->host_hdgst_enable) {
1566 		r2t->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF;
1567 		r2t->common.plen += SPDK_NVME_TCP_DIGEST_LEN;
1568 	}
1569 
1570 	r2t->cccid = tcp_req->req.cmd->nvme_cmd.cid;
1571 	r2t->ttag = tcp_req->ttag;
1572 	r2t->r2to = tcp_req->next_expected_r2t_offset;
1573 	r2t->r2tl = spdk_min(tcp_req->req.length - tcp_req->next_expected_r2t_offset, tqpair->maxh2cdata);
1574 	tcp_req->r2tl_remain = r2t->r2tl;
1575 
1576 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
1577 		      "tcp_req(%p) on tqpair(%p), r2t_info: cccid=%u, ttag=%u, r2to=%u, r2tl=%u\n",
1578 		      tcp_req, tqpair, r2t->cccid, r2t->ttag, r2t->r2to, r2t->r2tl);
1579 	spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_pdu_cmd_complete, NULL);
1580 }
1581 
1582 static void
1583 spdk_nvmf_tcp_h2c_data_payload_handle(struct spdk_nvmf_tcp_transport *ttransport,
1584 				      struct spdk_nvmf_tcp_qpair *tqpair,
1585 				      struct nvme_tcp_pdu *pdu)
1586 {
1587 	struct spdk_nvmf_tcp_req *tcp_req;
1588 
1589 	tcp_req = pdu->ctx;
1590 	assert(tcp_req != NULL);
1591 
1592 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
1593 
1594 	tcp_req->next_expected_r2t_offset += pdu->data_len;
1595 	tcp_req->r2tl_remain -= pdu->data_len;
1596 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
1597 
1598 	if (!tcp_req->r2tl_remain) {
1599 		if (tcp_req->next_expected_r2t_offset == tcp_req->req.length) {
1600 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
1601 			spdk_nvmf_tcp_req_process(ttransport, tcp_req);
1602 		} else {
1603 			SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Send r2t pdu for tcp_req=%p on tqpair=%p\n", tcp_req, tqpair);
1604 			spdk_nvmf_tcp_send_r2t_pdu(tqpair, tcp_req);
1605 		}
1606 	}
1607 }
1608 
1609 static void
1610 spdk_nvmf_tcp_h2c_term_req_dump(struct spdk_nvme_tcp_term_req_hdr *h2c_term_req)
1611 {
1612 	SPDK_ERRLOG("Error info of pdu(%p): %s\n", h2c_term_req,
1613 		    spdk_nvmf_tcp_term_req_fes_str[h2c_term_req->fes]);
1614 	if ((h2c_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) ||
1615 	    (h2c_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) {
1616 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "The offset from the start of the PDU header is %u\n",
1617 			      DGET32(h2c_term_req->fei));
1618 	}
1619 }
1620 
1621 static void
1622 spdk_nvmf_tcp_h2c_term_req_hdr_handle(struct spdk_nvmf_tcp_qpair *tqpair,
1623 				      struct nvme_tcp_pdu *pdu)
1624 {
1625 	struct spdk_nvme_tcp_term_req_hdr *h2c_term_req = &pdu->hdr->term_req;
1626 	uint32_t error_offset = 0;
1627 	enum spdk_nvme_tcp_term_req_fes fes;
1628 
1629 
1630 	if (h2c_term_req->fes > SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER) {
1631 		SPDK_ERRLOG("Fatal Error Stauts(FES) is unknown for h2c_term_req pdu=%p\n", pdu);
1632 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1633 		error_offset = offsetof(struct spdk_nvme_tcp_term_req_hdr, fes);
1634 		goto end;
1635 	}
1636 
1637 	/* set the data buffer */
1638 	nvme_tcp_pdu_set_data(pdu, (uint8_t *)pdu->hdr->raw + h2c_term_req->common.hlen,
1639 			      h2c_term_req->common.plen - h2c_term_req->common.hlen);
1640 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
1641 	return;
1642 end:
1643 	spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1644 	return;
1645 }
1646 
1647 static void
1648 spdk_nvmf_tcp_h2c_term_req_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair,
1649 		struct nvme_tcp_pdu *pdu)
1650 {
1651 	struct spdk_nvme_tcp_term_req_hdr *h2c_term_req = &pdu->hdr->term_req;
1652 
1653 	spdk_nvmf_tcp_h2c_term_req_dump(h2c_term_req);
1654 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1655 	return;
1656 }
1657 
1658 static void
1659 spdk_nvmf_tcp_pdu_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair)
1660 {
1661 	int rc = 0;
1662 	struct nvme_tcp_pdu *pdu;
1663 	uint32_t crc32c, error_offset = 0;
1664 	enum spdk_nvme_tcp_term_req_fes fes;
1665 	struct spdk_nvmf_tcp_transport *ttransport;
1666 
1667 	assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
1668 	pdu = &tqpair->pdu_in_progress;
1669 
1670 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
1671 	/* check data digest if need */
1672 	if (pdu->ddgst_enable) {
1673 		crc32c = nvme_tcp_pdu_calc_data_digest(pdu);
1674 		rc = MATCH_DIGEST_WORD(pdu->data_digest, crc32c);
1675 		if (rc == 0) {
1676 			SPDK_ERRLOG("Data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu);
1677 			fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR;
1678 			spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1679 			return;
1680 
1681 		}
1682 	}
1683 
1684 	ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport, struct spdk_nvmf_tcp_transport, transport);
1685 	switch (pdu->hdr->common.pdu_type) {
1686 	case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD:
1687 		spdk_nvmf_tcp_capsule_cmd_payload_handle(ttransport, tqpair, pdu);
1688 		break;
1689 	case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA:
1690 		spdk_nvmf_tcp_h2c_data_payload_handle(ttransport, tqpair, pdu);
1691 		break;
1692 
1693 	case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ:
1694 		spdk_nvmf_tcp_h2c_term_req_payload_handle(tqpair, pdu);
1695 		break;
1696 
1697 	default:
1698 		/* The code should not go to here */
1699 		SPDK_ERRLOG("The code should not go to here\n");
1700 		break;
1701 	}
1702 }
1703 
1704 static void
1705 spdk_nvmf_tcp_send_icresp_complete(void *cb_arg)
1706 {
1707 	struct spdk_nvmf_tcp_qpair *tqpair = cb_arg;
1708 
1709 	tqpair->state = NVME_TCP_QPAIR_STATE_RUNNING;
1710 }
1711 
1712 static void
1713 spdk_nvmf_tcp_icreq_handle(struct spdk_nvmf_tcp_transport *ttransport,
1714 			   struct spdk_nvmf_tcp_qpair *tqpair,
1715 			   struct nvme_tcp_pdu *pdu)
1716 {
1717 	struct spdk_nvme_tcp_ic_req *ic_req = &pdu->hdr->ic_req;
1718 	struct nvme_tcp_pdu *rsp_pdu;
1719 	struct spdk_nvme_tcp_ic_resp *ic_resp;
1720 	uint32_t error_offset = 0;
1721 	enum spdk_nvme_tcp_term_req_fes fes;
1722 
1723 	/* Only PFV 0 is defined currently */
1724 	if (ic_req->pfv != 0) {
1725 		SPDK_ERRLOG("Expected ICReq PFV %u, got %u\n", 0u, ic_req->pfv);
1726 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1727 		error_offset = offsetof(struct spdk_nvme_tcp_ic_req, pfv);
1728 		goto end;
1729 	}
1730 
1731 	/* MAXR2T is 0's based */
1732 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "maxr2t =%u\n", (ic_req->maxr2t + 1u));
1733 
1734 	tqpair->host_hdgst_enable = ic_req->dgst.bits.hdgst_enable ? true : false;
1735 	if (!tqpair->host_hdgst_enable) {
1736 		tqpair->pdu_recv_buf.size -= SPDK_NVME_TCP_DIGEST_LEN * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR;
1737 	}
1738 	tqpair->host_ddgst_enable = ic_req->dgst.bits.ddgst_enable ? true : false;
1739 	if (!tqpair->host_ddgst_enable) {
1740 		tqpair->pdu_recv_buf.size -= SPDK_NVME_TCP_DIGEST_LEN * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR;
1741 	}
1742 
1743 	tqpair->cpda = spdk_min(ic_req->hpda, SPDK_NVME_TCP_CPDA_MAX);
1744 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "cpda of tqpair=(%p) is : %u\n", tqpair, tqpair->cpda);
1745 
1746 	rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair);
1747 	if (!rsp_pdu) {
1748 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
1749 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1750 		return;
1751 	}
1752 
1753 	ic_resp = &rsp_pdu->hdr->ic_resp;
1754 	ic_resp->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_IC_RESP;
1755 	ic_resp->common.hlen = ic_resp->common.plen =  sizeof(*ic_resp);
1756 	ic_resp->pfv = 0;
1757 	ic_resp->cpda = tqpair->cpda;
1758 	tqpair->maxh2cdata = spdk_min(NVMF_TCP_PDU_MAX_H2C_DATA_SIZE,
1759 				      ttransport->transport.opts.io_unit_size);
1760 	ic_resp->maxh2cdata = tqpair->maxh2cdata;
1761 	ic_resp->dgst.bits.hdgst_enable = tqpair->host_hdgst_enable ? 1 : 0;
1762 	ic_resp->dgst.bits.ddgst_enable = tqpair->host_ddgst_enable ? 1 : 0;
1763 
1764 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "host_hdgst_enable: %u\n", tqpair->host_hdgst_enable);
1765 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "host_ddgst_enable: %u\n", tqpair->host_ddgst_enable);
1766 
1767 	spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_send_icresp_complete, tqpair);
1768 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
1769 	return;
1770 end:
1771 	spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1772 	return;
1773 }
1774 
1775 static void
1776 spdk_nvmf_tcp_pdu_psh_handle(struct spdk_nvmf_tcp_qpair *tqpair)
1777 {
1778 	struct nvme_tcp_pdu *pdu;
1779 	int rc;
1780 	uint32_t crc32c, error_offset = 0;
1781 	enum spdk_nvme_tcp_term_req_fes fes;
1782 	struct spdk_nvmf_tcp_transport *ttransport;
1783 
1784 	assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH);
1785 	pdu = &tqpair->pdu_in_progress;
1786 
1787 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "pdu type of tqpair(%p) is %d\n", tqpair,
1788 		      pdu->hdr->common.pdu_type);
1789 	/* check header digest if needed */
1790 	if (pdu->has_hdgst) {
1791 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Compare the header of pdu=%p on tqpair=%p\n", pdu, tqpair);
1792 		crc32c = nvme_tcp_pdu_calc_header_digest(pdu);
1793 		rc = MATCH_DIGEST_WORD((uint8_t *)pdu->hdr->raw + pdu->hdr->common.hlen, crc32c);
1794 		if (rc == 0) {
1795 			SPDK_ERRLOG("Header digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu);
1796 			fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR;
1797 			spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1798 			return;
1799 
1800 		}
1801 	}
1802 
1803 	ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport, struct spdk_nvmf_tcp_transport, transport);
1804 	switch (pdu->hdr->common.pdu_type) {
1805 	case SPDK_NVME_TCP_PDU_TYPE_IC_REQ:
1806 		spdk_nvmf_tcp_icreq_handle(ttransport, tqpair, pdu);
1807 		break;
1808 	case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD:
1809 		spdk_nvmf_tcp_capsule_cmd_hdr_handle(ttransport, tqpair, pdu);
1810 		break;
1811 	case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA:
1812 		spdk_nvmf_tcp_h2c_data_hdr_handle(ttransport, tqpair, pdu);
1813 		break;
1814 
1815 	case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ:
1816 		spdk_nvmf_tcp_h2c_term_req_hdr_handle(tqpair, pdu);
1817 		break;
1818 
1819 	default:
1820 		SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->pdu_in_progress.hdr->common.pdu_type);
1821 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1822 		error_offset = 1;
1823 		spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1824 		break;
1825 	}
1826 }
1827 
1828 static void
1829 spdk_nvmf_tcp_pdu_ch_handle(struct spdk_nvmf_tcp_qpair *tqpair)
1830 {
1831 	struct nvme_tcp_pdu *pdu;
1832 	uint32_t error_offset = 0;
1833 	enum spdk_nvme_tcp_term_req_fes fes;
1834 	uint8_t expected_hlen, pdo;
1835 	bool plen_error = false, pdo_error = false;
1836 
1837 	assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH);
1838 	pdu = &tqpair->pdu_in_progress;
1839 
1840 	if (pdu->hdr->common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_REQ) {
1841 		if (tqpair->state != NVME_TCP_QPAIR_STATE_INVALID) {
1842 			SPDK_ERRLOG("Already received ICreq PDU, and reject this pdu=%p\n", pdu);
1843 			fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR;
1844 			goto err;
1845 		}
1846 		expected_hlen = sizeof(struct spdk_nvme_tcp_ic_req);
1847 		if (pdu->hdr->common.plen != expected_hlen) {
1848 			plen_error = true;
1849 		}
1850 	} else {
1851 		if (tqpair->state != NVME_TCP_QPAIR_STATE_RUNNING) {
1852 			SPDK_ERRLOG("The TCP/IP connection is not negotitated\n");
1853 			fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR;
1854 			goto err;
1855 		}
1856 
1857 		switch (pdu->hdr->common.pdu_type) {
1858 		case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD:
1859 			expected_hlen = sizeof(struct spdk_nvme_tcp_cmd);
1860 			pdo = pdu->hdr->common.pdo;
1861 			if ((tqpair->cpda != 0) && (pdo != ((tqpair->cpda + 1) << 2))) {
1862 				pdo_error = true;
1863 				break;
1864 			}
1865 
1866 			if (pdu->hdr->common.plen < expected_hlen) {
1867 				plen_error = true;
1868 			}
1869 			break;
1870 		case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA:
1871 			expected_hlen = sizeof(struct spdk_nvme_tcp_h2c_data_hdr);
1872 			pdo = pdu->hdr->common.pdo;
1873 			if ((tqpair->cpda != 0) && (pdo != ((tqpair->cpda + 1) << 2))) {
1874 				pdo_error = true;
1875 				break;
1876 			}
1877 			if (pdu->hdr->common.plen < expected_hlen) {
1878 				plen_error = true;
1879 			}
1880 			break;
1881 
1882 		case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ:
1883 			expected_hlen = sizeof(struct spdk_nvme_tcp_term_req_hdr);
1884 			if ((pdu->hdr->common.plen <= expected_hlen) ||
1885 			    (pdu->hdr->common.plen > SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE)) {
1886 				plen_error = true;
1887 			}
1888 			break;
1889 
1890 		default:
1891 			SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", pdu->hdr->common.pdu_type);
1892 			fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1893 			error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdu_type);
1894 			goto err;
1895 		}
1896 	}
1897 
1898 	if (pdu->hdr->common.hlen != expected_hlen) {
1899 		SPDK_ERRLOG("PDU type=0x%02x, Expected ICReq header length %u, got %u on tqpair=%p\n",
1900 			    pdu->hdr->common.pdu_type,
1901 			    expected_hlen, pdu->hdr->common.hlen, tqpair);
1902 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1903 		error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, hlen);
1904 		goto err;
1905 	} else if (pdo_error) {
1906 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1907 		error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdo);
1908 	} else if (plen_error) {
1909 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1910 		error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, plen);
1911 		goto err;
1912 	} else {
1913 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH);
1914 		nvme_tcp_pdu_calc_psh_len(&tqpair->pdu_in_progress, tqpair->host_hdgst_enable);
1915 		return;
1916 	}
1917 err:
1918 	spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1919 }
1920 
1921 static int
1922 nvmf_tcp_pdu_payload_insert_dif(struct nvme_tcp_pdu *pdu, uint32_t read_offset,
1923 				int read_len)
1924 {
1925 	int rc;
1926 
1927 	rc = spdk_dif_generate_stream(pdu->data_iov, pdu->data_iovcnt,
1928 				      read_offset, read_len, pdu->dif_ctx);
1929 	if (rc != 0) {
1930 		SPDK_ERRLOG("DIF generate failed\n");
1931 	}
1932 
1933 	return rc;
1934 }
1935 
1936 static int
1937 nvme_tcp_recv_buf_read(struct spdk_sock *sock, struct nvme_tcp_pdu_recv_buf *pdu_recv_buf)
1938 {
1939 	int rc;
1940 
1941 	rc = nvme_tcp_read_data(sock, pdu_recv_buf->size - pdu_recv_buf->off,
1942 				(void *)pdu_recv_buf->buf + pdu_recv_buf->off);
1943 	if (rc < 0) {
1944 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "will disconnect sock=%p\n", sock);
1945 	} else if (rc > 0) {
1946 		pdu_recv_buf->remain_size = rc;
1947 		spdk_trace_record(TRACE_TCP_READ_FROM_SOCKET_DONE, 0, rc, 0, 0);
1948 	}
1949 
1950 	return rc;
1951 }
1952 
1953 static uint32_t
1954 nvme_tcp_read_data_from_pdu_recv_buf(struct nvme_tcp_pdu_recv_buf *pdu_recv_buf,
1955 				     uint32_t expected_size,
1956 				     char *dst)
1957 {
1958 	uint32_t size;
1959 
1960 	assert(pdu_recv_buf->remain_size > 0);
1961 	size = spdk_min(expected_size, pdu_recv_buf->remain_size);
1962 	if (dst) {
1963 		memcpy(dst, (void *)pdu_recv_buf->buf + pdu_recv_buf->off, size);
1964 	}
1965 	pdu_recv_buf->off += size;
1966 	pdu_recv_buf->remain_size -= size;
1967 
1968 
1969 	return size;
1970 }
1971 
1972 static int
1973 nvme_tcp_read_payload_data_from_pdu_recv_buf(struct nvme_tcp_pdu_recv_buf *pdu_recv_buf,
1974 		struct nvme_tcp_pdu *pdu)
1975 {
1976 	struct iovec iov[NVME_TCP_MAX_SGL_DESCRIPTORS + 1];
1977 	int iovcnt, i;
1978 	uint32_t size = 0;
1979 	void *dst;
1980 
1981 	assert(pdu_recv_buf->remain_size > 0);
1982 	iovcnt = nvme_tcp_build_payload_iovs(iov, NVME_TCP_MAX_SGL_DESCRIPTORS + 1, pdu,
1983 					     pdu->ddgst_enable, NULL);
1984 	assert(iovcnt >= 0);
1985 	for (i = 0; i < iovcnt; i++) {
1986 		if (!pdu_recv_buf->remain_size) {
1987 			break;
1988 		}
1989 
1990 		dst = NULL;
1991 		if (pdu->hdr->common.pdu_type != SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ) {
1992 			dst = iov[i].iov_base;
1993 		}
1994 		size += nvme_tcp_read_data_from_pdu_recv_buf(pdu_recv_buf, iov[i].iov_len, dst);
1995 	}
1996 
1997 	return size;
1998 }
1999 
2000 static int
2001 spdk_nvmf_tcp_sock_process(struct spdk_nvmf_tcp_qpair *tqpair)
2002 {
2003 	int rc = 0;
2004 	struct nvme_tcp_pdu *pdu;
2005 	enum nvme_tcp_pdu_recv_state prev_state;
2006 	uint32_t data_len;
2007 
2008 	/* The loop here is to allow for several back-to-back state changes. */
2009 	do {
2010 		prev_state = tqpair->recv_state;
2011 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair(%p) recv pdu entering state %d\n", tqpair, prev_state);
2012 
2013 		pdu = &tqpair->pdu_in_progress;
2014 		switch (tqpair->recv_state) {
2015 		/* Wait for the common header  */
2016 		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY:
2017 		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH:
2018 			if (!tqpair->pdu_recv_buf.remain_size) {
2019 				rc = nvme_tcp_recv_buf_read(tqpair->sock, &tqpair->pdu_recv_buf);
2020 				if (rc <= 0) {
2021 					return rc;
2022 				}
2023 			}
2024 			rc = nvme_tcp_read_data_from_pdu_recv_buf(&tqpair->pdu_recv_buf,
2025 					sizeof(struct spdk_nvme_tcp_common_pdu_hdr) - pdu->ch_valid_bytes,
2026 					NULL);
2027 			pdu->ch_valid_bytes += rc;
2028 			if (spdk_likely(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY)) {
2029 				spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH);
2030 			}
2031 
2032 			if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) {
2033 				return NVME_TCP_PDU_IN_PROGRESS;
2034 			}
2035 
2036 			/* The command header of this PDU has now been read from the socket. */
2037 			spdk_nvmf_tcp_pdu_ch_handle(tqpair);
2038 			break;
2039 		/* Wait for the pdu specific header  */
2040 		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH:
2041 			if (!tqpair->pdu_recv_buf.remain_size) {
2042 				rc = nvme_tcp_recv_buf_read(tqpair->sock, &tqpair->pdu_recv_buf);
2043 				if (rc <= 0) {
2044 					return rc;
2045 				}
2046 			}
2047 
2048 			rc = nvme_tcp_read_data_from_pdu_recv_buf(&tqpair->pdu_recv_buf,
2049 					pdu->psh_len - pdu->psh_valid_bytes,
2050 					NULL);
2051 			pdu->psh_valid_bytes += rc;
2052 			if (pdu->psh_valid_bytes < pdu->psh_len) {
2053 				return NVME_TCP_PDU_IN_PROGRESS;
2054 			}
2055 
2056 			/* All header(ch, psh, head digist) of this PDU has now been read from the socket. */
2057 			spdk_nvmf_tcp_pdu_psh_handle(tqpair);
2058 			break;
2059 		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD:
2060 			/* check whether the data is valid, if not we just return */
2061 			if (!pdu->data_len) {
2062 				return NVME_TCP_PDU_IN_PROGRESS;
2063 			}
2064 
2065 			data_len = pdu->data_len;
2066 			/* data digest */
2067 			if (spdk_unlikely((pdu->hdr->common.pdu_type != SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ) &&
2068 					  tqpair->host_ddgst_enable)) {
2069 				data_len += SPDK_NVME_TCP_DIGEST_LEN;
2070 				pdu->ddgst_enable = true;
2071 			}
2072 
2073 			if (tqpair->pdu_recv_buf.remain_size) {
2074 				rc = nvme_tcp_read_payload_data_from_pdu_recv_buf(&tqpair->pdu_recv_buf, pdu);
2075 				pdu->readv_offset += rc;
2076 			}
2077 
2078 			if (pdu->readv_offset < data_len) {
2079 				rc = nvme_tcp_read_payload_data(tqpair->sock, pdu);
2080 				if (rc < 0) {
2081 					return NVME_TCP_PDU_IN_PROGRESS;
2082 				}
2083 				pdu->readv_offset += rc;
2084 			}
2085 
2086 			if (spdk_unlikely(pdu->dif_ctx != NULL)) {
2087 				rc = nvmf_tcp_pdu_payload_insert_dif(pdu, pdu->readv_offset - rc, rc);
2088 				if (rc != 0) {
2089 					return NVME_TCP_PDU_FATAL;
2090 				}
2091 			}
2092 
2093 			if (pdu->readv_offset < data_len) {
2094 				return NVME_TCP_PDU_IN_PROGRESS;
2095 			}
2096 
2097 			/* All of this PDU has now been read from the socket. */
2098 			spdk_nvmf_tcp_pdu_payload_handle(tqpair);
2099 			break;
2100 		case NVME_TCP_PDU_RECV_STATE_ERROR:
2101 			/* Check whether the connection is closed. Each time, we only read 1 byte every time */
2102 			rc = nvme_tcp_read_data(tqpair->sock, 1, (void *)&pdu->hdr->common);
2103 			if (rc < 0) {
2104 				return NVME_TCP_PDU_FATAL;
2105 			}
2106 			break;
2107 		default:
2108 			assert(0);
2109 			SPDK_ERRLOG("code should not come to here");
2110 			break;
2111 		}
2112 	} while (tqpair->recv_state != prev_state);
2113 
2114 	return rc;
2115 }
2116 
2117 static enum spdk_nvme_data_transfer
2118 spdk_nvmf_tcp_req_get_xfer(struct spdk_nvmf_tcp_req *tcp_req) {
2119 	enum spdk_nvme_data_transfer xfer;
2120 	struct spdk_nvme_cmd *cmd = &tcp_req->req.cmd->nvme_cmd;
2121 	struct spdk_nvme_sgl_descriptor *sgl = &cmd->dptr.sgl1;
2122 
2123 	/* Figure out data transfer direction */
2124 	if (cmd->opc == SPDK_NVME_OPC_FABRIC)
2125 	{
2126 		xfer = spdk_nvme_opc_get_data_transfer(tcp_req->req.cmd->nvmf_cmd.fctype);
2127 	} else
2128 	{
2129 		xfer = spdk_nvme_opc_get_data_transfer(cmd->opc);
2130 
2131 		/* Some admin commands are special cases */
2132 		if ((tcp_req->req.qpair->qid == 0) &&
2133 		    ((cmd->opc == SPDK_NVME_OPC_GET_FEATURES) ||
2134 		     (cmd->opc == SPDK_NVME_OPC_SET_FEATURES))) {
2135 			switch (cmd->cdw10 & 0xff) {
2136 			case SPDK_NVME_FEAT_LBA_RANGE_TYPE:
2137 			case SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION:
2138 			case SPDK_NVME_FEAT_HOST_IDENTIFIER:
2139 				break;
2140 			default:
2141 				xfer = SPDK_NVME_DATA_NONE;
2142 			}
2143 		}
2144 	}
2145 
2146 	if (xfer == SPDK_NVME_DATA_NONE)
2147 	{
2148 		return xfer;
2149 	}
2150 
2151 	/* Even for commands that may transfer data, they could have specified 0 length.
2152 	 * We want those to show up with xfer SPDK_NVME_DATA_NONE.
2153 	 */
2154 	switch (sgl->generic.type)
2155 	{
2156 	case SPDK_NVME_SGL_TYPE_DATA_BLOCK:
2157 	case SPDK_NVME_SGL_TYPE_BIT_BUCKET:
2158 	case SPDK_NVME_SGL_TYPE_SEGMENT:
2159 	case SPDK_NVME_SGL_TYPE_LAST_SEGMENT:
2160 	case SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK:
2161 		if (sgl->unkeyed.length == 0) {
2162 			xfer = SPDK_NVME_DATA_NONE;
2163 		}
2164 		break;
2165 	case SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK:
2166 		if (sgl->keyed.length == 0) {
2167 			xfer = SPDK_NVME_DATA_NONE;
2168 		}
2169 		break;
2170 	}
2171 
2172 	return xfer;
2173 }
2174 
2175 static int
2176 spdk_nvmf_tcp_req_parse_sgl(struct spdk_nvmf_tcp_req *tcp_req,
2177 			    struct spdk_nvmf_transport *transport,
2178 			    struct spdk_nvmf_transport_poll_group *group)
2179 {
2180 	struct spdk_nvmf_request		*req = &tcp_req->req;
2181 	struct spdk_nvme_cmd			*cmd;
2182 	struct spdk_nvme_cpl			*rsp;
2183 	struct spdk_nvme_sgl_descriptor		*sgl;
2184 	uint32_t				length;
2185 
2186 	cmd = &req->cmd->nvme_cmd;
2187 	rsp = &req->rsp->nvme_cpl;
2188 	sgl = &cmd->dptr.sgl1;
2189 
2190 	length = sgl->unkeyed.length;
2191 
2192 	if (sgl->generic.type == SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK &&
2193 	    sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_TRANSPORT) {
2194 		if (length > transport->opts.max_io_size) {
2195 			SPDK_ERRLOG("SGL length 0x%x exceeds max io size 0x%x\n",
2196 				    length, transport->opts.max_io_size);
2197 			rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
2198 			return -1;
2199 		}
2200 
2201 		/* fill request length and populate iovs */
2202 		req->length = length;
2203 
2204 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Data requested length= 0x%x\n", length);
2205 
2206 		if (spdk_unlikely(tcp_req->dif_insert_or_strip)) {
2207 			tcp_req->orig_length = length;
2208 			length = spdk_dif_get_length_with_md(length, &tcp_req->dif_ctx);
2209 			tcp_req->elba_length = length;
2210 		}
2211 
2212 		if (spdk_nvmf_request_get_buffers(req, group, transport, length)) {
2213 			/* No available buffers. Queue this request up. */
2214 			SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "No available large data buffers. Queueing request %p\n",
2215 				      tcp_req);
2216 			return 0;
2217 		}
2218 
2219 		/* backward compatible */
2220 		req->data = req->iov[0].iov_base;
2221 
2222 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Request %p took %d buffer/s from central pool, and data=%p\n",
2223 			      tcp_req, req->iovcnt, req->data);
2224 
2225 		return 0;
2226 	} else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK &&
2227 		   sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) {
2228 		uint64_t offset = sgl->address;
2229 		uint32_t max_len = transport->opts.in_capsule_data_size;
2230 
2231 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "In-capsule data: offset 0x%" PRIx64 ", length 0x%x\n",
2232 			      offset, length);
2233 
2234 		if (offset > max_len) {
2235 			SPDK_ERRLOG("In-capsule offset 0x%" PRIx64 " exceeds capsule length 0x%x\n",
2236 				    offset, max_len);
2237 			rsp->status.sc = SPDK_NVME_SC_INVALID_SGL_OFFSET;
2238 			return -1;
2239 		}
2240 		max_len -= (uint32_t)offset;
2241 
2242 		if (length > max_len) {
2243 			SPDK_ERRLOG("In-capsule data length 0x%x exceeds capsule length 0x%x\n",
2244 				    length, max_len);
2245 			rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
2246 			return -1;
2247 		}
2248 
2249 		req->data = tcp_req->buf + offset;
2250 		req->data_from_pool = false;
2251 		req->length = length;
2252 
2253 		if (spdk_unlikely(tcp_req->dif_insert_or_strip)) {
2254 			length = spdk_dif_get_length_with_md(length, &tcp_req->dif_ctx);
2255 			tcp_req->elba_length = length;
2256 		}
2257 
2258 		req->iov[0].iov_base = tcp_req->req.data;
2259 		req->iov[0].iov_len = length;
2260 		req->iovcnt = 1;
2261 
2262 		return 0;
2263 	}
2264 
2265 	SPDK_ERRLOG("Invalid NVMf I/O Command SGL:  Type 0x%x, Subtype 0x%x\n",
2266 		    sgl->generic.type, sgl->generic.subtype);
2267 	rsp->status.sc = SPDK_NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID;
2268 	return -1;
2269 }
2270 
2271 static inline enum spdk_nvme_media_error_status_code
2272 nvmf_tcp_dif_error_to_compl_status(uint8_t err_type) {
2273 	enum spdk_nvme_media_error_status_code result;
2274 
2275 	switch (err_type)
2276 	{
2277 	case SPDK_DIF_REFTAG_ERROR:
2278 		result = SPDK_NVME_SC_REFERENCE_TAG_CHECK_ERROR;
2279 		break;
2280 	case SPDK_DIF_APPTAG_ERROR:
2281 		result = SPDK_NVME_SC_APPLICATION_TAG_CHECK_ERROR;
2282 		break;
2283 	case SPDK_DIF_GUARD_ERROR:
2284 		result = SPDK_NVME_SC_GUARD_CHECK_ERROR;
2285 		break;
2286 	default:
2287 		SPDK_UNREACHABLE();
2288 		break;
2289 	}
2290 
2291 	return result;
2292 }
2293 
2294 static void
2295 spdk_nvmf_tcp_send_c2h_data(struct spdk_nvmf_tcp_qpair *tqpair,
2296 			    struct spdk_nvmf_tcp_req *tcp_req)
2297 {
2298 	struct nvme_tcp_pdu *rsp_pdu;
2299 	struct spdk_nvme_tcp_c2h_data_hdr *c2h_data;
2300 	uint32_t plen, pdo, alignment;
2301 	int rc;
2302 
2303 	assert(tcp_req == STAILQ_FIRST(&tqpair->queued_c2h_data_tcp_req));
2304 
2305 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
2306 
2307 	rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair);
2308 	assert(rsp_pdu != NULL);
2309 
2310 	c2h_data = &rsp_pdu->hdr->c2h_data;
2311 	c2h_data->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_C2H_DATA;
2312 	plen = c2h_data->common.hlen = sizeof(*c2h_data);
2313 
2314 	if (tqpair->host_hdgst_enable) {
2315 		plen += SPDK_NVME_TCP_DIGEST_LEN;
2316 		c2h_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF;
2317 	}
2318 
2319 	/* set the psh */
2320 	c2h_data->cccid = tcp_req->req.cmd->nvme_cmd.cid;
2321 	c2h_data->datal = spdk_min(NVMF_TCP_PDU_MAX_C2H_DATA_SIZE,
2322 				   tcp_req->req.length - tcp_req->c2h_data_offset);
2323 	c2h_data->datao = tcp_req->c2h_data_offset;
2324 
2325 	/* set the padding */
2326 	rsp_pdu->padding_len = 0;
2327 	pdo = plen;
2328 	if (tqpair->cpda) {
2329 		alignment = (tqpair->cpda + 1) << 2;
2330 		if (alignment > plen) {
2331 			rsp_pdu->padding_len = alignment - plen;
2332 			pdo = plen = alignment;
2333 		}
2334 	}
2335 
2336 	c2h_data->common.pdo = pdo;
2337 	plen += c2h_data->datal;
2338 	if (tqpair->host_ddgst_enable) {
2339 		c2h_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF;
2340 		plen += SPDK_NVME_TCP_DIGEST_LEN;
2341 	}
2342 
2343 	c2h_data->common.plen = plen;
2344 
2345 	if (spdk_unlikely(tcp_req->dif_insert_or_strip)) {
2346 		rsp_pdu->dif_ctx = &tcp_req->dif_ctx;
2347 	}
2348 
2349 	nvme_tcp_pdu_set_data_buf(rsp_pdu, tcp_req->req.iov, tcp_req->req.iovcnt,
2350 				  c2h_data->datao, c2h_data->datal);
2351 
2352 	if (spdk_unlikely(tcp_req->dif_insert_or_strip)) {
2353 		struct spdk_nvme_cpl *rsp = &tcp_req->req.rsp->nvme_cpl;
2354 		struct spdk_dif_error err_blk = {};
2355 
2356 		rc = spdk_dif_verify_stream(rsp_pdu->data_iov, rsp_pdu->data_iovcnt,
2357 					    0, rsp_pdu->data_len, rsp_pdu->dif_ctx, &err_blk);
2358 		if (rc != 0) {
2359 			SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n",
2360 				    err_blk.err_type, err_blk.err_offset);
2361 			rsp->status.sct = SPDK_NVME_SCT_MEDIA_ERROR;
2362 			rsp->status.sc = nvmf_tcp_dif_error_to_compl_status(err_blk.err_type);
2363 			STAILQ_REMOVE_HEAD(&tqpair->queued_c2h_data_tcp_req, link);
2364 			spdk_nvmf_tcp_pdu_put(tqpair, rsp_pdu);
2365 			spdk_nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair);
2366 			return;
2367 		}
2368 	}
2369 
2370 	tcp_req->c2h_data_offset += c2h_data->datal;
2371 	if (tcp_req->c2h_data_offset == tcp_req->req.length) {
2372 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Last pdu for tcp_req=%p on tqpair=%p\n", tcp_req, tqpair);
2373 		c2h_data->common.flags |= SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU;
2374 		if (tqpair->qpair.transport->opts.c2h_success) {
2375 			c2h_data->common.flags |= SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS;
2376 		}
2377 		STAILQ_REMOVE_HEAD(&tqpair->queued_c2h_data_tcp_req, link);
2378 	}
2379 
2380 	tqpair->c2h_data_pdu_cnt += 1;
2381 	spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_pdu_c2h_data_complete, tcp_req);
2382 }
2383 
2384 static int
2385 spdk_nvmf_tcp_calc_c2h_data_pdu_num(struct spdk_nvmf_tcp_req *tcp_req)
2386 {
2387 	return (tcp_req->req.length + NVMF_TCP_PDU_MAX_C2H_DATA_SIZE - 1) /
2388 	       NVMF_TCP_PDU_MAX_C2H_DATA_SIZE;
2389 }
2390 
2391 static void
2392 spdk_nvmf_tcp_handle_pending_c2h_data_queue(struct spdk_nvmf_tcp_qpair *tqpair)
2393 {
2394 	struct spdk_nvmf_tcp_req *tcp_req;
2395 
2396 	while (!STAILQ_EMPTY(&tqpair->queued_c2h_data_tcp_req) &&
2397 	       (tqpair->c2h_data_pdu_cnt < NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM)) {
2398 		tcp_req = STAILQ_FIRST(&tqpair->queued_c2h_data_tcp_req);
2399 		spdk_nvmf_tcp_send_c2h_data(tqpair, tcp_req);
2400 	}
2401 }
2402 
2403 static void
2404 spdk_nvmf_tcp_queue_c2h_data(struct spdk_nvmf_tcp_req *tcp_req,
2405 			     struct spdk_nvmf_tcp_qpair *tqpair)
2406 {
2407 	tcp_req->c2h_data_pdu_num = spdk_nvmf_tcp_calc_c2h_data_pdu_num(tcp_req);
2408 
2409 	assert(tcp_req->c2h_data_pdu_num < NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM);
2410 
2411 	STAILQ_INSERT_TAIL(&tqpair->queued_c2h_data_tcp_req, tcp_req, link);
2412 	spdk_nvmf_tcp_handle_pending_c2h_data_queue(tqpair);
2413 }
2414 
2415 static int
2416 request_transfer_out(struct spdk_nvmf_request *req)
2417 {
2418 	struct spdk_nvmf_tcp_req	*tcp_req;
2419 	struct spdk_nvmf_qpair		*qpair;
2420 	struct spdk_nvmf_tcp_qpair	*tqpair;
2421 	struct spdk_nvme_cpl		*rsp;
2422 
2423 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
2424 
2425 	qpair = req->qpair;
2426 	rsp = &req->rsp->nvme_cpl;
2427 	tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req);
2428 
2429 	/* Advance our sq_head pointer */
2430 	if (qpair->sq_head == qpair->sq_head_max) {
2431 		qpair->sq_head = 0;
2432 	} else {
2433 		qpair->sq_head++;
2434 	}
2435 	rsp->sqhd = qpair->sq_head;
2436 
2437 	tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair);
2438 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST);
2439 	if (rsp->status.sc == SPDK_NVME_SC_SUCCESS &&
2440 	    req->xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
2441 		spdk_nvmf_tcp_queue_c2h_data(tcp_req, tqpair);
2442 	} else {
2443 		spdk_nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair);
2444 	}
2445 
2446 	return 0;
2447 }
2448 
2449 static void
2450 spdk_nvmf_tcp_pdu_set_buf_from_req(struct spdk_nvmf_tcp_qpair *tqpair,
2451 				   struct spdk_nvmf_tcp_req *tcp_req)
2452 {
2453 	struct nvme_tcp_pdu *pdu;
2454 
2455 	if (tcp_req->req.data_from_pool) {
2456 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Will send r2t for tcp_req(%p) on tqpair=%p\n", tcp_req, tqpair);
2457 		tcp_req->next_expected_r2t_offset = 0;
2458 		spdk_nvmf_tcp_send_r2t_pdu(tqpair, tcp_req);
2459 	} else {
2460 		pdu = &tqpair->pdu_in_progress;
2461 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Not need to send r2t for tcp_req(%p) on tqpair=%p\n", tcp_req,
2462 			      tqpair);
2463 		/* No need to send r2t, contained in the capsuled data */
2464 		nvme_tcp_pdu_set_data_buf(pdu, tcp_req->req.iov, tcp_req->req.iovcnt,
2465 					  0, tcp_req->req.length);
2466 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
2467 	}
2468 }
2469 
2470 static void
2471 spdk_nvmf_tcp_set_incapsule_data(struct spdk_nvmf_tcp_qpair *tqpair,
2472 				 struct spdk_nvmf_tcp_req *tcp_req)
2473 {
2474 	struct nvme_tcp_pdu *pdu;
2475 	uint32_t plen = 0;
2476 
2477 	pdu = &tqpair->pdu_in_progress;
2478 	plen = pdu->hdr->common.hlen;
2479 
2480 	if (tqpair->host_hdgst_enable) {
2481 		plen += SPDK_NVME_TCP_DIGEST_LEN;
2482 	}
2483 
2484 	if (pdu->hdr->common.plen != plen) {
2485 		tcp_req->has_incapsule_data = true;
2486 	}
2487 }
2488 
2489 static bool
2490 spdk_nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport,
2491 			  struct spdk_nvmf_tcp_req *tcp_req)
2492 {
2493 	struct spdk_nvmf_tcp_qpair		*tqpair;
2494 	struct spdk_nvme_cpl			*rsp = &tcp_req->req.rsp->nvme_cpl;
2495 	int					rc;
2496 	enum spdk_nvmf_tcp_req_state		prev_state;
2497 	bool					progress = false;
2498 	struct spdk_nvmf_transport		*transport = &ttransport->transport;
2499 	struct spdk_nvmf_transport_poll_group	*group;
2500 
2501 	tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair);
2502 	group = &tqpair->group->group;
2503 	assert(tcp_req->state != TCP_REQUEST_STATE_FREE);
2504 
2505 	/* The loop here is to allow for several back-to-back state changes. */
2506 	do {
2507 		prev_state = tcp_req->state;
2508 
2509 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Request %p entering state %d on tqpair=%p\n", tcp_req, prev_state,
2510 			      tqpair);
2511 
2512 		switch (tcp_req->state) {
2513 		case TCP_REQUEST_STATE_FREE:
2514 			/* Some external code must kick a request into TCP_REQUEST_STATE_NEW
2515 			 * to escape this state. */
2516 			break;
2517 		case TCP_REQUEST_STATE_NEW:
2518 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_NEW, 0, 0, (uintptr_t)tcp_req, 0);
2519 
2520 			/* copy the cmd from the receive pdu */
2521 			tcp_req->cmd = tqpair->pdu_in_progress.hdr->capsule_cmd.ccsqe;
2522 
2523 			if (spdk_unlikely(spdk_nvmf_request_get_dif_ctx(&tcp_req->req, &tcp_req->dif_ctx))) {
2524 				tcp_req->dif_insert_or_strip = true;
2525 				tqpair->pdu_in_progress.dif_ctx = &tcp_req->dif_ctx;
2526 			}
2527 
2528 			/* The next state transition depends on the data transfer needs of this request. */
2529 			tcp_req->req.xfer = spdk_nvmf_tcp_req_get_xfer(tcp_req);
2530 
2531 			/* If no data to transfer, ready to execute. */
2532 			if (tcp_req->req.xfer == SPDK_NVME_DATA_NONE) {
2533 				/* Reset the tqpair receving pdu state */
2534 				spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
2535 				spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
2536 				break;
2537 			}
2538 
2539 			spdk_nvmf_tcp_set_incapsule_data(tqpair, tcp_req);
2540 
2541 			if (!tcp_req->has_incapsule_data) {
2542 				spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
2543 			}
2544 
2545 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEED_BUFFER);
2546 			STAILQ_INSERT_TAIL(&group->pending_buf_queue, &tcp_req->req, buf_link);
2547 			break;
2548 		case TCP_REQUEST_STATE_NEED_BUFFER:
2549 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_NEED_BUFFER, 0, 0, (uintptr_t)tcp_req, 0);
2550 
2551 			assert(tcp_req->req.xfer != SPDK_NVME_DATA_NONE);
2552 
2553 			if (&tcp_req->req != STAILQ_FIRST(&group->pending_buf_queue)) {
2554 				SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
2555 					      "Not the first element to wait for the buf for tcp_req(%p) on tqpair=%p\n",
2556 					      tcp_req, tqpair);
2557 				/* This request needs to wait in line to obtain a buffer */
2558 				break;
2559 			}
2560 
2561 			/* Try to get a data buffer */
2562 			rc = spdk_nvmf_tcp_req_parse_sgl(tcp_req, transport, group);
2563 			if (rc < 0) {
2564 				STAILQ_REMOVE_HEAD(&group->pending_buf_queue, buf_link);
2565 				rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2566 				/* Reset the tqpair receving pdu state */
2567 				spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
2568 				spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE);
2569 				break;
2570 			}
2571 
2572 			if (!tcp_req->req.data) {
2573 				SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "No buffer allocated for tcp_req(%p) on tqpair(%p\n)",
2574 					      tcp_req, tqpair);
2575 				/* No buffers available. */
2576 				break;
2577 			}
2578 
2579 			STAILQ_REMOVE_HEAD(&group->pending_buf_queue, buf_link);
2580 
2581 			/* If data is transferring from host to controller, we need to do a transfer from the host. */
2582 			if (tcp_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
2583 				spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
2584 				spdk_nvmf_tcp_pdu_set_buf_from_req(tqpair, tcp_req);
2585 				break;
2586 			}
2587 
2588 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
2589 			break;
2590 		case TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER:
2591 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, 0, 0,
2592 					  (uintptr_t)tcp_req, 0);
2593 			/* Some external code must kick a request into TCP_REQUEST_STATE_READY_TO_EXECUTE
2594 			 * to escape this state. */
2595 			break;
2596 		case TCP_REQUEST_STATE_READY_TO_EXECUTE:
2597 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE, 0, 0, (uintptr_t)tcp_req, 0);
2598 
2599 			if (spdk_unlikely(tcp_req->dif_insert_or_strip)) {
2600 				assert(tcp_req->elba_length >= tcp_req->req.length);
2601 				tcp_req->req.length = tcp_req->elba_length;
2602 			}
2603 
2604 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_EXECUTING);
2605 			spdk_nvmf_request_exec(&tcp_req->req);
2606 			break;
2607 		case TCP_REQUEST_STATE_EXECUTING:
2608 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_EXECUTING, 0, 0, (uintptr_t)tcp_req, 0);
2609 			/* Some external code must kick a request into TCP_REQUEST_STATE_EXECUTED
2610 			 * to escape this state. */
2611 			break;
2612 		case TCP_REQUEST_STATE_EXECUTED:
2613 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_EXECUTED, 0, 0, (uintptr_t)tcp_req, 0);
2614 
2615 			if (spdk_unlikely(tcp_req->dif_insert_or_strip)) {
2616 				tcp_req->req.length = tcp_req->orig_length;
2617 			}
2618 
2619 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE);
2620 			break;
2621 		case TCP_REQUEST_STATE_READY_TO_COMPLETE:
2622 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE, 0, 0, (uintptr_t)tcp_req, 0);
2623 			rc = request_transfer_out(&tcp_req->req);
2624 			assert(rc == 0); /* No good way to handle this currently */
2625 			break;
2626 		case TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST:
2627 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST, 0, 0,
2628 					  (uintptr_t)tcp_req,
2629 					  0);
2630 			/* Some external code must kick a request into TCP_REQUEST_STATE_COMPLETED
2631 			 * to escape this state. */
2632 			break;
2633 		case TCP_REQUEST_STATE_COMPLETED:
2634 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_COMPLETED, 0, 0, (uintptr_t)tcp_req, 0);
2635 			if (tcp_req->req.data_from_pool) {
2636 				spdk_nvmf_request_free_buffers(&tcp_req->req, group, transport);
2637 			}
2638 			tcp_req->req.length = 0;
2639 			tcp_req->req.iovcnt = 0;
2640 			tcp_req->req.data = NULL;
2641 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_FREE);
2642 			break;
2643 		case TCP_REQUEST_NUM_STATES:
2644 		default:
2645 			assert(0);
2646 			break;
2647 		}
2648 
2649 		if (tcp_req->state != prev_state) {
2650 			progress = true;
2651 		}
2652 	} while (tcp_req->state != prev_state);
2653 
2654 	return progress;
2655 }
2656 
2657 static void
2658 spdk_nvmf_tcp_sock_cb(void *arg, struct spdk_sock_group *group, struct spdk_sock *sock)
2659 {
2660 	struct spdk_nvmf_tcp_qpair *tqpair = arg;
2661 	int rc;
2662 
2663 	assert(tqpair != NULL);
2664 	rc = spdk_nvmf_tcp_sock_process(tqpair);
2665 
2666 	/* check the following two factors:
2667 	 * rc: The socket is closed
2668 	 * State of tqpair: The tqpair is in EXITING state due to internal error
2669 	 */
2670 	if ((rc < 0) || (tqpair->state == NVME_TCP_QPAIR_STATE_EXITING)) {
2671 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITED;
2672 		spdk_nvmf_tcp_qpair_flush_pdus(tqpair);
2673 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "will disconect the tqpair=%p\n", tqpair);
2674 		spdk_poller_unregister(&tqpair->timeout_poller);
2675 		spdk_nvmf_qpair_disconnect(&tqpair->qpair, NULL, NULL);
2676 	}
2677 }
2678 
2679 static int
2680 spdk_nvmf_tcp_poll_group_add(struct spdk_nvmf_transport_poll_group *group,
2681 			     struct spdk_nvmf_qpair *qpair)
2682 {
2683 	struct spdk_nvmf_tcp_poll_group	*tgroup;
2684 	struct spdk_nvmf_tcp_qpair	*tqpair;
2685 	int				rc;
2686 
2687 	tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
2688 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
2689 
2690 	rc = spdk_sock_group_add_sock(tgroup->sock_group, tqpair->sock,
2691 				      spdk_nvmf_tcp_sock_cb, tqpair);
2692 	if (rc != 0) {
2693 		SPDK_ERRLOG("Could not add sock to sock_group: %s (%d)\n",
2694 			    spdk_strerror(errno), errno);
2695 		spdk_nvmf_tcp_qpair_destroy(tqpair);
2696 		return -1;
2697 	}
2698 
2699 	rc =  spdk_nvmf_tcp_qpair_sock_init(tqpair);
2700 	if (rc != 0) {
2701 		SPDK_ERRLOG("Cannot set sock opt for tqpair=%p\n", tqpair);
2702 		spdk_nvmf_tcp_qpair_destroy(tqpair);
2703 		return -1;
2704 	}
2705 
2706 	rc = spdk_nvmf_tcp_qpair_init(&tqpair->qpair);
2707 	if (rc < 0) {
2708 		SPDK_ERRLOG("Cannot init tqpair=%p\n", tqpair);
2709 		spdk_nvmf_tcp_qpair_destroy(tqpair);
2710 		return -1;
2711 	}
2712 
2713 	rc = spdk_nvmf_tcp_qpair_init_mem_resource(tqpair, 1);
2714 	if (rc < 0) {
2715 		SPDK_ERRLOG("Cannot init memory resource info for tqpair=%p\n", tqpair);
2716 		spdk_nvmf_tcp_qpair_destroy(tqpair);
2717 		return -1;
2718 	}
2719 
2720 	tqpair->group = tgroup;
2721 	tqpair->state = NVME_TCP_QPAIR_STATE_INVALID;
2722 	TAILQ_INSERT_TAIL(&tgroup->qpairs, tqpair, link);
2723 
2724 	return 0;
2725 }
2726 
2727 static int
2728 spdk_nvmf_tcp_poll_group_remove(struct spdk_nvmf_transport_poll_group *group,
2729 				struct spdk_nvmf_qpair *qpair)
2730 {
2731 	struct spdk_nvmf_tcp_poll_group	*tgroup;
2732 	struct spdk_nvmf_tcp_qpair		*tqpair;
2733 	int				rc;
2734 
2735 	tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
2736 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
2737 
2738 	assert(tqpair->group == tgroup);
2739 
2740 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "remove tqpair=%p from the tgroup=%p\n", tqpair, tgroup);
2741 	TAILQ_REMOVE(&tgroup->qpairs, tqpair, link);
2742 	rc = spdk_sock_group_remove_sock(tgroup->sock_group, tqpair->sock);
2743 	if (rc != 0) {
2744 		SPDK_ERRLOG("Could not remove sock from sock_group: %s (%d)\n",
2745 			    spdk_strerror(errno), errno);
2746 	}
2747 
2748 	return rc;
2749 }
2750 
2751 static int
2752 spdk_nvmf_tcp_req_complete(struct spdk_nvmf_request *req)
2753 {
2754 	struct spdk_nvmf_tcp_transport *ttransport;
2755 	struct spdk_nvmf_tcp_req *tcp_req;
2756 
2757 	ttransport = SPDK_CONTAINEROF(req->qpair->transport, struct spdk_nvmf_tcp_transport, transport);
2758 	tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req);
2759 
2760 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_EXECUTED);
2761 	spdk_nvmf_tcp_req_process(ttransport, tcp_req);
2762 
2763 	return 0;
2764 }
2765 
2766 static void
2767 spdk_nvmf_tcp_close_qpair(struct spdk_nvmf_qpair *qpair)
2768 {
2769 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
2770 
2771 	spdk_nvmf_tcp_qpair_destroy(SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair));
2772 }
2773 
2774 static int
2775 spdk_nvmf_tcp_poll_group_poll(struct spdk_nvmf_transport_poll_group *group)
2776 {
2777 	struct spdk_nvmf_tcp_poll_group *tgroup;
2778 	int rc;
2779 	struct spdk_nvmf_request *req, *req_tmp;
2780 	struct spdk_nvmf_tcp_req *tcp_req;
2781 	struct spdk_nvmf_tcp_transport *ttransport = SPDK_CONTAINEROF(group->transport,
2782 			struct spdk_nvmf_tcp_transport, transport);
2783 
2784 	tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
2785 
2786 	if (spdk_unlikely(TAILQ_EMPTY(&tgroup->qpairs))) {
2787 		return 0;
2788 	}
2789 
2790 	STAILQ_FOREACH_SAFE(req, &group->pending_buf_queue, buf_link, req_tmp) {
2791 		tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req);
2792 		if (spdk_nvmf_tcp_req_process(ttransport, tcp_req) == false) {
2793 			break;
2794 		}
2795 	}
2796 
2797 	rc = spdk_sock_group_poll(tgroup->sock_group);
2798 	if (rc < 0) {
2799 		SPDK_ERRLOG("Failed to poll sock_group=%p\n", tgroup->sock_group);
2800 	}
2801 
2802 	return rc;
2803 }
2804 
2805 static int
2806 spdk_nvmf_tcp_qpair_get_trid(struct spdk_nvmf_qpair *qpair,
2807 			     struct spdk_nvme_transport_id *trid, bool peer)
2808 {
2809 	struct spdk_nvmf_tcp_qpair     *tqpair;
2810 	uint16_t			port;
2811 
2812 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
2813 	trid->trtype = SPDK_NVME_TRANSPORT_TCP;
2814 
2815 	if (peer) {
2816 		snprintf(trid->traddr, sizeof(trid->traddr), "%s", tqpair->initiator_addr);
2817 		port = tqpair->initiator_port;
2818 	} else {
2819 		snprintf(trid->traddr, sizeof(trid->traddr), "%s", tqpair->target_addr);
2820 		port = tqpair->target_port;
2821 	}
2822 
2823 	if (spdk_sock_is_ipv4(tqpair->sock)) {
2824 		trid->adrfam = SPDK_NVMF_ADRFAM_IPV4;
2825 	} else if (spdk_sock_is_ipv4(tqpair->sock)) {
2826 		trid->adrfam = SPDK_NVMF_ADRFAM_IPV6;
2827 	} else {
2828 		return -1;
2829 	}
2830 
2831 	snprintf(trid->trsvcid, sizeof(trid->trsvcid), "%d", port);
2832 	return 0;
2833 }
2834 
2835 static int
2836 spdk_nvmf_tcp_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair,
2837 				   struct spdk_nvme_transport_id *trid)
2838 {
2839 	return spdk_nvmf_tcp_qpair_get_trid(qpair, trid, 0);
2840 }
2841 
2842 static int
2843 spdk_nvmf_tcp_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair,
2844 				  struct spdk_nvme_transport_id *trid)
2845 {
2846 	return spdk_nvmf_tcp_qpair_get_trid(qpair, trid, 1);
2847 }
2848 
2849 static int
2850 spdk_nvmf_tcp_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair,
2851 				    struct spdk_nvme_transport_id *trid)
2852 {
2853 	return spdk_nvmf_tcp_qpair_get_trid(qpair, trid, 0);
2854 }
2855 
2856 static int
2857 spdk_nvmf_tcp_qpair_set_sq_size(struct spdk_nvmf_qpair *qpair)
2858 {
2859 	struct spdk_nvmf_tcp_qpair     *tqpair;
2860 	int rc;
2861 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
2862 
2863 	rc = spdk_nvmf_tcp_qpair_init_mem_resource(tqpair, tqpair->qpair.sq_head_max);
2864 	if (!rc) {
2865 		tqpair->max_queue_depth += tqpair->qpair.sq_head_max;
2866 		tqpair->free_pdu_num += tqpair->qpair.sq_head_max;
2867 		tqpair->state_cntr[TCP_REQUEST_STATE_FREE] += tqpair->qpair.sq_head_max;
2868 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "The queue depth=%u for tqpair=%p\n",
2869 			      tqpair->max_queue_depth, tqpair);
2870 	}
2871 
2872 	return rc;
2873 
2874 }
2875 
2876 #define SPDK_NVMF_TCP_DEFAULT_MAX_QUEUE_DEPTH 128
2877 #define SPDK_NVMF_TCP_DEFAULT_AQ_DEPTH 128
2878 #define SPDK_NVMF_TCP_DEFAULT_MAX_QPAIRS_PER_CTRLR 128
2879 #define SPDK_NVMF_TCP_DEFAULT_IN_CAPSULE_DATA_SIZE 4096
2880 #define SPDK_NVMF_TCP_DEFAULT_MAX_IO_SIZE 131072
2881 #define SPDK_NVMF_TCP_DEFAULT_IO_UNIT_SIZE 131072
2882 #define SPDK_NVMF_TCP_DEFAULT_NUM_SHARED_BUFFERS 511
2883 #define SPDK_NVMF_TCP_DEFAULT_BUFFER_CACHE_SIZE 32
2884 #define SPDK_NVMF_TCP_DEFAULT_SUCCESS_OPTIMIZATION true
2885 #define SPDK_NVMF_TCP_DEFAULT_DIF_INSERT_OR_STRIP false
2886 #define SPDK_NVMF_TCP_DEFAULT_SOCK_PRIORITY 0
2887 
2888 static void
2889 spdk_nvmf_tcp_opts_init(struct spdk_nvmf_transport_opts *opts)
2890 {
2891 	opts->max_queue_depth =		SPDK_NVMF_TCP_DEFAULT_MAX_QUEUE_DEPTH;
2892 	opts->max_qpairs_per_ctrlr =	SPDK_NVMF_TCP_DEFAULT_MAX_QPAIRS_PER_CTRLR;
2893 	opts->in_capsule_data_size =	SPDK_NVMF_TCP_DEFAULT_IN_CAPSULE_DATA_SIZE;
2894 	opts->max_io_size =		SPDK_NVMF_TCP_DEFAULT_MAX_IO_SIZE;
2895 	opts->io_unit_size =		SPDK_NVMF_TCP_DEFAULT_IO_UNIT_SIZE;
2896 	opts->max_aq_depth =		SPDK_NVMF_TCP_DEFAULT_AQ_DEPTH;
2897 	opts->num_shared_buffers =	SPDK_NVMF_TCP_DEFAULT_NUM_SHARED_BUFFERS;
2898 	opts->buf_cache_size =		SPDK_NVMF_TCP_DEFAULT_BUFFER_CACHE_SIZE;
2899 	opts->c2h_success =		SPDK_NVMF_TCP_DEFAULT_SUCCESS_OPTIMIZATION;
2900 	opts->dif_insert_or_strip =	SPDK_NVMF_TCP_DEFAULT_DIF_INSERT_OR_STRIP;
2901 	opts->sock_priority =		SPDK_NVMF_TCP_DEFAULT_SOCK_PRIORITY;
2902 }
2903 
2904 const struct spdk_nvmf_transport_ops spdk_nvmf_transport_tcp = {
2905 	.type = SPDK_NVME_TRANSPORT_TCP,
2906 	.opts_init = spdk_nvmf_tcp_opts_init,
2907 	.create = spdk_nvmf_tcp_create,
2908 	.destroy = spdk_nvmf_tcp_destroy,
2909 
2910 	.listen = spdk_nvmf_tcp_listen,
2911 	.stop_listen = spdk_nvmf_tcp_stop_listen,
2912 	.accept = spdk_nvmf_tcp_accept,
2913 
2914 	.listener_discover = spdk_nvmf_tcp_discover,
2915 
2916 	.poll_group_create = spdk_nvmf_tcp_poll_group_create,
2917 	.get_optimal_poll_group = spdk_nvmf_tcp_get_optimal_poll_group,
2918 	.poll_group_destroy = spdk_nvmf_tcp_poll_group_destroy,
2919 	.poll_group_add = spdk_nvmf_tcp_poll_group_add,
2920 	.poll_group_remove = spdk_nvmf_tcp_poll_group_remove,
2921 	.poll_group_poll = spdk_nvmf_tcp_poll_group_poll,
2922 
2923 	.req_free = spdk_nvmf_tcp_req_free,
2924 	.req_complete = spdk_nvmf_tcp_req_complete,
2925 
2926 	.qpair_fini = spdk_nvmf_tcp_close_qpair,
2927 	.qpair_get_local_trid = spdk_nvmf_tcp_qpair_get_local_trid,
2928 	.qpair_get_peer_trid = spdk_nvmf_tcp_qpair_get_peer_trid,
2929 	.qpair_get_listen_trid = spdk_nvmf_tcp_qpair_get_listen_trid,
2930 	.qpair_set_sqsize = spdk_nvmf_tcp_qpair_set_sq_size,
2931 };
2932 
2933 SPDK_LOG_REGISTER_COMPONENT("nvmf_tcp", SPDK_LOG_NVMF_TCP)
2934