xref: /spdk/lib/nvmf/tcp.c (revision dc29e75b1c287e6ba6bcf207e0e01d06f489b1ae)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation. All rights reserved.
5  *   Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/stdinc.h"
35 #include "spdk/crc32.h"
36 #include "spdk/endian.h"
37 #include "spdk/assert.h"
38 #include "spdk/thread.h"
39 #include "spdk/nvmf.h"
40 #include "spdk/nvmf_spec.h"
41 #include "spdk/sock.h"
42 #include "spdk/string.h"
43 #include "spdk/trace.h"
44 #include "spdk/util.h"
45 
46 #include "nvmf_internal.h"
47 #include "transport.h"
48 
49 #include "spdk_internal/assert.h"
50 #include "spdk_internal/log.h"
51 #include "spdk_internal/nvme_tcp.h"
52 
53 #define NVMF_TCP_MAX_ACCEPT_SOCK_ONE_TIME 16
54 
55 #define NVMF_TCP_PDU_MAX_H2C_DATA_SIZE	131072
56 #define NVMF_TCP_PDU_MAX_C2H_DATA_SIZE	131072
57 #define NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM  64  /* Maximal c2h_data pdu number for ecah tqpair */
58 #define SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY 6
59 #define SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR 4
60 
61 /* spdk nvmf related structure */
62 enum spdk_nvmf_tcp_req_state {
63 
64 	/* The request is not currently in use */
65 	TCP_REQUEST_STATE_FREE = 0,
66 
67 	/* Initial state when request first received */
68 	TCP_REQUEST_STATE_NEW,
69 
70 	/* The request is queued until a data buffer is available. */
71 	TCP_REQUEST_STATE_NEED_BUFFER,
72 
73 	/* The request is currently transferring data from the host to the controller. */
74 	TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER,
75 
76 	/* The request is ready to execute at the block device */
77 	TCP_REQUEST_STATE_READY_TO_EXECUTE,
78 
79 	/* The request is currently executing at the block device */
80 	TCP_REQUEST_STATE_EXECUTING,
81 
82 	/* The request finished executing at the block device */
83 	TCP_REQUEST_STATE_EXECUTED,
84 
85 	/* The request is ready to send a completion */
86 	TCP_REQUEST_STATE_READY_TO_COMPLETE,
87 
88 	/* The request is currently transferring final pdus from the controller to the host. */
89 	TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST,
90 
91 	/* The request completed and can be marked free. */
92 	TCP_REQUEST_STATE_COMPLETED,
93 
94 	/* Terminator */
95 	TCP_REQUEST_NUM_STATES,
96 };
97 
98 static const char *spdk_nvmf_tcp_term_req_fes_str[] = {
99 	"Invalid PDU Header Field",
100 	"PDU Sequence Error",
101 	"Header Digiest Error",
102 	"Data Transfer Out of Range",
103 	"R2T Limit Exceeded",
104 	"Unsupported parameter",
105 };
106 
107 #define OBJECT_NVMF_TCP_IO				0x80
108 
109 #define TRACE_GROUP_NVMF_TCP				0x5
110 #define TRACE_TCP_REQUEST_STATE_NEW					SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x0)
111 #define TRACE_TCP_REQUEST_STATE_NEED_BUFFER				SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x1)
112 #define TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER		SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x2)
113 #define TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE			SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x3)
114 #define TRACE_TCP_REQUEST_STATE_EXECUTING				SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x4)
115 #define TRACE_TCP_REQUEST_STATE_EXECUTED				SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x5)
116 #define TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE			SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x6)
117 #define TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST		SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x7)
118 #define TRACE_TCP_REQUEST_STATE_COMPLETED				SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x8)
119 #define TRACE_TCP_FLUSH_WRITEBUF_START					SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x9)
120 #define TRACE_TCP_FLUSH_WRITEBUF_DONE					SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xA)
121 #define TRACE_TCP_READ_FROM_SOCKET_DONE					SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xB)
122 
123 SPDK_TRACE_REGISTER_FN(nvmf_tcp_trace, "nvmf_tcp", TRACE_GROUP_NVMF_TCP)
124 {
125 	spdk_trace_register_object(OBJECT_NVMF_TCP_IO, 'r');
126 	spdk_trace_register_description("TCP_REQ_NEW",
127 					TRACE_TCP_REQUEST_STATE_NEW,
128 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 1, 1, "");
129 	spdk_trace_register_description("TCP_REQ_NEED_BUFFER",
130 					TRACE_TCP_REQUEST_STATE_NEED_BUFFER,
131 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
132 	spdk_trace_register_description("TCP_REQ_TX_H_TO_C",
133 					TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER,
134 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
135 	spdk_trace_register_description("TCP_REQ_RDY_TO_EXECUTE",
136 					TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE,
137 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
138 	spdk_trace_register_description("TCP_REQ_EXECUTING",
139 					TRACE_TCP_REQUEST_STATE_EXECUTING,
140 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
141 	spdk_trace_register_description("TCP_REQ_EXECUTED",
142 					TRACE_TCP_REQUEST_STATE_EXECUTED,
143 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
144 	spdk_trace_register_description("TCP_REQ_RDY_TO_COMPLETE",
145 					TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE,
146 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
147 	spdk_trace_register_description("TCP_REQ_TRANSFER_C2H",
148 					TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST,
149 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
150 	spdk_trace_register_description("TCP_REQ_COMPLETED",
151 					TRACE_TCP_REQUEST_STATE_COMPLETED,
152 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
153 	spdk_trace_register_description("TCP_WRITE_START",
154 					TRACE_TCP_FLUSH_WRITEBUF_START,
155 					OWNER_NONE, OBJECT_NONE, 0, 0, "");
156 	spdk_trace_register_description("TCP_WRITE_DONE",
157 					TRACE_TCP_FLUSH_WRITEBUF_DONE,
158 					OWNER_NONE, OBJECT_NONE, 0, 0, "");
159 	spdk_trace_register_description("TCP_READ_DONE",
160 					TRACE_TCP_READ_FROM_SOCKET_DONE,
161 					OWNER_NONE, OBJECT_NONE, 0, 0, "");
162 }
163 
164 struct spdk_nvmf_tcp_req  {
165 	struct spdk_nvmf_request		req;
166 	struct spdk_nvme_cpl			rsp;
167 	struct spdk_nvme_cmd			cmd;
168 
169 	/* In-capsule data buffer */
170 	uint8_t					*buf;
171 
172 	bool					has_incapsule_data;
173 
174 	/* transfer_tag */
175 	uint16_t				ttag;
176 
177 	enum spdk_nvmf_tcp_req_state		state;
178 
179 	/*
180 	 * next_expected_r2t_offset is used when we receive the h2c_data PDU.
181 	 */
182 	uint32_t				next_expected_r2t_offset;
183 	uint32_t				r2tl_remain;
184 
185 	/*
186 	 * c2h_data_offset is used when we send the c2h_data PDU.
187 	 */
188 	uint32_t				c2h_data_offset;
189 	uint32_t				c2h_data_pdu_num;
190 
191 	STAILQ_ENTRY(spdk_nvmf_tcp_req)		link;
192 	TAILQ_ENTRY(spdk_nvmf_tcp_req)		state_link;
193 };
194 
195 struct nvme_tcp_pdu_recv_buf {
196 	char					*buf;
197 	uint32_t				off;
198 	uint32_t				size;
199 	uint32_t				remain_size;
200 };
201 
202 struct spdk_nvmf_tcp_qpair {
203 	struct spdk_nvmf_qpair			qpair;
204 	struct spdk_nvmf_tcp_poll_group		*group;
205 	struct spdk_nvmf_tcp_port		*port;
206 	struct spdk_sock			*sock;
207 	struct spdk_poller			*flush_poller;
208 
209 	enum nvme_tcp_pdu_recv_state		recv_state;
210 	enum nvme_tcp_qpair_state		state;
211 
212 	struct nvme_tcp_pdu			pdu_in_progress;
213 	struct nvme_tcp_pdu_recv_buf		pdu_recv_buf;
214 
215 	TAILQ_HEAD(, nvme_tcp_pdu)		send_queue;
216 	TAILQ_HEAD(, nvme_tcp_pdu)		free_queue;
217 
218 	struct nvme_tcp_pdu			*pdu;
219 	struct nvme_tcp_pdu			*pdu_pool;
220 	uint16_t				free_pdu_num;
221 
222 	/* Queues to track the requests in all states */
223 	TAILQ_HEAD(, spdk_nvmf_tcp_req)		state_queue[TCP_REQUEST_NUM_STATES];
224 	/* Number of requests in each state */
225 	int32_t					state_cntr[TCP_REQUEST_NUM_STATES];
226 
227 	STAILQ_HEAD(, spdk_nvmf_tcp_req)	queued_c2h_data_tcp_req;
228 
229 	uint8_t					cpda;
230 
231 	/* Array of size "max_queue_depth * InCapsuleDataSize" containing
232 	 * buffers to be used for in capsule data.
233 	 */
234 	void					*buf;
235 	void					*bufs;
236 	struct spdk_nvmf_tcp_req		*req;
237 	struct spdk_nvmf_tcp_req		*reqs;
238 
239 	bool					host_hdgst_enable;
240 	bool					host_ddgst_enable;
241 
242 
243 	/* The maximum number of I/O outstanding on this connection at one time */
244 	uint16_t				max_queue_depth;
245 
246 
247 	/** Specifies the maximum number of PDU-Data bytes per H2C Data Transfer PDU */
248 	uint32_t				maxh2cdata;
249 
250 	uint32_t				c2h_data_pdu_cnt;
251 
252 	/* IP address */
253 	char					initiator_addr[SPDK_NVMF_TRADDR_MAX_LEN];
254 	char					target_addr[SPDK_NVMF_TRADDR_MAX_LEN];
255 
256 	/* IP port */
257 	uint16_t				initiator_port;
258 	uint16_t				target_port;
259 
260 	/* Timer used to destroy qpair after detecting transport error issue if initiator does
261 	 *  not close the connection.
262 	 */
263 	struct spdk_poller			*timeout_poller;
264 
265 	TAILQ_ENTRY(spdk_nvmf_tcp_qpair)	link;
266 };
267 
268 struct spdk_nvmf_tcp_poll_group {
269 	struct spdk_nvmf_transport_poll_group	group;
270 	struct spdk_sock_group			*sock_group;
271 
272 	TAILQ_HEAD(, spdk_nvmf_tcp_qpair)	qpairs;
273 };
274 
275 struct spdk_nvmf_tcp_port {
276 	struct spdk_nvme_transport_id		trid;
277 	struct spdk_sock			*listen_sock;
278 	uint32_t				ref;
279 	TAILQ_ENTRY(spdk_nvmf_tcp_port)		link;
280 };
281 
282 struct spdk_nvmf_tcp_transport {
283 	struct spdk_nvmf_transport		transport;
284 
285 	pthread_mutex_t				lock;
286 
287 	TAILQ_HEAD(, spdk_nvmf_tcp_port)	ports;
288 };
289 
290 static bool spdk_nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport,
291 				      struct spdk_nvmf_tcp_req *tcp_req);
292 static void spdk_nvmf_tcp_handle_pending_c2h_data_queue(struct spdk_nvmf_tcp_qpair *tqpair);
293 
294 static void
295 spdk_nvmf_tcp_req_set_state(struct spdk_nvmf_tcp_req *tcp_req,
296 			    enum spdk_nvmf_tcp_req_state state)
297 {
298 	struct spdk_nvmf_qpair *qpair;
299 	struct spdk_nvmf_tcp_qpair *tqpair;
300 
301 	qpair = tcp_req->req.qpair;
302 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
303 
304 	TAILQ_REMOVE(&tqpair->state_queue[tcp_req->state], tcp_req, state_link);
305 	tqpair->state_cntr[tcp_req->state]--;
306 	assert(tqpair->state_cntr[tcp_req->state] >= 0);
307 
308 	TAILQ_INSERT_TAIL(&tqpair->state_queue[state], tcp_req, state_link);
309 	tqpair->state_cntr[state]++;
310 
311 	tcp_req->state = state;
312 }
313 
314 static struct nvme_tcp_pdu *
315 spdk_nvmf_tcp_pdu_get(struct spdk_nvmf_tcp_qpair *tqpair)
316 {
317 	struct nvme_tcp_pdu *pdu;
318 
319 	pdu = TAILQ_FIRST(&tqpair->free_queue);
320 	if (!pdu) {
321 		SPDK_ERRLOG("Unable to get PDU for tqpair=%p\n", tqpair);
322 		abort();
323 		return NULL;
324 	}
325 
326 	tqpair->free_pdu_num--;
327 	TAILQ_REMOVE(&tqpair->free_queue, pdu, tailq);
328 	memset(pdu, 0, sizeof(*pdu));
329 	pdu->qpair = tqpair;
330 	pdu->ref = 1;
331 	pdu->hdr = &pdu->hdr_mem;
332 
333 	return pdu;
334 }
335 
336 static void
337 spdk_nvmf_tcp_pdu_put(struct spdk_nvmf_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu)
338 {
339 	if (!pdu) {
340 		return;
341 	}
342 
343 	assert(pdu->ref > 0);
344 
345 	pdu->ref--;
346 	if (pdu->ref == 0) {
347 		tqpair->free_pdu_num++;
348 		TAILQ_INSERT_HEAD(&tqpair->free_queue, pdu, tailq);
349 	}
350 }
351 
352 static struct spdk_nvmf_tcp_req *
353 spdk_nvmf_tcp_req_get(struct spdk_nvmf_tcp_qpair *tqpair)
354 {
355 	struct spdk_nvmf_tcp_req *tcp_req;
356 
357 	tcp_req = TAILQ_FIRST(&tqpair->state_queue[TCP_REQUEST_STATE_FREE]);
358 	if (!tcp_req) {
359 		SPDK_ERRLOG("Cannot allocate tcp_req on tqpair=%p\n", tqpair);
360 		return NULL;
361 	}
362 
363 	memset(&tcp_req->cmd, 0, sizeof(tcp_req->cmd));
364 	memset(&tcp_req->rsp, 0, sizeof(tcp_req->rsp));
365 	tcp_req->next_expected_r2t_offset = 0;
366 	tcp_req->r2tl_remain = 0;
367 	tcp_req->c2h_data_offset = 0;
368 	tcp_req->has_incapsule_data = false;
369 	memset(&tcp_req->req.dif, 0, sizeof(tcp_req->req.dif));
370 
371 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEW);
372 	return tcp_req;
373 }
374 
375 static void
376 nvmf_tcp_request_free(struct spdk_nvmf_tcp_req *tcp_req)
377 {
378 	struct spdk_nvmf_tcp_transport *ttransport;
379 
380 	if (!tcp_req) {
381 		return;
382 	}
383 
384 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tcp_req=%p will be freed\n", tcp_req);
385 	ttransport = SPDK_CONTAINEROF(tcp_req->req.qpair->transport,
386 				      struct spdk_nvmf_tcp_transport, transport);
387 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_COMPLETED);
388 	spdk_nvmf_tcp_req_process(ttransport, tcp_req);
389 }
390 
391 static int
392 spdk_nvmf_tcp_req_free(struct spdk_nvmf_request *req)
393 {
394 	struct spdk_nvmf_tcp_req *tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req);
395 
396 	nvmf_tcp_request_free(tcp_req);
397 
398 	return 0;
399 }
400 
401 static void
402 spdk_nvmf_tcp_drain_state_queue(struct spdk_nvmf_tcp_qpair *tqpair,
403 				enum spdk_nvmf_tcp_req_state state)
404 {
405 	struct spdk_nvmf_tcp_req *tcp_req, *req_tmp;
406 
407 	TAILQ_FOREACH_SAFE(tcp_req, &tqpair->state_queue[state], state_link, req_tmp) {
408 		nvmf_tcp_request_free(tcp_req);
409 	}
410 }
411 
412 static void
413 spdk_nvmf_tcp_cleanup_all_states(struct spdk_nvmf_tcp_qpair *tqpair)
414 {
415 	struct spdk_nvmf_tcp_req *tcp_req, *req_tmp;
416 	struct nvme_tcp_pdu *pdu, *tmp_pdu;
417 
418 	/* Free the pdus in the send_queue */
419 	TAILQ_FOREACH_SAFE(pdu, &tqpair->send_queue, tailq, tmp_pdu) {
420 		TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq);
421 		/* Also check the pdu type, we need to calculte the c2h_data_pdu_cnt later */
422 		if (pdu->hdr->common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_DATA) {
423 			assert(tqpair->c2h_data_pdu_cnt > 0);
424 			tqpair->c2h_data_pdu_cnt--;
425 		}
426 		spdk_nvmf_tcp_pdu_put(tqpair, pdu);
427 	}
428 
429 	while (!STAILQ_EMPTY(&tqpair->queued_c2h_data_tcp_req)) {
430 		STAILQ_REMOVE_HEAD(&tqpair->queued_c2h_data_tcp_req, link);
431 	}
432 	spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST);
433 
434 	spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEW);
435 
436 	/* Wipe the requests waiting for buffer from the global list */
437 	TAILQ_FOREACH_SAFE(tcp_req, &tqpair->state_queue[TCP_REQUEST_STATE_NEED_BUFFER], state_link,
438 			   req_tmp) {
439 		STAILQ_REMOVE(&tqpair->group->group.pending_buf_queue, &tcp_req->req,
440 			      spdk_nvmf_request, buf_link);
441 	}
442 
443 	spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEED_BUFFER);
444 	spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_EXECUTING);
445 	spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
446 }
447 
448 static void
449 nvmf_tcp_dump_qpair_req_contents(struct spdk_nvmf_tcp_qpair *tqpair)
450 {
451 	int i;
452 	struct spdk_nvmf_tcp_req *tcp_req;
453 
454 	SPDK_ERRLOG("Dumping contents of queue pair (QID %d)\n", tqpair->qpair.qid);
455 	for (i = 1; i < TCP_REQUEST_NUM_STATES; i++) {
456 		SPDK_ERRLOG("\tNum of requests in state[%d] = %d\n", i, tqpair->state_cntr[i]);
457 		TAILQ_FOREACH(tcp_req, &tqpair->state_queue[i], state_link) {
458 			SPDK_ERRLOG("\t\tRequest Data From Pool: %d\n", tcp_req->req.data_from_pool);
459 			SPDK_ERRLOG("\t\tRequest opcode: %d\n", tcp_req->req.cmd->nvmf_cmd.opcode);
460 		}
461 	}
462 }
463 
464 static void
465 spdk_nvmf_tcp_qpair_destroy(struct spdk_nvmf_tcp_qpair *tqpair)
466 {
467 	int err = 0;
468 
469 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
470 
471 	spdk_poller_unregister(&tqpair->flush_poller);
472 	spdk_sock_close(&tqpair->sock);
473 	spdk_nvmf_tcp_cleanup_all_states(tqpair);
474 
475 	if (tqpair->free_pdu_num != (tqpair->max_queue_depth + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM)) {
476 		SPDK_ERRLOG("tqpair(%p) free pdu pool num is %u but should be %u\n", tqpair,
477 			    tqpair->free_pdu_num,
478 			    (tqpair->max_queue_depth + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM));
479 		err++;
480 	}
481 
482 	if (tqpair->state_cntr[TCP_REQUEST_STATE_FREE] != tqpair->max_queue_depth) {
483 		SPDK_ERRLOG("tqpair(%p) free tcp request num is %u but should be %u\n", tqpair,
484 			    tqpair->state_cntr[TCP_REQUEST_STATE_FREE],
485 			    tqpair->max_queue_depth);
486 		err++;
487 	}
488 
489 	if (tqpair->c2h_data_pdu_cnt != 0) {
490 		SPDK_ERRLOG("tqpair(%p) free c2h_data_pdu cnt is %u but should be 0\n", tqpair,
491 			    tqpair->c2h_data_pdu_cnt);
492 		err++;
493 	}
494 
495 	if (err > 0) {
496 		nvmf_tcp_dump_qpair_req_contents(tqpair);
497 	}
498 	free(tqpair->pdu);
499 	free(tqpair->pdu_pool);
500 	free(tqpair->req);
501 	free(tqpair->reqs);
502 	spdk_free(tqpair->buf);
503 	spdk_free(tqpair->bufs);
504 	free(tqpair->pdu_recv_buf.buf);
505 	free(tqpair);
506 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Leave\n");
507 }
508 
509 static int
510 spdk_nvmf_tcp_destroy(struct spdk_nvmf_transport *transport)
511 {
512 	struct spdk_nvmf_tcp_transport	*ttransport;
513 
514 	assert(transport != NULL);
515 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
516 
517 	pthread_mutex_destroy(&ttransport->lock);
518 	free(ttransport);
519 	return 0;
520 }
521 
522 static struct spdk_nvmf_transport *
523 spdk_nvmf_tcp_create(struct spdk_nvmf_transport_opts *opts)
524 {
525 	struct spdk_nvmf_tcp_transport *ttransport;
526 	uint32_t sge_count;
527 	uint32_t min_shared_buffers;
528 
529 	ttransport = calloc(1, sizeof(*ttransport));
530 	if (!ttransport) {
531 		return NULL;
532 	}
533 
534 	TAILQ_INIT(&ttransport->ports);
535 
536 	ttransport->transport.ops = &spdk_nvmf_transport_tcp;
537 
538 	SPDK_NOTICELOG("*** TCP Transport Init ***\n");
539 
540 	SPDK_INFOLOG(SPDK_LOG_NVMF_TCP, "*** TCP Transport Init ***\n"
541 		     "  Transport opts:  max_ioq_depth=%d, max_io_size=%d,\n"
542 		     "  max_qpairs_per_ctrlr=%d, io_unit_size=%d,\n"
543 		     "  in_capsule_data_size=%d, max_aq_depth=%d\n"
544 		     "  num_shared_buffers=%d, c2h_success=%d,\n"
545 		     "  dif_insert_or_strip=%d, sock_priority=%d\n",
546 		     opts->max_queue_depth,
547 		     opts->max_io_size,
548 		     opts->max_qpairs_per_ctrlr,
549 		     opts->io_unit_size,
550 		     opts->in_capsule_data_size,
551 		     opts->max_aq_depth,
552 		     opts->num_shared_buffers,
553 		     opts->c2h_success,
554 		     opts->dif_insert_or_strip,
555 		     opts->sock_priority);
556 
557 	if (opts->sock_priority > SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY) {
558 		SPDK_ERRLOG("Unsupported socket_priority=%d, the current range is: 0 to %d\n"
559 			    "you can use man 7 socket to view the range of priority under SO_PRIORITY item\n",
560 			    opts->sock_priority, SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY);
561 		free(ttransport);
562 		return NULL;
563 	}
564 
565 	/* I/O unit size cannot be larger than max I/O size */
566 	if (opts->io_unit_size > opts->max_io_size) {
567 		opts->io_unit_size = opts->max_io_size;
568 	}
569 
570 	sge_count = opts->max_io_size / opts->io_unit_size;
571 	if (sge_count > SPDK_NVMF_MAX_SGL_ENTRIES) {
572 		SPDK_ERRLOG("Unsupported IO Unit size specified, %d bytes\n", opts->io_unit_size);
573 		free(ttransport);
574 		return NULL;
575 	}
576 
577 	min_shared_buffers = spdk_thread_get_count() * opts->buf_cache_size;
578 	if (min_shared_buffers > opts->num_shared_buffers) {
579 		SPDK_ERRLOG("There are not enough buffers to satisfy"
580 			    "per-poll group caches for each thread. (%" PRIu32 ")"
581 			    "supplied. (%" PRIu32 ") required\n", opts->num_shared_buffers, min_shared_buffers);
582 		SPDK_ERRLOG("Please specify a larger number of shared buffers\n");
583 		spdk_nvmf_tcp_destroy(&ttransport->transport);
584 		return NULL;
585 	}
586 
587 	pthread_mutex_init(&ttransport->lock, NULL);
588 
589 	return &ttransport->transport;
590 }
591 
592 static int
593 _spdk_nvmf_tcp_trsvcid_to_int(const char *trsvcid)
594 {
595 	unsigned long long ull;
596 	char *end = NULL;
597 
598 	ull = strtoull(trsvcid, &end, 10);
599 	if (end == NULL || end == trsvcid || *end != '\0') {
600 		return -1;
601 	}
602 
603 	/* Valid TCP/IP port numbers are in [0, 65535] */
604 	if (ull > 65535) {
605 		return -1;
606 	}
607 
608 	return (int)ull;
609 }
610 
611 /**
612  * Canonicalize a listen address trid.
613  */
614 static int
615 _spdk_nvmf_tcp_canon_listen_trid(struct spdk_nvme_transport_id *canon_trid,
616 				 const struct spdk_nvme_transport_id *trid)
617 {
618 	int trsvcid_int;
619 
620 	trsvcid_int = _spdk_nvmf_tcp_trsvcid_to_int(trid->trsvcid);
621 	if (trsvcid_int < 0) {
622 		return -EINVAL;
623 	}
624 
625 	memset(canon_trid, 0, sizeof(*canon_trid));
626 	canon_trid->trtype = SPDK_NVME_TRANSPORT_TCP;
627 	canon_trid->adrfam = trid->adrfam;
628 	snprintf(canon_trid->traddr, sizeof(canon_trid->traddr), "%s", trid->traddr);
629 	snprintf(canon_trid->trsvcid, sizeof(canon_trid->trsvcid), "%d", trsvcid_int);
630 
631 	return 0;
632 }
633 
634 /**
635  * Find an existing listening port.
636  *
637  * Caller must hold ttransport->lock.
638  */
639 static struct spdk_nvmf_tcp_port *
640 _spdk_nvmf_tcp_find_port(struct spdk_nvmf_tcp_transport *ttransport,
641 			 const struct spdk_nvme_transport_id *trid)
642 {
643 	struct spdk_nvme_transport_id canon_trid;
644 	struct spdk_nvmf_tcp_port *port;
645 
646 	if (_spdk_nvmf_tcp_canon_listen_trid(&canon_trid, trid) != 0) {
647 		return NULL;
648 	}
649 
650 	TAILQ_FOREACH(port, &ttransport->ports, link) {
651 		if (spdk_nvme_transport_id_compare(&canon_trid, &port->trid) == 0) {
652 			return port;
653 		}
654 	}
655 
656 	return NULL;
657 }
658 
659 static int
660 spdk_nvmf_tcp_listen(struct spdk_nvmf_transport *transport,
661 		     const struct spdk_nvme_transport_id *trid)
662 {
663 	struct spdk_nvmf_tcp_transport *ttransport;
664 	struct spdk_nvmf_tcp_port *port;
665 	int trsvcid_int;
666 	uint8_t adrfam;
667 
668 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
669 
670 	trsvcid_int = _spdk_nvmf_tcp_trsvcid_to_int(trid->trsvcid);
671 	if (trsvcid_int < 0) {
672 		SPDK_ERRLOG("Invalid trsvcid '%s'\n", trid->trsvcid);
673 		return -EINVAL;
674 	}
675 
676 	pthread_mutex_lock(&ttransport->lock);
677 
678 	port = _spdk_nvmf_tcp_find_port(ttransport, trid);
679 	if (port) {
680 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Already listening on %s port %s\n",
681 			      trid->traddr, trid->trsvcid);
682 		port->ref++;
683 		pthread_mutex_unlock(&ttransport->lock);
684 		return 0;
685 	}
686 
687 	port = calloc(1, sizeof(*port));
688 	if (!port) {
689 		SPDK_ERRLOG("Port allocation failed\n");
690 		free(port);
691 		pthread_mutex_unlock(&ttransport->lock);
692 		return -ENOMEM;
693 	}
694 
695 	port->ref = 1;
696 
697 	if (_spdk_nvmf_tcp_canon_listen_trid(&port->trid, trid) != 0) {
698 		SPDK_ERRLOG("Invalid traddr %s / trsvcid %s\n",
699 			    trid->traddr, trid->trsvcid);
700 		free(port);
701 		pthread_mutex_unlock(&ttransport->lock);
702 		return -ENOMEM;
703 	}
704 
705 	port->listen_sock = spdk_sock_listen(trid->traddr, trsvcid_int);
706 	if (port->listen_sock == NULL) {
707 		SPDK_ERRLOG("spdk_sock_listen(%s, %d) failed: %s (%d)\n",
708 			    trid->traddr, trsvcid_int,
709 			    spdk_strerror(errno), errno);
710 		free(port);
711 		pthread_mutex_unlock(&ttransport->lock);
712 		return -errno;
713 	}
714 
715 	if (spdk_sock_is_ipv4(port->listen_sock)) {
716 		adrfam = SPDK_NVMF_ADRFAM_IPV4;
717 	} else if (spdk_sock_is_ipv6(port->listen_sock)) {
718 		adrfam = SPDK_NVMF_ADRFAM_IPV6;
719 	} else {
720 		SPDK_ERRLOG("Unhandled socket type\n");
721 		adrfam = 0;
722 	}
723 
724 	if (adrfam != trid->adrfam) {
725 		SPDK_ERRLOG("Socket address family mismatch\n");
726 		spdk_sock_close(&port->listen_sock);
727 		free(port);
728 		pthread_mutex_unlock(&ttransport->lock);
729 		return -EINVAL;
730 	}
731 
732 	SPDK_NOTICELOG("*** NVMe/TCP Target Listening on %s port %d ***\n",
733 		       trid->traddr, trsvcid_int);
734 
735 	TAILQ_INSERT_TAIL(&ttransport->ports, port, link);
736 	pthread_mutex_unlock(&ttransport->lock);
737 
738 	return 0;
739 }
740 
741 static int
742 spdk_nvmf_tcp_stop_listen(struct spdk_nvmf_transport *transport,
743 			  const struct spdk_nvme_transport_id *trid)
744 {
745 	struct spdk_nvmf_tcp_transport *ttransport;
746 	struct spdk_nvmf_tcp_port *port;
747 	int rc;
748 
749 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
750 
751 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Removing listen address %s port %s\n",
752 		      trid->traddr, trid->trsvcid);
753 
754 	pthread_mutex_lock(&ttransport->lock);
755 	port = _spdk_nvmf_tcp_find_port(ttransport, trid);
756 	if (port) {
757 		assert(port->ref > 0);
758 		port->ref--;
759 		if (port->ref == 0) {
760 			TAILQ_REMOVE(&ttransport->ports, port, link);
761 			spdk_sock_close(&port->listen_sock);
762 			free(port);
763 		}
764 		rc = 0;
765 	} else {
766 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Port not found\n");
767 		rc = -ENOENT;
768 	}
769 	pthread_mutex_unlock(&ttransport->lock);
770 
771 	return rc;
772 }
773 
774 static int
775 spdk_nvmf_tcp_qpair_flush_pdus_internal(struct spdk_nvmf_tcp_qpair *tqpair)
776 {
777 	const int array_size = 32;
778 	struct iovec iovs[array_size];
779 	int iovcnt = 0;
780 	int bytes = 0;
781 	int total_length = 0;
782 	uint32_t mapped_length = 0;
783 	struct nvme_tcp_pdu *pdu;
784 	int pdu_length;
785 	TAILQ_HEAD(, nvme_tcp_pdu) completed_pdus_list;
786 
787 	pdu = TAILQ_FIRST(&tqpair->send_queue);
788 
789 	if (pdu == NULL) {
790 		return 0;
791 	}
792 
793 	/*
794 	 * Build up a list of iovecs for the first few PDUs in the
795 	 *  tqpair 's send_queue.
796 	 */
797 	while (pdu != NULL && ((array_size - iovcnt) >= (2 + (int)pdu->data_iovcnt))) {
798 		iovcnt += nvme_tcp_build_iovs(&iovs[iovcnt],
799 					      array_size - iovcnt,
800 					      pdu,
801 					      tqpair->host_hdgst_enable,
802 					      tqpair->host_ddgst_enable,
803 					      &mapped_length);
804 		total_length += mapped_length;
805 		pdu = TAILQ_NEXT(pdu, tailq);
806 	}
807 
808 	spdk_trace_record(TRACE_TCP_FLUSH_WRITEBUF_START, 0, total_length, 0, iovcnt);
809 
810 	bytes = spdk_sock_writev(tqpair->sock, iovs, iovcnt);
811 	if (bytes == -1) {
812 		if (errno == EWOULDBLOCK || errno == EAGAIN) {
813 			return 1;
814 		} else {
815 			SPDK_ERRLOG("spdk_sock_writev() failed, errno %d: %s\n",
816 				    errno, spdk_strerror(errno));
817 			return -1;
818 		}
819 	}
820 
821 	spdk_trace_record(TRACE_TCP_FLUSH_WRITEBUF_DONE, 0, bytes, 0, 0);
822 
823 	pdu = TAILQ_FIRST(&tqpair->send_queue);
824 
825 	/*
826 	 * Free any PDUs that were fully written.  If a PDU was only
827 	 *  partially written, update its writev_offset so that next
828 	 *  time only the unwritten portion will be sent to writev().
829 	 */
830 	TAILQ_INIT(&completed_pdus_list);
831 	while (bytes > 0) {
832 		pdu_length = pdu->hdr->common.plen - pdu->writev_offset;
833 		if (bytes >= pdu_length) {
834 			bytes -= pdu_length;
835 			TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq);
836 			TAILQ_INSERT_TAIL(&completed_pdus_list, pdu, tailq);
837 			pdu = TAILQ_FIRST(&tqpair->send_queue);
838 
839 		} else {
840 			pdu->writev_offset += bytes;
841 			bytes = 0;
842 		}
843 	}
844 
845 	while (!TAILQ_EMPTY(&completed_pdus_list)) {
846 		pdu = TAILQ_FIRST(&completed_pdus_list);
847 		TAILQ_REMOVE(&completed_pdus_list, pdu, tailq);
848 		assert(pdu->cb_fn != NULL);
849 		pdu->cb_fn(pdu->cb_arg);
850 		spdk_nvmf_tcp_pdu_put(tqpair, pdu);
851 	}
852 
853 	return TAILQ_EMPTY(&tqpair->send_queue) ? 0 : 1;
854 }
855 
856 static int
857 spdk_nvmf_tcp_qpair_flush_pdus(void *_tqpair)
858 {
859 	struct spdk_nvmf_tcp_qpair *tqpair = _tqpair;
860 	int rc;
861 
862 	if (tqpair->state == NVME_TCP_QPAIR_STATE_RUNNING) {
863 		rc = spdk_nvmf_tcp_qpair_flush_pdus_internal(tqpair);
864 		if (rc == 0 && tqpair->flush_poller != NULL) {
865 			spdk_poller_unregister(&tqpair->flush_poller);
866 		} else if (rc == 1 && tqpair->flush_poller == NULL) {
867 			tqpair->flush_poller = spdk_poller_register(spdk_nvmf_tcp_qpair_flush_pdus,
868 					       tqpair, 50);
869 		}
870 	} else {
871 		/*
872 		 * If the tqpair state is not RUNNING, then
873 		 * keep trying to flush PDUs until our list is
874 		 * empty - to make sure all data is sent before
875 		 * closing the connection.
876 		 */
877 		do {
878 			rc = spdk_nvmf_tcp_qpair_flush_pdus_internal(tqpair);
879 		} while (rc == 1);
880 	}
881 
882 	if (rc < 0 && tqpair->state < NVME_TCP_QPAIR_STATE_EXITING) {
883 		/*
884 		 * If the poller has already started destruction of the tqpair,
885 		 *  i.e. the socket read failed, then the connection state may already
886 		 *  be EXITED.  We don't want to set it back to EXITING in that case.
887 		 */
888 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
889 	}
890 
891 	return -1;
892 }
893 
894 static void
895 spdk_nvmf_tcp_qpair_disconnect(struct spdk_nvmf_tcp_qpair *tqpair)
896 {
897 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Disconnecting qpair %p\n", tqpair);
898 
899 	tqpair->state = NVME_TCP_QPAIR_STATE_EXITED;
900 	spdk_nvmf_tcp_qpair_flush_pdus(tqpair);
901 	spdk_poller_unregister(&tqpair->timeout_poller);
902 	spdk_nvmf_qpair_disconnect(&tqpair->qpair, NULL, NULL);
903 }
904 
905 static void
906 spdk_nvmf_tcp_qpair_write_pdu(struct spdk_nvmf_tcp_qpair *tqpair,
907 			      struct nvme_tcp_pdu *pdu,
908 			      nvme_tcp_qpair_xfer_complete_cb cb_fn,
909 			      void *cb_arg)
910 {
911 	int enable_digest;
912 	int hlen;
913 	uint32_t crc32c;
914 
915 	hlen = pdu->hdr->common.hlen;
916 	enable_digest = 1;
917 	if (pdu->hdr->common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP ||
918 	    pdu->hdr->common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ) {
919 		/* this PDU should be sent without digest */
920 		enable_digest = 0;
921 	}
922 
923 	/* Header Digest */
924 	if (enable_digest && tqpair->host_hdgst_enable) {
925 		crc32c = nvme_tcp_pdu_calc_header_digest(pdu);
926 		MAKE_DIGEST_WORD((uint8_t *)pdu->hdr->raw + hlen, crc32c);
927 	}
928 
929 	/* Data Digest */
930 	if (pdu->data_len > 0 && enable_digest && tqpair->host_ddgst_enable) {
931 		crc32c = nvme_tcp_pdu_calc_data_digest(pdu);
932 		MAKE_DIGEST_WORD(pdu->data_digest, crc32c);
933 	}
934 
935 	pdu->cb_fn = cb_fn;
936 	pdu->cb_arg = cb_arg;
937 	TAILQ_INSERT_TAIL(&tqpair->send_queue, pdu, tailq);
938 	spdk_nvmf_tcp_qpair_flush_pdus(tqpair);
939 }
940 
941 static int
942 spdk_nvmf_tcp_qpair_init_mem_resource(struct spdk_nvmf_tcp_qpair *tqpair, uint16_t size)
943 {
944 	int i;
945 	struct spdk_nvmf_tcp_req *tcp_req;
946 	struct spdk_nvmf_transport *transport = tqpair->qpair.transport;
947 	uint32_t in_capsule_data_size;
948 
949 	in_capsule_data_size = transport->opts.in_capsule_data_size;
950 	if (transport->opts.dif_insert_or_strip) {
951 		in_capsule_data_size = SPDK_BDEV_BUF_SIZE_WITH_MD(in_capsule_data_size);
952 	}
953 
954 	if (!tqpair->qpair.sq_head_max) {
955 		tqpair->req = calloc(1, sizeof(*tqpair->req));
956 		if (!tqpair->req) {
957 			SPDK_ERRLOG("Unable to allocate req on tqpair=%p.\n", tqpair);
958 			return -1;
959 		}
960 
961 		if (in_capsule_data_size) {
962 			tqpair->buf = spdk_zmalloc(in_capsule_data_size, 0x1000,
963 						   NULL, SPDK_ENV_LCORE_ID_ANY,
964 						   SPDK_MALLOC_DMA);
965 			if (!tqpair->buf) {
966 				SPDK_ERRLOG("Unable to allocate buf on tqpair=%p.\n", tqpair);
967 				return -1;
968 			}
969 		}
970 
971 		tcp_req = tqpair->req;
972 		tcp_req->ttag = 0;
973 		tcp_req->req.qpair = &tqpair->qpair;
974 
975 		/* Set up memory to receive commands */
976 		if (tqpair->buf) {
977 			tcp_req->buf = tqpair->buf;
978 		}
979 
980 		/* Set the cmdn and rsp */
981 		tcp_req->req.rsp = (union nvmf_c2h_msg *)&tcp_req->rsp;
982 		tcp_req->req.cmd = (union nvmf_h2c_msg *)&tcp_req->cmd;
983 
984 		/* Initialize request state to FREE */
985 		tcp_req->state = TCP_REQUEST_STATE_FREE;
986 		TAILQ_INSERT_TAIL(&tqpair->state_queue[tcp_req->state], tcp_req, state_link);
987 
988 		tqpair->pdu = calloc(NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM + 1, sizeof(*tqpair->pdu));
989 		if (!tqpair->pdu) {
990 			SPDK_ERRLOG("Unable to allocate pdu on tqpair=%p.\n", tqpair);
991 			return -1;
992 		}
993 
994 		for (i = 0; i < 1 + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM; i++) {
995 			TAILQ_INSERT_TAIL(&tqpair->free_queue, &tqpair->pdu[i], tailq);
996 		}
997 
998 		tqpair->pdu_recv_buf.size = (in_capsule_data_size + sizeof(struct spdk_nvme_tcp_cmd) + 2 *
999 					     SPDK_NVME_TCP_DIGEST_LEN) * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR;
1000 		tqpair->pdu_recv_buf.buf = calloc(1, tqpair->pdu_recv_buf.size);
1001 		if (!tqpair->pdu_recv_buf.buf) {
1002 			SPDK_ERRLOG("Unable to allocate the pdu recv buf on tqpair=%p with size=%d\n", tqpair,
1003 				    tqpair->pdu_recv_buf.size);
1004 			return -1;
1005 		}
1006 		tqpair->pdu_in_progress.hdr = (union nvme_tcp_pdu_hdr *)tqpair->pdu_recv_buf.buf;
1007 	} else {
1008 		tqpair->reqs = calloc(size, sizeof(*tqpair->reqs));
1009 		if (!tqpair->reqs) {
1010 			SPDK_ERRLOG("Unable to allocate reqs on tqpair=%p\n", tqpair);
1011 			return -1;
1012 		}
1013 
1014 		if (in_capsule_data_size) {
1015 			tqpair->bufs = spdk_zmalloc(size * in_capsule_data_size, 0x1000,
1016 						    NULL, SPDK_ENV_LCORE_ID_ANY,
1017 						    SPDK_MALLOC_DMA);
1018 			if (!tqpair->bufs) {
1019 				SPDK_ERRLOG("Unable to allocate bufs on tqpair=%p.\n", tqpair);
1020 				return -1;
1021 			}
1022 		}
1023 
1024 		for (i = 0; i < size; i++) {
1025 			struct spdk_nvmf_tcp_req *tcp_req = &tqpair->reqs[i];
1026 
1027 			tcp_req->ttag = i + 1;
1028 			tcp_req->req.qpair = &tqpair->qpair;
1029 
1030 			/* Set up memory to receive commands */
1031 			if (tqpair->bufs) {
1032 				tcp_req->buf = (void *)((uintptr_t)tqpair->bufs + (i * in_capsule_data_size));
1033 			}
1034 
1035 			/* Set the cmdn and rsp */
1036 			tcp_req->req.rsp = (union nvmf_c2h_msg *)&tcp_req->rsp;
1037 			tcp_req->req.cmd = (union nvmf_h2c_msg *)&tcp_req->cmd;
1038 
1039 			/* Initialize request state to FREE */
1040 			tcp_req->state = TCP_REQUEST_STATE_FREE;
1041 			TAILQ_INSERT_TAIL(&tqpair->state_queue[tcp_req->state], tcp_req, state_link);
1042 		}
1043 
1044 		tqpair->pdu_pool = calloc(size, sizeof(*tqpair->pdu_pool));
1045 		if (!tqpair->pdu_pool) {
1046 			SPDK_ERRLOG("Unable to allocate pdu pool on tqpair =%p.\n", tqpair);
1047 			return -1;
1048 		}
1049 
1050 		for (i = 0; i < size; i++) {
1051 			TAILQ_INSERT_TAIL(&tqpair->free_queue, &tqpair->pdu_pool[i], tailq);
1052 		}
1053 	}
1054 
1055 	return 0;
1056 }
1057 
1058 static int
1059 spdk_nvmf_tcp_qpair_init(struct spdk_nvmf_qpair *qpair)
1060 {
1061 	struct spdk_nvmf_tcp_qpair *tqpair;
1062 	int i;
1063 
1064 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
1065 
1066 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "New TCP Connection: %p\n", qpair);
1067 
1068 	TAILQ_INIT(&tqpair->send_queue);
1069 	TAILQ_INIT(&tqpair->free_queue);
1070 	STAILQ_INIT(&tqpair->queued_c2h_data_tcp_req);
1071 
1072 	/* Initialise request state queues of the qpair */
1073 	for (i = TCP_REQUEST_STATE_FREE; i < TCP_REQUEST_NUM_STATES; i++) {
1074 		TAILQ_INIT(&tqpair->state_queue[i]);
1075 	}
1076 
1077 	tqpair->host_hdgst_enable = true;
1078 	tqpair->host_ddgst_enable = true;
1079 	return 0;
1080 }
1081 
1082 static int
1083 spdk_nvmf_tcp_qpair_sock_init(struct spdk_nvmf_tcp_qpair *tqpair)
1084 {
1085 	int rc;
1086 
1087 	/* set low water mark */
1088 	rc = spdk_sock_set_recvlowat(tqpair->sock, sizeof(struct spdk_nvme_tcp_c2h_data_hdr));
1089 	if (rc != 0) {
1090 		SPDK_ERRLOG("spdk_sock_set_recvlowat() failed\n");
1091 		return rc;
1092 	}
1093 
1094 	return 0;
1095 }
1096 
1097 static void
1098 _spdk_nvmf_tcp_handle_connect(struct spdk_nvmf_transport *transport,
1099 			      struct spdk_nvmf_tcp_port *port,
1100 			      struct spdk_sock *sock,
1101 			      new_qpair_fn cb_fn, void *cb_arg)
1102 {
1103 	struct spdk_nvmf_tcp_qpair *tqpair;
1104 	int rc;
1105 
1106 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "New connection accepted on %s port %s\n",
1107 		      port->trid.traddr, port->trid.trsvcid);
1108 
1109 	if (transport->opts.sock_priority) {
1110 		rc = spdk_sock_set_priority(sock, transport->opts.sock_priority);
1111 		if (rc) {
1112 			SPDK_ERRLOG("Failed to set the priority of the socket\n");
1113 			spdk_sock_close(&sock);
1114 			return;
1115 		}
1116 	}
1117 
1118 	tqpair = calloc(1, sizeof(struct spdk_nvmf_tcp_qpair));
1119 	if (tqpair == NULL) {
1120 		SPDK_ERRLOG("Could not allocate new connection.\n");
1121 		spdk_sock_close(&sock);
1122 		return;
1123 	}
1124 
1125 	tqpair->sock = sock;
1126 	tqpair->max_queue_depth = 1;
1127 	tqpair->free_pdu_num = tqpair->max_queue_depth + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM;
1128 	tqpair->state_cntr[TCP_REQUEST_STATE_FREE] = tqpair->max_queue_depth;
1129 	tqpair->port = port;
1130 	tqpair->qpair.transport = transport;
1131 
1132 	rc = spdk_sock_getaddr(tqpair->sock, tqpair->target_addr,
1133 			       sizeof(tqpair->target_addr), &tqpair->target_port,
1134 			       tqpair->initiator_addr, sizeof(tqpair->initiator_addr),
1135 			       &tqpair->initiator_port);
1136 	if (rc < 0) {
1137 		SPDK_ERRLOG("spdk_sock_getaddr() failed of tqpair=%p\n", tqpair);
1138 		spdk_nvmf_tcp_qpair_destroy(tqpair);
1139 		return;
1140 	}
1141 
1142 	cb_fn(&tqpair->qpair, cb_arg);
1143 }
1144 
1145 static void
1146 spdk_nvmf_tcp_port_accept(struct spdk_nvmf_transport *transport, struct spdk_nvmf_tcp_port *port,
1147 			  new_qpair_fn cb_fn, void *cb_arg)
1148 {
1149 	struct spdk_sock *sock;
1150 	int i;
1151 
1152 	for (i = 0; i < NVMF_TCP_MAX_ACCEPT_SOCK_ONE_TIME; i++) {
1153 		sock = spdk_sock_accept(port->listen_sock);
1154 		if (sock == NULL) {
1155 			break;
1156 		}
1157 		_spdk_nvmf_tcp_handle_connect(transport, port, sock, cb_fn, cb_arg);
1158 	}
1159 }
1160 
1161 static void
1162 spdk_nvmf_tcp_accept(struct spdk_nvmf_transport *transport, new_qpair_fn cb_fn, void *cb_arg)
1163 {
1164 	struct spdk_nvmf_tcp_transport *ttransport;
1165 	struct spdk_nvmf_tcp_port *port;
1166 
1167 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
1168 
1169 	TAILQ_FOREACH(port, &ttransport->ports, link) {
1170 		spdk_nvmf_tcp_port_accept(transport, port, cb_fn, cb_arg);
1171 	}
1172 }
1173 
1174 static void
1175 spdk_nvmf_tcp_discover(struct spdk_nvmf_transport *transport,
1176 		       struct spdk_nvme_transport_id *trid,
1177 		       struct spdk_nvmf_discovery_log_page_entry *entry)
1178 {
1179 	entry->trtype = SPDK_NVMF_TRTYPE_TCP;
1180 	entry->adrfam = trid->adrfam;
1181 	entry->treq.secure_channel = SPDK_NVMF_TREQ_SECURE_CHANNEL_NOT_REQUIRED;
1182 
1183 	spdk_strcpy_pad(entry->trsvcid, trid->trsvcid, sizeof(entry->trsvcid), ' ');
1184 	spdk_strcpy_pad(entry->traddr, trid->traddr, sizeof(entry->traddr), ' ');
1185 
1186 	entry->tsas.tcp.sectype = SPDK_NVME_TCP_SECURITY_NONE;
1187 }
1188 
1189 static struct spdk_nvmf_transport_poll_group *
1190 spdk_nvmf_tcp_poll_group_create(struct spdk_nvmf_transport *transport)
1191 {
1192 	struct spdk_nvmf_tcp_poll_group *tgroup;
1193 
1194 	tgroup = calloc(1, sizeof(*tgroup));
1195 	if (!tgroup) {
1196 		return NULL;
1197 	}
1198 
1199 	tgroup->sock_group = spdk_sock_group_create(&tgroup->group);
1200 	if (!tgroup->sock_group) {
1201 		goto cleanup;
1202 	}
1203 
1204 	TAILQ_INIT(&tgroup->qpairs);
1205 
1206 	return &tgroup->group;
1207 
1208 cleanup:
1209 	free(tgroup);
1210 	return NULL;
1211 }
1212 
1213 static struct spdk_nvmf_transport_poll_group *
1214 spdk_nvmf_tcp_get_optimal_poll_group(struct spdk_nvmf_qpair *qpair)
1215 {
1216 	struct spdk_nvmf_tcp_qpair *tqpair;
1217 	struct spdk_sock_group *group = NULL;
1218 	int rc;
1219 
1220 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
1221 	rc = spdk_sock_get_optimal_sock_group(tqpair->sock, &group);
1222 	if (!rc && group != NULL) {
1223 		return spdk_sock_group_get_ctx(group);
1224 	}
1225 
1226 	return NULL;
1227 }
1228 
1229 static void
1230 spdk_nvmf_tcp_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group)
1231 {
1232 	struct spdk_nvmf_tcp_poll_group *tgroup;
1233 
1234 	tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
1235 	spdk_sock_group_close(&tgroup->sock_group);
1236 
1237 	free(tgroup);
1238 }
1239 
1240 static inline void
1241 spdk_nvmf_tcp_reset_pdu_in_process(struct spdk_nvmf_tcp_qpair *tqpair)
1242 {
1243 	struct nvme_tcp_pdu_recv_buf *pdu_recv_buf = &tqpair->pdu_recv_buf;
1244 	char *dst, *src;
1245 
1246 	if (spdk_unlikely((pdu_recv_buf->off + sizeof(union nvme_tcp_pdu_hdr)) >
1247 			  pdu_recv_buf->size)) {
1248 		if (pdu_recv_buf->remain_size) {
1249 			dst = pdu_recv_buf->buf;
1250 			src = (char *)((void *)pdu_recv_buf->buf + pdu_recv_buf->off);
1251 
1252 			/* purpose: to avoid overlap copy, so do not use memcpy if there is overlap case */
1253 			memmove(dst, src, pdu_recv_buf->remain_size);
1254 		}
1255 		tqpair->pdu_recv_buf.off = 0;
1256 	} else if (!pdu_recv_buf->remain_size) {
1257 		tqpair->pdu_recv_buf.off = 0;
1258 	}
1259 
1260 	tqpair->pdu_in_progress.hdr = (union nvme_tcp_pdu_hdr *)((void *)pdu_recv_buf->buf +
1261 				      pdu_recv_buf->off);
1262 }
1263 
1264 static void
1265 spdk_nvmf_tcp_qpair_set_recv_state(struct spdk_nvmf_tcp_qpair *tqpair,
1266 				   enum nvme_tcp_pdu_recv_state state)
1267 {
1268 	if (tqpair->recv_state == state) {
1269 		SPDK_ERRLOG("The recv state of tqpair=%p is same with the state(%d) to be set\n",
1270 			    tqpair, state);
1271 		return;
1272 	}
1273 
1274 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair(%p) recv state=%d\n", tqpair, state);
1275 	tqpair->recv_state = state;
1276 
1277 	switch (state) {
1278 	case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH:
1279 	case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH:
1280 	case NVME_TCP_PDU_RECV_STATE_AWAIT_REQ:
1281 	case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD:
1282 		break;
1283 	case NVME_TCP_PDU_RECV_STATE_ERROR:
1284 	case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY:
1285 		memset(&tqpair->pdu_in_progress, 0, sizeof(tqpair->pdu_in_progress));
1286 		spdk_nvmf_tcp_reset_pdu_in_process(tqpair);
1287 		break;
1288 	default:
1289 		SPDK_ERRLOG("The state(%d) is invalid\n", state);
1290 		abort();
1291 		break;
1292 	}
1293 }
1294 
1295 static int
1296 spdk_nvmf_tcp_qpair_handle_timeout(void *ctx)
1297 {
1298 	struct spdk_nvmf_tcp_qpair *tqpair = ctx;
1299 
1300 	assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_ERROR);
1301 
1302 	SPDK_ERRLOG("No pdu coming for tqpair=%p within %d seconds\n", tqpair,
1303 		    SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT);
1304 
1305 	spdk_nvmf_tcp_qpair_disconnect(tqpair);
1306 	return 0;
1307 }
1308 
1309 static void
1310 spdk_nvmf_tcp_send_c2h_term_req_complete(void *cb_arg)
1311 {
1312 	struct spdk_nvmf_tcp_qpair *tqpair = (struct spdk_nvmf_tcp_qpair *)cb_arg;
1313 
1314 	if (!tqpair->timeout_poller) {
1315 		tqpair->timeout_poller = spdk_poller_register(spdk_nvmf_tcp_qpair_handle_timeout, tqpair,
1316 					 SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT * 1000000);
1317 	}
1318 }
1319 
1320 static void
1321 spdk_nvmf_tcp_send_c2h_term_req(struct spdk_nvmf_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu,
1322 				enum spdk_nvme_tcp_term_req_fes fes, uint32_t error_offset)
1323 {
1324 	struct nvme_tcp_pdu *rsp_pdu;
1325 	struct spdk_nvme_tcp_term_req_hdr *c2h_term_req;
1326 	uint32_t c2h_term_req_hdr_len = sizeof(*c2h_term_req);
1327 	uint32_t copy_len;
1328 
1329 	rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair);
1330 	if (!rsp_pdu) {
1331 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
1332 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1333 		return;
1334 	}
1335 
1336 	c2h_term_req = &rsp_pdu->hdr->term_req;
1337 	c2h_term_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ;
1338 	c2h_term_req->common.hlen = c2h_term_req_hdr_len;
1339 
1340 	if ((fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) ||
1341 	    (fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) {
1342 		DSET32(&c2h_term_req->fei, error_offset);
1343 	}
1344 
1345 	copy_len = pdu->hdr->common.hlen;
1346 	if (copy_len > SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE) {
1347 		copy_len = SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE;
1348 	}
1349 
1350 	/* Copy the error info into the buffer */
1351 	memcpy((uint8_t *)rsp_pdu->hdr->raw + c2h_term_req_hdr_len, pdu->hdr->raw, copy_len);
1352 	nvme_tcp_pdu_set_data(rsp_pdu, (uint8_t *)rsp_pdu->hdr->raw + c2h_term_req_hdr_len, copy_len);
1353 
1354 	/* Contain the header of the wrong received pdu */
1355 	c2h_term_req->common.plen = c2h_term_req->common.hlen + copy_len;
1356 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1357 	spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_send_c2h_term_req_complete, tqpair);
1358 }
1359 
1360 static void
1361 spdk_nvmf_tcp_capsule_cmd_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport,
1362 				     struct spdk_nvmf_tcp_qpair *tqpair,
1363 				     struct nvme_tcp_pdu *pdu)
1364 {
1365 	struct spdk_nvmf_tcp_req *tcp_req;
1366 
1367 	assert(pdu->psh_valid_bytes == pdu->psh_len);
1368 	assert(pdu->hdr->common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD);
1369 
1370 	tcp_req = spdk_nvmf_tcp_req_get(tqpair);
1371 	if (!tcp_req) {
1372 		/* Directly return and make the allocation retry again */
1373 		if (tqpair->state_cntr[TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST] > 0) {
1374 			return;
1375 		}
1376 
1377 		/* The host sent more commands than the maximum queue depth. */
1378 		SPDK_ERRLOG("Cannot allocate tcp_req\n");
1379 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
1380 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1381 		return;
1382 	}
1383 
1384 	pdu->req = tcp_req;
1385 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEW);
1386 	spdk_nvmf_tcp_req_process(ttransport, tcp_req);
1387 	return;
1388 }
1389 
1390 static void
1391 spdk_nvmf_tcp_capsule_cmd_payload_handle(struct spdk_nvmf_tcp_transport *ttransport,
1392 		struct spdk_nvmf_tcp_qpair *tqpair,
1393 		struct nvme_tcp_pdu *pdu)
1394 {
1395 	struct spdk_nvmf_tcp_req *tcp_req;
1396 	struct spdk_nvme_tcp_cmd *capsule_cmd;
1397 	uint32_t error_offset = 0;
1398 	enum spdk_nvme_tcp_term_req_fes fes;
1399 
1400 	capsule_cmd = &pdu->hdr->capsule_cmd;
1401 	tcp_req = pdu->req;
1402 	assert(tcp_req != NULL);
1403 	if (capsule_cmd->common.pdo > SPDK_NVME_TCP_PDU_PDO_MAX_OFFSET) {
1404 		SPDK_ERRLOG("Expected ICReq capsule_cmd pdu offset <= %d, got %c\n",
1405 			    SPDK_NVME_TCP_PDU_PDO_MAX_OFFSET, capsule_cmd->common.pdo);
1406 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1407 		error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdo);
1408 		goto err;
1409 	}
1410 
1411 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
1412 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
1413 	spdk_nvmf_tcp_req_process(ttransport, tcp_req);
1414 
1415 	return;
1416 err:
1417 	spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1418 }
1419 
1420 static void
1421 spdk_nvmf_tcp_h2c_data_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport,
1422 				  struct spdk_nvmf_tcp_qpair *tqpair,
1423 				  struct nvme_tcp_pdu *pdu)
1424 {
1425 	struct spdk_nvmf_tcp_req *tcp_req;
1426 	uint32_t error_offset = 0;
1427 	enum spdk_nvme_tcp_term_req_fes fes = 0;
1428 	struct spdk_nvme_tcp_h2c_data_hdr *h2c_data;
1429 	bool ttag_offset_error = false;
1430 
1431 	h2c_data = &pdu->hdr->h2c_data;
1432 
1433 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair=%p, r2t_info: datao=%u, datal=%u, cccid=%u, ttag=%u\n",
1434 		      tqpair, h2c_data->datao, h2c_data->datal, h2c_data->cccid, h2c_data->ttag);
1435 
1436 	/* According to the information in the pdu to find the req */
1437 	TAILQ_FOREACH(tcp_req, &tqpair->state_queue[TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER],
1438 		      state_link) {
1439 		if ((tcp_req->req.cmd->nvme_cmd.cid == h2c_data->cccid) && (tcp_req->ttag == h2c_data->ttag)) {
1440 			break;
1441 		}
1442 
1443 		if (!ttag_offset_error && (tcp_req->req.cmd->nvme_cmd.cid == h2c_data->cccid)) {
1444 			ttag_offset_error = true;
1445 		}
1446 	}
1447 
1448 	if (!tcp_req) {
1449 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tcp_req is not found for tqpair=%p\n", tqpair);
1450 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER;
1451 		if (!ttag_offset_error) {
1452 			error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, cccid);
1453 		} else {
1454 			error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, ttag);
1455 		}
1456 		goto err;
1457 	}
1458 
1459 	if (tcp_req->next_expected_r2t_offset != h2c_data->datao) {
1460 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
1461 			      "tcp_req(%p), tqpair=%p,  expected_r2t_offset=%u, but data offset =%u\n",
1462 			      tcp_req, tqpair, tcp_req->next_expected_r2t_offset, h2c_data->datao);
1463 		fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE;
1464 		goto err;
1465 	}
1466 
1467 	if (h2c_data->datal > tqpair->maxh2cdata) {
1468 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tcp_req(%p), tqpair=%p,  datao=%u execeeds maxh2cdata size=%u\n",
1469 			      tcp_req, tqpair, h2c_data->datao, tqpair->maxh2cdata);
1470 		fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE;
1471 		goto err;
1472 	}
1473 
1474 	if ((h2c_data->datao + h2c_data->datal) > tcp_req->req.length) {
1475 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
1476 			      "tcp_req(%p), tqpair=%p,  (datao=%u + datal=%u) execeeds requested length=%u\n",
1477 			      tcp_req, tqpair, h2c_data->datao, h2c_data->datal, tcp_req->req.length);
1478 		fes = SPDK_NVME_TCP_TERM_REQ_FES_R2T_LIMIT_EXCEEDED;
1479 		goto err;
1480 	}
1481 
1482 	pdu->req = tcp_req;
1483 
1484 	if (spdk_unlikely(tcp_req->req.dif.dif_insert_or_strip)) {
1485 		pdu->dif_ctx = &tcp_req->req.dif.dif_ctx;
1486 	}
1487 
1488 	nvme_tcp_pdu_set_data_buf(pdu, tcp_req->req.iov, tcp_req->req.iovcnt,
1489 				  h2c_data->datao, h2c_data->datal);
1490 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
1491 	return;
1492 
1493 err:
1494 	spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1495 }
1496 
1497 static void
1498 spdk_nvmf_tcp_pdu_cmd_complete(void *cb_arg)
1499 {
1500 	struct spdk_nvmf_tcp_req *tcp_req = cb_arg;
1501 	nvmf_tcp_request_free(tcp_req);
1502 }
1503 
1504 static void
1505 spdk_nvmf_tcp_send_capsule_resp_pdu(struct spdk_nvmf_tcp_req *tcp_req,
1506 				    struct spdk_nvmf_tcp_qpair *tqpair)
1507 {
1508 	struct nvme_tcp_pdu *rsp_pdu;
1509 	struct spdk_nvme_tcp_rsp *capsule_resp;
1510 
1511 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter, tqpair=%p\n", tqpair);
1512 	rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair);
1513 	if (!rsp_pdu) {
1514 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1515 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
1516 		return;
1517 	}
1518 
1519 	capsule_resp = &rsp_pdu->hdr->capsule_resp;
1520 	capsule_resp->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP;
1521 	capsule_resp->common.plen = capsule_resp->common.hlen = sizeof(*capsule_resp);
1522 	capsule_resp->rccqe = tcp_req->req.rsp->nvme_cpl;
1523 	if (tqpair->host_hdgst_enable) {
1524 		capsule_resp->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF;
1525 		capsule_resp->common.plen += SPDK_NVME_TCP_DIGEST_LEN;
1526 	}
1527 
1528 	spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_pdu_cmd_complete, tcp_req);
1529 }
1530 
1531 static void
1532 spdk_nvmf_tcp_pdu_c2h_data_complete(void *cb_arg)
1533 {
1534 	struct spdk_nvmf_tcp_req *tcp_req = cb_arg;
1535 	struct spdk_nvmf_tcp_qpair *tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair,
1536 					     struct spdk_nvmf_tcp_qpair, qpair);
1537 
1538 	assert(tqpair != NULL);
1539 	assert(tcp_req->c2h_data_pdu_num > 0);
1540 	tcp_req->c2h_data_pdu_num--;
1541 	if (!tcp_req->c2h_data_pdu_num) {
1542 		if (tqpair->qpair.transport->opts.c2h_success) {
1543 			nvmf_tcp_request_free(tcp_req);
1544 		} else {
1545 			spdk_nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair);
1546 		}
1547 	}
1548 
1549 	tqpair->c2h_data_pdu_cnt--;
1550 	spdk_nvmf_tcp_handle_pending_c2h_data_queue(tqpair);
1551 }
1552 
1553 static void
1554 spdk_nvmf_tcp_send_r2t_pdu(struct spdk_nvmf_tcp_qpair *tqpair,
1555 			   struct spdk_nvmf_tcp_req *tcp_req)
1556 {
1557 	struct nvme_tcp_pdu *rsp_pdu;
1558 	struct spdk_nvme_tcp_r2t_hdr *r2t;
1559 
1560 	rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair);
1561 	if (!rsp_pdu) {
1562 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
1563 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1564 		return;
1565 	}
1566 
1567 	r2t = &rsp_pdu->hdr->r2t;
1568 	r2t->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_R2T;
1569 	r2t->common.plen = r2t->common.hlen = sizeof(*r2t);
1570 
1571 	if (tqpair->host_hdgst_enable) {
1572 		r2t->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF;
1573 		r2t->common.plen += SPDK_NVME_TCP_DIGEST_LEN;
1574 	}
1575 
1576 	r2t->cccid = tcp_req->req.cmd->nvme_cmd.cid;
1577 	r2t->ttag = tcp_req->ttag;
1578 	r2t->r2to = tcp_req->next_expected_r2t_offset;
1579 	r2t->r2tl = spdk_min(tcp_req->req.length - tcp_req->next_expected_r2t_offset, tqpair->maxh2cdata);
1580 	tcp_req->r2tl_remain = r2t->r2tl;
1581 
1582 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
1583 		      "tcp_req(%p) on tqpair(%p), r2t_info: cccid=%u, ttag=%u, r2to=%u, r2tl=%u\n",
1584 		      tcp_req, tqpair, r2t->cccid, r2t->ttag, r2t->r2to, r2t->r2tl);
1585 	spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_pdu_cmd_complete, NULL);
1586 }
1587 
1588 static void
1589 spdk_nvmf_tcp_h2c_data_payload_handle(struct spdk_nvmf_tcp_transport *ttransport,
1590 				      struct spdk_nvmf_tcp_qpair *tqpair,
1591 				      struct nvme_tcp_pdu *pdu)
1592 {
1593 	struct spdk_nvmf_tcp_req *tcp_req;
1594 
1595 	tcp_req = pdu->req;
1596 	assert(tcp_req != NULL);
1597 
1598 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
1599 
1600 	tcp_req->next_expected_r2t_offset += pdu->data_len;
1601 	tcp_req->r2tl_remain -= pdu->data_len;
1602 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
1603 
1604 	if (!tcp_req->r2tl_remain) {
1605 		if (tcp_req->next_expected_r2t_offset == tcp_req->req.length) {
1606 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
1607 			spdk_nvmf_tcp_req_process(ttransport, tcp_req);
1608 		} else {
1609 			SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Send r2t pdu for tcp_req=%p on tqpair=%p\n", tcp_req, tqpair);
1610 			spdk_nvmf_tcp_send_r2t_pdu(tqpair, tcp_req);
1611 		}
1612 	}
1613 }
1614 
1615 static void
1616 spdk_nvmf_tcp_h2c_term_req_dump(struct spdk_nvme_tcp_term_req_hdr *h2c_term_req)
1617 {
1618 	SPDK_ERRLOG("Error info of pdu(%p): %s\n", h2c_term_req,
1619 		    spdk_nvmf_tcp_term_req_fes_str[h2c_term_req->fes]);
1620 	if ((h2c_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) ||
1621 	    (h2c_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) {
1622 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "The offset from the start of the PDU header is %u\n",
1623 			      DGET32(h2c_term_req->fei));
1624 	}
1625 }
1626 
1627 static void
1628 spdk_nvmf_tcp_h2c_term_req_hdr_handle(struct spdk_nvmf_tcp_qpair *tqpair,
1629 				      struct nvme_tcp_pdu *pdu)
1630 {
1631 	struct spdk_nvme_tcp_term_req_hdr *h2c_term_req = &pdu->hdr->term_req;
1632 	uint32_t error_offset = 0;
1633 	enum spdk_nvme_tcp_term_req_fes fes;
1634 
1635 
1636 	if (h2c_term_req->fes > SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER) {
1637 		SPDK_ERRLOG("Fatal Error Stauts(FES) is unknown for h2c_term_req pdu=%p\n", pdu);
1638 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1639 		error_offset = offsetof(struct spdk_nvme_tcp_term_req_hdr, fes);
1640 		goto end;
1641 	}
1642 
1643 	/* set the data buffer */
1644 	nvme_tcp_pdu_set_data(pdu, (uint8_t *)pdu->hdr->raw + h2c_term_req->common.hlen,
1645 			      h2c_term_req->common.plen - h2c_term_req->common.hlen);
1646 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
1647 	return;
1648 end:
1649 	spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1650 	return;
1651 }
1652 
1653 static void
1654 spdk_nvmf_tcp_h2c_term_req_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair,
1655 		struct nvme_tcp_pdu *pdu)
1656 {
1657 	struct spdk_nvme_tcp_term_req_hdr *h2c_term_req = &pdu->hdr->term_req;
1658 
1659 	spdk_nvmf_tcp_h2c_term_req_dump(h2c_term_req);
1660 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1661 	return;
1662 }
1663 
1664 static void
1665 spdk_nvmf_tcp_pdu_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair,
1666 				 struct spdk_nvmf_tcp_transport *ttransport)
1667 {
1668 	int rc = 0;
1669 	struct nvme_tcp_pdu *pdu;
1670 	uint32_t crc32c, error_offset = 0;
1671 	enum spdk_nvme_tcp_term_req_fes fes;
1672 
1673 	assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
1674 	pdu = &tqpair->pdu_in_progress;
1675 
1676 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
1677 	/* check data digest if need */
1678 	if (pdu->ddgst_enable) {
1679 		crc32c = nvme_tcp_pdu_calc_data_digest(pdu);
1680 		rc = MATCH_DIGEST_WORD(pdu->data_digest, crc32c);
1681 		if (rc == 0) {
1682 			SPDK_ERRLOG("Data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu);
1683 			fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR;
1684 			spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1685 			return;
1686 
1687 		}
1688 	}
1689 
1690 	switch (pdu->hdr->common.pdu_type) {
1691 	case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD:
1692 		spdk_nvmf_tcp_capsule_cmd_payload_handle(ttransport, tqpair, pdu);
1693 		break;
1694 	case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA:
1695 		spdk_nvmf_tcp_h2c_data_payload_handle(ttransport, tqpair, pdu);
1696 		break;
1697 
1698 	case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ:
1699 		spdk_nvmf_tcp_h2c_term_req_payload_handle(tqpair, pdu);
1700 		break;
1701 
1702 	default:
1703 		/* The code should not go to here */
1704 		SPDK_ERRLOG("The code should not go to here\n");
1705 		break;
1706 	}
1707 }
1708 
1709 static void
1710 spdk_nvmf_tcp_send_icresp_complete(void *cb_arg)
1711 {
1712 	struct spdk_nvmf_tcp_qpair *tqpair = cb_arg;
1713 
1714 	tqpair->state = NVME_TCP_QPAIR_STATE_RUNNING;
1715 }
1716 
1717 static void
1718 spdk_nvmf_tcp_icreq_handle(struct spdk_nvmf_tcp_transport *ttransport,
1719 			   struct spdk_nvmf_tcp_qpair *tqpair,
1720 			   struct nvme_tcp_pdu *pdu)
1721 {
1722 	struct spdk_nvme_tcp_ic_req *ic_req = &pdu->hdr->ic_req;
1723 	struct nvme_tcp_pdu *rsp_pdu;
1724 	struct spdk_nvme_tcp_ic_resp *ic_resp;
1725 	uint32_t error_offset = 0;
1726 	enum spdk_nvme_tcp_term_req_fes fes;
1727 
1728 	/* Only PFV 0 is defined currently */
1729 	if (ic_req->pfv != 0) {
1730 		SPDK_ERRLOG("Expected ICReq PFV %u, got %u\n", 0u, ic_req->pfv);
1731 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1732 		error_offset = offsetof(struct spdk_nvme_tcp_ic_req, pfv);
1733 		goto end;
1734 	}
1735 
1736 	/* MAXR2T is 0's based */
1737 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "maxr2t =%u\n", (ic_req->maxr2t + 1u));
1738 
1739 	tqpair->host_hdgst_enable = ic_req->dgst.bits.hdgst_enable ? true : false;
1740 	if (!tqpair->host_hdgst_enable) {
1741 		tqpair->pdu_recv_buf.size -= SPDK_NVME_TCP_DIGEST_LEN * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR;
1742 	}
1743 	tqpair->host_ddgst_enable = ic_req->dgst.bits.ddgst_enable ? true : false;
1744 	if (!tqpair->host_ddgst_enable) {
1745 		tqpair->pdu_recv_buf.size -= SPDK_NVME_TCP_DIGEST_LEN * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR;
1746 	}
1747 
1748 	tqpair->cpda = spdk_min(ic_req->hpda, SPDK_NVME_TCP_CPDA_MAX);
1749 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "cpda of tqpair=(%p) is : %u\n", tqpair, tqpair->cpda);
1750 
1751 	rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair);
1752 	if (!rsp_pdu) {
1753 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
1754 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1755 		return;
1756 	}
1757 
1758 	ic_resp = &rsp_pdu->hdr->ic_resp;
1759 	ic_resp->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_IC_RESP;
1760 	ic_resp->common.hlen = ic_resp->common.plen =  sizeof(*ic_resp);
1761 	ic_resp->pfv = 0;
1762 	ic_resp->cpda = tqpair->cpda;
1763 	tqpair->maxh2cdata = spdk_min(NVMF_TCP_PDU_MAX_H2C_DATA_SIZE,
1764 				      ttransport->transport.opts.io_unit_size);
1765 	ic_resp->maxh2cdata = tqpair->maxh2cdata;
1766 	ic_resp->dgst.bits.hdgst_enable = tqpair->host_hdgst_enable ? 1 : 0;
1767 	ic_resp->dgst.bits.ddgst_enable = tqpair->host_ddgst_enable ? 1 : 0;
1768 
1769 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "host_hdgst_enable: %u\n", tqpair->host_hdgst_enable);
1770 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "host_ddgst_enable: %u\n", tqpair->host_ddgst_enable);
1771 
1772 	tqpair->state = NVME_TCP_QPAIR_STATE_INITIALIZING;
1773 	spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_send_icresp_complete, tqpair);
1774 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
1775 	return;
1776 end:
1777 	spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1778 	return;
1779 }
1780 
1781 static void
1782 spdk_nvmf_tcp_pdu_psh_handle(struct spdk_nvmf_tcp_qpair *tqpair,
1783 			     struct spdk_nvmf_tcp_transport *ttransport)
1784 {
1785 	struct nvme_tcp_pdu *pdu;
1786 	int rc;
1787 	uint32_t crc32c, error_offset = 0;
1788 	enum spdk_nvme_tcp_term_req_fes fes;
1789 
1790 	assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH);
1791 	pdu = &tqpair->pdu_in_progress;
1792 
1793 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "pdu type of tqpair(%p) is %d\n", tqpair,
1794 		      pdu->hdr->common.pdu_type);
1795 	/* check header digest if needed */
1796 	if (pdu->has_hdgst) {
1797 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Compare the header of pdu=%p on tqpair=%p\n", pdu, tqpair);
1798 		crc32c = nvme_tcp_pdu_calc_header_digest(pdu);
1799 		rc = MATCH_DIGEST_WORD((uint8_t *)pdu->hdr->raw + pdu->hdr->common.hlen, crc32c);
1800 		if (rc == 0) {
1801 			SPDK_ERRLOG("Header digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu);
1802 			fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR;
1803 			spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1804 			return;
1805 
1806 		}
1807 	}
1808 
1809 	switch (pdu->hdr->common.pdu_type) {
1810 	case SPDK_NVME_TCP_PDU_TYPE_IC_REQ:
1811 		spdk_nvmf_tcp_icreq_handle(ttransport, tqpair, pdu);
1812 		break;
1813 	case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD:
1814 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_REQ);
1815 		break;
1816 	case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA:
1817 		spdk_nvmf_tcp_h2c_data_hdr_handle(ttransport, tqpair, pdu);
1818 		break;
1819 
1820 	case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ:
1821 		spdk_nvmf_tcp_h2c_term_req_hdr_handle(tqpair, pdu);
1822 		break;
1823 
1824 	default:
1825 		SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->pdu_in_progress.hdr->common.pdu_type);
1826 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1827 		error_offset = 1;
1828 		spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1829 		break;
1830 	}
1831 }
1832 
1833 static void
1834 spdk_nvmf_tcp_pdu_ch_handle(struct spdk_nvmf_tcp_qpair *tqpair)
1835 {
1836 	struct nvme_tcp_pdu *pdu;
1837 	uint32_t error_offset = 0;
1838 	enum spdk_nvme_tcp_term_req_fes fes;
1839 	uint8_t expected_hlen, pdo;
1840 	bool plen_error = false, pdo_error = false;
1841 
1842 	assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH);
1843 	pdu = &tqpair->pdu_in_progress;
1844 
1845 	if (pdu->hdr->common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_REQ) {
1846 		if (tqpair->state != NVME_TCP_QPAIR_STATE_INVALID) {
1847 			SPDK_ERRLOG("Already received ICreq PDU, and reject this pdu=%p\n", pdu);
1848 			fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR;
1849 			goto err;
1850 		}
1851 		expected_hlen = sizeof(struct spdk_nvme_tcp_ic_req);
1852 		if (pdu->hdr->common.plen != expected_hlen) {
1853 			plen_error = true;
1854 		}
1855 	} else {
1856 		if (tqpair->state != NVME_TCP_QPAIR_STATE_RUNNING) {
1857 			SPDK_ERRLOG("The TCP/IP connection is not negotitated\n");
1858 			fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR;
1859 			goto err;
1860 		}
1861 
1862 		switch (pdu->hdr->common.pdu_type) {
1863 		case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD:
1864 			expected_hlen = sizeof(struct spdk_nvme_tcp_cmd);
1865 			pdo = pdu->hdr->common.pdo;
1866 			if ((tqpair->cpda != 0) && (pdo != ((tqpair->cpda + 1) << 2))) {
1867 				pdo_error = true;
1868 				break;
1869 			}
1870 
1871 			if (pdu->hdr->common.plen < expected_hlen) {
1872 				plen_error = true;
1873 			}
1874 			break;
1875 		case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA:
1876 			expected_hlen = sizeof(struct spdk_nvme_tcp_h2c_data_hdr);
1877 			pdo = pdu->hdr->common.pdo;
1878 			if ((tqpair->cpda != 0) && (pdo != ((tqpair->cpda + 1) << 2))) {
1879 				pdo_error = true;
1880 				break;
1881 			}
1882 			if (pdu->hdr->common.plen < expected_hlen) {
1883 				plen_error = true;
1884 			}
1885 			break;
1886 
1887 		case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ:
1888 			expected_hlen = sizeof(struct spdk_nvme_tcp_term_req_hdr);
1889 			if ((pdu->hdr->common.plen <= expected_hlen) ||
1890 			    (pdu->hdr->common.plen > SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE)) {
1891 				plen_error = true;
1892 			}
1893 			break;
1894 
1895 		default:
1896 			SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", pdu->hdr->common.pdu_type);
1897 			fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1898 			error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdu_type);
1899 			goto err;
1900 		}
1901 	}
1902 
1903 	if (pdu->hdr->common.hlen != expected_hlen) {
1904 		SPDK_ERRLOG("PDU type=0x%02x, Expected ICReq header length %u, got %u on tqpair=%p\n",
1905 			    pdu->hdr->common.pdu_type,
1906 			    expected_hlen, pdu->hdr->common.hlen, tqpair);
1907 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1908 		error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, hlen);
1909 		goto err;
1910 	} else if (pdo_error) {
1911 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1912 		error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdo);
1913 	} else if (plen_error) {
1914 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1915 		error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, plen);
1916 		goto err;
1917 	} else {
1918 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH);
1919 		nvme_tcp_pdu_calc_psh_len(&tqpair->pdu_in_progress, tqpair->host_hdgst_enable);
1920 		return;
1921 	}
1922 err:
1923 	spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1924 }
1925 
1926 static int
1927 nvmf_tcp_pdu_payload_insert_dif(struct nvme_tcp_pdu *pdu, uint32_t read_offset,
1928 				int read_len)
1929 {
1930 	int rc;
1931 
1932 	rc = spdk_dif_generate_stream(pdu->data_iov, pdu->data_iovcnt,
1933 				      read_offset, read_len, pdu->dif_ctx);
1934 	if (rc != 0) {
1935 		SPDK_ERRLOG("DIF generate failed\n");
1936 	}
1937 
1938 	return rc;
1939 }
1940 
1941 static int
1942 nvme_tcp_recv_buf_read(struct spdk_sock *sock, struct nvme_tcp_pdu_recv_buf *pdu_recv_buf)
1943 {
1944 	int rc;
1945 
1946 	rc = nvme_tcp_read_data(sock, pdu_recv_buf->size - pdu_recv_buf->off,
1947 				(void *)pdu_recv_buf->buf + pdu_recv_buf->off);
1948 	if (rc < 0) {
1949 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "will disconnect sock=%p\n", sock);
1950 	} else if (rc > 0) {
1951 		pdu_recv_buf->remain_size = rc;
1952 		spdk_trace_record(TRACE_TCP_READ_FROM_SOCKET_DONE, 0, rc, 0, 0);
1953 	}
1954 
1955 	return rc;
1956 }
1957 
1958 static uint32_t
1959 nvme_tcp_read_data_from_pdu_recv_buf(struct nvme_tcp_pdu_recv_buf *pdu_recv_buf,
1960 				     uint32_t expected_size,
1961 				     char *dst)
1962 {
1963 	uint32_t size;
1964 
1965 	assert(pdu_recv_buf->remain_size > 0);
1966 	size = spdk_min(expected_size, pdu_recv_buf->remain_size);
1967 	if (dst) {
1968 		memcpy(dst, (void *)pdu_recv_buf->buf + pdu_recv_buf->off, size);
1969 	}
1970 	pdu_recv_buf->off += size;
1971 	pdu_recv_buf->remain_size -= size;
1972 
1973 
1974 	return size;
1975 }
1976 
1977 static int
1978 nvme_tcp_read_payload_data_from_pdu_recv_buf(struct nvme_tcp_pdu_recv_buf *pdu_recv_buf,
1979 		struct nvme_tcp_pdu *pdu)
1980 {
1981 	struct iovec iov[NVME_TCP_MAX_SGL_DESCRIPTORS + 1];
1982 	int iovcnt, i;
1983 	uint32_t size = 0;
1984 	void *dst;
1985 
1986 	assert(pdu_recv_buf->remain_size > 0);
1987 	iovcnt = nvme_tcp_build_payload_iovs(iov, NVME_TCP_MAX_SGL_DESCRIPTORS + 1, pdu,
1988 					     pdu->ddgst_enable, NULL);
1989 	assert(iovcnt >= 0);
1990 	for (i = 0; i < iovcnt; i++) {
1991 		if (!pdu_recv_buf->remain_size) {
1992 			break;
1993 		}
1994 
1995 		dst = NULL;
1996 		if (pdu->hdr->common.pdu_type != SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ) {
1997 			dst = iov[i].iov_base;
1998 		}
1999 		size += nvme_tcp_read_data_from_pdu_recv_buf(pdu_recv_buf, iov[i].iov_len, dst);
2000 	}
2001 
2002 	return size;
2003 }
2004 
2005 static int
2006 spdk_nvmf_tcp_sock_process(struct spdk_nvmf_tcp_qpair *tqpair)
2007 {
2008 	int rc = 0;
2009 	struct nvme_tcp_pdu *pdu;
2010 	enum nvme_tcp_pdu_recv_state prev_state;
2011 	uint32_t data_len;
2012 	struct spdk_nvmf_tcp_transport *ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport,
2013 			struct spdk_nvmf_tcp_transport, transport);
2014 
2015 	/* The loop here is to allow for several back-to-back state changes. */
2016 	do {
2017 		prev_state = tqpair->recv_state;
2018 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair(%p) recv pdu entering state %d\n", tqpair, prev_state);
2019 
2020 		pdu = &tqpair->pdu_in_progress;
2021 		switch (tqpair->recv_state) {
2022 		/* Wait for the common header  */
2023 		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY:
2024 		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH:
2025 			if (spdk_unlikely(tqpair->state == NVME_TCP_QPAIR_STATE_INITIALIZING)) {
2026 				return rc;
2027 			}
2028 
2029 			if (!tqpair->pdu_recv_buf.remain_size) {
2030 				rc = nvme_tcp_recv_buf_read(tqpair->sock, &tqpair->pdu_recv_buf);
2031 				if (rc <= 0) {
2032 					return rc;
2033 				}
2034 			}
2035 			rc = nvme_tcp_read_data_from_pdu_recv_buf(&tqpair->pdu_recv_buf,
2036 					sizeof(struct spdk_nvme_tcp_common_pdu_hdr) - pdu->ch_valid_bytes,
2037 					NULL);
2038 			pdu->ch_valid_bytes += rc;
2039 			if (spdk_likely(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY)) {
2040 				spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH);
2041 			}
2042 
2043 			if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) {
2044 				return NVME_TCP_PDU_IN_PROGRESS;
2045 			}
2046 
2047 			/* The command header of this PDU has now been read from the socket. */
2048 			spdk_nvmf_tcp_pdu_ch_handle(tqpair);
2049 			break;
2050 		/* Wait for the pdu specific header  */
2051 		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH:
2052 			if (!tqpair->pdu_recv_buf.remain_size) {
2053 				rc = nvme_tcp_recv_buf_read(tqpair->sock, &tqpair->pdu_recv_buf);
2054 				if (rc <= 0) {
2055 					return rc;
2056 				}
2057 			}
2058 
2059 			rc = nvme_tcp_read_data_from_pdu_recv_buf(&tqpair->pdu_recv_buf,
2060 					pdu->psh_len - pdu->psh_valid_bytes,
2061 					NULL);
2062 			pdu->psh_valid_bytes += rc;
2063 			if (pdu->psh_valid_bytes < pdu->psh_len) {
2064 				return NVME_TCP_PDU_IN_PROGRESS;
2065 			}
2066 
2067 			/* All header(ch, psh, head digist) of this PDU has now been read from the socket. */
2068 			spdk_nvmf_tcp_pdu_psh_handle(tqpair, ttransport);
2069 			break;
2070 		/* Wait for the req slot */
2071 		case NVME_TCP_PDU_RECV_STATE_AWAIT_REQ:
2072 			spdk_nvmf_tcp_capsule_cmd_hdr_handle(ttransport, tqpair, pdu);
2073 			break;
2074 		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD:
2075 			/* check whether the data is valid, if not we just return */
2076 			if (!pdu->data_len) {
2077 				return NVME_TCP_PDU_IN_PROGRESS;
2078 			}
2079 
2080 			data_len = pdu->data_len;
2081 			/* data digest */
2082 			if (spdk_unlikely((pdu->hdr->common.pdu_type != SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ) &&
2083 					  tqpair->host_ddgst_enable)) {
2084 				data_len += SPDK_NVME_TCP_DIGEST_LEN;
2085 				pdu->ddgst_enable = true;
2086 			}
2087 
2088 			if (tqpair->pdu_recv_buf.remain_size) {
2089 				rc = nvme_tcp_read_payload_data_from_pdu_recv_buf(&tqpair->pdu_recv_buf, pdu);
2090 				pdu->readv_offset += rc;
2091 			}
2092 
2093 			if (pdu->readv_offset < data_len) {
2094 				rc = nvme_tcp_read_payload_data(tqpair->sock, pdu);
2095 				if (rc < 0) {
2096 					return NVME_TCP_PDU_IN_PROGRESS;
2097 				}
2098 				pdu->readv_offset += rc;
2099 			}
2100 
2101 			if (spdk_unlikely(pdu->dif_ctx != NULL)) {
2102 				rc = nvmf_tcp_pdu_payload_insert_dif(pdu, pdu->readv_offset - rc, rc);
2103 				if (rc != 0) {
2104 					return NVME_TCP_PDU_FATAL;
2105 				}
2106 			}
2107 
2108 			if (pdu->readv_offset < data_len) {
2109 				return NVME_TCP_PDU_IN_PROGRESS;
2110 			}
2111 
2112 			/* All of this PDU has now been read from the socket. */
2113 			spdk_nvmf_tcp_pdu_payload_handle(tqpair, ttransport);
2114 			break;
2115 		case NVME_TCP_PDU_RECV_STATE_ERROR:
2116 			if (!spdk_sock_is_connected(tqpair->sock)) {
2117 				return NVME_TCP_PDU_FATAL;
2118 			}
2119 			break;
2120 		default:
2121 			assert(0);
2122 			SPDK_ERRLOG("code should not come to here");
2123 			break;
2124 		}
2125 	} while (tqpair->recv_state != prev_state);
2126 
2127 	return rc;
2128 }
2129 
2130 static enum spdk_nvme_data_transfer
2131 spdk_nvmf_tcp_req_get_xfer(struct spdk_nvmf_tcp_req *tcp_req) {
2132 	enum spdk_nvme_data_transfer xfer;
2133 	struct spdk_nvme_cmd *cmd = &tcp_req->req.cmd->nvme_cmd;
2134 	struct spdk_nvme_sgl_descriptor *sgl = &cmd->dptr.sgl1;
2135 
2136 	/* Figure out data transfer direction */
2137 	if (cmd->opc == SPDK_NVME_OPC_FABRIC)
2138 	{
2139 		xfer = spdk_nvme_opc_get_data_transfer(tcp_req->req.cmd->nvmf_cmd.fctype);
2140 	} else
2141 	{
2142 		xfer = spdk_nvme_opc_get_data_transfer(cmd->opc);
2143 
2144 		/* Some admin commands are special cases */
2145 		if ((tcp_req->req.qpair->qid == 0) &&
2146 		    ((cmd->opc == SPDK_NVME_OPC_GET_FEATURES) ||
2147 		     (cmd->opc == SPDK_NVME_OPC_SET_FEATURES))) {
2148 			switch (cmd->cdw10 & 0xff) {
2149 			case SPDK_NVME_FEAT_LBA_RANGE_TYPE:
2150 			case SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION:
2151 			case SPDK_NVME_FEAT_HOST_IDENTIFIER:
2152 				break;
2153 			default:
2154 				xfer = SPDK_NVME_DATA_NONE;
2155 			}
2156 		}
2157 	}
2158 
2159 	if (xfer == SPDK_NVME_DATA_NONE)
2160 	{
2161 		return xfer;
2162 	}
2163 
2164 	/* Even for commands that may transfer data, they could have specified 0 length.
2165 	 * We want those to show up with xfer SPDK_NVME_DATA_NONE.
2166 	 */
2167 	switch (sgl->generic.type)
2168 	{
2169 	case SPDK_NVME_SGL_TYPE_DATA_BLOCK:
2170 	case SPDK_NVME_SGL_TYPE_BIT_BUCKET:
2171 	case SPDK_NVME_SGL_TYPE_SEGMENT:
2172 	case SPDK_NVME_SGL_TYPE_LAST_SEGMENT:
2173 	case SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK:
2174 		if (sgl->unkeyed.length == 0) {
2175 			xfer = SPDK_NVME_DATA_NONE;
2176 		}
2177 		break;
2178 	case SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK:
2179 		if (sgl->keyed.length == 0) {
2180 			xfer = SPDK_NVME_DATA_NONE;
2181 		}
2182 		break;
2183 	}
2184 
2185 	return xfer;
2186 }
2187 
2188 static int
2189 spdk_nvmf_tcp_req_parse_sgl(struct spdk_nvmf_tcp_req *tcp_req,
2190 			    struct spdk_nvmf_transport *transport,
2191 			    struct spdk_nvmf_transport_poll_group *group)
2192 {
2193 	struct spdk_nvmf_request		*req = &tcp_req->req;
2194 	struct spdk_nvme_cmd			*cmd;
2195 	struct spdk_nvme_cpl			*rsp;
2196 	struct spdk_nvme_sgl_descriptor		*sgl;
2197 	uint32_t				length;
2198 
2199 	cmd = &req->cmd->nvme_cmd;
2200 	rsp = &req->rsp->nvme_cpl;
2201 	sgl = &cmd->dptr.sgl1;
2202 
2203 	length = sgl->unkeyed.length;
2204 
2205 	if (sgl->generic.type == SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK &&
2206 	    sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_TRANSPORT) {
2207 		if (length > transport->opts.max_io_size) {
2208 			SPDK_ERRLOG("SGL length 0x%x exceeds max io size 0x%x\n",
2209 				    length, transport->opts.max_io_size);
2210 			rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
2211 			return -1;
2212 		}
2213 
2214 		/* fill request length and populate iovs */
2215 		req->length = length;
2216 
2217 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Data requested length= 0x%x\n", length);
2218 
2219 		if (spdk_unlikely(req->dif.dif_insert_or_strip)) {
2220 			req->dif.orig_length = length;
2221 			length = spdk_dif_get_length_with_md(length, &req->dif.dif_ctx);
2222 			req->dif.elba_length = length;
2223 		}
2224 
2225 		if (spdk_nvmf_request_get_buffers(req, group, transport, length)) {
2226 			/* No available buffers. Queue this request up. */
2227 			SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "No available large data buffers. Queueing request %p\n",
2228 				      tcp_req);
2229 			return 0;
2230 		}
2231 
2232 		/* backward compatible */
2233 		req->data = req->iov[0].iov_base;
2234 
2235 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Request %p took %d buffer/s from central pool, and data=%p\n",
2236 			      tcp_req, req->iovcnt, req->data);
2237 
2238 		return 0;
2239 	} else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK &&
2240 		   sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) {
2241 		uint64_t offset = sgl->address;
2242 		uint32_t max_len = transport->opts.in_capsule_data_size;
2243 
2244 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "In-capsule data: offset 0x%" PRIx64 ", length 0x%x\n",
2245 			      offset, length);
2246 
2247 		if (offset > max_len) {
2248 			SPDK_ERRLOG("In-capsule offset 0x%" PRIx64 " exceeds capsule length 0x%x\n",
2249 				    offset, max_len);
2250 			rsp->status.sc = SPDK_NVME_SC_INVALID_SGL_OFFSET;
2251 			return -1;
2252 		}
2253 		max_len -= (uint32_t)offset;
2254 
2255 		if (length > max_len) {
2256 			SPDK_ERRLOG("In-capsule data length 0x%x exceeds capsule length 0x%x\n",
2257 				    length, max_len);
2258 			rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
2259 			return -1;
2260 		}
2261 
2262 		req->data = tcp_req->buf + offset;
2263 		req->data_from_pool = false;
2264 		req->length = length;
2265 
2266 		if (spdk_unlikely(req->dif.dif_insert_or_strip)) {
2267 			length = spdk_dif_get_length_with_md(length, &req->dif.dif_ctx);
2268 			req->dif.elba_length = length;
2269 		}
2270 
2271 		req->iov[0].iov_base = req->data;
2272 		req->iov[0].iov_len = length;
2273 		req->iovcnt = 1;
2274 
2275 		return 0;
2276 	}
2277 
2278 	SPDK_ERRLOG("Invalid NVMf I/O Command SGL:  Type 0x%x, Subtype 0x%x\n",
2279 		    sgl->generic.type, sgl->generic.subtype);
2280 	rsp->status.sc = SPDK_NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID;
2281 	return -1;
2282 }
2283 
2284 static inline enum spdk_nvme_media_error_status_code
2285 nvmf_tcp_dif_error_to_compl_status(uint8_t err_type) {
2286 	enum spdk_nvme_media_error_status_code result;
2287 
2288 	switch (err_type)
2289 	{
2290 	case SPDK_DIF_REFTAG_ERROR:
2291 		result = SPDK_NVME_SC_REFERENCE_TAG_CHECK_ERROR;
2292 		break;
2293 	case SPDK_DIF_APPTAG_ERROR:
2294 		result = SPDK_NVME_SC_APPLICATION_TAG_CHECK_ERROR;
2295 		break;
2296 	case SPDK_DIF_GUARD_ERROR:
2297 		result = SPDK_NVME_SC_GUARD_CHECK_ERROR;
2298 		break;
2299 	default:
2300 		SPDK_UNREACHABLE();
2301 		break;
2302 	}
2303 
2304 	return result;
2305 }
2306 
2307 static void
2308 spdk_nvmf_tcp_send_c2h_data(struct spdk_nvmf_tcp_qpair *tqpair,
2309 			    struct spdk_nvmf_tcp_req *tcp_req)
2310 {
2311 	struct nvme_tcp_pdu *rsp_pdu;
2312 	struct spdk_nvme_tcp_c2h_data_hdr *c2h_data;
2313 	uint32_t plen, pdo, alignment;
2314 	int rc;
2315 
2316 	assert(tcp_req == STAILQ_FIRST(&tqpair->queued_c2h_data_tcp_req));
2317 
2318 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
2319 
2320 	rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair);
2321 	assert(rsp_pdu != NULL);
2322 
2323 	c2h_data = &rsp_pdu->hdr->c2h_data;
2324 	c2h_data->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_C2H_DATA;
2325 	plen = c2h_data->common.hlen = sizeof(*c2h_data);
2326 
2327 	if (tqpair->host_hdgst_enable) {
2328 		plen += SPDK_NVME_TCP_DIGEST_LEN;
2329 		c2h_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF;
2330 	}
2331 
2332 	/* set the psh */
2333 	c2h_data->cccid = tcp_req->req.cmd->nvme_cmd.cid;
2334 	c2h_data->datal = spdk_min(NVMF_TCP_PDU_MAX_C2H_DATA_SIZE,
2335 				   tcp_req->req.length - tcp_req->c2h_data_offset);
2336 	c2h_data->datao = tcp_req->c2h_data_offset;
2337 
2338 	/* set the padding */
2339 	rsp_pdu->padding_len = 0;
2340 	pdo = plen;
2341 	if (tqpair->cpda) {
2342 		alignment = (tqpair->cpda + 1) << 2;
2343 		if (alignment > plen) {
2344 			rsp_pdu->padding_len = alignment - plen;
2345 			pdo = plen = alignment;
2346 		}
2347 	}
2348 
2349 	c2h_data->common.pdo = pdo;
2350 	plen += c2h_data->datal;
2351 	if (tqpair->host_ddgst_enable) {
2352 		c2h_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF;
2353 		plen += SPDK_NVME_TCP_DIGEST_LEN;
2354 	}
2355 
2356 	c2h_data->common.plen = plen;
2357 
2358 	if (spdk_unlikely(tcp_req->req.dif.dif_insert_or_strip)) {
2359 		rsp_pdu->dif_ctx = &tcp_req->req.dif.dif_ctx;
2360 	}
2361 
2362 	nvme_tcp_pdu_set_data_buf(rsp_pdu, tcp_req->req.iov, tcp_req->req.iovcnt,
2363 				  c2h_data->datao, c2h_data->datal);
2364 
2365 	if (spdk_unlikely(tcp_req->req.dif.dif_insert_or_strip)) {
2366 		struct spdk_nvme_cpl *rsp = &tcp_req->req.rsp->nvme_cpl;
2367 		struct spdk_dif_error err_blk = {};
2368 
2369 		rc = spdk_dif_verify_stream(rsp_pdu->data_iov, rsp_pdu->data_iovcnt,
2370 					    0, rsp_pdu->data_len, rsp_pdu->dif_ctx, &err_blk);
2371 		if (rc != 0) {
2372 			SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n",
2373 				    err_blk.err_type, err_blk.err_offset);
2374 			rsp->status.sct = SPDK_NVME_SCT_MEDIA_ERROR;
2375 			rsp->status.sc = nvmf_tcp_dif_error_to_compl_status(err_blk.err_type);
2376 			STAILQ_REMOVE_HEAD(&tqpair->queued_c2h_data_tcp_req, link);
2377 			spdk_nvmf_tcp_pdu_put(tqpair, rsp_pdu);
2378 			spdk_nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair);
2379 			return;
2380 		}
2381 	}
2382 
2383 	tcp_req->c2h_data_offset += c2h_data->datal;
2384 	if (tcp_req->c2h_data_offset == tcp_req->req.length) {
2385 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Last pdu for tcp_req=%p on tqpair=%p\n", tcp_req, tqpair);
2386 		c2h_data->common.flags |= SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU;
2387 		if (tqpair->qpair.transport->opts.c2h_success) {
2388 			c2h_data->common.flags |= SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS;
2389 		}
2390 		STAILQ_REMOVE_HEAD(&tqpair->queued_c2h_data_tcp_req, link);
2391 	}
2392 
2393 	tqpair->c2h_data_pdu_cnt += 1;
2394 	spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_pdu_c2h_data_complete, tcp_req);
2395 }
2396 
2397 static int
2398 spdk_nvmf_tcp_calc_c2h_data_pdu_num(struct spdk_nvmf_tcp_req *tcp_req)
2399 {
2400 	return (tcp_req->req.length + NVMF_TCP_PDU_MAX_C2H_DATA_SIZE - 1) /
2401 	       NVMF_TCP_PDU_MAX_C2H_DATA_SIZE;
2402 }
2403 
2404 static void
2405 spdk_nvmf_tcp_handle_pending_c2h_data_queue(struct spdk_nvmf_tcp_qpair *tqpair)
2406 {
2407 	struct spdk_nvmf_tcp_req *tcp_req;
2408 
2409 	while (!STAILQ_EMPTY(&tqpair->queued_c2h_data_tcp_req) &&
2410 	       (tqpair->c2h_data_pdu_cnt < NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM)) {
2411 		tcp_req = STAILQ_FIRST(&tqpair->queued_c2h_data_tcp_req);
2412 		spdk_nvmf_tcp_send_c2h_data(tqpair, tcp_req);
2413 	}
2414 }
2415 
2416 static void
2417 spdk_nvmf_tcp_queue_c2h_data(struct spdk_nvmf_tcp_req *tcp_req,
2418 			     struct spdk_nvmf_tcp_qpair *tqpair)
2419 {
2420 	tcp_req->c2h_data_pdu_num = spdk_nvmf_tcp_calc_c2h_data_pdu_num(tcp_req);
2421 
2422 	assert(tcp_req->c2h_data_pdu_num < NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM);
2423 
2424 	STAILQ_INSERT_TAIL(&tqpair->queued_c2h_data_tcp_req, tcp_req, link);
2425 	spdk_nvmf_tcp_handle_pending_c2h_data_queue(tqpair);
2426 }
2427 
2428 static int
2429 request_transfer_out(struct spdk_nvmf_request *req)
2430 {
2431 	struct spdk_nvmf_tcp_req	*tcp_req;
2432 	struct spdk_nvmf_qpair		*qpair;
2433 	struct spdk_nvmf_tcp_qpair	*tqpair;
2434 	struct spdk_nvme_cpl		*rsp;
2435 
2436 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
2437 
2438 	qpair = req->qpair;
2439 	rsp = &req->rsp->nvme_cpl;
2440 	tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req);
2441 
2442 	/* Advance our sq_head pointer */
2443 	if (qpair->sq_head == qpair->sq_head_max) {
2444 		qpair->sq_head = 0;
2445 	} else {
2446 		qpair->sq_head++;
2447 	}
2448 	rsp->sqhd = qpair->sq_head;
2449 
2450 	tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair);
2451 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST);
2452 	if (rsp->status.sc == SPDK_NVME_SC_SUCCESS &&
2453 	    req->xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
2454 		spdk_nvmf_tcp_queue_c2h_data(tcp_req, tqpair);
2455 	} else {
2456 		spdk_nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair);
2457 	}
2458 
2459 	return 0;
2460 }
2461 
2462 static void
2463 spdk_nvmf_tcp_pdu_set_buf_from_req(struct spdk_nvmf_tcp_qpair *tqpair,
2464 				   struct spdk_nvmf_tcp_req *tcp_req)
2465 {
2466 	struct nvme_tcp_pdu *pdu;
2467 
2468 	if (tcp_req->req.data_from_pool) {
2469 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Will send r2t for tcp_req(%p) on tqpair=%p\n", tcp_req, tqpair);
2470 		tcp_req->next_expected_r2t_offset = 0;
2471 		spdk_nvmf_tcp_send_r2t_pdu(tqpair, tcp_req);
2472 	} else {
2473 		pdu = &tqpair->pdu_in_progress;
2474 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Not need to send r2t for tcp_req(%p) on tqpair=%p\n", tcp_req,
2475 			      tqpair);
2476 		/* No need to send r2t, contained in the capsuled data */
2477 		nvme_tcp_pdu_set_data_buf(pdu, tcp_req->req.iov, tcp_req->req.iovcnt,
2478 					  0, tcp_req->req.length);
2479 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
2480 	}
2481 }
2482 
2483 static void
2484 spdk_nvmf_tcp_set_incapsule_data(struct spdk_nvmf_tcp_qpair *tqpair,
2485 				 struct spdk_nvmf_tcp_req *tcp_req)
2486 {
2487 	struct nvme_tcp_pdu *pdu;
2488 	uint32_t plen = 0;
2489 
2490 	pdu = &tqpair->pdu_in_progress;
2491 	plen = pdu->hdr->common.hlen;
2492 
2493 	if (tqpair->host_hdgst_enable) {
2494 		plen += SPDK_NVME_TCP_DIGEST_LEN;
2495 	}
2496 
2497 	if (pdu->hdr->common.plen != plen) {
2498 		tcp_req->has_incapsule_data = true;
2499 	}
2500 }
2501 
2502 static bool
2503 spdk_nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport,
2504 			  struct spdk_nvmf_tcp_req *tcp_req)
2505 {
2506 	struct spdk_nvmf_tcp_qpair		*tqpair;
2507 	struct spdk_nvme_cpl			*rsp = &tcp_req->req.rsp->nvme_cpl;
2508 	int					rc;
2509 	enum spdk_nvmf_tcp_req_state		prev_state;
2510 	bool					progress = false;
2511 	struct spdk_nvmf_transport		*transport = &ttransport->transport;
2512 	struct spdk_nvmf_transport_poll_group	*group;
2513 
2514 	tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair);
2515 	group = &tqpair->group->group;
2516 	assert(tcp_req->state != TCP_REQUEST_STATE_FREE);
2517 
2518 	/* The loop here is to allow for several back-to-back state changes. */
2519 	do {
2520 		prev_state = tcp_req->state;
2521 
2522 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Request %p entering state %d on tqpair=%p\n", tcp_req, prev_state,
2523 			      tqpair);
2524 
2525 		switch (tcp_req->state) {
2526 		case TCP_REQUEST_STATE_FREE:
2527 			/* Some external code must kick a request into TCP_REQUEST_STATE_NEW
2528 			 * to escape this state. */
2529 			break;
2530 		case TCP_REQUEST_STATE_NEW:
2531 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_NEW, 0, 0, (uintptr_t)tcp_req, 0);
2532 
2533 			/* copy the cmd from the receive pdu */
2534 			tcp_req->cmd = tqpair->pdu_in_progress.hdr->capsule_cmd.ccsqe;
2535 
2536 			if (spdk_unlikely(spdk_nvmf_request_get_dif_ctx(&tcp_req->req, &tcp_req->req.dif.dif_ctx))) {
2537 				tcp_req->req.dif.dif_insert_or_strip = true;
2538 				tqpair->pdu_in_progress.dif_ctx = &tcp_req->req.dif.dif_ctx;
2539 			}
2540 
2541 			/* The next state transition depends on the data transfer needs of this request. */
2542 			tcp_req->req.xfer = spdk_nvmf_tcp_req_get_xfer(tcp_req);
2543 
2544 			/* If no data to transfer, ready to execute. */
2545 			if (tcp_req->req.xfer == SPDK_NVME_DATA_NONE) {
2546 				/* Reset the tqpair receving pdu state */
2547 				spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
2548 				spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
2549 				break;
2550 			}
2551 
2552 			spdk_nvmf_tcp_set_incapsule_data(tqpair, tcp_req);
2553 
2554 			if (!tcp_req->has_incapsule_data) {
2555 				spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
2556 			}
2557 
2558 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEED_BUFFER);
2559 			STAILQ_INSERT_TAIL(&group->pending_buf_queue, &tcp_req->req, buf_link);
2560 			break;
2561 		case TCP_REQUEST_STATE_NEED_BUFFER:
2562 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_NEED_BUFFER, 0, 0, (uintptr_t)tcp_req, 0);
2563 
2564 			assert(tcp_req->req.xfer != SPDK_NVME_DATA_NONE);
2565 
2566 			if (!tcp_req->has_incapsule_data && (&tcp_req->req != STAILQ_FIRST(&group->pending_buf_queue))) {
2567 				SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
2568 					      "Not the first element to wait for the buf for tcp_req(%p) on tqpair=%p\n",
2569 					      tcp_req, tqpair);
2570 				/* This request needs to wait in line to obtain a buffer */
2571 				break;
2572 			}
2573 
2574 			/* Try to get a data buffer */
2575 			rc = spdk_nvmf_tcp_req_parse_sgl(tcp_req, transport, group);
2576 			if (rc < 0) {
2577 				STAILQ_REMOVE_HEAD(&group->pending_buf_queue, buf_link);
2578 				rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2579 				/* Reset the tqpair receving pdu state */
2580 				spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
2581 				spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE);
2582 				break;
2583 			}
2584 
2585 			if (!tcp_req->req.data) {
2586 				SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "No buffer allocated for tcp_req(%p) on tqpair(%p\n)",
2587 					      tcp_req, tqpair);
2588 				/* No buffers available. */
2589 				break;
2590 			}
2591 
2592 			STAILQ_REMOVE(&group->pending_buf_queue, &tcp_req->req, spdk_nvmf_request, buf_link);
2593 
2594 			/* If data is transferring from host to controller, we need to do a transfer from the host. */
2595 			if (tcp_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
2596 				spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
2597 				spdk_nvmf_tcp_pdu_set_buf_from_req(tqpair, tcp_req);
2598 				break;
2599 			}
2600 
2601 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
2602 			break;
2603 		case TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER:
2604 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, 0, 0,
2605 					  (uintptr_t)tcp_req, 0);
2606 			/* Some external code must kick a request into TCP_REQUEST_STATE_READY_TO_EXECUTE
2607 			 * to escape this state. */
2608 			break;
2609 		case TCP_REQUEST_STATE_READY_TO_EXECUTE:
2610 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE, 0, 0, (uintptr_t)tcp_req, 0);
2611 
2612 			if (spdk_unlikely(tcp_req->req.dif.dif_insert_or_strip)) {
2613 				assert(tcp_req->req.dif.elba_length >= tcp_req->req.length);
2614 				tcp_req->req.length = tcp_req->req.dif.elba_length;
2615 			}
2616 
2617 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_EXECUTING);
2618 			spdk_nvmf_request_exec(&tcp_req->req);
2619 			break;
2620 		case TCP_REQUEST_STATE_EXECUTING:
2621 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_EXECUTING, 0, 0, (uintptr_t)tcp_req, 0);
2622 			/* Some external code must kick a request into TCP_REQUEST_STATE_EXECUTED
2623 			 * to escape this state. */
2624 			break;
2625 		case TCP_REQUEST_STATE_EXECUTED:
2626 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_EXECUTED, 0, 0, (uintptr_t)tcp_req, 0);
2627 
2628 			if (spdk_unlikely(tcp_req->req.dif.dif_insert_or_strip)) {
2629 				tcp_req->req.length = tcp_req->req.dif.orig_length;
2630 			}
2631 
2632 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE);
2633 			break;
2634 		case TCP_REQUEST_STATE_READY_TO_COMPLETE:
2635 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE, 0, 0, (uintptr_t)tcp_req, 0);
2636 			rc = request_transfer_out(&tcp_req->req);
2637 			assert(rc == 0); /* No good way to handle this currently */
2638 			break;
2639 		case TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST:
2640 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST, 0, 0,
2641 					  (uintptr_t)tcp_req,
2642 					  0);
2643 			/* Some external code must kick a request into TCP_REQUEST_STATE_COMPLETED
2644 			 * to escape this state. */
2645 			break;
2646 		case TCP_REQUEST_STATE_COMPLETED:
2647 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_COMPLETED, 0, 0, (uintptr_t)tcp_req, 0);
2648 			if (tcp_req->req.data_from_pool) {
2649 				spdk_nvmf_request_free_buffers(&tcp_req->req, group, transport);
2650 			}
2651 			tcp_req->req.length = 0;
2652 			tcp_req->req.iovcnt = 0;
2653 			tcp_req->req.data = NULL;
2654 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_FREE);
2655 			break;
2656 		case TCP_REQUEST_NUM_STATES:
2657 		default:
2658 			assert(0);
2659 			break;
2660 		}
2661 
2662 		if (tcp_req->state != prev_state) {
2663 			progress = true;
2664 		}
2665 	} while (tcp_req->state != prev_state);
2666 
2667 	return progress;
2668 }
2669 
2670 static void
2671 spdk_nvmf_tcp_sock_cb(void *arg, struct spdk_sock_group *group, struct spdk_sock *sock)
2672 {
2673 	struct spdk_nvmf_tcp_qpair *tqpair = arg;
2674 	int rc;
2675 
2676 	assert(tqpair != NULL);
2677 	rc = spdk_nvmf_tcp_sock_process(tqpair);
2678 
2679 	/* check the following two factors:
2680 	 * rc: The socket is closed
2681 	 * State of tqpair: The tqpair is in EXITING state due to internal error
2682 	 */
2683 	if ((rc < 0) || (tqpair->state == NVME_TCP_QPAIR_STATE_EXITING)) {
2684 		spdk_nvmf_tcp_qpair_disconnect(tqpair);
2685 	}
2686 }
2687 
2688 static int
2689 spdk_nvmf_tcp_poll_group_add(struct spdk_nvmf_transport_poll_group *group,
2690 			     struct spdk_nvmf_qpair *qpair)
2691 {
2692 	struct spdk_nvmf_tcp_poll_group	*tgroup;
2693 	struct spdk_nvmf_tcp_qpair	*tqpair;
2694 	int				rc;
2695 
2696 	tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
2697 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
2698 
2699 	rc = spdk_sock_group_add_sock(tgroup->sock_group, tqpair->sock,
2700 				      spdk_nvmf_tcp_sock_cb, tqpair);
2701 	if (rc != 0) {
2702 		SPDK_ERRLOG("Could not add sock to sock_group: %s (%d)\n",
2703 			    spdk_strerror(errno), errno);
2704 		return -1;
2705 	}
2706 
2707 	rc =  spdk_nvmf_tcp_qpair_sock_init(tqpair);
2708 	if (rc != 0) {
2709 		SPDK_ERRLOG("Cannot set sock opt for tqpair=%p\n", tqpair);
2710 		return -1;
2711 	}
2712 
2713 	rc = spdk_nvmf_tcp_qpair_init(&tqpair->qpair);
2714 	if (rc < 0) {
2715 		SPDK_ERRLOG("Cannot init tqpair=%p\n", tqpair);
2716 		return -1;
2717 	}
2718 
2719 	rc = spdk_nvmf_tcp_qpair_init_mem_resource(tqpair, 1);
2720 	if (rc < 0) {
2721 		SPDK_ERRLOG("Cannot init memory resource info for tqpair=%p\n", tqpair);
2722 		return -1;
2723 	}
2724 
2725 	tqpair->group = tgroup;
2726 	tqpair->state = NVME_TCP_QPAIR_STATE_INVALID;
2727 	TAILQ_INSERT_TAIL(&tgroup->qpairs, tqpair, link);
2728 
2729 	return 0;
2730 }
2731 
2732 static int
2733 spdk_nvmf_tcp_poll_group_remove(struct spdk_nvmf_transport_poll_group *group,
2734 				struct spdk_nvmf_qpair *qpair)
2735 {
2736 	struct spdk_nvmf_tcp_poll_group	*tgroup;
2737 	struct spdk_nvmf_tcp_qpair		*tqpair;
2738 	int				rc;
2739 
2740 	tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
2741 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
2742 
2743 	assert(tqpair->group == tgroup);
2744 
2745 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "remove tqpair=%p from the tgroup=%p\n", tqpair, tgroup);
2746 	TAILQ_REMOVE(&tgroup->qpairs, tqpair, link);
2747 	rc = spdk_sock_group_remove_sock(tgroup->sock_group, tqpair->sock);
2748 	if (rc != 0) {
2749 		SPDK_ERRLOG("Could not remove sock from sock_group: %s (%d)\n",
2750 			    spdk_strerror(errno), errno);
2751 	}
2752 
2753 	return rc;
2754 }
2755 
2756 static int
2757 spdk_nvmf_tcp_req_complete(struct spdk_nvmf_request *req)
2758 {
2759 	struct spdk_nvmf_tcp_transport *ttransport;
2760 	struct spdk_nvmf_tcp_req *tcp_req;
2761 
2762 	ttransport = SPDK_CONTAINEROF(req->qpair->transport, struct spdk_nvmf_tcp_transport, transport);
2763 	tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req);
2764 
2765 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_EXECUTED);
2766 	spdk_nvmf_tcp_req_process(ttransport, tcp_req);
2767 
2768 	return 0;
2769 }
2770 
2771 static void
2772 spdk_nvmf_tcp_close_qpair(struct spdk_nvmf_qpair *qpair)
2773 {
2774 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
2775 
2776 	spdk_nvmf_tcp_qpair_destroy(SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair));
2777 }
2778 
2779 static int
2780 spdk_nvmf_tcp_poll_group_poll(struct spdk_nvmf_transport_poll_group *group)
2781 {
2782 	struct spdk_nvmf_tcp_poll_group *tgroup;
2783 	int rc;
2784 	struct spdk_nvmf_request *req, *req_tmp;
2785 	struct spdk_nvmf_tcp_req *tcp_req;
2786 	struct spdk_nvmf_tcp_transport *ttransport = SPDK_CONTAINEROF(group->transport,
2787 			struct spdk_nvmf_tcp_transport, transport);
2788 
2789 	tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
2790 
2791 	if (spdk_unlikely(TAILQ_EMPTY(&tgroup->qpairs))) {
2792 		return 0;
2793 	}
2794 
2795 	STAILQ_FOREACH_SAFE(req, &group->pending_buf_queue, buf_link, req_tmp) {
2796 		tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req);
2797 		if (spdk_nvmf_tcp_req_process(ttransport, tcp_req) == false) {
2798 			break;
2799 		}
2800 	}
2801 
2802 	rc = spdk_sock_group_poll(tgroup->sock_group);
2803 	if (rc < 0) {
2804 		SPDK_ERRLOG("Failed to poll sock_group=%p\n", tgroup->sock_group);
2805 	}
2806 
2807 	return rc;
2808 }
2809 
2810 static int
2811 spdk_nvmf_tcp_qpair_get_trid(struct spdk_nvmf_qpair *qpair,
2812 			     struct spdk_nvme_transport_id *trid, bool peer)
2813 {
2814 	struct spdk_nvmf_tcp_qpair     *tqpair;
2815 	uint16_t			port;
2816 
2817 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
2818 	trid->trtype = SPDK_NVME_TRANSPORT_TCP;
2819 
2820 	if (peer) {
2821 		snprintf(trid->traddr, sizeof(trid->traddr), "%s", tqpair->initiator_addr);
2822 		port = tqpair->initiator_port;
2823 	} else {
2824 		snprintf(trid->traddr, sizeof(trid->traddr), "%s", tqpair->target_addr);
2825 		port = tqpair->target_port;
2826 	}
2827 
2828 	if (spdk_sock_is_ipv4(tqpair->sock)) {
2829 		trid->adrfam = SPDK_NVMF_ADRFAM_IPV4;
2830 	} else if (spdk_sock_is_ipv4(tqpair->sock)) {
2831 		trid->adrfam = SPDK_NVMF_ADRFAM_IPV6;
2832 	} else {
2833 		return -1;
2834 	}
2835 
2836 	snprintf(trid->trsvcid, sizeof(trid->trsvcid), "%d", port);
2837 	return 0;
2838 }
2839 
2840 static int
2841 spdk_nvmf_tcp_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair,
2842 				   struct spdk_nvme_transport_id *trid)
2843 {
2844 	return spdk_nvmf_tcp_qpair_get_trid(qpair, trid, 0);
2845 }
2846 
2847 static int
2848 spdk_nvmf_tcp_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair,
2849 				  struct spdk_nvme_transport_id *trid)
2850 {
2851 	return spdk_nvmf_tcp_qpair_get_trid(qpair, trid, 1);
2852 }
2853 
2854 static int
2855 spdk_nvmf_tcp_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair,
2856 				    struct spdk_nvme_transport_id *trid)
2857 {
2858 	return spdk_nvmf_tcp_qpair_get_trid(qpair, trid, 0);
2859 }
2860 
2861 static int
2862 spdk_nvmf_tcp_qpair_set_sq_size(struct spdk_nvmf_qpair *qpair)
2863 {
2864 	struct spdk_nvmf_tcp_qpair     *tqpair;
2865 	int rc;
2866 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
2867 
2868 	rc = spdk_nvmf_tcp_qpair_init_mem_resource(tqpair, tqpair->qpair.sq_head_max);
2869 	if (!rc) {
2870 		tqpair->max_queue_depth += tqpair->qpair.sq_head_max;
2871 		tqpair->free_pdu_num += tqpair->qpair.sq_head_max;
2872 		tqpair->state_cntr[TCP_REQUEST_STATE_FREE] += tqpair->qpair.sq_head_max;
2873 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "The queue depth=%u for tqpair=%p\n",
2874 			      tqpair->max_queue_depth, tqpair);
2875 	}
2876 
2877 	return rc;
2878 
2879 }
2880 
2881 #define SPDK_NVMF_TCP_DEFAULT_MAX_QUEUE_DEPTH 128
2882 #define SPDK_NVMF_TCP_DEFAULT_AQ_DEPTH 128
2883 #define SPDK_NVMF_TCP_DEFAULT_MAX_QPAIRS_PER_CTRLR 128
2884 #define SPDK_NVMF_TCP_DEFAULT_IN_CAPSULE_DATA_SIZE 4096
2885 #define SPDK_NVMF_TCP_DEFAULT_MAX_IO_SIZE 131072
2886 #define SPDK_NVMF_TCP_DEFAULT_IO_UNIT_SIZE 131072
2887 #define SPDK_NVMF_TCP_DEFAULT_NUM_SHARED_BUFFERS 511
2888 #define SPDK_NVMF_TCP_DEFAULT_BUFFER_CACHE_SIZE 32
2889 #define SPDK_NVMF_TCP_DEFAULT_SUCCESS_OPTIMIZATION true
2890 #define SPDK_NVMF_TCP_DEFAULT_DIF_INSERT_OR_STRIP false
2891 #define SPDK_NVMF_TCP_DEFAULT_SOCK_PRIORITY 0
2892 
2893 static void
2894 spdk_nvmf_tcp_opts_init(struct spdk_nvmf_transport_opts *opts)
2895 {
2896 	opts->max_queue_depth =		SPDK_NVMF_TCP_DEFAULT_MAX_QUEUE_DEPTH;
2897 	opts->max_qpairs_per_ctrlr =	SPDK_NVMF_TCP_DEFAULT_MAX_QPAIRS_PER_CTRLR;
2898 	opts->in_capsule_data_size =	SPDK_NVMF_TCP_DEFAULT_IN_CAPSULE_DATA_SIZE;
2899 	opts->max_io_size =		SPDK_NVMF_TCP_DEFAULT_MAX_IO_SIZE;
2900 	opts->io_unit_size =		SPDK_NVMF_TCP_DEFAULT_IO_UNIT_SIZE;
2901 	opts->max_aq_depth =		SPDK_NVMF_TCP_DEFAULT_AQ_DEPTH;
2902 	opts->num_shared_buffers =	SPDK_NVMF_TCP_DEFAULT_NUM_SHARED_BUFFERS;
2903 	opts->buf_cache_size =		SPDK_NVMF_TCP_DEFAULT_BUFFER_CACHE_SIZE;
2904 	opts->c2h_success =		SPDK_NVMF_TCP_DEFAULT_SUCCESS_OPTIMIZATION;
2905 	opts->dif_insert_or_strip =	SPDK_NVMF_TCP_DEFAULT_DIF_INSERT_OR_STRIP;
2906 	opts->sock_priority =		SPDK_NVMF_TCP_DEFAULT_SOCK_PRIORITY;
2907 }
2908 
2909 const struct spdk_nvmf_transport_ops spdk_nvmf_transport_tcp = {
2910 	.type = SPDK_NVME_TRANSPORT_TCP,
2911 	.opts_init = spdk_nvmf_tcp_opts_init,
2912 	.create = spdk_nvmf_tcp_create,
2913 	.destroy = spdk_nvmf_tcp_destroy,
2914 
2915 	.listen = spdk_nvmf_tcp_listen,
2916 	.stop_listen = spdk_nvmf_tcp_stop_listen,
2917 	.accept = spdk_nvmf_tcp_accept,
2918 
2919 	.listener_discover = spdk_nvmf_tcp_discover,
2920 
2921 	.poll_group_create = spdk_nvmf_tcp_poll_group_create,
2922 	.get_optimal_poll_group = spdk_nvmf_tcp_get_optimal_poll_group,
2923 	.poll_group_destroy = spdk_nvmf_tcp_poll_group_destroy,
2924 	.poll_group_add = spdk_nvmf_tcp_poll_group_add,
2925 	.poll_group_remove = spdk_nvmf_tcp_poll_group_remove,
2926 	.poll_group_poll = spdk_nvmf_tcp_poll_group_poll,
2927 
2928 	.req_free = spdk_nvmf_tcp_req_free,
2929 	.req_complete = spdk_nvmf_tcp_req_complete,
2930 
2931 	.qpair_fini = spdk_nvmf_tcp_close_qpair,
2932 	.qpair_get_local_trid = spdk_nvmf_tcp_qpair_get_local_trid,
2933 	.qpair_get_peer_trid = spdk_nvmf_tcp_qpair_get_peer_trid,
2934 	.qpair_get_listen_trid = spdk_nvmf_tcp_qpair_get_listen_trid,
2935 	.qpair_set_sqsize = spdk_nvmf_tcp_qpair_set_sq_size,
2936 };
2937 
2938 SPDK_LOG_REGISTER_COMPONENT("nvmf_tcp", SPDK_LOG_NVMF_TCP)
2939