xref: /spdk/lib/nvmf/tcp.c (revision 42dba6047b054d9db99f2a78f1b7100a9b3162a1)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/stdinc.h"
35 #include "spdk/crc32.h"
36 #include "spdk/endian.h"
37 #include "spdk/assert.h"
38 #include "spdk/thread.h"
39 #include "spdk/nvmf.h"
40 #include "spdk/nvmf_spec.h"
41 #include "spdk/sock.h"
42 #include "spdk/string.h"
43 #include "spdk/trace.h"
44 #include "spdk/util.h"
45 
46 #include "nvmf_internal.h"
47 #include "transport.h"
48 
49 #include "spdk_internal/log.h"
50 #include "spdk_internal/nvme_tcp.h"
51 
52 #define NVMF_TCP_MAX_ACCEPT_SOCK_ONE_TIME 16
53 
54 #define NVMF_TCP_PDU_MAX_H2C_DATA_SIZE	131072
55 #define NVMF_TCP_PDU_MAX_C2H_DATA_SIZE	131072
56 #define NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM  64  /* Maximal c2h_data pdu number for ecah tqpair */
57 
58 /* This is used to support the Linux kernel NVMe-oF initiator */
59 #define LINUX_KERNEL_SUPPORT_NOT_SENDING_RESP_FOR_C2H 0
60 
61 /* spdk nvmf related structure */
62 enum spdk_nvmf_tcp_req_state {
63 
64 	/* The request is not currently in use */
65 	TCP_REQUEST_STATE_FREE = 0,
66 
67 	/* Initial state when request first received */
68 	TCP_REQUEST_STATE_NEW,
69 
70 	/* The request is queued until a data buffer is available. */
71 	TCP_REQUEST_STATE_NEED_BUFFER,
72 
73 	/* The request is pending on r2t slots */
74 	TCP_REQUEST_STATE_DATA_PENDING_FOR_R2T,
75 
76 	/* The request is currently transferring data from the host to the controller. */
77 	TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER,
78 
79 	/* The request is ready to execute at the block device */
80 	TCP_REQUEST_STATE_READY_TO_EXECUTE,
81 
82 	/* The request is currently executing at the block device */
83 	TCP_REQUEST_STATE_EXECUTING,
84 
85 	/* The request finished executing at the block device */
86 	TCP_REQUEST_STATE_EXECUTED,
87 
88 	/* The request is ready to send a completion */
89 	TCP_REQUEST_STATE_READY_TO_COMPLETE,
90 
91 	/* The request is currently transferring final pdus from the controller to the host. */
92 	TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST,
93 
94 	/* The request completed and can be marked free. */
95 	TCP_REQUEST_STATE_COMPLETED,
96 
97 	/* Terminator */
98 	TCP_REQUEST_NUM_STATES,
99 };
100 
101 static const char *spdk_nvme_tcp_term_req_fes_str[] = {
102 	"Invalid PDU Header Field",
103 	"PDU Sequence Error",
104 	"Header Digiest Error",
105 	"Data Transfer Out of Range",
106 	"R2T Limit Exceeded",
107 	"Unsupported parameter",
108 };
109 
110 #define OBJECT_NVMF_TCP_IO				0x80
111 
112 #define TRACE_GROUP_NVMF_TCP				0x5
113 #define TRACE_TCP_REQUEST_STATE_NEW					SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x0)
114 #define TRACE_TCP_REQUEST_STATE_NEED_BUFFER				SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x1)
115 #define TRACE_TCP_REQUEST_STATE_DATA_PENDING_FOR_R2T			SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x2)
116 #define TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER		SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x3)
117 #define TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE			SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x4)
118 #define TRACE_TCP_REQUEST_STATE_EXECUTING				SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x5)
119 #define TRACE_TCP_REQUEST_STATE_EXECUTED				SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x6)
120 #define TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE			SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x7)
121 #define TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST		SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x8)
122 #define TRACE_TCP_REQUEST_STATE_COMPLETED				SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x9)
123 #define TRACE_TCP_FLUSH_WRITEBUF_START					SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xA)
124 #define TRACE_TCP_FLUSH_WRITEBUF_DONE					SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xB)
125 #define TRACE_TCP_FLUSH_WRITEBUF_PDU_DONE				SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xC)
126 
127 SPDK_TRACE_REGISTER_FN(nvmf_tcp_trace, "nvmf_tcp", TRACE_GROUP_NVMF_TCP)
128 {
129 	spdk_trace_register_object(OBJECT_NVMF_TCP_IO, 'r');
130 	spdk_trace_register_description("TCP_REQ_NEW", "",
131 					TRACE_TCP_REQUEST_STATE_NEW,
132 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 1, 1, "");
133 	spdk_trace_register_description("TCP_REQ_NEED_BUFFER", "",
134 					TRACE_TCP_REQUEST_STATE_NEED_BUFFER,
135 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
136 	spdk_trace_register_description("TCP_REQ_TX_PENDING_R2T", "",
137 					TRACE_TCP_REQUEST_STATE_DATA_PENDING_FOR_R2T,
138 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
139 	spdk_trace_register_description("TCP_REQ_TX_H_TO_C", "",
140 					TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER,
141 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
142 	spdk_trace_register_description("TCP_REQ_RDY_TO_EXECUTE", "",
143 					TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE,
144 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
145 	spdk_trace_register_description("TCP_REQ_EXECUTING", "",
146 					TRACE_TCP_REQUEST_STATE_EXECUTING,
147 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
148 	spdk_trace_register_description("TCP_REQ_EXECUTED", "",
149 					TRACE_TCP_REQUEST_STATE_EXECUTED,
150 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
151 	spdk_trace_register_description("TCP_REQ_RDY_TO_COMPLETE", "",
152 					TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE,
153 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
154 	spdk_trace_register_description("TCP_REQ_COMPLETING_INCAPSULE", "",
155 					TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST,
156 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
157 	spdk_trace_register_description("TCP_REQ_COMPLETED", "",
158 					TRACE_TCP_REQUEST_STATE_COMPLETED,
159 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
160 	spdk_trace_register_description("TCP_FLUSH_WRITEBUF_START", "",
161 					TRACE_TCP_FLUSH_WRITEBUF_START,
162 					OWNER_NONE, OBJECT_NONE, 0, 0, "");
163 	spdk_trace_register_description("TCP_FLUSH_WRITEBUF_DONE", "",
164 					TRACE_TCP_FLUSH_WRITEBUF_DONE,
165 					OWNER_NONE, OBJECT_NONE, 0, 0, "");
166 	spdk_trace_register_description("TCP_FLUSH_WRITEBUF_PDU_DONE", "",
167 					TRACE_TCP_FLUSH_WRITEBUF_PDU_DONE,
168 					OWNER_NONE, OBJECT_NONE, 0, 0, "");
169 }
170 
171 struct nvme_tcp_req  {
172 	struct spdk_nvmf_request		req;
173 	struct spdk_nvme_cpl			rsp;
174 	struct spdk_nvme_cmd			cmd;
175 
176 	/* In-capsule data buffer */
177 	uint8_t					*buf;
178 
179 	bool					data_from_pool;
180 	void					*buffers[SPDK_NVMF_MAX_SGL_ENTRIES];
181 
182 	/* transfer_tag */
183 	uint16_t				ttag;
184 
185 	/*
186 	 * next_expected_r2t_offset is used when we receive the h2c_data PDU.
187 	 */
188 	uint32_t				next_expected_r2t_offset;
189 	uint32_t				r2tl_remain;
190 
191 	/*
192 	 * c2h_data_offset is used when we send the c2h_data PDU.
193 	 */
194 	uint32_t				c2h_data_offset;
195 	uint32_t				c2h_data_pdu_num;
196 
197 	enum spdk_nvmf_tcp_req_state		state;
198 	bool					has_incapsule_data;
199 
200 	TAILQ_ENTRY(nvme_tcp_req)		link;
201 	TAILQ_ENTRY(nvme_tcp_req)		state_link;
202 };
203 
204 struct nvme_tcp_qpair {
205 	struct spdk_nvmf_qpair			qpair;
206 	struct spdk_nvmf_tcp_port		*port;
207 	struct spdk_sock			*sock;
208 	struct spdk_poller			*flush_poller;
209 
210 	enum nvme_tcp_pdu_recv_state		recv_state;
211 	enum nvme_tcp_qpair_state		state;
212 
213 	struct nvme_tcp_pdu			pdu_in_progress;
214 
215 	TAILQ_HEAD(, nvme_tcp_pdu)		send_queue;
216 	TAILQ_HEAD(, nvme_tcp_pdu)		free_queue;
217 
218 	struct nvme_tcp_pdu			*pdu;
219 	struct nvme_tcp_pdu			*pdu_pool;
220 	uint16_t				free_pdu_num;
221 
222 	/* Queues to track the requests in all states */
223 	TAILQ_HEAD(, nvme_tcp_req)		state_queue[TCP_REQUEST_NUM_STATES];
224 	/* Number of requests in each state */
225 	int32_t					state_cntr[TCP_REQUEST_NUM_STATES];
226 
227 	uint32_t				maxr2t;
228 	uint32_t				pending_r2t;
229 	TAILQ_HEAD(, nvme_tcp_req)		queued_c2h_data_tcp_req;
230 
231 	uint8_t					cpda;
232 
233 	/* Array of size "max_queue_depth * InCapsuleDataSize" containing
234 	 * buffers to be used for in capsule data.
235 	 */
236 	void					*buf;
237 	void					*bufs;
238 	struct nvme_tcp_req			*req;
239 	struct nvme_tcp_req			*reqs;
240 
241 	bool					host_hdgst_enable;
242 	bool					host_ddgst_enable;
243 
244 
245 	/* The maximum number of I/O outstanding on this connection at one time */
246 	uint16_t				max_queue_depth;
247 
248 
249 	/** Specifies the maximum number of PDU-Data bytes per H2C Data Transfer PDU */
250 	uint32_t				maxh2cdata;
251 
252 	/* Timer used to destroy qpair after detecting transport error issue if initiator does
253 	 *  not close the connection.
254 	 */
255 	uint64_t				last_pdu_time;
256 	int					timeout;
257 
258 	/* Mgmt channel */
259 	struct spdk_io_channel			*mgmt_channel;
260 	struct spdk_nvmf_tcp_mgmt_channel	*ch;
261 
262 	uint32_t				c2h_data_pdu_cnt;
263 
264 	/* IP address */
265 	char					initiator_addr[SPDK_NVMF_TRADDR_MAX_LEN];
266 	char					target_addr[SPDK_NVMF_TRADDR_MAX_LEN];
267 
268 	/* IP port */
269 	uint16_t				initiator_port;
270 	uint16_t				target_port;
271 
272 	TAILQ_ENTRY(nvme_tcp_qpair)		link;
273 };
274 
275 struct spdk_nvmf_tcp_poll_group {
276 	struct spdk_nvmf_transport_poll_group	group;
277 	struct spdk_sock_group			*sock_group;
278 	struct spdk_poller			*timeout_poller;
279 	TAILQ_HEAD(, nvme_tcp_qpair)		qpairs;
280 };
281 
282 struct spdk_nvmf_tcp_port {
283 	struct spdk_nvme_transport_id		trid;
284 	struct spdk_sock			*listen_sock;
285 	uint32_t				ref;
286 	TAILQ_ENTRY(spdk_nvmf_tcp_port)		link;
287 };
288 
289 struct spdk_nvmf_tcp_transport {
290 	struct spdk_nvmf_transport		transport;
291 
292 	pthread_mutex_t				lock;
293 
294 	TAILQ_HEAD(, spdk_nvmf_tcp_port)	ports;
295 };
296 
297 struct spdk_nvmf_tcp_mgmt_channel {
298 	/* Requests that are waiting to obtain a data buffer */
299 	TAILQ_HEAD(, nvme_tcp_req)	pending_data_buf_queue;
300 
301 	/* Point to the transport polling group */
302 	struct spdk_nvmf_tcp_poll_group	*tgroup;
303 
304 };
305 
306 static void spdk_nvmf_tcp_qpair_process_pending(struct spdk_nvmf_tcp_transport *ttransport,
307 		struct nvme_tcp_qpair *tqpair);
308 static bool spdk_nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport,
309 				      struct nvme_tcp_req *tcp_req);
310 static void spdk_nvmf_tcp_handle_pending_c2h_data_queue(struct nvme_tcp_qpair *tqpair);
311 
312 static void
313 spdk_nvmf_tcp_req_set_state(struct nvme_tcp_req *tcp_req,
314 			    enum spdk_nvmf_tcp_req_state state)
315 {
316 	struct spdk_nvmf_qpair *qpair;
317 	struct nvme_tcp_qpair *tqpair;
318 
319 	qpair = tcp_req->req.qpair;
320 	tqpair = SPDK_CONTAINEROF(qpair, struct nvme_tcp_qpair, qpair);
321 
322 	TAILQ_REMOVE(&tqpair->state_queue[tcp_req->state], tcp_req, state_link);
323 	tqpair->state_cntr[tcp_req->state]--;
324 	assert(tqpair->state_cntr[tcp_req->state] >= 0);
325 
326 	TAILQ_INSERT_TAIL(&tqpair->state_queue[state], tcp_req, state_link);
327 	tqpair->state_cntr[state]++;
328 
329 	tcp_req->state = state;
330 }
331 
332 static struct nvme_tcp_pdu *
333 spdk_nvmf_tcp_pdu_get(struct nvme_tcp_qpair *tqpair)
334 {
335 	struct nvme_tcp_pdu *pdu;
336 
337 	pdu = TAILQ_FIRST(&tqpair->free_queue);
338 	if (!pdu) {
339 		SPDK_ERRLOG("Unable to get PDU for tqpair=%p\n", tqpair);
340 		abort();
341 		return NULL;
342 	}
343 
344 	tqpair->free_pdu_num--;
345 	TAILQ_REMOVE(&tqpair->free_queue, pdu, tailq);
346 	memset(pdu, 0, sizeof(*pdu));
347 	pdu->ref = 1;
348 	pdu->tqpair = tqpair;
349 
350 	return pdu;
351 }
352 
353 static void
354 spdk_nvmf_tcp_pdu_put(struct nvme_tcp_pdu *pdu)
355 {
356 	struct nvme_tcp_qpair *tqpair;
357 	if (!pdu) {
358 		return;
359 	}
360 
361 	assert(pdu->ref > 0);
362 	assert(pdu->tqpair != NULL);
363 	tqpair = pdu->tqpair;
364 
365 	pdu->ref--;
366 	if (pdu->ref == 0) {
367 		tqpair->free_pdu_num++;
368 		TAILQ_INSERT_HEAD(&tqpair->free_queue, pdu, tailq);
369 	}
370 }
371 
372 static struct nvme_tcp_req *
373 spdk_nvmf_tcp_req_get(struct nvme_tcp_qpair *tqpair)
374 {
375 	struct nvme_tcp_req *tcp_req;
376 
377 	tcp_req = TAILQ_FIRST(&tqpair->state_queue[TCP_REQUEST_STATE_FREE]);
378 	if (!tcp_req) {
379 		SPDK_ERRLOG("Cannot allocate tcp_req on tqpair=%p\n", tqpair);
380 		return NULL;
381 	}
382 
383 	memset(&tcp_req->cmd, 0, sizeof(tcp_req->cmd));
384 	memset(&tcp_req->rsp, 0, sizeof(tcp_req->rsp));
385 	tcp_req->next_expected_r2t_offset = 0;
386 	tcp_req->r2tl_remain = 0;
387 	tcp_req->c2h_data_offset = 0;
388 	tcp_req->has_incapsule_data = false;
389 
390 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEW);
391 	return tcp_req;
392 }
393 
394 static void
395 nvmf_tcp_request_free(struct nvme_tcp_req *tcp_req)
396 {
397 	struct spdk_nvmf_tcp_transport *ttransport;
398 
399 	if (!tcp_req) {
400 		return;
401 	}
402 
403 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tcp_req=%p will be freed\n", tcp_req);
404 	ttransport = SPDK_CONTAINEROF(tcp_req->req.qpair->transport,
405 				      struct spdk_nvmf_tcp_transport, transport);
406 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_COMPLETED);
407 	spdk_nvmf_tcp_req_process(ttransport, tcp_req);
408 }
409 
410 static int
411 spdk_nvmf_tcp_req_free(struct spdk_nvmf_request *req)
412 {
413 	struct nvme_tcp_req *tcp_req = SPDK_CONTAINEROF(req, struct nvme_tcp_req, req);
414 
415 	nvmf_tcp_request_free(tcp_req);
416 
417 	return 0;
418 }
419 
420 static int
421 spdk_nvmf_tcp_mgmt_channel_create(void *io_device, void *ctx_buf)
422 {
423 	struct spdk_nvmf_tcp_mgmt_channel *ch = ctx_buf;
424 
425 	TAILQ_INIT(&ch->pending_data_buf_queue);
426 	return 0;
427 }
428 
429 static void
430 spdk_nvmf_tcp_mgmt_channel_destroy(void *io_device, void *ctx_buf)
431 {
432 	struct spdk_nvmf_tcp_mgmt_channel *ch = ctx_buf;
433 
434 	if (!TAILQ_EMPTY(&ch->pending_data_buf_queue)) {
435 		SPDK_ERRLOG("Pending I/O list wasn't empty on channel destruction\n");
436 	}
437 }
438 
439 static void
440 spdk_nvmf_tcp_drain_state_queue(struct nvme_tcp_qpair *tqpair,
441 				enum spdk_nvmf_tcp_req_state state)
442 {
443 	struct nvme_tcp_req *tcp_req, *req_tmp;
444 
445 	TAILQ_FOREACH_SAFE(tcp_req, &tqpair->state_queue[state], state_link, req_tmp) {
446 		nvmf_tcp_request_free(tcp_req);
447 	}
448 }
449 
450 static void
451 spdk_nvmf_tcp_cleanup_all_states(struct nvme_tcp_qpair *tqpair)
452 {
453 	struct nvme_tcp_req *tcp_req, *req_tmp;
454 	struct nvme_tcp_pdu *pdu, *tmp_pdu;
455 
456 	/* Free the pdus in the send_queue */
457 	TAILQ_FOREACH_SAFE(pdu, &tqpair->send_queue, tailq, tmp_pdu) {
458 		TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq);
459 		/* Also check the pdu type, we need to calculte the c2h_data_pdu_cnt later */
460 		if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_DATA) {
461 			assert(tqpair->c2h_data_pdu_cnt > 0);
462 			tqpair->c2h_data_pdu_cnt--;
463 		}
464 		spdk_nvmf_tcp_pdu_put(pdu);
465 	}
466 
467 	TAILQ_FOREACH_SAFE(tcp_req, &tqpair->queued_c2h_data_tcp_req, link, req_tmp) {
468 		TAILQ_REMOVE(&tqpair->queued_c2h_data_tcp_req, tcp_req, link);
469 	}
470 	spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST);
471 
472 	spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEW);
473 
474 	spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_DATA_PENDING_FOR_R2T);
475 
476 	/* Wipe the requests waiting for buffer from the global list */
477 	TAILQ_FOREACH_SAFE(tcp_req, &tqpair->state_queue[TCP_REQUEST_STATE_NEED_BUFFER], state_link,
478 			   req_tmp) {
479 		TAILQ_REMOVE(&tqpair->ch->pending_data_buf_queue, tcp_req, link);
480 	}
481 
482 	spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEED_BUFFER);
483 	spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_EXECUTING);
484 	spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
485 }
486 
487 static void
488 nvmf_tcp_dump_qpair_req_contents(struct nvme_tcp_qpair *tqpair)
489 {
490 	int i;
491 	struct nvme_tcp_req *tcp_req;
492 
493 	SPDK_ERRLOG("Dumping contents of queue pair (QID %d)\n", tqpair->qpair.qid);
494 	for (i = 1; i < TCP_REQUEST_NUM_STATES; i++) {
495 		SPDK_ERRLOG("\tNum of requests in state[%d] = %d\n", i, tqpair->state_cntr[i]);
496 		TAILQ_FOREACH(tcp_req, &tqpair->state_queue[i], state_link) {
497 			SPDK_ERRLOG("\t\tRequest Data From Pool: %d\n", tcp_req->data_from_pool);
498 			SPDK_ERRLOG("\t\tRequest opcode: %d\n", tcp_req->req.cmd->nvmf_cmd.opcode);
499 		}
500 	}
501 }
502 
503 static void
504 spdk_nvmf_tcp_qpair_destroy(struct nvme_tcp_qpair *tqpair)
505 {
506 	int err = 0;
507 
508 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
509 
510 	spdk_poller_unregister(&tqpair->flush_poller);
511 	spdk_sock_close(&tqpair->sock);
512 	spdk_nvmf_tcp_cleanup_all_states(tqpair);
513 	if (tqpair->mgmt_channel) {
514 		spdk_put_io_channel(tqpair->mgmt_channel);
515 	}
516 
517 	if (tqpair->free_pdu_num != (tqpair->max_queue_depth + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM)) {
518 		SPDK_ERRLOG("tqpair(%p) free pdu pool num is %u but should be %u\n", tqpair,
519 			    tqpair->free_pdu_num,
520 			    (tqpair->max_queue_depth + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM));
521 		err++;
522 	}
523 
524 	if (tqpair->state_cntr[TCP_REQUEST_STATE_FREE] != tqpair->max_queue_depth) {
525 		SPDK_ERRLOG("tqpair(%p) free tcp request num is %u but should be %u\n", tqpair,
526 			    tqpair->state_cntr[TCP_REQUEST_STATE_FREE],
527 			    tqpair->max_queue_depth);
528 		err++;
529 	}
530 
531 	if (tqpair->c2h_data_pdu_cnt != 0) {
532 		SPDK_ERRLOG("tqpair(%p) free c2h_data_pdu cnt is %u but should be 0\n", tqpair,
533 			    tqpair->c2h_data_pdu_cnt);
534 		err++;
535 	}
536 
537 	if (err > 0) {
538 		nvmf_tcp_dump_qpair_req_contents(tqpair);
539 	}
540 	free(tqpair->pdu);
541 	free(tqpair->pdu_pool);
542 	free(tqpair->req);
543 	free(tqpair->reqs);
544 	spdk_dma_free(tqpair->buf);
545 	spdk_dma_free(tqpair->bufs);
546 	free(tqpair);
547 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Leave\n");
548 }
549 
550 static int
551 spdk_nvmf_tcp_destroy(struct spdk_nvmf_transport *transport)
552 {
553 	struct spdk_nvmf_tcp_transport	*ttransport;
554 
555 	assert(transport != NULL);
556 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
557 
558 	spdk_io_device_unregister(ttransport, NULL);
559 	pthread_mutex_destroy(&ttransport->lock);
560 	free(ttransport);
561 	return 0;
562 }
563 
564 static struct spdk_nvmf_transport *
565 spdk_nvmf_tcp_create(struct spdk_nvmf_transport_opts *opts)
566 {
567 	struct spdk_nvmf_tcp_transport *ttransport;
568 	uint32_t sge_count;
569 	uint32_t min_shared_buffers;
570 
571 	ttransport = calloc(1, sizeof(*ttransport));
572 	if (!ttransport) {
573 		return NULL;
574 	}
575 
576 	TAILQ_INIT(&ttransport->ports);
577 
578 	ttransport->transport.ops = &spdk_nvmf_transport_tcp;
579 
580 	SPDK_NOTICELOG("*** TCP Transport Init ***\n");
581 
582 	SPDK_INFOLOG(SPDK_LOG_NVMF_TCP, "*** TCP Transport Init ***\n"
583 		     "  Transport opts:  max_ioq_depth=%d, max_io_size=%d,\n"
584 		     "  max_qpairs_per_ctrlr=%d, io_unit_size=%d,\n"
585 		     "  in_capsule_data_size=%d, max_aq_depth=%d\n"
586 		     "  num_shared_buffers=%d\n",
587 		     opts->max_queue_depth,
588 		     opts->max_io_size,
589 		     opts->max_qpairs_per_ctrlr,
590 		     opts->io_unit_size,
591 		     opts->in_capsule_data_size,
592 		     opts->max_aq_depth,
593 		     opts->num_shared_buffers);
594 
595 	/* I/O unit size cannot be larger than max I/O size */
596 	if (opts->io_unit_size > opts->max_io_size) {
597 		opts->io_unit_size = opts->max_io_size;
598 	}
599 
600 	sge_count = opts->max_io_size / opts->io_unit_size;
601 	if (sge_count > SPDK_NVMF_MAX_SGL_ENTRIES) {
602 		SPDK_ERRLOG("Unsupported IO Unit size specified, %d bytes\n", opts->io_unit_size);
603 		free(ttransport);
604 		return NULL;
605 	}
606 
607 	min_shared_buffers = spdk_thread_get_count() * opts->buf_cache_size;
608 	if (min_shared_buffers > opts->num_shared_buffers) {
609 		SPDK_ERRLOG("There are not enough buffers to satisfy"
610 			    "per-poll group caches for each thread. (%" PRIu32 ")"
611 			    "supplied. (%" PRIu32 ") required\n", opts->num_shared_buffers, min_shared_buffers);
612 		SPDK_ERRLOG("Please specify a larger number of shared buffers\n");
613 		spdk_nvmf_tcp_destroy(&ttransport->transport);
614 		return NULL;
615 	}
616 
617 	pthread_mutex_init(&ttransport->lock, NULL);
618 
619 	spdk_io_device_register(ttransport, spdk_nvmf_tcp_mgmt_channel_create,
620 				spdk_nvmf_tcp_mgmt_channel_destroy,
621 				sizeof(struct spdk_nvmf_tcp_mgmt_channel), "tcp_transport");
622 
623 	return &ttransport->transport;
624 }
625 
626 static int
627 _spdk_nvmf_tcp_trsvcid_to_int(const char *trsvcid)
628 {
629 	unsigned long long ull;
630 	char *end = NULL;
631 
632 	ull = strtoull(trsvcid, &end, 10);
633 	if (end == NULL || end == trsvcid || *end != '\0') {
634 		return -1;
635 	}
636 
637 	/* Valid TCP/IP port numbers are in [0, 65535] */
638 	if (ull > 65535) {
639 		return -1;
640 	}
641 
642 	return (int)ull;
643 }
644 
645 /**
646  * Canonicalize a listen address trid.
647  */
648 static int
649 _spdk_nvmf_tcp_canon_listen_trid(struct spdk_nvme_transport_id *canon_trid,
650 				 const struct spdk_nvme_transport_id *trid)
651 {
652 	int trsvcid_int;
653 
654 	trsvcid_int = _spdk_nvmf_tcp_trsvcid_to_int(trid->trsvcid);
655 	if (trsvcid_int < 0) {
656 		return -EINVAL;
657 	}
658 
659 	memset(canon_trid, 0, sizeof(*canon_trid));
660 	canon_trid->trtype = SPDK_NVME_TRANSPORT_TCP;
661 	canon_trid->adrfam = trid->adrfam;
662 	snprintf(canon_trid->traddr, sizeof(canon_trid->traddr), "%s", trid->traddr);
663 	snprintf(canon_trid->trsvcid, sizeof(canon_trid->trsvcid), "%d", trsvcid_int);
664 
665 	return 0;
666 }
667 
668 /**
669  * Find an existing listening port.
670  *
671  * Caller must hold ttransport->lock.
672  */
673 static struct spdk_nvmf_tcp_port *
674 _spdk_nvmf_tcp_find_port(struct spdk_nvmf_tcp_transport *ttransport,
675 			 const struct spdk_nvme_transport_id *trid)
676 {
677 	struct spdk_nvme_transport_id canon_trid;
678 	struct spdk_nvmf_tcp_port *port;
679 
680 	if (_spdk_nvmf_tcp_canon_listen_trid(&canon_trid, trid) != 0) {
681 		return NULL;
682 	}
683 
684 	TAILQ_FOREACH(port, &ttransport->ports, link) {
685 		if (spdk_nvme_transport_id_compare(&canon_trid, &port->trid) == 0) {
686 			return port;
687 		}
688 	}
689 
690 	return NULL;
691 }
692 
693 static int
694 spdk_nvmf_tcp_listen(struct spdk_nvmf_transport *transport,
695 		     const struct spdk_nvme_transport_id *trid)
696 {
697 	struct spdk_nvmf_tcp_transport *ttransport;
698 	struct spdk_nvmf_tcp_port *port;
699 	int trsvcid_int;
700 	uint8_t adrfam;
701 
702 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
703 
704 	trsvcid_int = _spdk_nvmf_tcp_trsvcid_to_int(trid->trsvcid);
705 	if (trsvcid_int < 0) {
706 		SPDK_ERRLOG("Invalid trsvcid '%s'\n", trid->trsvcid);
707 		return -EINVAL;
708 	}
709 
710 	pthread_mutex_lock(&ttransport->lock);
711 
712 	port = _spdk_nvmf_tcp_find_port(ttransport, trid);
713 	if (port) {
714 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Already listening on %s port %s\n",
715 			      trid->traddr, trid->trsvcid);
716 		port->ref++;
717 		pthread_mutex_unlock(&ttransport->lock);
718 		return 0;
719 	}
720 
721 	port = calloc(1, sizeof(*port));
722 	if (!port) {
723 		SPDK_ERRLOG("Port allocation failed\n");
724 		free(port);
725 		pthread_mutex_unlock(&ttransport->lock);
726 		return -ENOMEM;
727 	}
728 
729 	port->ref = 1;
730 
731 	if (_spdk_nvmf_tcp_canon_listen_trid(&port->trid, trid) != 0) {
732 		SPDK_ERRLOG("Invalid traddr %s / trsvcid %s\n",
733 			    trid->traddr, trid->trsvcid);
734 		free(port);
735 		pthread_mutex_unlock(&ttransport->lock);
736 		return -ENOMEM;
737 	}
738 
739 	port->listen_sock = spdk_sock_listen(trid->traddr, trsvcid_int);
740 	if (port->listen_sock == NULL) {
741 		SPDK_ERRLOG("spdk_sock_listen(%s, %d) failed: %s (%d)\n",
742 			    trid->traddr, trsvcid_int,
743 			    spdk_strerror(errno), errno);
744 		free(port);
745 		pthread_mutex_unlock(&ttransport->lock);
746 		return -errno;
747 	}
748 
749 	if (spdk_sock_is_ipv4(port->listen_sock)) {
750 		adrfam = SPDK_NVMF_ADRFAM_IPV4;
751 	} else if (spdk_sock_is_ipv6(port->listen_sock)) {
752 		adrfam = SPDK_NVMF_ADRFAM_IPV6;
753 	} else {
754 		SPDK_ERRLOG("Unhandled socket type\n");
755 		adrfam = 0;
756 	}
757 
758 	if (adrfam != trid->adrfam) {
759 		SPDK_ERRLOG("Socket address family mismatch\n");
760 		spdk_sock_close(&port->listen_sock);
761 		free(port);
762 		pthread_mutex_unlock(&ttransport->lock);
763 		return -EINVAL;
764 	}
765 
766 	SPDK_NOTICELOG("*** NVMe/TCP Target Listening on %s port %d ***\n",
767 		       trid->traddr, trsvcid_int);
768 
769 	TAILQ_INSERT_TAIL(&ttransport->ports, port, link);
770 	pthread_mutex_unlock(&ttransport->lock);
771 
772 	return 0;
773 }
774 
775 static int
776 spdk_nvmf_tcp_stop_listen(struct spdk_nvmf_transport *transport,
777 			  const struct spdk_nvme_transport_id *trid)
778 {
779 	struct spdk_nvmf_tcp_transport *ttransport;
780 	struct spdk_nvmf_tcp_port *port;
781 	int rc;
782 
783 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
784 
785 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Removing listen address %s port %s\n",
786 		      trid->traddr, trid->trsvcid);
787 
788 	pthread_mutex_lock(&ttransport->lock);
789 	port = _spdk_nvmf_tcp_find_port(ttransport, trid);
790 	if (port) {
791 		assert(port->ref > 0);
792 		port->ref--;
793 		if (port->ref == 0) {
794 			TAILQ_REMOVE(&ttransport->ports, port, link);
795 			spdk_sock_close(&port->listen_sock);
796 			free(port);
797 		}
798 		rc = 0;
799 	} else {
800 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Port not found\n");
801 		rc = -ENOENT;
802 	}
803 	pthread_mutex_unlock(&ttransport->lock);
804 
805 	return rc;
806 }
807 
808 static int
809 spdk_nvmf_tcp_qpair_flush_pdus_internal(struct nvme_tcp_qpair *tqpair)
810 {
811 	const int array_size = 32;
812 	struct iovec	iovec_array[array_size];
813 	struct iovec	*iov = iovec_array;
814 	int iovec_cnt = 0;
815 	int bytes = 0;
816 	int total_length = 0;
817 	uint32_t writev_offset;
818 	struct nvme_tcp_pdu *pdu;
819 	int pdu_length;
820 	TAILQ_HEAD(, nvme_tcp_pdu) completed_pdus_list;
821 	struct spdk_nvmf_tcp_transport *ttransport;
822 
823 	pdu = TAILQ_FIRST(&tqpair->send_queue);
824 
825 	if (pdu == NULL) {
826 		return 0;
827 	}
828 
829 	/*
830 	 * Build up a list of iovecs for the first few PDUs in the
831 	 *  tqpair 's send_queue.
832 	 */
833 	while (pdu != NULL && ((array_size - iovec_cnt) >= 3)) {
834 		iovec_cnt += nvme_tcp_build_iovecs(&iovec_array[iovec_cnt],
835 						   pdu,
836 						   tqpair->host_hdgst_enable,
837 						   tqpair->host_ddgst_enable);
838 		total_length += pdu->hdr.common.plen;
839 		pdu = TAILQ_NEXT(pdu, tailq);
840 	}
841 
842 	/*
843 	 * Check if the first PDU was partially written out the last time
844 	 *  this function was called, and if so adjust the iovec array
845 	 *  accordingly.
846 	 */
847 	writev_offset = TAILQ_FIRST(&tqpair->send_queue)->writev_offset;
848 	total_length -= writev_offset;
849 	while ((writev_offset > 0) && (iovec_cnt > 0)) {
850 		if (writev_offset >= iov->iov_len) {
851 			writev_offset -= iov->iov_len;
852 			iov++;
853 			iovec_cnt--;
854 		} else {
855 			iov->iov_len -= writev_offset;
856 			iov->iov_base = (char *)iov->iov_base + writev_offset;
857 			writev_offset = 0;
858 		}
859 	}
860 
861 	spdk_trace_record(TRACE_TCP_FLUSH_WRITEBUF_START, 0, total_length, 0, iovec_cnt);
862 
863 	bytes = spdk_sock_writev(tqpair->sock, iov, iovec_cnt);
864 	if (bytes == -1) {
865 		if (errno == EWOULDBLOCK || errno == EAGAIN) {
866 			return 1;
867 		} else {
868 			SPDK_ERRLOG("spdk_sock_writev() failed, errno %d: %s\n",
869 				    errno, spdk_strerror(errno));
870 			return -1;
871 		}
872 	}
873 
874 	spdk_trace_record(TRACE_TCP_FLUSH_WRITEBUF_DONE, 0, bytes, 0, 0);
875 
876 	pdu = TAILQ_FIRST(&tqpair->send_queue);
877 
878 	/*
879 	 * Free any PDUs that were fully written.  If a PDU was only
880 	 *  partially written, update its writev_offset so that next
881 	 *  time only the unwritten portion will be sent to writev().
882 	 */
883 	TAILQ_INIT(&completed_pdus_list);
884 	while (bytes > 0) {
885 		pdu_length = pdu->hdr.common.plen - pdu->writev_offset;
886 		if (bytes >= pdu_length) {
887 			bytes -= pdu_length;
888 			TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq);
889 			TAILQ_INSERT_TAIL(&completed_pdus_list, pdu, tailq);
890 			pdu = TAILQ_FIRST(&tqpair->send_queue);
891 
892 		} else {
893 			pdu->writev_offset += bytes;
894 			bytes = 0;
895 		}
896 	}
897 
898 	while (!TAILQ_EMPTY(&completed_pdus_list)) {
899 		pdu = TAILQ_FIRST(&completed_pdus_list);
900 		TAILQ_REMOVE(&completed_pdus_list, pdu, tailq);
901 		assert(pdu->cb_fn != NULL);
902 		pdu->cb_fn(pdu->cb_arg);
903 		spdk_nvmf_tcp_pdu_put(pdu);
904 	}
905 
906 	ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport, struct spdk_nvmf_tcp_transport, transport);
907 	spdk_nvmf_tcp_qpair_process_pending(ttransport, tqpair);
908 
909 	return TAILQ_EMPTY(&tqpair->send_queue) ? 0 : 1;
910 }
911 
912 static int
913 spdk_nvmf_tcp_qpair_flush_pdus(void *_tqpair)
914 {
915 	struct nvme_tcp_qpair *tqpair = _tqpair;
916 	int rc;
917 
918 	if (tqpair->state == NVME_TCP_QPAIR_STATE_RUNNING) {
919 		rc = spdk_nvmf_tcp_qpair_flush_pdus_internal(tqpair);
920 		if (rc == 0 && tqpair->flush_poller != NULL) {
921 			spdk_poller_unregister(&tqpair->flush_poller);
922 		} else if (rc == 1 && tqpair->flush_poller == NULL) {
923 			tqpair->flush_poller = spdk_poller_register(spdk_nvmf_tcp_qpair_flush_pdus,
924 					       tqpair, 50);
925 		}
926 	} else {
927 		/*
928 		 * If the tqpair state is not RUNNING, then
929 		 * keep trying to flush PDUs until our list is
930 		 * empty - to make sure all data is sent before
931 		 * closing the connection.
932 		 */
933 		do {
934 			rc = spdk_nvmf_tcp_qpair_flush_pdus_internal(tqpair);
935 		} while (rc == 1);
936 	}
937 
938 	if (rc < 0 && tqpair->state < NVME_TCP_QPAIR_STATE_EXITING) {
939 		/*
940 		 * If the poller has already started destruction of the tqpair,
941 		 *  i.e. the socket read failed, then the connection state may already
942 		 *  be EXITED.  We don't want to set it back to EXITING in that case.
943 		 */
944 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
945 	}
946 
947 	return -1;
948 }
949 
950 static void
951 spdk_nvmf_tcp_qpair_write_pdu(struct nvme_tcp_qpair *tqpair,
952 			      struct nvme_tcp_pdu *pdu,
953 			      nvme_tcp_qpair_xfer_complete_cb cb_fn,
954 			      void *cb_arg)
955 {
956 	int enable_digest;
957 	int hlen;
958 	uint32_t crc32c;
959 
960 	hlen = pdu->hdr.common.hlen;
961 	enable_digest = 1;
962 	if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP ||
963 	    pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ) {
964 		/* this PDU should be sent without digest */
965 		enable_digest = 0;
966 	}
967 
968 	/* Header Digest */
969 	if (enable_digest && tqpair->host_hdgst_enable) {
970 		crc32c = nvme_tcp_pdu_calc_header_digest(pdu);
971 		MAKE_DIGEST_WORD((uint8_t *)pdu->hdr.raw + hlen, crc32c);
972 	}
973 
974 	/* Data Digest */
975 	if (pdu->data_len > 0 && enable_digest && tqpair->host_ddgst_enable) {
976 		crc32c = nvme_tcp_pdu_calc_data_digest(pdu);
977 		MAKE_DIGEST_WORD(pdu->data_digest, crc32c);
978 	}
979 
980 	pdu->cb_fn = cb_fn;
981 	pdu->cb_arg = cb_arg;
982 	TAILQ_INSERT_TAIL(&tqpair->send_queue, pdu, tailq);
983 	spdk_nvmf_tcp_qpair_flush_pdus(tqpair);
984 }
985 
986 static int
987 spdk_nvmf_tcp_qpair_init_mem_resource(struct nvme_tcp_qpair *tqpair, uint16_t size)
988 {
989 	int i;
990 	struct nvme_tcp_req *tcp_req;
991 	struct spdk_nvmf_transport *transport = tqpair->qpair.transport;
992 	struct spdk_nvmf_tcp_transport *ttransport;
993 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
994 
995 	if (!tqpair->qpair.sq_head_max) {
996 		tqpair->req = calloc(1, sizeof(*tqpair->req));
997 		if (!tqpair->req) {
998 			SPDK_ERRLOG("Unable to allocate req on tqpair=%p.\n", tqpair);
999 			return -1;
1000 		}
1001 
1002 		if (transport->opts.in_capsule_data_size) {
1003 			tqpair->buf = spdk_dma_zmalloc(ttransport->transport.opts.in_capsule_data_size, 0x1000, NULL);
1004 			if (!tqpair->buf) {
1005 				SPDK_ERRLOG("Unable to allocate buf on tqpair=%p.\n", tqpair);
1006 				return -1;
1007 			}
1008 		}
1009 
1010 		tcp_req = tqpair->req;
1011 		tcp_req->ttag = 0;
1012 		tcp_req->req.qpair = &tqpair->qpair;
1013 
1014 		/* Set up memory to receive commands */
1015 		if (tqpair->buf) {
1016 			tcp_req->buf = tqpair->buf;
1017 		}
1018 
1019 		/* Set the cmdn and rsp */
1020 		tcp_req->req.rsp = (union nvmf_c2h_msg *)&tcp_req->rsp;
1021 		tcp_req->req.cmd = (union nvmf_h2c_msg *)&tcp_req->cmd;
1022 
1023 		/* Initialize request state to FREE */
1024 		tcp_req->state = TCP_REQUEST_STATE_FREE;
1025 		TAILQ_INSERT_TAIL(&tqpair->state_queue[tcp_req->state], tcp_req, state_link);
1026 
1027 		tqpair->pdu = calloc(NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM + 1, sizeof(*tqpair->pdu));
1028 		if (!tqpair->pdu) {
1029 			SPDK_ERRLOG("Unable to allocate pdu on tqpair=%p.\n", tqpair);
1030 			return -1;
1031 		}
1032 
1033 		for (i = 0; i < 1 + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM; i++) {
1034 			TAILQ_INSERT_TAIL(&tqpair->free_queue, &tqpair->pdu[i], tailq);
1035 		}
1036 
1037 	} else {
1038 		tqpair->reqs = calloc(size, sizeof(*tqpair->reqs));
1039 		if (!tqpair->reqs) {
1040 			SPDK_ERRLOG("Unable to allocate reqs on tqpair=%p\n", tqpair);
1041 			return -1;
1042 		}
1043 
1044 		if (transport->opts.in_capsule_data_size) {
1045 			tqpair->bufs = spdk_dma_zmalloc(size * transport->opts.in_capsule_data_size,
1046 							0x1000, NULL);
1047 			if (!tqpair->bufs) {
1048 				SPDK_ERRLOG("Unable to allocate bufs on tqpair=%p.\n", tqpair);
1049 				return -1;
1050 			}
1051 		}
1052 
1053 		for (i = 0; i < size; i++) {
1054 			struct nvme_tcp_req *tcp_req = &tqpair->reqs[i];
1055 
1056 			tcp_req->ttag = i + 1;
1057 			tcp_req->req.qpair = &tqpair->qpair;
1058 
1059 			/* Set up memory to receive commands */
1060 			if (tqpair->bufs) {
1061 				tcp_req->buf = (void *)((uintptr_t)tqpair->bufs + (i * transport->opts.in_capsule_data_size));
1062 			}
1063 
1064 			/* Set the cmdn and rsp */
1065 			tcp_req->req.rsp = (union nvmf_c2h_msg *)&tcp_req->rsp;
1066 			tcp_req->req.cmd = (union nvmf_h2c_msg *)&tcp_req->cmd;
1067 
1068 			/* Initialize request state to FREE */
1069 			tcp_req->state = TCP_REQUEST_STATE_FREE;
1070 			TAILQ_INSERT_TAIL(&tqpair->state_queue[tcp_req->state], tcp_req, state_link);
1071 		}
1072 
1073 		tqpair->pdu_pool = calloc(size, sizeof(*tqpair->pdu_pool));
1074 		if (!tqpair->pdu_pool) {
1075 			SPDK_ERRLOG("Unable to allocate pdu pool on tqpair =%p.\n", tqpair);
1076 			return -1;
1077 		}
1078 
1079 		for (i = 0; i < size; i++) {
1080 			TAILQ_INSERT_TAIL(&tqpair->free_queue, &tqpair->pdu_pool[i], tailq);
1081 		}
1082 	}
1083 
1084 	return 0;
1085 }
1086 
1087 static int
1088 spdk_nvmf_tcp_qpair_init(struct spdk_nvmf_qpair *qpair)
1089 {
1090 	struct spdk_nvmf_tcp_transport *ttransport;
1091 	struct nvme_tcp_qpair *tqpair;
1092 	int i;
1093 
1094 	ttransport = SPDK_CONTAINEROF(qpair->transport, struct spdk_nvmf_tcp_transport, transport);
1095 	tqpair = SPDK_CONTAINEROF(qpair, struct nvme_tcp_qpair, qpair);
1096 
1097 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "New TCP Connection: %p\n", qpair);
1098 
1099 	TAILQ_INIT(&tqpair->send_queue);
1100 	TAILQ_INIT(&tqpair->free_queue);
1101 	TAILQ_INIT(&tqpair->queued_c2h_data_tcp_req);
1102 
1103 	/* Initialise request state queues of the qpair */
1104 	for (i = TCP_REQUEST_STATE_FREE; i < TCP_REQUEST_NUM_STATES; i++) {
1105 		TAILQ_INIT(&tqpair->state_queue[i]);
1106 	}
1107 
1108 	tqpair->host_hdgst_enable = true;
1109 	tqpair->host_ddgst_enable = true;
1110 
1111 	tqpair->mgmt_channel = spdk_get_io_channel(ttransport);
1112 	if (!tqpair->mgmt_channel) {
1113 		return -1;
1114 	}
1115 	tqpair->ch = spdk_io_channel_get_ctx(tqpair->mgmt_channel);
1116 	assert(tqpair->ch != NULL);
1117 
1118 	return 0;
1119 }
1120 
1121 static int
1122 spdk_nvmf_tcp_qpair_sock_init(struct nvme_tcp_qpair *tqpair)
1123 {
1124 
1125 	int rc;
1126 	int buf_size;
1127 
1128 	/* set recv buffer size */
1129 	buf_size = 2 * 1024 * 1024;
1130 	rc = spdk_sock_set_recvbuf(tqpair->sock, buf_size);
1131 	if (rc != 0) {
1132 		SPDK_ERRLOG("spdk_sock_set_recvbuf failed\n");
1133 		return rc;
1134 	}
1135 
1136 	/* set send buffer size */
1137 	rc = spdk_sock_set_sendbuf(tqpair->sock, buf_size);
1138 	if (rc != 0) {
1139 		SPDK_ERRLOG("spdk_sock_set_sendbuf failed\n");
1140 		return rc;
1141 	}
1142 
1143 	/* set low water mark */
1144 	rc = spdk_sock_set_recvlowat(tqpair->sock, sizeof(struct spdk_nvme_tcp_c2h_data_hdr));
1145 	if (rc != 0) {
1146 		SPDK_ERRLOG("spdk_sock_set_recvlowat() failed\n");
1147 		return rc;
1148 	}
1149 
1150 	return 0;
1151 }
1152 
1153 static void
1154 _spdk_nvmf_tcp_handle_connect(struct spdk_nvmf_transport *transport,
1155 			      struct spdk_nvmf_tcp_port *port,
1156 			      struct spdk_sock *sock, new_qpair_fn cb_fn)
1157 {
1158 	struct nvme_tcp_qpair *tqpair;
1159 	int rc;
1160 
1161 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "New connection accepted on %s port %s\n",
1162 		      port->trid.traddr, port->trid.trsvcid);
1163 
1164 	tqpair = calloc(1, sizeof(struct nvme_tcp_qpair));
1165 	if (tqpair == NULL) {
1166 		SPDK_ERRLOG("Could not allocate new connection.\n");
1167 		spdk_sock_close(&sock);
1168 		return;
1169 	}
1170 
1171 	tqpair->sock = sock;
1172 	tqpair->max_queue_depth = 1;
1173 	tqpair->free_pdu_num = tqpair->max_queue_depth + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM;
1174 	tqpair->state_cntr[TCP_REQUEST_STATE_FREE] = tqpair->max_queue_depth;
1175 	tqpair->port = port;
1176 	tqpair->qpair.transport = transport;
1177 
1178 	rc = spdk_sock_getaddr(tqpair->sock, tqpair->target_addr,
1179 			       sizeof(tqpair->target_addr), &tqpair->target_port,
1180 			       tqpair->initiator_addr, sizeof(tqpair->initiator_addr),
1181 			       &tqpair->initiator_port);
1182 	if (rc < 0) {
1183 		SPDK_ERRLOG("spdk_sock_getaddr() failed of tqpair=%p\n", tqpair);
1184 		spdk_nvmf_tcp_qpair_destroy(tqpair);
1185 		return;
1186 	}
1187 
1188 	cb_fn(&tqpair->qpair);
1189 }
1190 
1191 static void
1192 spdk_nvmf_tcp_port_accept(struct spdk_nvmf_transport *transport, struct spdk_nvmf_tcp_port *port,
1193 			  new_qpair_fn cb_fn)
1194 {
1195 	struct spdk_sock *sock;
1196 	int i;
1197 
1198 	for (i = 0; i < NVMF_TCP_MAX_ACCEPT_SOCK_ONE_TIME; i++) {
1199 		sock = spdk_sock_accept(port->listen_sock);
1200 		if (sock) {
1201 			_spdk_nvmf_tcp_handle_connect(transport, port, sock, cb_fn);
1202 		}
1203 	}
1204 }
1205 
1206 static void
1207 spdk_nvmf_tcp_accept(struct spdk_nvmf_transport *transport, new_qpair_fn cb_fn)
1208 {
1209 	struct spdk_nvmf_tcp_transport *ttransport;
1210 	struct spdk_nvmf_tcp_port *port;
1211 
1212 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
1213 
1214 	TAILQ_FOREACH(port, &ttransport->ports, link) {
1215 		spdk_nvmf_tcp_port_accept(transport, port, cb_fn);
1216 	}
1217 }
1218 
1219 static void
1220 spdk_nvmf_tcp_discover(struct spdk_nvmf_transport *transport,
1221 		       struct spdk_nvme_transport_id *trid,
1222 		       struct spdk_nvmf_discovery_log_page_entry *entry)
1223 {
1224 	entry->trtype = SPDK_NVMF_TRTYPE_TCP;
1225 	entry->adrfam = trid->adrfam;
1226 	entry->treq.secure_channel = SPDK_NVMF_TREQ_SECURE_CHANNEL_NOT_SPECIFIED;
1227 
1228 	spdk_strcpy_pad(entry->trsvcid, trid->trsvcid, sizeof(entry->trsvcid), ' ');
1229 	spdk_strcpy_pad(entry->traddr, trid->traddr, sizeof(entry->traddr), ' ');
1230 
1231 	entry->tsas.tcp.sectype = SPDK_NVME_TCP_SECURITY_NONE;
1232 }
1233 
1234 static void
1235 spdk_nvmf_tcp_qpair_handle_timeout(struct nvme_tcp_qpair *tqpair, uint64_t tsc)
1236 {
1237 	if ((tqpair->state == NVME_TCP_QPAIR_STATE_EXITING) ||
1238 	    (tqpair->state == NVME_TCP_QPAIR_STATE_EXITED)) {
1239 		return;
1240 	}
1241 
1242 	/* Currently, we did not have keep alive support, so make sure that we should have the generic support later */
1243 	if ((tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_ERROR) ||
1244 	    (tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY)) {
1245 		return;
1246 	}
1247 
1248 	/* Check for interval expiration */
1249 	if ((tsc - tqpair->last_pdu_time) > (tqpair->timeout  * spdk_get_ticks_hz())) {
1250 		SPDK_ERRLOG("No pdu coming for tqpair=%p within %d seconds\n", tqpair, tqpair->timeout);
1251 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
1252 	}
1253 }
1254 
1255 static int
1256 spdk_nvmf_tcp_poll_group_handle_timeout(void *ctx)
1257 {
1258 	struct spdk_nvmf_tcp_poll_group *tgroup = ctx;
1259 	struct nvme_tcp_qpair *tqpair, *tmp;
1260 	uint64_t tsc = spdk_get_ticks();
1261 
1262 	TAILQ_FOREACH_SAFE(tqpair, &tgroup->qpairs, link, tmp) {
1263 		spdk_nvmf_tcp_qpair_handle_timeout(tqpair, tsc);
1264 		if (tqpair->state == NVME_TCP_QPAIR_STATE_EXITING) {
1265 			/* to prevent the state is set again */
1266 			tqpair->state = NVME_TCP_QPAIR_STATE_EXITED;
1267 			SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "will disconect the tqpair=%p\n", tqpair);
1268 			spdk_nvmf_qpair_disconnect(&tqpair->qpair, NULL, NULL);
1269 		}
1270 	}
1271 
1272 	return -1;
1273 }
1274 
1275 static struct spdk_nvmf_transport_poll_group *
1276 spdk_nvmf_tcp_poll_group_create(struct spdk_nvmf_transport *transport)
1277 {
1278 	struct spdk_nvmf_tcp_poll_group *tgroup;
1279 
1280 	tgroup = calloc(1, sizeof(*tgroup));
1281 	if (!tgroup) {
1282 		return NULL;
1283 	}
1284 
1285 	tgroup->sock_group = spdk_sock_group_create();
1286 	if (!tgroup->sock_group) {
1287 		goto cleanup;
1288 	}
1289 
1290 	TAILQ_INIT(&tgroup->qpairs);
1291 
1292 	tgroup->timeout_poller = spdk_poller_register(spdk_nvmf_tcp_poll_group_handle_timeout, tgroup,
1293 				 1000000);
1294 	return &tgroup->group;
1295 
1296 cleanup:
1297 	free(tgroup);
1298 	return NULL;
1299 }
1300 
1301 static void
1302 spdk_nvmf_tcp_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group)
1303 {
1304 	struct spdk_nvmf_tcp_poll_group *tgroup;
1305 
1306 	tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
1307 	spdk_sock_group_close(&tgroup->sock_group);
1308 	spdk_poller_unregister(&tgroup->timeout_poller);
1309 
1310 	free(tgroup);
1311 }
1312 
1313 static void
1314 spdk_nvmf_tcp_qpair_set_recv_state(struct nvme_tcp_qpair *tqpair,
1315 				   enum nvme_tcp_pdu_recv_state state)
1316 {
1317 	if (tqpair->recv_state == state) {
1318 		SPDK_ERRLOG("The recv state of tqpair=%p is same with the state(%d) to be set\n",
1319 			    tqpair, state);
1320 		return;
1321 	}
1322 
1323 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair(%p) recv state=%d\n", tqpair, state);
1324 	tqpair->recv_state = state;
1325 	switch (state) {
1326 	case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH:
1327 	case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH:
1328 	case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD:
1329 		break;
1330 	case NVME_TCP_PDU_RECV_STATE_ERROR:
1331 	case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY:
1332 		memset(&tqpair->pdu_in_progress, 0, sizeof(tqpair->pdu_in_progress));
1333 		break;
1334 	default:
1335 		SPDK_ERRLOG("The state(%d) is invalid\n", state);
1336 		abort();
1337 		break;
1338 	}
1339 }
1340 
1341 static void
1342 spdk_nvmf_tcp_send_c2h_term_req_complete(void *cb_arg)
1343 {
1344 }
1345 
1346 static void
1347 spdk_nvmf_tcp_send_c2h_term_req(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu,
1348 				enum spdk_nvme_tcp_term_req_fes fes, uint32_t error_offset)
1349 {
1350 	struct nvme_tcp_pdu *rsp_pdu;
1351 	struct spdk_nvme_tcp_term_req_hdr *c2h_term_req;
1352 	uint32_t c2h_term_req_hdr_len = sizeof(*c2h_term_req);
1353 	uint32_t copy_len;
1354 
1355 	rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair);
1356 	if (!rsp_pdu) {
1357 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
1358 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1359 		return;
1360 	}
1361 
1362 	c2h_term_req = &rsp_pdu->hdr.term_req;
1363 	c2h_term_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ;
1364 	c2h_term_req->common.hlen = c2h_term_req_hdr_len;
1365 
1366 	if ((fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) ||
1367 	    (fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) {
1368 		DSET32(&c2h_term_req->fei, error_offset);
1369 	}
1370 
1371 	rsp_pdu->data = (uint8_t *)rsp_pdu->hdr.raw + c2h_term_req_hdr_len;
1372 	copy_len = pdu->hdr.common.hlen;
1373 	if (copy_len > SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE) {
1374 		copy_len = SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE;
1375 	}
1376 
1377 	/* Copy the error info into the buffer */
1378 	memcpy((uint8_t *)rsp_pdu->data, pdu->hdr.raw, copy_len);
1379 	rsp_pdu->data_len = copy_len;
1380 
1381 	/* Contain the header of the wrong received pdu */
1382 	c2h_term_req->common.plen = c2h_term_req->common.hlen + copy_len;
1383 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1384 	spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_send_c2h_term_req_complete, tqpair);
1385 }
1386 
1387 static void
1388 spdk_nvmf_tcp_capsule_cmd_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport,
1389 				     struct nvme_tcp_qpair *tqpair,
1390 				     struct nvme_tcp_pdu *pdu)
1391 {
1392 	struct nvme_tcp_req *tcp_req;
1393 
1394 	tcp_req = spdk_nvmf_tcp_req_get(tqpair);
1395 	if (!tcp_req) {
1396 		SPDK_ERRLOG("Cannot allocate tcp_req\n");
1397 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
1398 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1399 		return;
1400 	}
1401 
1402 	pdu->tcp_req = tcp_req;
1403 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEW);
1404 	spdk_nvmf_tcp_req_process(ttransport, tcp_req);
1405 	return;
1406 }
1407 
1408 static void
1409 spdk_nvmf_tcp_capsule_cmd_payload_handle(struct spdk_nvmf_tcp_transport *ttransport,
1410 		struct nvme_tcp_qpair *tqpair,
1411 		struct nvme_tcp_pdu *pdu)
1412 {
1413 	struct nvme_tcp_req *tcp_req;
1414 	struct spdk_nvme_tcp_cmd *capsule_cmd;
1415 	uint32_t error_offset = 0;
1416 	enum spdk_nvme_tcp_term_req_fes fes;
1417 
1418 	capsule_cmd = &pdu->hdr.capsule_cmd;
1419 	tcp_req = pdu->tcp_req;
1420 	assert(tcp_req != NULL);
1421 	if (capsule_cmd->common.pdo > SPDK_NVME_TCP_PDU_PDO_MAX_OFFSET) {
1422 		SPDK_ERRLOG("Expected ICReq capsule_cmd pdu offset <= %d, got %c\n",
1423 			    SPDK_NVME_TCP_PDU_PDO_MAX_OFFSET, capsule_cmd->common.pdo);
1424 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1425 		error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdo);
1426 		goto err;
1427 	}
1428 
1429 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
1430 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
1431 	spdk_nvmf_tcp_req_process(ttransport, tcp_req);
1432 
1433 	return;
1434 err:
1435 	spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1436 }
1437 
1438 static void
1439 spdk_nvmf_tcp_h2c_data_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport,
1440 				  struct nvme_tcp_qpair *tqpair,
1441 				  struct nvme_tcp_pdu *pdu)
1442 {
1443 	struct nvme_tcp_req *tcp_req;
1444 	uint32_t error_offset = 0;
1445 	enum spdk_nvme_tcp_term_req_fes fes = 0;
1446 	struct spdk_nvme_tcp_h2c_data_hdr *h2c_data;
1447 	uint32_t iov_index;
1448 	bool ttag_offset_error = false;
1449 
1450 	h2c_data = &pdu->hdr.h2c_data;
1451 
1452 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair=%p, r2t_info: datao=%u, datal=%u, cccid=%u, ttag=%u\n",
1453 		      tqpair, h2c_data->datao, h2c_data->datal, h2c_data->cccid, h2c_data->ttag);
1454 
1455 	/* According to the information in the pdu to find the req */
1456 	TAILQ_FOREACH(tcp_req, &tqpair->state_queue[TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER],
1457 		      state_link) {
1458 		if ((tcp_req->req.cmd->nvme_cmd.cid == h2c_data->cccid) && (tcp_req->ttag == h2c_data->ttag)) {
1459 			break;
1460 		}
1461 
1462 		if (!ttag_offset_error && (tcp_req->req.cmd->nvme_cmd.cid == h2c_data->cccid)) {
1463 			ttag_offset_error = true;
1464 		}
1465 	}
1466 
1467 	if (!tcp_req) {
1468 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tcp_req is not found for tqpair=%p\n", tqpair);
1469 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER;
1470 		if (!ttag_offset_error) {
1471 			error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, cccid);
1472 		} else {
1473 			error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, ttag);
1474 		}
1475 		goto err;
1476 	}
1477 
1478 	if (tcp_req->next_expected_r2t_offset != h2c_data->datao) {
1479 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
1480 			      "tcp_req(%p), tqpair=%p,  expected_r2t_offset=%u, but data offset =%u\n",
1481 			      tcp_req, tqpair, tcp_req->next_expected_r2t_offset, h2c_data->datao);
1482 		fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE;
1483 		goto err;
1484 	}
1485 
1486 	if (h2c_data->datal > tqpair->maxh2cdata) {
1487 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tcp_req(%p), tqpair=%p,  datao=%u execeeds maxh2cdata size=%u\n",
1488 			      tcp_req, tqpair, h2c_data->datao, tqpair->maxh2cdata);
1489 		fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE;
1490 		goto err;
1491 	}
1492 
1493 	if ((h2c_data->datao + h2c_data->datal) > tcp_req->req.length) {
1494 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
1495 			      "tcp_req(%p), tqpair=%p,  (datao=%u + datal=%u) execeeds requested length=%u\n",
1496 			      tcp_req, tqpair, h2c_data->datao, h2c_data->datal, tcp_req->req.length);
1497 		fes = SPDK_NVME_TCP_TERM_REQ_FES_R2T_LIMIT_EXCEEDED;
1498 		goto err;
1499 	}
1500 
1501 	pdu->tcp_req = tcp_req;
1502 	pdu->data_len = h2c_data->datal;
1503 	iov_index = pdu->hdr.h2c_data.datao / ttransport->transport.opts.io_unit_size;
1504 	pdu->data = tcp_req->req.iov[iov_index].iov_base + (pdu->hdr.h2c_data.datao %
1505 			ttransport->transport.opts.io_unit_size);
1506 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
1507 	return;
1508 
1509 err:
1510 	spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1511 }
1512 
1513 static void
1514 spdk_nvmf_tcp_pdu_cmd_complete(void *cb_arg)
1515 {
1516 	struct nvme_tcp_req *tcp_req = cb_arg;
1517 	nvmf_tcp_request_free(tcp_req);
1518 }
1519 
1520 static void
1521 spdk_nvmf_tcp_send_capsule_resp_pdu(struct nvme_tcp_req *tcp_req,
1522 				    struct nvme_tcp_qpair *tqpair)
1523 {
1524 	struct nvme_tcp_pdu *rsp_pdu;
1525 	struct spdk_nvme_tcp_rsp *capsule_resp;
1526 
1527 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter, tqpair=%p\n", tqpair);
1528 	rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair);
1529 	if (!rsp_pdu) {
1530 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1531 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
1532 		return;
1533 	}
1534 
1535 	capsule_resp = &rsp_pdu->hdr.capsule_resp;
1536 	capsule_resp->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP;
1537 	capsule_resp->common.plen = capsule_resp->common.hlen = sizeof(*capsule_resp);
1538 	capsule_resp->rccqe = tcp_req->req.rsp->nvme_cpl;
1539 	if (tqpair->host_hdgst_enable) {
1540 		capsule_resp->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF;
1541 		capsule_resp->common.plen += SPDK_NVME_TCP_DIGEST_LEN;
1542 	}
1543 
1544 	spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_pdu_cmd_complete, tcp_req);
1545 }
1546 
1547 static void
1548 spdk_nvmf_tcp_pdu_c2h_data_complete(void *cb_arg)
1549 {
1550 	struct nvme_tcp_req *tcp_req = cb_arg;
1551 	struct nvme_tcp_qpair *tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair,
1552 					struct nvme_tcp_qpair, qpair);
1553 
1554 	assert(tqpair != NULL);
1555 	assert(tcp_req->c2h_data_pdu_num > 0);
1556 	tcp_req->c2h_data_pdu_num--;
1557 	if (!tcp_req->c2h_data_pdu_num) {
1558 #if LINUX_KERNEL_SUPPORT_NOT_SENDING_RESP_FOR_C2H
1559 		nvmf_tcp_request_free(tcp_req);
1560 #else
1561 		spdk_nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair);
1562 #endif
1563 	}
1564 
1565 	tqpair->c2h_data_pdu_cnt--;
1566 	spdk_nvmf_tcp_handle_pending_c2h_data_queue(tqpair);
1567 }
1568 
1569 static void
1570 spdk_nvmf_tcp_send_r2t_pdu(struct nvme_tcp_qpair *tqpair,
1571 			   struct nvme_tcp_req *tcp_req)
1572 {
1573 	struct nvme_tcp_pdu *rsp_pdu;
1574 	struct spdk_nvme_tcp_r2t_hdr *r2t;
1575 
1576 	rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair);
1577 	if (!rsp_pdu) {
1578 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
1579 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1580 		return;
1581 	}
1582 
1583 	r2t = &rsp_pdu->hdr.r2t;
1584 	r2t->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_R2T;
1585 	r2t->common.plen = r2t->common.hlen = sizeof(*r2t);
1586 
1587 	if (tqpair->host_hdgst_enable) {
1588 		r2t->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF;
1589 		r2t->common.plen += SPDK_NVME_TCP_DIGEST_LEN;
1590 	}
1591 
1592 	r2t->cccid = tcp_req->req.cmd->nvme_cmd.cid;
1593 	r2t->ttag = tcp_req->ttag;
1594 	r2t->r2to = tcp_req->next_expected_r2t_offset;
1595 	r2t->r2tl = spdk_min(tcp_req->req.length - tcp_req->next_expected_r2t_offset, tqpair->maxh2cdata);
1596 	tcp_req->r2tl_remain = r2t->r2tl;
1597 
1598 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
1599 		      "tcp_req(%p) on tqpair(%p), r2t_info: cccid=%u, ttag=%u, r2to=%u, r2tl=%u\n",
1600 		      tcp_req, tqpair, r2t->cccid, r2t->ttag, r2t->r2to, r2t->r2tl);
1601 	spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_pdu_cmd_complete, NULL);
1602 }
1603 
1604 static void
1605 spdk_nvmf_tcp_handle_queued_r2t_req(struct nvme_tcp_qpair *tqpair)
1606 {
1607 	struct nvme_tcp_req *tcp_req, *req_tmp;
1608 
1609 	TAILQ_FOREACH_SAFE(tcp_req, &tqpair->state_queue[TCP_REQUEST_STATE_DATA_PENDING_FOR_R2T],
1610 			   state_link, req_tmp) {
1611 		if (tqpair->pending_r2t < tqpair->maxr2t) {
1612 			tqpair->pending_r2t++;
1613 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
1614 			spdk_nvmf_tcp_send_r2t_pdu(tqpair, tcp_req);
1615 		} else {
1616 			break;
1617 		}
1618 	}
1619 }
1620 
1621 static void
1622 spdk_nvmf_tcp_h2c_data_payload_handle(struct spdk_nvmf_tcp_transport *ttransport,
1623 				      struct nvme_tcp_qpair *tqpair,
1624 				      struct nvme_tcp_pdu *pdu)
1625 {
1626 	struct nvme_tcp_req *tcp_req;
1627 
1628 	tcp_req = pdu->tcp_req;
1629 	assert(tcp_req != NULL);
1630 
1631 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
1632 
1633 	tcp_req->next_expected_r2t_offset += pdu->data_len;
1634 	tcp_req->r2tl_remain -= pdu->data_len;
1635 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
1636 
1637 	if (!tcp_req->r2tl_remain) {
1638 		if (tcp_req->next_expected_r2t_offset == tcp_req->req.length) {
1639 			assert(tqpair->pending_r2t > 0);
1640 			tqpair->pending_r2t--;
1641 			assert(tqpair->pending_r2t < tqpair->maxr2t);
1642 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
1643 			spdk_nvmf_tcp_req_process(ttransport, tcp_req);
1644 
1645 			spdk_nvmf_tcp_handle_queued_r2t_req(tqpair);
1646 		} else {
1647 			SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Send r2t pdu for tcp_req=%p on tqpair=%p\n", tcp_req, tqpair);
1648 			spdk_nvmf_tcp_send_r2t_pdu(tqpair, tcp_req);
1649 		}
1650 	}
1651 }
1652 
1653 static void
1654 spdk_nvmf_tcp_h2c_term_req_dump(struct spdk_nvme_tcp_term_req_hdr *h2c_term_req)
1655 {
1656 	SPDK_ERRLOG("Error info of pdu(%p): %s\n", h2c_term_req,
1657 		    spdk_nvme_tcp_term_req_fes_str[h2c_term_req->fes]);
1658 	if ((h2c_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) ||
1659 	    (h2c_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) {
1660 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "The offset from the start of the PDU header is %u\n",
1661 			      DGET32(h2c_term_req->fei));
1662 	}
1663 }
1664 
1665 static void
1666 spdk_nvmf_tcp_h2c_term_req_hdr_handle(struct nvme_tcp_qpair *tqpair,
1667 				      struct nvme_tcp_pdu *pdu)
1668 {
1669 	struct spdk_nvme_tcp_term_req_hdr *h2c_term_req = &pdu->hdr.term_req;
1670 	uint32_t error_offset = 0;
1671 	enum spdk_nvme_tcp_term_req_fes fes;
1672 
1673 
1674 	if (h2c_term_req->fes > SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER) {
1675 		SPDK_ERRLOG("Fatal Error Stauts(FES) is unknown for h2c_term_req pdu=%p\n", pdu);
1676 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1677 		error_offset = offsetof(struct spdk_nvme_tcp_term_req_hdr, fes);
1678 		goto end;
1679 	}
1680 
1681 	/* set the data buffer */
1682 	pdu->data = (uint8_t *)pdu->hdr.raw + h2c_term_req->common.hlen;
1683 	pdu->data_len = h2c_term_req->common.plen - h2c_term_req->common.hlen;
1684 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
1685 	return;
1686 end:
1687 	spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1688 	return;
1689 }
1690 
1691 static void
1692 spdk_nvmf_tcp_h2c_term_req_payload_handle(struct nvme_tcp_qpair *tqpair,
1693 		struct nvme_tcp_pdu *pdu)
1694 {
1695 	struct spdk_nvme_tcp_term_req_hdr *h2c_term_req = &pdu->hdr.term_req;
1696 
1697 	spdk_nvmf_tcp_h2c_term_req_dump(h2c_term_req);
1698 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1699 	return;
1700 }
1701 
1702 static void
1703 spdk_nvmf_tcp_pdu_payload_handle(struct nvme_tcp_qpair *tqpair)
1704 {
1705 	int rc = 0;
1706 	struct nvme_tcp_pdu *pdu;
1707 	uint32_t crc32c, error_offset = 0;
1708 	enum spdk_nvme_tcp_term_req_fes fes;
1709 	struct spdk_nvmf_tcp_transport *ttransport;
1710 
1711 	assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
1712 	pdu = &tqpair->pdu_in_progress;
1713 
1714 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
1715 	/* check data digest if need */
1716 	if (pdu->ddigest_valid_bytes) {
1717 		crc32c = nvme_tcp_pdu_calc_data_digest(pdu);
1718 		rc = MATCH_DIGEST_WORD(pdu->data_digest, crc32c);
1719 		if (rc == 0) {
1720 			SPDK_ERRLOG("Data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu);
1721 			fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR;
1722 			spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1723 			return;
1724 
1725 		}
1726 	}
1727 
1728 	ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport, struct spdk_nvmf_tcp_transport, transport);
1729 	switch (pdu->hdr.common.pdu_type) {
1730 	case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD:
1731 		spdk_nvmf_tcp_capsule_cmd_payload_handle(ttransport, tqpair, pdu);
1732 		break;
1733 	case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA:
1734 		spdk_nvmf_tcp_h2c_data_payload_handle(ttransport, tqpair, pdu);
1735 		break;
1736 
1737 	case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ:
1738 		spdk_nvmf_tcp_h2c_term_req_payload_handle(tqpair, pdu);
1739 		break;
1740 
1741 	default:
1742 		/* The code should not go to here */
1743 		SPDK_ERRLOG("The code should not go to here\n");
1744 		break;
1745 	}
1746 }
1747 
1748 static void
1749 spdk_nvmf_tcp_send_icresp_complete(void *cb_arg)
1750 {
1751 	struct nvme_tcp_qpair *tqpair = cb_arg;
1752 
1753 	tqpair->state = NVME_TCP_QPAIR_STATE_RUNNING;
1754 }
1755 
1756 static void
1757 spdk_nvmf_tcp_icreq_handle(struct spdk_nvmf_tcp_transport *ttransport,
1758 			   struct nvme_tcp_qpair *tqpair,
1759 			   struct nvme_tcp_pdu *pdu)
1760 {
1761 	struct spdk_nvme_tcp_ic_req *ic_req = &pdu->hdr.ic_req;
1762 	struct nvme_tcp_pdu *rsp_pdu;
1763 	struct spdk_nvme_tcp_ic_resp *ic_resp;
1764 	uint32_t error_offset = 0;
1765 	enum spdk_nvme_tcp_term_req_fes fes;
1766 
1767 	/* Only PFV 0 is defined currently */
1768 	if (ic_req->pfv != 0) {
1769 		SPDK_ERRLOG("Expected ICReq PFV %u, got %u\n", 0u, ic_req->pfv);
1770 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1771 		error_offset = offsetof(struct spdk_nvme_tcp_ic_req, pfv);
1772 		goto end;
1773 	}
1774 
1775 	/* MAXR2T is 0's based */
1776 	tqpair->maxr2t = ic_req->maxr2t + 1ull;
1777 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "maxr2t =%u\n", tqpair->maxr2t);
1778 
1779 	tqpair->host_hdgst_enable = ic_req->dgst.bits.hdgst_enable ? true : false;
1780 	tqpair->host_ddgst_enable = ic_req->dgst.bits.ddgst_enable ? true : false;
1781 
1782 	tqpair->cpda = spdk_min(ic_req->hpda, SPDK_NVME_TCP_CPDA_MAX);
1783 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "cpda of tqpair=(%p) is : %u\n", tqpair, tqpair->cpda);
1784 
1785 	rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair);
1786 	if (!rsp_pdu) {
1787 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
1788 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1789 		return;
1790 	}
1791 
1792 	ic_resp = &rsp_pdu->hdr.ic_resp;
1793 	ic_resp->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_IC_RESP;
1794 	ic_resp->common.hlen = ic_resp->common.plen =  sizeof(*ic_resp);
1795 	ic_resp->pfv = 0;
1796 	ic_resp->cpda = tqpair->cpda;
1797 	tqpair->maxh2cdata = spdk_min(NVMF_TCP_PDU_MAX_H2C_DATA_SIZE,
1798 				      ttransport->transport.opts.io_unit_size);
1799 	ic_resp->maxh2cdata = tqpair->maxh2cdata;
1800 	ic_resp->dgst.bits.hdgst_enable = tqpair->host_hdgst_enable ? 1 : 0;
1801 	ic_resp->dgst.bits.ddgst_enable = tqpair->host_ddgst_enable ? 1 : 0;
1802 
1803 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "host_hdgst_enable: %u\n", tqpair->host_hdgst_enable);
1804 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "host_ddgst_enable: %u\n", tqpair->host_ddgst_enable);
1805 
1806 	spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_send_icresp_complete, tqpair);
1807 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
1808 	return;
1809 end:
1810 	spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1811 	return;
1812 }
1813 
1814 static void
1815 spdk_nvmf_tcp_pdu_psh_handle(struct nvme_tcp_qpair *tqpair)
1816 {
1817 	struct nvme_tcp_pdu *pdu;
1818 	int rc;
1819 	uint32_t crc32c, error_offset = 0;
1820 	enum spdk_nvme_tcp_term_req_fes fes;
1821 	struct spdk_nvmf_tcp_transport *ttransport;
1822 
1823 	assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH);
1824 	pdu = &tqpair->pdu_in_progress;
1825 
1826 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "pdu type of tqpair(%p) is %d\n", tqpair,
1827 		      pdu->hdr.common.pdu_type);
1828 	/* check header digest if needed */
1829 	if (pdu->has_hdgst) {
1830 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Compare the header of pdu=%p on tqpair=%p\n", pdu, tqpair);
1831 		crc32c = nvme_tcp_pdu_calc_header_digest(pdu);
1832 		rc = MATCH_DIGEST_WORD((uint8_t *)pdu->hdr.raw + pdu->hdr.common.hlen, crc32c);
1833 		if (rc == 0) {
1834 			SPDK_ERRLOG("Header digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu);
1835 			fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR;
1836 			spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1837 			return;
1838 
1839 		}
1840 	}
1841 
1842 	ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport, struct spdk_nvmf_tcp_transport, transport);
1843 	switch (pdu->hdr.common.pdu_type) {
1844 	case SPDK_NVME_TCP_PDU_TYPE_IC_REQ:
1845 		spdk_nvmf_tcp_icreq_handle(ttransport, tqpair, pdu);
1846 		break;
1847 	case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD:
1848 		spdk_nvmf_tcp_capsule_cmd_hdr_handle(ttransport, tqpair, pdu);
1849 		break;
1850 	case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA:
1851 		spdk_nvmf_tcp_h2c_data_hdr_handle(ttransport, tqpair, pdu);
1852 		break;
1853 
1854 	case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ:
1855 		spdk_nvmf_tcp_h2c_term_req_hdr_handle(tqpair, pdu);
1856 		break;
1857 
1858 	default:
1859 		SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->pdu_in_progress.hdr.common.pdu_type);
1860 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1861 		error_offset = 1;
1862 		spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1863 		break;
1864 	}
1865 }
1866 
1867 static void
1868 spdk_nvmf_tcp_pdu_ch_handle(struct nvme_tcp_qpair *tqpair)
1869 {
1870 	struct nvme_tcp_pdu *pdu;
1871 	uint32_t error_offset = 0;
1872 	enum spdk_nvme_tcp_term_req_fes fes;
1873 	uint8_t expected_hlen, pdo;
1874 	bool plen_error = false, pdo_error = false;
1875 
1876 	assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH);
1877 	pdu = &tqpair->pdu_in_progress;
1878 
1879 	if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_REQ) {
1880 		if (tqpair->state != NVME_TCP_QPAIR_STATE_INVALID) {
1881 			SPDK_ERRLOG("Already received ICreq PDU, and reject this pdu=%p\n", pdu);
1882 			fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR;
1883 			goto err;
1884 		}
1885 		expected_hlen = sizeof(struct spdk_nvme_tcp_ic_req);
1886 		if (pdu->hdr.common.plen != expected_hlen) {
1887 			plen_error = true;
1888 		}
1889 	} else {
1890 		if (tqpair->state != NVME_TCP_QPAIR_STATE_RUNNING) {
1891 			SPDK_ERRLOG("The TCP/IP connection is not negotitated\n");
1892 			fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR;
1893 			goto err;
1894 		}
1895 
1896 		switch (pdu->hdr.common.pdu_type) {
1897 		case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD:
1898 			expected_hlen = sizeof(struct spdk_nvme_tcp_cmd);
1899 			pdo = pdu->hdr.common.pdo;
1900 			if ((tqpair->cpda != 0) && (pdo != ((tqpair->cpda + 1) << 2))) {
1901 				pdo_error = true;
1902 				break;
1903 			}
1904 
1905 			if (pdu->hdr.common.plen < expected_hlen) {
1906 				plen_error = true;
1907 			}
1908 			break;
1909 		case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA:
1910 			expected_hlen = sizeof(struct spdk_nvme_tcp_h2c_data_hdr);
1911 			pdo = pdu->hdr.common.pdo;
1912 			if ((tqpair->cpda != 0) && (pdo != ((tqpair->cpda + 1) << 2))) {
1913 				pdo_error = true;
1914 				break;
1915 			}
1916 			if (pdu->hdr.common.plen < expected_hlen) {
1917 				plen_error = true;
1918 			}
1919 			break;
1920 
1921 		case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ:
1922 			expected_hlen = sizeof(struct spdk_nvme_tcp_term_req_hdr);
1923 			if ((pdu->hdr.common.plen <= expected_hlen) ||
1924 			    (pdu->hdr.common.plen > SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE)) {
1925 				plen_error = true;
1926 			}
1927 			break;
1928 
1929 		default:
1930 			SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", pdu->hdr.common.pdu_type);
1931 			fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1932 			error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdu_type);
1933 			goto err;
1934 		}
1935 	}
1936 
1937 	if (pdu->hdr.common.hlen != expected_hlen) {
1938 		SPDK_ERRLOG("PDU type=0x%02x, Expected ICReq header length %u, got %u on tqpair=%p\n",
1939 			    pdu->hdr.common.pdu_type,
1940 			    expected_hlen, pdu->hdr.common.hlen, tqpair);
1941 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1942 		error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, hlen);
1943 		goto err;
1944 	} else if (pdo_error) {
1945 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1946 		error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdo);
1947 	} else if (plen_error) {
1948 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1949 		error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, plen);
1950 		goto err;
1951 	} else {
1952 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH);
1953 		return;
1954 	}
1955 err:
1956 	spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1957 }
1958 
1959 static int
1960 spdk_nvmf_tcp_sock_process(struct nvme_tcp_qpair *tqpair)
1961 {
1962 	int rc = 0;
1963 	struct nvme_tcp_pdu *pdu;
1964 	enum nvme_tcp_pdu_recv_state prev_state;
1965 	uint32_t data_len;
1966 	uint8_t psh_len, pdo, hlen;
1967 	int8_t  padding_len;
1968 
1969 	/* The loop here is to allow for several back-to-back state changes. */
1970 	do {
1971 		prev_state = tqpair->recv_state;
1972 
1973 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair(%p) recv pdu entering state %d\n", tqpair, prev_state);
1974 
1975 		switch (tqpair->recv_state) {
1976 		/* If in a new state */
1977 		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY:
1978 			spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH);
1979 			break;
1980 		/* Wait for the common header  */
1981 		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH:
1982 			pdu = &tqpair->pdu_in_progress;
1983 			/* common header */
1984 			if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) {
1985 				rc = nvme_tcp_read_data(tqpair->sock,
1986 							sizeof(struct spdk_nvme_tcp_common_pdu_hdr) - pdu->ch_valid_bytes,
1987 							(void *)&pdu->hdr.common + pdu->ch_valid_bytes);
1988 				if (rc < 0) {
1989 					SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "will disconnect tqpair=%p\n", tqpair);
1990 					spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1991 					break;
1992 				}
1993 				pdu->ch_valid_bytes += rc;
1994 				if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) {
1995 					return NVME_TCP_PDU_IN_PROGRESS;
1996 				}
1997 			}
1998 
1999 			tqpair->last_pdu_time = spdk_get_ticks();
2000 			/* The command header of this PDU has now been read from the socket. */
2001 			spdk_nvmf_tcp_pdu_ch_handle(tqpair);
2002 			break;
2003 		/* Wait for the pdu specific header  */
2004 		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH:
2005 			pdu = &tqpair->pdu_in_progress;
2006 			psh_len = hlen = pdu->hdr.common.hlen;
2007 			/* Only capsule_cmd and h2c_data has header digest */
2008 			if (((pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD) ||
2009 			     (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_H2C_DATA)) &&
2010 			    tqpair->host_hdgst_enable) {
2011 				pdu->has_hdgst = true;
2012 				psh_len += SPDK_NVME_TCP_DIGEST_LEN;
2013 				if (pdu->hdr.common.plen > psh_len) {
2014 					pdo = pdu->hdr.common.pdo;
2015 					padding_len = pdo - psh_len;
2016 					SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "padding length is =%d for pdu=%p on tqpair=%p\n", padding_len,
2017 						      pdu, tqpair);
2018 					if (padding_len > 0) {
2019 						psh_len = pdo;
2020 					}
2021 				}
2022 			}
2023 
2024 			psh_len -= sizeof(struct spdk_nvme_tcp_common_pdu_hdr);
2025 			/* The following will read psh + hdgest (if possbile) + padding (if posssible) */
2026 			if (pdu->psh_valid_bytes < psh_len) {
2027 				rc = nvme_tcp_read_data(tqpair->sock,
2028 							psh_len - pdu->psh_valid_bytes,
2029 							(void *)&pdu->hdr.raw + sizeof(struct spdk_nvme_tcp_common_pdu_hdr) + pdu->psh_valid_bytes);
2030 				if (rc < 0) {
2031 					spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
2032 					break;
2033 				}
2034 
2035 				pdu->psh_valid_bytes += rc;
2036 				if (pdu->psh_valid_bytes < psh_len) {
2037 					return NVME_TCP_PDU_IN_PROGRESS;
2038 				}
2039 			}
2040 
2041 			/* All header(ch, psh, head digist) of this PDU has now been read from the socket. */
2042 			spdk_nvmf_tcp_pdu_psh_handle(tqpair);
2043 			break;
2044 		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD:
2045 			pdu = &tqpair->pdu_in_progress;
2046 
2047 			/* check whether the data is valid, if not we just return */
2048 			if (!pdu->data) {
2049 				return NVME_TCP_PDU_IN_PROGRESS;
2050 			}
2051 
2052 			data_len = pdu->data_len;
2053 			/* data len */
2054 			if (pdu->data_valid_bytes < data_len) {
2055 				rc = nvme_tcp_read_data(tqpair->sock, data_len - pdu->data_valid_bytes,
2056 							(void *)pdu->data + pdu->data_valid_bytes);
2057 				if (rc < 0) {
2058 					spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
2059 					break;
2060 				}
2061 
2062 				pdu->data_valid_bytes += rc;
2063 				if (pdu->data_valid_bytes < data_len) {
2064 					return NVME_TCP_PDU_IN_PROGRESS;
2065 				}
2066 			}
2067 
2068 			/* data digest */
2069 			if ((pdu->hdr.common.pdu_type != SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ) &&
2070 			    tqpair->host_ddgst_enable && (pdu->ddigest_valid_bytes < SPDK_NVME_TCP_DIGEST_LEN)) {
2071 				rc = nvme_tcp_read_data(tqpair->sock,
2072 							SPDK_NVME_TCP_DIGEST_LEN - pdu->ddigest_valid_bytes,
2073 							pdu->data_digest + pdu->ddigest_valid_bytes);
2074 				if (rc < 0) {
2075 					spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
2076 					break;
2077 				}
2078 
2079 				pdu->ddigest_valid_bytes += rc;
2080 				if (pdu->ddigest_valid_bytes < SPDK_NVME_TCP_DIGEST_LEN) {
2081 					return NVME_TCP_PDU_IN_PROGRESS;
2082 				}
2083 			}
2084 
2085 			/* All of this PDU has now been read from the socket. */
2086 			spdk_nvmf_tcp_pdu_payload_handle(tqpair);
2087 			break;
2088 		case NVME_TCP_PDU_RECV_STATE_ERROR:
2089 			rc = NVME_TCP_PDU_FATAL;
2090 			break;
2091 		default:
2092 			assert(0);
2093 			SPDK_ERRLOG("code should not come to here");
2094 			break;
2095 		}
2096 	} while (tqpair->recv_state != prev_state);
2097 
2098 	return rc;
2099 }
2100 
2101 static enum spdk_nvme_data_transfer
2102 spdk_nvmf_tcp_req_get_xfer(struct nvme_tcp_req *tcp_req) {
2103 	enum spdk_nvme_data_transfer xfer;
2104 	struct spdk_nvme_cmd *cmd = &tcp_req->req.cmd->nvme_cmd;
2105 	struct spdk_nvme_sgl_descriptor *sgl = &cmd->dptr.sgl1;
2106 
2107 	/* Figure out data transfer direction */
2108 	if (cmd->opc == SPDK_NVME_OPC_FABRIC)
2109 	{
2110 		xfer = spdk_nvme_opc_get_data_transfer(tcp_req->req.cmd->nvmf_cmd.fctype);
2111 	} else
2112 	{
2113 		xfer = spdk_nvme_opc_get_data_transfer(cmd->opc);
2114 
2115 		/* Some admin commands are special cases */
2116 		if ((tcp_req->req.qpair->qid == 0) &&
2117 		    ((cmd->opc == SPDK_NVME_OPC_GET_FEATURES) ||
2118 		     (cmd->opc == SPDK_NVME_OPC_SET_FEATURES))) {
2119 			switch (cmd->cdw10 & 0xff) {
2120 			case SPDK_NVME_FEAT_LBA_RANGE_TYPE:
2121 			case SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION:
2122 			case SPDK_NVME_FEAT_HOST_IDENTIFIER:
2123 				break;
2124 			default:
2125 				xfer = SPDK_NVME_DATA_NONE;
2126 			}
2127 		}
2128 	}
2129 
2130 	if (xfer == SPDK_NVME_DATA_NONE)
2131 	{
2132 		return xfer;
2133 	}
2134 
2135 	/* Even for commands that may transfer data, they could have specified 0 length.
2136 	 * We want those to show up with xfer SPDK_NVME_DATA_NONE.
2137 	 */
2138 	switch (sgl->generic.type)
2139 	{
2140 	case SPDK_NVME_SGL_TYPE_DATA_BLOCK:
2141 	case SPDK_NVME_SGL_TYPE_BIT_BUCKET:
2142 	case SPDK_NVME_SGL_TYPE_SEGMENT:
2143 	case SPDK_NVME_SGL_TYPE_LAST_SEGMENT:
2144 	case SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK:
2145 		if (sgl->unkeyed.length == 0) {
2146 			xfer = SPDK_NVME_DATA_NONE;
2147 		}
2148 		break;
2149 	case SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK:
2150 		if (sgl->keyed.length == 0) {
2151 			xfer = SPDK_NVME_DATA_NONE;
2152 		}
2153 		break;
2154 	}
2155 
2156 	return xfer;
2157 }
2158 
2159 static void
2160 spdk_nvmf_tcp_request_free_buffers(struct nvme_tcp_req *tcp_req,
2161 				   struct spdk_nvmf_transport_poll_group *group, struct spdk_nvmf_transport *transport)
2162 {
2163 	for (uint32_t i = 0; i < tcp_req->req.iovcnt; i++) {
2164 		assert(tcp_req->buffers[i] != NULL);
2165 		if (group->buf_cache_count < group->buf_cache_size) {
2166 			STAILQ_INSERT_HEAD(&group->buf_cache,
2167 					   (struct spdk_nvmf_transport_pg_cache_buf *)tcp_req->buffers[i], link);
2168 			group->buf_cache_count++;
2169 		} else {
2170 			spdk_mempool_put(transport->data_buf_pool, tcp_req->buffers[i]);
2171 		}
2172 		tcp_req->req.iov[i].iov_base = NULL;
2173 		tcp_req->buffers[i] = NULL;
2174 		tcp_req->req.iov[i].iov_len = 0;
2175 	}
2176 	tcp_req->data_from_pool = false;
2177 }
2178 
2179 static int
2180 spdk_nvmf_tcp_req_fill_iovs(struct spdk_nvmf_tcp_transport *ttransport,
2181 			    struct nvme_tcp_req *tcp_req)
2182 {
2183 	void		*buf = NULL;
2184 	uint32_t	length = tcp_req->req.length;
2185 	uint32_t	i = 0;
2186 	struct nvme_tcp_qpair	*tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct nvme_tcp_qpair, qpair);
2187 	struct spdk_nvmf_transport_poll_group  *group = &tqpair->ch->tgroup->group;
2188 
2189 	tcp_req->req.iovcnt = 0;
2190 	while (length) {
2191 		if (!(STAILQ_EMPTY(&group->buf_cache))) {
2192 			group->buf_cache_count--;
2193 			buf = STAILQ_FIRST(&group->buf_cache);
2194 			STAILQ_REMOVE_HEAD(&group->buf_cache, link);
2195 		} else {
2196 			buf = spdk_mempool_get(ttransport->transport.data_buf_pool);
2197 			if (!buf) {
2198 				goto nomem;
2199 			}
2200 		}
2201 
2202 		tcp_req->req.iov[i].iov_base = (void *)((uintptr_t)(buf + NVMF_DATA_BUFFER_MASK) &
2203 							~NVMF_DATA_BUFFER_MASK);
2204 		tcp_req->req.iov[i].iov_len  = spdk_min(length, ttransport->transport.opts.io_unit_size);
2205 		tcp_req->req.iovcnt++;
2206 		tcp_req->buffers[i] = buf;
2207 		length -= tcp_req->req.iov[i].iov_len;
2208 		i++;
2209 	}
2210 
2211 	tcp_req->data_from_pool = true;
2212 	return 0;
2213 
2214 nomem:
2215 	spdk_nvmf_tcp_request_free_buffers(tcp_req, group, &ttransport->transport);
2216 	tcp_req->req.iovcnt = 0;
2217 	return -ENOMEM;
2218 }
2219 
2220 static int
2221 spdk_nvmf_tcp_req_parse_sgl(struct spdk_nvmf_tcp_transport *ttransport,
2222 			    struct nvme_tcp_req *tcp_req)
2223 {
2224 	struct spdk_nvme_cmd			*cmd;
2225 	struct spdk_nvme_cpl			*rsp;
2226 	struct spdk_nvme_sgl_descriptor		*sgl;
2227 
2228 	cmd = &tcp_req->req.cmd->nvme_cmd;
2229 	rsp = &tcp_req->req.rsp->nvme_cpl;
2230 	sgl = &cmd->dptr.sgl1;
2231 
2232 	if (sgl->generic.type == SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK &&
2233 	    sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_TRANSPORT) {
2234 		if (sgl->unkeyed.length > ttransport->transport.opts.max_io_size) {
2235 			SPDK_ERRLOG("SGL length 0x%x exceeds max io size 0x%x\n",
2236 				    sgl->unkeyed.length, ttransport->transport.opts.max_io_size);
2237 			rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
2238 			return -1;
2239 		}
2240 
2241 		/* fill request length and populate iovs */
2242 		tcp_req->req.length = sgl->unkeyed.length;
2243 
2244 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Data requested length= 0x%x\n",
2245 			      sgl->unkeyed.length);
2246 
2247 		if (spdk_nvmf_tcp_req_fill_iovs(ttransport, tcp_req) < 0) {
2248 			/* No available buffers. Queue this request up. */
2249 			SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "No available large data buffers. Queueing request %p\n", tcp_req);
2250 			return 0;
2251 		}
2252 
2253 		/* backward compatible */
2254 		tcp_req->req.data = tcp_req->req.iov[0].iov_base;
2255 
2256 
2257 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Request %p took %d buffer/s from central pool, and data=%p\n",
2258 			      tcp_req,
2259 			      tcp_req->req.iovcnt, tcp_req->req.data);
2260 
2261 		return 0;
2262 	} else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK &&
2263 		   sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) {
2264 		uint64_t offset = sgl->address;
2265 		uint32_t max_len = ttransport->transport.opts.in_capsule_data_size;
2266 
2267 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "In-capsule data: offset 0x%" PRIx64 ", length 0x%x\n",
2268 			      offset, sgl->unkeyed.length);
2269 
2270 		if (offset > max_len) {
2271 			SPDK_ERRLOG("In-capsule offset 0x%" PRIx64 " exceeds capsule length 0x%x\n",
2272 				    offset, max_len);
2273 			rsp->status.sc = SPDK_NVME_SC_INVALID_SGL_OFFSET;
2274 			return -1;
2275 		}
2276 		max_len -= (uint32_t)offset;
2277 
2278 		if (sgl->unkeyed.length > max_len) {
2279 			SPDK_ERRLOG("In-capsule data length 0x%x exceeds capsule length 0x%x\n",
2280 				    sgl->unkeyed.length, max_len);
2281 			rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
2282 			return -1;
2283 		}
2284 
2285 		tcp_req->req.data = tcp_req->buf + offset;
2286 		tcp_req->data_from_pool = false;
2287 		tcp_req->req.length = sgl->unkeyed.length;
2288 
2289 		tcp_req->req.iov[0].iov_base = tcp_req->req.data;
2290 		tcp_req->req.iov[0].iov_len = tcp_req->req.length;
2291 		tcp_req->req.iovcnt = 1;
2292 
2293 		return 0;
2294 	}
2295 
2296 	SPDK_ERRLOG("Invalid NVMf I/O Command SGL:  Type 0x%x, Subtype 0x%x\n",
2297 		    sgl->generic.type, sgl->generic.subtype);
2298 	rsp->status.sc = SPDK_NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID;
2299 	return -1;
2300 }
2301 
2302 static void
2303 spdk_nvmf_tcp_send_c2h_data(struct nvme_tcp_qpair *tqpair,
2304 			    struct nvme_tcp_req *tcp_req)
2305 {
2306 	struct nvme_tcp_pdu *rsp_pdu;
2307 	struct spdk_nvme_tcp_c2h_data_hdr *c2h_data;
2308 	uint32_t plen, pdo, alignment, offset, iov_index;
2309 
2310 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
2311 
2312 	/* always use the first iov_len, which is correct */
2313 	iov_index = tcp_req->c2h_data_offset / tcp_req->req.iov[0].iov_len;
2314 	offset = tcp_req->c2h_data_offset % tcp_req->req.iov[0].iov_len;
2315 
2316 	rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair);
2317 	assert(rsp_pdu != NULL);
2318 
2319 	c2h_data = &rsp_pdu->hdr.c2h_data;
2320 	c2h_data->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_C2H_DATA;
2321 	plen = c2h_data->common.hlen = sizeof(*c2h_data);
2322 
2323 	if (tqpair->host_hdgst_enable) {
2324 		plen += SPDK_NVME_TCP_DIGEST_LEN;
2325 		c2h_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF;
2326 	}
2327 
2328 	/* set the psh */
2329 	c2h_data->cccid = tcp_req->req.cmd->nvme_cmd.cid;
2330 	c2h_data->datal = spdk_min(NVMF_TCP_PDU_MAX_C2H_DATA_SIZE,
2331 				   (tcp_req->req.iov[iov_index].iov_len - offset));
2332 	c2h_data->datao = tcp_req->c2h_data_offset;
2333 
2334 	/* set the padding */
2335 	rsp_pdu->padding_len = 0;
2336 	pdo = plen;
2337 	if (tqpair->cpda) {
2338 		alignment = (tqpair->cpda + 1) << 2;
2339 		if (alignment > plen) {
2340 			rsp_pdu->padding_len = alignment - plen;
2341 			pdo = plen = alignment;
2342 		}
2343 	}
2344 
2345 	c2h_data->common.pdo = pdo;
2346 	plen += c2h_data->datal;
2347 	if (tqpair->host_ddgst_enable) {
2348 		c2h_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF;
2349 		plen += SPDK_NVME_TCP_DIGEST_LEN;
2350 	}
2351 
2352 	c2h_data->common.plen = plen;
2353 
2354 	rsp_pdu->data = tcp_req->req.iov[iov_index].iov_base + offset;
2355 	rsp_pdu->data_len = c2h_data->datal;
2356 
2357 	tcp_req->c2h_data_offset += c2h_data->datal;
2358 	if (iov_index == (tcp_req->req.iovcnt - 1) && (tcp_req->c2h_data_offset == tcp_req->req.length)) {
2359 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Last pdu for tcp_req=%p on tqpair=%p\n", tcp_req, tqpair);
2360 		c2h_data->common.flags |= SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU;
2361 		/* The linux kernel does not support this yet */
2362 #if LINUX_KERNEL_SUPPORT_NOT_SENDING_RESP_FOR_C2H
2363 		c2h_data->common.flags |= SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS;
2364 #endif
2365 		TAILQ_REMOVE(&tqpair->queued_c2h_data_tcp_req, tcp_req, link);
2366 	}
2367 
2368 	tqpair->c2h_data_pdu_cnt += 1;
2369 	spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_pdu_c2h_data_complete, tcp_req);
2370 }
2371 
2372 static int
2373 spdk_nvmf_tcp_calc_c2h_data_pdu_num(struct nvme_tcp_req *tcp_req)
2374 {
2375 	uint32_t i, iov_cnt, pdu_num = 0;
2376 
2377 	iov_cnt = tcp_req->req.iovcnt;
2378 	for (i = 0; i < iov_cnt; i++) {
2379 		pdu_num += (tcp_req->req.iov[i].iov_len + NVMF_TCP_PDU_MAX_C2H_DATA_SIZE - 1) /
2380 			   NVMF_TCP_PDU_MAX_C2H_DATA_SIZE;
2381 	}
2382 
2383 	return pdu_num;
2384 }
2385 
2386 static void
2387 spdk_nvmf_tcp_handle_pending_c2h_data_queue(struct nvme_tcp_qpair *tqpair)
2388 {
2389 	struct nvme_tcp_req *tcp_req;
2390 
2391 	while (!TAILQ_EMPTY(&tqpair->queued_c2h_data_tcp_req) &&
2392 	       (tqpair->c2h_data_pdu_cnt < NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM)) {
2393 		tcp_req = TAILQ_FIRST(&tqpair->queued_c2h_data_tcp_req);
2394 		spdk_nvmf_tcp_send_c2h_data(tqpair, tcp_req);
2395 	}
2396 }
2397 
2398 static void
2399 spdk_nvmf_tcp_queue_c2h_data(struct nvme_tcp_req *tcp_req,
2400 			     struct nvme_tcp_qpair *tqpair)
2401 {
2402 	tcp_req->c2h_data_pdu_num = spdk_nvmf_tcp_calc_c2h_data_pdu_num(tcp_req);
2403 
2404 	assert(tcp_req->c2h_data_pdu_num < NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM);
2405 
2406 	TAILQ_INSERT_TAIL(&tqpair->queued_c2h_data_tcp_req, tcp_req, link);
2407 	spdk_nvmf_tcp_handle_pending_c2h_data_queue(tqpair);
2408 }
2409 
2410 static int
2411 request_transfer_out(struct spdk_nvmf_request *req)
2412 {
2413 	struct nvme_tcp_req	*tcp_req;
2414 	struct spdk_nvmf_qpair		*qpair;
2415 	struct nvme_tcp_qpair	*tqpair;
2416 	struct spdk_nvme_cpl		*rsp;
2417 
2418 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
2419 
2420 	qpair = req->qpair;
2421 	rsp = &req->rsp->nvme_cpl;
2422 	tcp_req = SPDK_CONTAINEROF(req, struct nvme_tcp_req, req);
2423 
2424 	/* Advance our sq_head pointer */
2425 	if (qpair->sq_head == qpair->sq_head_max) {
2426 		qpair->sq_head = 0;
2427 	} else {
2428 		qpair->sq_head++;
2429 	}
2430 	rsp->sqhd = qpair->sq_head;
2431 
2432 	tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct nvme_tcp_qpair, qpair);
2433 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST);
2434 	if (rsp->status.sc == SPDK_NVME_SC_SUCCESS &&
2435 	    req->xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
2436 		spdk_nvmf_tcp_queue_c2h_data(tcp_req, tqpair);
2437 	} else {
2438 		spdk_nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair);
2439 	}
2440 
2441 	return 0;
2442 }
2443 
2444 static void
2445 spdk_nvmf_tcp_pdu_set_buf_from_req(struct nvme_tcp_qpair *tqpair,
2446 				   struct nvme_tcp_req *tcp_req)
2447 {
2448 	struct nvme_tcp_pdu *pdu;
2449 
2450 	if (tcp_req->data_from_pool) {
2451 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Will send r2t for tcp_req(%p) on tqpair=%p\n", tcp_req, tqpair);
2452 		tcp_req->next_expected_r2t_offset = 0;
2453 		spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_DATA_PENDING_FOR_R2T);
2454 		spdk_nvmf_tcp_handle_queued_r2t_req(tqpair);
2455 	} else {
2456 		pdu = &tqpair->pdu_in_progress;
2457 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Not need to send r2t for tcp_req(%p) on tqpair=%p\n", tcp_req,
2458 			      tqpair);
2459 		/* No need to send r2t, contained in the capsuled data */
2460 		pdu->data = tcp_req->req.data;
2461 		pdu->data_len = tcp_req->req.length;
2462 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
2463 		spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
2464 	}
2465 }
2466 
2467 static void
2468 spdk_nvmf_tcp_set_incapsule_data(struct nvme_tcp_qpair *tqpair,
2469 				 struct nvme_tcp_req *tcp_req)
2470 {
2471 	struct nvme_tcp_pdu *pdu;
2472 	uint32_t plen = 0;
2473 
2474 	pdu = &tqpair->pdu_in_progress;
2475 	plen = pdu->hdr.common.hlen;
2476 
2477 	if (tqpair->host_hdgst_enable) {
2478 		plen += SPDK_NVME_TCP_DIGEST_LEN;
2479 	}
2480 
2481 	if (pdu->hdr.common.plen != plen) {
2482 		tcp_req->has_incapsule_data = true;
2483 	}
2484 }
2485 
2486 static bool
2487 spdk_nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport,
2488 			  struct nvme_tcp_req *tcp_req)
2489 {
2490 	struct nvme_tcp_qpair	*tqpair;
2491 	struct spdk_nvme_cpl		*rsp = &tcp_req->req.rsp->nvme_cpl;
2492 	int				rc;
2493 	enum spdk_nvmf_tcp_req_state prev_state;
2494 	bool				progress = false;
2495 	struct spdk_nvmf_transport_poll_group *group;
2496 
2497 	tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct nvme_tcp_qpair, qpair);
2498 	group = &tqpair->ch->tgroup->group;
2499 	assert(tcp_req->state != TCP_REQUEST_STATE_FREE);
2500 
2501 	/* The loop here is to allow for several back-to-back state changes. */
2502 	do {
2503 		prev_state = tcp_req->state;
2504 
2505 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Request %p entering state %d on tqpair=%p\n", tcp_req, prev_state,
2506 			      tqpair);
2507 
2508 		switch (tcp_req->state) {
2509 		case TCP_REQUEST_STATE_FREE:
2510 			/* Some external code must kick a request into TCP_REQUEST_STATE_NEW
2511 			 * to escape this state. */
2512 			break;
2513 		case TCP_REQUEST_STATE_NEW:
2514 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_NEW, 0, 0, (uintptr_t)tcp_req, 0);
2515 
2516 			/* copy the cmd from the receive pdu */
2517 			tcp_req->cmd = tqpair->pdu_in_progress.hdr.capsule_cmd.ccsqe;
2518 
2519 			/* The next state transition depends on the data transfer needs of this request. */
2520 			tcp_req->req.xfer = spdk_nvmf_tcp_req_get_xfer(tcp_req);
2521 
2522 			/* If no data to transfer, ready to execute. */
2523 			if (tcp_req->req.xfer == SPDK_NVME_DATA_NONE) {
2524 				/* Reset the tqpair receving pdu state */
2525 				spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
2526 				spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
2527 				break;
2528 			}
2529 
2530 			spdk_nvmf_tcp_set_incapsule_data(tqpair, tcp_req);
2531 
2532 			if (!tcp_req->has_incapsule_data) {
2533 				spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
2534 			}
2535 
2536 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEED_BUFFER);
2537 			TAILQ_INSERT_TAIL(&tqpair->ch->pending_data_buf_queue, tcp_req, link);
2538 			break;
2539 		case TCP_REQUEST_STATE_NEED_BUFFER:
2540 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_NEED_BUFFER, 0, 0, (uintptr_t)tcp_req, 0);
2541 
2542 			assert(tcp_req->req.xfer != SPDK_NVME_DATA_NONE);
2543 
2544 			if (!tcp_req->has_incapsule_data && (tcp_req != TAILQ_FIRST(&tqpair->ch->pending_data_buf_queue))) {
2545 				SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
2546 					      "Not the first element to wait for the buf for tcp_req(%p) on tqpair=%p\n",
2547 					      tcp_req, tqpair);
2548 				/* This request needs to wait in line to obtain a buffer */
2549 				break;
2550 			}
2551 
2552 			/* Try to get a data buffer */
2553 			rc = spdk_nvmf_tcp_req_parse_sgl(ttransport, tcp_req);
2554 			if (rc < 0) {
2555 				TAILQ_REMOVE(&tqpair->ch->pending_data_buf_queue, tcp_req, link);
2556 				rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2557 				/* Reset the tqpair receving pdu state */
2558 				spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
2559 				spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE);
2560 				break;
2561 			}
2562 
2563 			if (!tcp_req->req.data) {
2564 				SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "No buffer allocated for tcp_req(%p) on tqpair(%p\n)",
2565 					      tcp_req, tqpair);
2566 				/* No buffers available. */
2567 				break;
2568 			}
2569 
2570 			TAILQ_REMOVE(&tqpair->ch->pending_data_buf_queue, tcp_req, link);
2571 
2572 			/* If data is transferring from host to controller, we need to do a transfer from the host. */
2573 			if (tcp_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
2574 				spdk_nvmf_tcp_pdu_set_buf_from_req(tqpair, tcp_req);
2575 				break;
2576 			}
2577 
2578 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
2579 			break;
2580 		case TCP_REQUEST_STATE_DATA_PENDING_FOR_R2T:
2581 			spdk_trace_record(TCP_REQUEST_STATE_DATA_PENDING_FOR_R2T, 0, 0,
2582 					  (uintptr_t)tcp_req, 0);
2583 			/* Some external code must kick a request into TCP_REQUEST_STATE_DATA_PENDING_R2T
2584 			 * to escape this state. */
2585 			break;
2586 
2587 		case TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER:
2588 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, 0, 0,
2589 					  (uintptr_t)tcp_req, 0);
2590 			/* Some external code must kick a request into TCP_REQUEST_STATE_READY_TO_EXECUTE
2591 			 * to escape this state. */
2592 			break;
2593 		case TCP_REQUEST_STATE_READY_TO_EXECUTE:
2594 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE, 0, 0, (uintptr_t)tcp_req, 0);
2595 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_EXECUTING);
2596 			spdk_nvmf_request_exec(&tcp_req->req);
2597 			break;
2598 		case TCP_REQUEST_STATE_EXECUTING:
2599 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_EXECUTING, 0, 0, (uintptr_t)tcp_req, 0);
2600 			/* Some external code must kick a request into TCP_REQUEST_STATE_EXECUTED
2601 			 * to escape this state. */
2602 			break;
2603 		case TCP_REQUEST_STATE_EXECUTED:
2604 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_EXECUTED, 0, 0, (uintptr_t)tcp_req, 0);
2605 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE);
2606 			break;
2607 		case TCP_REQUEST_STATE_READY_TO_COMPLETE:
2608 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE, 0, 0, (uintptr_t)tcp_req, 0);
2609 			rc = request_transfer_out(&tcp_req->req);
2610 			assert(rc == 0); /* No good way to handle this currently */
2611 			break;
2612 		case TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST:
2613 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST, 0, 0,
2614 					  (uintptr_t)tcp_req,
2615 					  0);
2616 			/* Some external code must kick a request into TCP_REQUEST_STATE_COMPLETED
2617 			 * to escape this state. */
2618 			break;
2619 		case TCP_REQUEST_STATE_COMPLETED:
2620 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_COMPLETED, 0, 0, (uintptr_t)tcp_req, 0);
2621 			if (tcp_req->data_from_pool) {
2622 				spdk_nvmf_tcp_request_free_buffers(tcp_req, group, &ttransport->transport);
2623 			}
2624 			tcp_req->req.length = 0;
2625 			tcp_req->req.iovcnt = 0;
2626 			tcp_req->req.data = NULL;
2627 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_FREE);
2628 			break;
2629 		case TCP_REQUEST_NUM_STATES:
2630 		default:
2631 			assert(0);
2632 			break;
2633 		}
2634 
2635 		if (tcp_req->state != prev_state) {
2636 			progress = true;
2637 		}
2638 	} while (tcp_req->state != prev_state);
2639 
2640 	return progress;
2641 }
2642 
2643 static void
2644 spdk_nvmf_tcp_qpair_process_pending(struct spdk_nvmf_tcp_transport *ttransport,
2645 				    struct nvme_tcp_qpair *tqpair)
2646 {
2647 	struct nvme_tcp_req *tcp_req, *req_tmp;
2648 
2649 	spdk_nvmf_tcp_handle_queued_r2t_req(tqpair);
2650 
2651 	TAILQ_FOREACH_SAFE(tcp_req, &tqpair->ch->pending_data_buf_queue, link, req_tmp) {
2652 		if (spdk_nvmf_tcp_req_process(ttransport, tcp_req) == false) {
2653 			break;
2654 		}
2655 	}
2656 }
2657 
2658 static void
2659 spdk_nvmf_tcp_sock_cb(void *arg, struct spdk_sock_group *group, struct spdk_sock *sock)
2660 {
2661 	struct nvme_tcp_qpair *tqpair = arg;
2662 	struct spdk_nvmf_tcp_transport *ttransport;
2663 	int rc;
2664 
2665 	assert(tqpair != NULL);
2666 
2667 	if (tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_ERROR) {
2668 		return;
2669 	}
2670 
2671 	ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport, struct spdk_nvmf_tcp_transport, transport);
2672 	spdk_nvmf_tcp_qpair_process_pending(ttransport, tqpair);
2673 	rc = spdk_nvmf_tcp_sock_process(tqpair);
2674 	if (rc < 0) {
2675 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITED;
2676 		spdk_nvmf_tcp_qpair_flush_pdus(tqpair);
2677 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "will disconect the tqpair=%p\n", tqpair);
2678 		spdk_nvmf_qpair_disconnect(&tqpair->qpair, NULL, NULL);
2679 	}
2680 }
2681 
2682 static int
2683 spdk_nvmf_tcp_poll_group_add(struct spdk_nvmf_transport_poll_group *group,
2684 			     struct spdk_nvmf_qpair *qpair)
2685 {
2686 	struct spdk_nvmf_tcp_poll_group	*tgroup;
2687 	struct nvme_tcp_qpair	*tqpair;
2688 	int				rc;
2689 
2690 	tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
2691 	tqpair = SPDK_CONTAINEROF(qpair, struct nvme_tcp_qpair, qpair);
2692 
2693 	rc = spdk_sock_group_add_sock(tgroup->sock_group, tqpair->sock,
2694 				      spdk_nvmf_tcp_sock_cb, tqpair);
2695 	if (rc != 0) {
2696 		SPDK_ERRLOG("Could not add sock to sock_group: %s (%d)\n",
2697 			    spdk_strerror(errno), errno);
2698 		spdk_nvmf_tcp_qpair_destroy(tqpair);
2699 		return -1;
2700 	}
2701 
2702 	rc =  spdk_nvmf_tcp_qpair_sock_init(tqpair);
2703 	if (rc != 0) {
2704 		SPDK_ERRLOG("Cannot set sock opt for tqpair=%p\n", tqpair);
2705 		spdk_nvmf_tcp_qpair_destroy(tqpair);
2706 		return -1;
2707 	}
2708 
2709 	rc = spdk_nvmf_tcp_qpair_init(&tqpair->qpair);
2710 	if (rc < 0) {
2711 		SPDK_ERRLOG("Cannot init tqpair=%p\n", tqpair);
2712 		spdk_nvmf_tcp_qpair_destroy(tqpair);
2713 		return -1;
2714 	}
2715 
2716 	rc = spdk_nvmf_tcp_qpair_init_mem_resource(tqpair, 1);
2717 	if (rc < 0) {
2718 		SPDK_ERRLOG("Cannot init memory resource info for tqpair=%p\n", tqpair);
2719 		spdk_nvmf_tcp_qpair_destroy(tqpair);
2720 		return -1;
2721 	}
2722 
2723 	tqpair->ch->tgroup = tgroup;
2724 	tqpair->state = NVME_TCP_QPAIR_STATE_INVALID;
2725 	tqpair->timeout = SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT;
2726 	tqpair->last_pdu_time = spdk_get_ticks();
2727 	TAILQ_INSERT_TAIL(&tgroup->qpairs, tqpair, link);
2728 
2729 	return 0;
2730 }
2731 
2732 static int
2733 spdk_nvmf_tcp_poll_group_remove(struct spdk_nvmf_transport_poll_group *group,
2734 				struct spdk_nvmf_qpair *qpair)
2735 {
2736 	struct spdk_nvmf_tcp_poll_group	*tgroup;
2737 	struct nvme_tcp_qpair		*tqpair;
2738 	int				rc;
2739 
2740 	tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
2741 	tqpair = SPDK_CONTAINEROF(qpair, struct nvme_tcp_qpair, qpair);
2742 
2743 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "remove tqpair=%p from the tgroup=%p\n", tqpair, tgroup);
2744 	TAILQ_REMOVE(&tgroup->qpairs, tqpair, link);
2745 	rc = spdk_sock_group_remove_sock(tgroup->sock_group, tqpair->sock);
2746 	if (rc != 0) {
2747 		SPDK_ERRLOG("Could not remove sock from sock_group: %s (%d)\n",
2748 			    spdk_strerror(errno), errno);
2749 	}
2750 
2751 	return rc;
2752 }
2753 
2754 static int
2755 spdk_nvmf_tcp_req_complete(struct spdk_nvmf_request *req)
2756 {
2757 	struct spdk_nvmf_tcp_transport *ttransport;
2758 	struct nvme_tcp_req *tcp_req;
2759 
2760 	ttransport = SPDK_CONTAINEROF(req->qpair->transport, struct spdk_nvmf_tcp_transport, transport);
2761 	tcp_req = SPDK_CONTAINEROF(req, struct nvme_tcp_req, req);
2762 
2763 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_EXECUTED);
2764 	spdk_nvmf_tcp_req_process(ttransport, tcp_req);
2765 
2766 	return 0;
2767 }
2768 
2769 static void
2770 spdk_nvmf_tcp_close_qpair(struct spdk_nvmf_qpair *qpair)
2771 {
2772 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
2773 
2774 	spdk_nvmf_tcp_qpair_destroy(SPDK_CONTAINEROF(qpair, struct nvme_tcp_qpair, qpair));
2775 }
2776 
2777 static int
2778 spdk_nvmf_tcp_poll_group_poll(struct spdk_nvmf_transport_poll_group *group)
2779 {
2780 	struct spdk_nvmf_tcp_poll_group *tgroup;
2781 	int rc;
2782 
2783 	tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
2784 
2785 	if (spdk_unlikely(TAILQ_EMPTY(&tgroup->qpairs))) {
2786 		return 0;
2787 	}
2788 
2789 	rc = spdk_sock_group_poll(tgroup->sock_group);
2790 	if (rc < 0) {
2791 		SPDK_ERRLOG("Failed to poll sock_group=%p\n", tgroup->sock_group);
2792 		return rc;
2793 	}
2794 
2795 	return 0;
2796 }
2797 
2798 static bool
2799 spdk_nvmf_tcp_qpair_is_idle(struct spdk_nvmf_qpair *qpair)
2800 {
2801 	struct nvme_tcp_qpair *tqpair;
2802 
2803 	tqpair = SPDK_CONTAINEROF(qpair, struct nvme_tcp_qpair, qpair);
2804 	if (tqpair->state_cntr[TCP_REQUEST_STATE_FREE] == tqpair->max_queue_depth) {
2805 		return true;
2806 	}
2807 
2808 	return false;
2809 }
2810 
2811 static int
2812 spdk_nvmf_tcp_qpair_get_trid(struct spdk_nvmf_qpair *qpair,
2813 			     struct spdk_nvme_transport_id *trid, bool peer)
2814 {
2815 	struct nvme_tcp_qpair     *tqpair;
2816 	uint16_t			port;
2817 
2818 	tqpair = SPDK_CONTAINEROF(qpair, struct nvme_tcp_qpair, qpair);
2819 	trid->trtype = SPDK_NVME_TRANSPORT_TCP;
2820 
2821 	if (peer) {
2822 		snprintf(trid->traddr, sizeof(trid->traddr), "%s", tqpair->initiator_addr);
2823 		port = tqpair->initiator_port;
2824 	} else {
2825 		snprintf(trid->traddr, sizeof(trid->traddr), "%s", tqpair->target_addr);
2826 		port = tqpair->target_port;
2827 	}
2828 
2829 	if (spdk_sock_is_ipv4(tqpair->sock)) {
2830 		trid->adrfam = SPDK_NVMF_ADRFAM_IPV4;
2831 	} else if (spdk_sock_is_ipv4(tqpair->sock)) {
2832 		trid->adrfam = SPDK_NVMF_ADRFAM_IPV6;
2833 	} else {
2834 		return -1;
2835 	}
2836 
2837 	snprintf(trid->trsvcid, sizeof(trid->trsvcid), "%d", port);
2838 	return 0;
2839 }
2840 
2841 static int
2842 spdk_nvmf_tcp_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair,
2843 				   struct spdk_nvme_transport_id *trid)
2844 {
2845 	return spdk_nvmf_tcp_qpair_get_trid(qpair, trid, 0);
2846 }
2847 
2848 static int
2849 spdk_nvmf_tcp_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair,
2850 				  struct spdk_nvme_transport_id *trid)
2851 {
2852 	return spdk_nvmf_tcp_qpair_get_trid(qpair, trid, 1);
2853 }
2854 
2855 static int
2856 spdk_nvmf_tcp_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair,
2857 				    struct spdk_nvme_transport_id *trid)
2858 {
2859 	return spdk_nvmf_tcp_qpair_get_trid(qpair, trid, 0);
2860 }
2861 
2862 static int
2863 spdk_nvmf_tcp_qpair_set_sq_size(struct spdk_nvmf_qpair *qpair)
2864 {
2865 	struct nvme_tcp_qpair     *tqpair;
2866 	int rc;
2867 	tqpair = SPDK_CONTAINEROF(qpair, struct nvme_tcp_qpair, qpair);
2868 
2869 	rc = spdk_nvmf_tcp_qpair_init_mem_resource(tqpair, tqpair->qpair.sq_head_max);
2870 	if (!rc) {
2871 		tqpair->max_queue_depth += tqpair->qpair.sq_head_max;
2872 		tqpair->free_pdu_num += tqpair->qpair.sq_head_max;
2873 		tqpair->state_cntr[TCP_REQUEST_STATE_FREE] += tqpair->qpair.sq_head_max;
2874 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "The queue depth=%u for tqpair=%p\n",
2875 			      tqpair->max_queue_depth, tqpair);
2876 	}
2877 
2878 	return rc;
2879 
2880 }
2881 
2882 #define SPDK_NVMF_TCP_DEFAULT_MAX_QUEUE_DEPTH 128
2883 #define SPDK_NVMF_TCP_DEFAULT_AQ_DEPTH 128
2884 #define SPDK_NVMF_TCP_DEFAULT_MAX_QPAIRS_PER_CTRLR 64
2885 #define SPDK_NVMF_TCP_DEFAULT_IN_CAPSULE_DATA_SIZE 4096
2886 #define SPDK_NVMF_TCP_DEFAULT_MAX_IO_SIZE 131072
2887 #define SPDK_NVMF_TCP_DEFAULT_IO_UNIT_SIZE 131072
2888 #define SPDK_NVMF_TCP_DEFAULT_NUM_SHARED_BUFFERS 512
2889 #define SPDK_NVMF_TCP_DEFAULT_BUFFER_CACHE_SIZE 32
2890 
2891 static void
2892 spdk_nvmf_tcp_opts_init(struct spdk_nvmf_transport_opts *opts)
2893 {
2894 	opts->max_queue_depth =		SPDK_NVMF_TCP_DEFAULT_MAX_QUEUE_DEPTH;
2895 	opts->max_qpairs_per_ctrlr =	SPDK_NVMF_TCP_DEFAULT_MAX_QPAIRS_PER_CTRLR;
2896 	opts->in_capsule_data_size =	SPDK_NVMF_TCP_DEFAULT_IN_CAPSULE_DATA_SIZE;
2897 	opts->max_io_size =		SPDK_NVMF_TCP_DEFAULT_MAX_IO_SIZE;
2898 	opts->io_unit_size =		SPDK_NVMF_TCP_DEFAULT_IO_UNIT_SIZE;
2899 	opts->max_aq_depth =		SPDK_NVMF_TCP_DEFAULT_AQ_DEPTH;
2900 	opts->num_shared_buffers =	SPDK_NVMF_TCP_DEFAULT_NUM_SHARED_BUFFERS;
2901 	opts->buf_cache_size =		SPDK_NVMF_TCP_DEFAULT_BUFFER_CACHE_SIZE;
2902 }
2903 
2904 const struct spdk_nvmf_transport_ops spdk_nvmf_transport_tcp = {
2905 	.type = SPDK_NVME_TRANSPORT_TCP,
2906 	.opts_init = spdk_nvmf_tcp_opts_init,
2907 	.create = spdk_nvmf_tcp_create,
2908 	.destroy = spdk_nvmf_tcp_destroy,
2909 
2910 	.listen = spdk_nvmf_tcp_listen,
2911 	.stop_listen = spdk_nvmf_tcp_stop_listen,
2912 	.accept = spdk_nvmf_tcp_accept,
2913 
2914 	.listener_discover = spdk_nvmf_tcp_discover,
2915 
2916 	.poll_group_create = spdk_nvmf_tcp_poll_group_create,
2917 	.poll_group_destroy = spdk_nvmf_tcp_poll_group_destroy,
2918 	.poll_group_add = spdk_nvmf_tcp_poll_group_add,
2919 	.poll_group_remove = spdk_nvmf_tcp_poll_group_remove,
2920 	.poll_group_poll = spdk_nvmf_tcp_poll_group_poll,
2921 
2922 	.req_free = spdk_nvmf_tcp_req_free,
2923 	.req_complete = spdk_nvmf_tcp_req_complete,
2924 
2925 	.qpair_fini = spdk_nvmf_tcp_close_qpair,
2926 	.qpair_is_idle = spdk_nvmf_tcp_qpair_is_idle,
2927 	.qpair_get_local_trid = spdk_nvmf_tcp_qpair_get_local_trid,
2928 	.qpair_get_peer_trid = spdk_nvmf_tcp_qpair_get_peer_trid,
2929 	.qpair_get_listen_trid = spdk_nvmf_tcp_qpair_get_listen_trid,
2930 	.qpair_set_sqsize = spdk_nvmf_tcp_qpair_set_sq_size,
2931 };
2932 
2933 SPDK_LOG_REGISTER_COMPONENT("nvmf_tcp", SPDK_LOG_NVMF_TCP)
2934