xref: /spdk/lib/nvmf/tcp.c (revision a15dcb0bf07debe26957eaf30ec392942910ea99)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/stdinc.h"
35 #include "spdk/crc32.h"
36 #include "spdk/endian.h"
37 #include "spdk/assert.h"
38 #include "spdk/thread.h"
39 #include "spdk/nvmf.h"
40 #include "spdk/nvmf_spec.h"
41 #include "spdk/sock.h"
42 #include "spdk/string.h"
43 #include "spdk/trace.h"
44 #include "spdk/util.h"
45 
46 #include "nvmf_internal.h"
47 #include "transport.h"
48 
49 #include "spdk_internal/log.h"
50 #include "spdk_internal/nvme_tcp.h"
51 
52 #define NVMF_TCP_MAX_ACCEPT_SOCK_ONE_TIME 16
53 
54 #define NVMF_TCP_PDU_MAX_H2C_DATA_SIZE	131072
55 #define NVMF_TCP_PDU_MAX_C2H_DATA_SIZE	131072
56 #define NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM  64  /* Maximal c2h_data pdu number for ecah tqpair */
57 #define SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY 6
58 
59 /* spdk nvmf related structure */
60 enum spdk_nvmf_tcp_req_state {
61 
62 	/* The request is not currently in use */
63 	TCP_REQUEST_STATE_FREE = 0,
64 
65 	/* Initial state when request first received */
66 	TCP_REQUEST_STATE_NEW,
67 
68 	/* The request is queued until a data buffer is available. */
69 	TCP_REQUEST_STATE_NEED_BUFFER,
70 
71 	/* The request is currently transferring data from the host to the controller. */
72 	TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER,
73 
74 	/* The request is ready to execute at the block device */
75 	TCP_REQUEST_STATE_READY_TO_EXECUTE,
76 
77 	/* The request is currently executing at the block device */
78 	TCP_REQUEST_STATE_EXECUTING,
79 
80 	/* The request finished executing at the block device */
81 	TCP_REQUEST_STATE_EXECUTED,
82 
83 	/* The request is ready to send a completion */
84 	TCP_REQUEST_STATE_READY_TO_COMPLETE,
85 
86 	/* The request is currently transferring final pdus from the controller to the host. */
87 	TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST,
88 
89 	/* The request completed and can be marked free. */
90 	TCP_REQUEST_STATE_COMPLETED,
91 
92 	/* Terminator */
93 	TCP_REQUEST_NUM_STATES,
94 };
95 
96 static const char *spdk_nvmf_tcp_term_req_fes_str[] = {
97 	"Invalid PDU Header Field",
98 	"PDU Sequence Error",
99 	"Header Digiest Error",
100 	"Data Transfer Out of Range",
101 	"R2T Limit Exceeded",
102 	"Unsupported parameter",
103 };
104 
105 #define OBJECT_NVMF_TCP_IO				0x80
106 
107 #define TRACE_GROUP_NVMF_TCP				0x5
108 #define TRACE_TCP_REQUEST_STATE_NEW					SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x0)
109 #define TRACE_TCP_REQUEST_STATE_NEED_BUFFER				SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x1)
110 #define TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER		SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x2)
111 #define TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE			SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x3)
112 #define TRACE_TCP_REQUEST_STATE_EXECUTING				SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x4)
113 #define TRACE_TCP_REQUEST_STATE_EXECUTED				SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x5)
114 #define TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE			SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x6)
115 #define TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST		SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x7)
116 #define TRACE_TCP_REQUEST_STATE_COMPLETED				SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x8)
117 #define TRACE_TCP_FLUSH_WRITEBUF_START					SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x9)
118 #define TRACE_TCP_FLUSH_WRITEBUF_DONE					SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xA)
119 #define TRACE_TCP_READ_FROM_SOCKET_DONE					SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xB)
120 
121 SPDK_TRACE_REGISTER_FN(nvmf_tcp_trace, "nvmf_tcp", TRACE_GROUP_NVMF_TCP)
122 {
123 	spdk_trace_register_object(OBJECT_NVMF_TCP_IO, 'r');
124 	spdk_trace_register_description("TCP_REQ_NEW",
125 					TRACE_TCP_REQUEST_STATE_NEW,
126 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 1, 1, "");
127 	spdk_trace_register_description("TCP_REQ_NEED_BUFFER",
128 					TRACE_TCP_REQUEST_STATE_NEED_BUFFER,
129 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
130 	spdk_trace_register_description("TCP_REQ_TX_H_TO_C",
131 					TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER,
132 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
133 	spdk_trace_register_description("TCP_REQ_RDY_TO_EXECUTE",
134 					TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE,
135 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
136 	spdk_trace_register_description("TCP_REQ_EXECUTING",
137 					TRACE_TCP_REQUEST_STATE_EXECUTING,
138 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
139 	spdk_trace_register_description("TCP_REQ_EXECUTED",
140 					TRACE_TCP_REQUEST_STATE_EXECUTED,
141 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
142 	spdk_trace_register_description("TCP_REQ_RDY_TO_COMPLETE",
143 					TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE,
144 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
145 	spdk_trace_register_description("TCP_REQ_TRANSFER_C2H",
146 					TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST,
147 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
148 	spdk_trace_register_description("TCP_REQ_COMPLETED",
149 					TRACE_TCP_REQUEST_STATE_COMPLETED,
150 					OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
151 	spdk_trace_register_description("TCP_WRITE_START",
152 					TRACE_TCP_FLUSH_WRITEBUF_START,
153 					OWNER_NONE, OBJECT_NONE, 0, 0, "");
154 	spdk_trace_register_description("TCP_WRITE_DONE",
155 					TRACE_TCP_FLUSH_WRITEBUF_DONE,
156 					OWNER_NONE, OBJECT_NONE, 0, 0, "");
157 	spdk_trace_register_description("TCP_READ_DONE",
158 					TRACE_TCP_READ_FROM_SOCKET_DONE,
159 					OWNER_NONE, OBJECT_NONE, 0, 0, "");
160 }
161 
162 struct spdk_nvmf_tcp_req  {
163 	struct spdk_nvmf_request		req;
164 	struct spdk_nvme_cpl			rsp;
165 	struct spdk_nvme_cmd			cmd;
166 
167 	/* In-capsule data buffer */
168 	uint8_t					*buf;
169 
170 	bool					data_from_pool;
171 	bool					has_incapsule_data;
172 
173 	/* transfer_tag */
174 	uint16_t				ttag;
175 
176 	enum spdk_nvmf_tcp_req_state		state;
177 
178 	void					*buffers[SPDK_NVMF_MAX_SGL_ENTRIES];
179 
180 	/*
181 	 * next_expected_r2t_offset is used when we receive the h2c_data PDU.
182 	 */
183 	uint32_t				next_expected_r2t_offset;
184 	uint32_t				r2tl_remain;
185 
186 	/*
187 	 * c2h_data_offset is used when we send the c2h_data PDU.
188 	 */
189 	uint32_t				c2h_data_offset;
190 	uint32_t				c2h_data_pdu_num;
191 
192 	struct spdk_dif_ctx			dif_ctx;
193 	bool					dif_insert_or_strip;
194 	uint32_t				elba_length;
195 	uint32_t				orig_length;
196 
197 	TAILQ_ENTRY(spdk_nvmf_tcp_req)		link;
198 	TAILQ_ENTRY(spdk_nvmf_tcp_req)		state_link;
199 };
200 
201 struct spdk_nvmf_tcp_qpair {
202 	struct spdk_nvmf_qpair			qpair;
203 	struct spdk_nvmf_tcp_poll_group		*group;
204 	struct spdk_nvmf_tcp_port		*port;
205 	struct spdk_sock			*sock;
206 	struct spdk_poller			*flush_poller;
207 
208 	enum nvme_tcp_pdu_recv_state		recv_state;
209 	enum nvme_tcp_qpair_state		state;
210 
211 	struct nvme_tcp_pdu			pdu_in_progress;
212 
213 	TAILQ_HEAD(, nvme_tcp_pdu)		send_queue;
214 	TAILQ_HEAD(, nvme_tcp_pdu)		free_queue;
215 
216 	struct nvme_tcp_pdu			*pdu;
217 	struct nvme_tcp_pdu			*pdu_pool;
218 	uint16_t				free_pdu_num;
219 
220 	/* Queues to track the requests in all states */
221 	TAILQ_HEAD(, spdk_nvmf_tcp_req)		state_queue[TCP_REQUEST_NUM_STATES];
222 	/* Number of requests in each state */
223 	int32_t					state_cntr[TCP_REQUEST_NUM_STATES];
224 
225 	TAILQ_HEAD(, spdk_nvmf_tcp_req)		queued_c2h_data_tcp_req;
226 
227 	uint8_t					cpda;
228 
229 	/* Array of size "max_queue_depth * InCapsuleDataSize" containing
230 	 * buffers to be used for in capsule data.
231 	 */
232 	void					*buf;
233 	void					*bufs;
234 	struct spdk_nvmf_tcp_req		*req;
235 	struct spdk_nvmf_tcp_req		*reqs;
236 
237 	bool					host_hdgst_enable;
238 	bool					host_ddgst_enable;
239 
240 
241 	/* The maximum number of I/O outstanding on this connection at one time */
242 	uint16_t				max_queue_depth;
243 
244 
245 	/** Specifies the maximum number of PDU-Data bytes per H2C Data Transfer PDU */
246 	uint32_t				maxh2cdata;
247 
248 	uint32_t				c2h_data_pdu_cnt;
249 
250 	/* IP address */
251 	char					initiator_addr[SPDK_NVMF_TRADDR_MAX_LEN];
252 	char					target_addr[SPDK_NVMF_TRADDR_MAX_LEN];
253 
254 	/* IP port */
255 	uint16_t				initiator_port;
256 	uint16_t				target_port;
257 
258 	/* Timer used to destroy qpair after detecting transport error issue if initiator does
259 	 *  not close the connection.
260 	 */
261 	struct spdk_poller			*timeout_poller;
262 
263 	TAILQ_ENTRY(spdk_nvmf_tcp_qpair)	link;
264 };
265 
266 struct spdk_nvmf_tcp_poll_group {
267 	struct spdk_nvmf_transport_poll_group	group;
268 	struct spdk_sock_group			*sock_group;
269 
270 	/* Requests that are waiting to obtain a data buffer */
271 	TAILQ_HEAD(, spdk_nvmf_tcp_req)		pending_data_buf_queue;
272 
273 	TAILQ_HEAD(, spdk_nvmf_tcp_qpair)	qpairs;
274 };
275 
276 struct spdk_nvmf_tcp_port {
277 	struct spdk_nvme_transport_id		trid;
278 	struct spdk_sock			*listen_sock;
279 	uint32_t				ref;
280 	TAILQ_ENTRY(spdk_nvmf_tcp_port)		link;
281 };
282 
283 struct spdk_nvmf_tcp_transport {
284 	struct spdk_nvmf_transport		transport;
285 
286 	pthread_mutex_t				lock;
287 
288 	TAILQ_HEAD(, spdk_nvmf_tcp_port)	ports;
289 };
290 
291 static void spdk_nvmf_tcp_qpair_process_pending(struct spdk_nvmf_tcp_transport *ttransport,
292 		struct spdk_nvmf_tcp_qpair *tqpair);
293 static bool spdk_nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport,
294 				      struct spdk_nvmf_tcp_req *tcp_req);
295 static void spdk_nvmf_tcp_handle_pending_c2h_data_queue(struct spdk_nvmf_tcp_qpair *tqpair);
296 
297 static void
298 spdk_nvmf_tcp_req_set_state(struct spdk_nvmf_tcp_req *tcp_req,
299 			    enum spdk_nvmf_tcp_req_state state)
300 {
301 	struct spdk_nvmf_qpair *qpair;
302 	struct spdk_nvmf_tcp_qpair *tqpair;
303 
304 	qpair = tcp_req->req.qpair;
305 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
306 
307 	TAILQ_REMOVE(&tqpair->state_queue[tcp_req->state], tcp_req, state_link);
308 	tqpair->state_cntr[tcp_req->state]--;
309 	assert(tqpair->state_cntr[tcp_req->state] >= 0);
310 
311 	TAILQ_INSERT_TAIL(&tqpair->state_queue[state], tcp_req, state_link);
312 	tqpair->state_cntr[state]++;
313 
314 	tcp_req->state = state;
315 }
316 
317 static struct nvme_tcp_pdu *
318 spdk_nvmf_tcp_pdu_get(struct spdk_nvmf_tcp_qpair *tqpair)
319 {
320 	struct nvme_tcp_pdu *pdu;
321 
322 	pdu = TAILQ_FIRST(&tqpair->free_queue);
323 	if (!pdu) {
324 		SPDK_ERRLOG("Unable to get PDU for tqpair=%p\n", tqpair);
325 		abort();
326 		return NULL;
327 	}
328 
329 	tqpair->free_pdu_num--;
330 	TAILQ_REMOVE(&tqpair->free_queue, pdu, tailq);
331 	memset(pdu, 0, sizeof(*pdu));
332 	pdu->ref = 1;
333 
334 	return pdu;
335 }
336 
337 static void
338 spdk_nvmf_tcp_pdu_put(struct spdk_nvmf_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu)
339 {
340 	if (!pdu) {
341 		return;
342 	}
343 
344 	assert(pdu->ref > 0);
345 
346 	pdu->ref--;
347 	if (pdu->ref == 0) {
348 		tqpair->free_pdu_num++;
349 		TAILQ_INSERT_HEAD(&tqpair->free_queue, pdu, tailq);
350 	}
351 }
352 
353 static struct spdk_nvmf_tcp_req *
354 spdk_nvmf_tcp_req_get(struct spdk_nvmf_tcp_qpair *tqpair)
355 {
356 	struct spdk_nvmf_tcp_req *tcp_req;
357 
358 	tcp_req = TAILQ_FIRST(&tqpair->state_queue[TCP_REQUEST_STATE_FREE]);
359 	if (!tcp_req) {
360 		SPDK_ERRLOG("Cannot allocate tcp_req on tqpair=%p\n", tqpair);
361 		return NULL;
362 	}
363 
364 	memset(&tcp_req->cmd, 0, sizeof(tcp_req->cmd));
365 	memset(&tcp_req->rsp, 0, sizeof(tcp_req->rsp));
366 	tcp_req->next_expected_r2t_offset = 0;
367 	tcp_req->r2tl_remain = 0;
368 	tcp_req->c2h_data_offset = 0;
369 	tcp_req->has_incapsule_data = false;
370 	tcp_req->dif_insert_or_strip = false;
371 
372 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEW);
373 	return tcp_req;
374 }
375 
376 static void
377 nvmf_tcp_request_free(struct spdk_nvmf_tcp_req *tcp_req)
378 {
379 	struct spdk_nvmf_tcp_transport *ttransport;
380 
381 	if (!tcp_req) {
382 		return;
383 	}
384 
385 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tcp_req=%p will be freed\n", tcp_req);
386 	ttransport = SPDK_CONTAINEROF(tcp_req->req.qpair->transport,
387 				      struct spdk_nvmf_tcp_transport, transport);
388 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_COMPLETED);
389 	spdk_nvmf_tcp_req_process(ttransport, tcp_req);
390 }
391 
392 static int
393 spdk_nvmf_tcp_req_free(struct spdk_nvmf_request *req)
394 {
395 	struct spdk_nvmf_tcp_req *tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req);
396 
397 	nvmf_tcp_request_free(tcp_req);
398 
399 	return 0;
400 }
401 
402 static void
403 spdk_nvmf_tcp_drain_state_queue(struct spdk_nvmf_tcp_qpair *tqpair,
404 				enum spdk_nvmf_tcp_req_state state)
405 {
406 	struct spdk_nvmf_tcp_req *tcp_req, *req_tmp;
407 
408 	TAILQ_FOREACH_SAFE(tcp_req, &tqpair->state_queue[state], state_link, req_tmp) {
409 		nvmf_tcp_request_free(tcp_req);
410 	}
411 }
412 
413 static void
414 spdk_nvmf_tcp_cleanup_all_states(struct spdk_nvmf_tcp_qpair *tqpair)
415 {
416 	struct spdk_nvmf_tcp_req *tcp_req, *req_tmp;
417 	struct nvme_tcp_pdu *pdu, *tmp_pdu;
418 
419 	/* Free the pdus in the send_queue */
420 	TAILQ_FOREACH_SAFE(pdu, &tqpair->send_queue, tailq, tmp_pdu) {
421 		TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq);
422 		/* Also check the pdu type, we need to calculte the c2h_data_pdu_cnt later */
423 		if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_DATA) {
424 			assert(tqpair->c2h_data_pdu_cnt > 0);
425 			tqpair->c2h_data_pdu_cnt--;
426 		}
427 		spdk_nvmf_tcp_pdu_put(tqpair, pdu);
428 	}
429 
430 	TAILQ_FOREACH_SAFE(tcp_req, &tqpair->queued_c2h_data_tcp_req, link, req_tmp) {
431 		TAILQ_REMOVE(&tqpair->queued_c2h_data_tcp_req, tcp_req, link);
432 	}
433 	spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST);
434 
435 	spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEW);
436 
437 	/* Wipe the requests waiting for buffer from the global list */
438 	TAILQ_FOREACH_SAFE(tcp_req, &tqpair->state_queue[TCP_REQUEST_STATE_NEED_BUFFER], state_link,
439 			   req_tmp) {
440 		TAILQ_REMOVE(&tqpair->group->pending_data_buf_queue, tcp_req, link);
441 	}
442 
443 	spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEED_BUFFER);
444 	spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_EXECUTING);
445 	spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
446 }
447 
448 static void
449 nvmf_tcp_dump_qpair_req_contents(struct spdk_nvmf_tcp_qpair *tqpair)
450 {
451 	int i;
452 	struct spdk_nvmf_tcp_req *tcp_req;
453 
454 	SPDK_ERRLOG("Dumping contents of queue pair (QID %d)\n", tqpair->qpair.qid);
455 	for (i = 1; i < TCP_REQUEST_NUM_STATES; i++) {
456 		SPDK_ERRLOG("\tNum of requests in state[%d] = %d\n", i, tqpair->state_cntr[i]);
457 		TAILQ_FOREACH(tcp_req, &tqpair->state_queue[i], state_link) {
458 			SPDK_ERRLOG("\t\tRequest Data From Pool: %d\n", tcp_req->data_from_pool);
459 			SPDK_ERRLOG("\t\tRequest opcode: %d\n", tcp_req->req.cmd->nvmf_cmd.opcode);
460 		}
461 	}
462 }
463 
464 static void
465 spdk_nvmf_tcp_qpair_destroy(struct spdk_nvmf_tcp_qpair *tqpair)
466 {
467 	int err = 0;
468 
469 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
470 
471 	spdk_poller_unregister(&tqpair->flush_poller);
472 	spdk_sock_close(&tqpair->sock);
473 	spdk_nvmf_tcp_cleanup_all_states(tqpair);
474 
475 	if (tqpair->free_pdu_num != (tqpair->max_queue_depth + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM)) {
476 		SPDK_ERRLOG("tqpair(%p) free pdu pool num is %u but should be %u\n", tqpair,
477 			    tqpair->free_pdu_num,
478 			    (tqpair->max_queue_depth + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM));
479 		err++;
480 	}
481 
482 	if (tqpair->state_cntr[TCP_REQUEST_STATE_FREE] != tqpair->max_queue_depth) {
483 		SPDK_ERRLOG("tqpair(%p) free tcp request num is %u but should be %u\n", tqpair,
484 			    tqpair->state_cntr[TCP_REQUEST_STATE_FREE],
485 			    tqpair->max_queue_depth);
486 		err++;
487 	}
488 
489 	if (tqpair->c2h_data_pdu_cnt != 0) {
490 		SPDK_ERRLOG("tqpair(%p) free c2h_data_pdu cnt is %u but should be 0\n", tqpair,
491 			    tqpair->c2h_data_pdu_cnt);
492 		err++;
493 	}
494 
495 	if (err > 0) {
496 		nvmf_tcp_dump_qpair_req_contents(tqpair);
497 	}
498 	free(tqpair->pdu);
499 	free(tqpair->pdu_pool);
500 	free(tqpair->req);
501 	free(tqpair->reqs);
502 	spdk_free(tqpair->buf);
503 	spdk_free(tqpair->bufs);
504 	free(tqpair);
505 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Leave\n");
506 }
507 
508 static int
509 spdk_nvmf_tcp_destroy(struct spdk_nvmf_transport *transport)
510 {
511 	struct spdk_nvmf_tcp_transport	*ttransport;
512 
513 	assert(transport != NULL);
514 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
515 
516 	pthread_mutex_destroy(&ttransport->lock);
517 	free(ttransport);
518 	return 0;
519 }
520 
521 static struct spdk_nvmf_transport *
522 spdk_nvmf_tcp_create(struct spdk_nvmf_transport_opts *opts)
523 {
524 	struct spdk_nvmf_tcp_transport *ttransport;
525 	uint32_t sge_count;
526 	uint32_t min_shared_buffers;
527 
528 	ttransport = calloc(1, sizeof(*ttransport));
529 	if (!ttransport) {
530 		return NULL;
531 	}
532 
533 	TAILQ_INIT(&ttransport->ports);
534 
535 	ttransport->transport.ops = &spdk_nvmf_transport_tcp;
536 
537 	SPDK_NOTICELOG("*** TCP Transport Init ***\n");
538 
539 	SPDK_INFOLOG(SPDK_LOG_NVMF_TCP, "*** TCP Transport Init ***\n"
540 		     "  Transport opts:  max_ioq_depth=%d, max_io_size=%d,\n"
541 		     "  max_qpairs_per_ctrlr=%d, io_unit_size=%d,\n"
542 		     "  in_capsule_data_size=%d, max_aq_depth=%d\n"
543 		     "  num_shared_buffers=%d, c2h_success=%d,\n"
544 		     "  dif_insert_or_strip=%d, sock_priority=%d\n",
545 		     opts->max_queue_depth,
546 		     opts->max_io_size,
547 		     opts->max_qpairs_per_ctrlr,
548 		     opts->io_unit_size,
549 		     opts->in_capsule_data_size,
550 		     opts->max_aq_depth,
551 		     opts->num_shared_buffers,
552 		     opts->c2h_success,
553 		     opts->dif_insert_or_strip,
554 		     opts->sock_priority);
555 
556 	if (opts->sock_priority > SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY) {
557 		SPDK_ERRLOG("Unsupported socket_priority=%d, the current range is: 0 to %d\n"
558 			    "you can use man 7 socket to view the range of priority under SO_PRIORITY item\n",
559 			    opts->sock_priority, SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY);
560 		free(ttransport);
561 		return NULL;
562 	}
563 
564 	/* I/O unit size cannot be larger than max I/O size */
565 	if (opts->io_unit_size > opts->max_io_size) {
566 		opts->io_unit_size = opts->max_io_size;
567 	}
568 
569 	sge_count = opts->max_io_size / opts->io_unit_size;
570 	if (sge_count > SPDK_NVMF_MAX_SGL_ENTRIES) {
571 		SPDK_ERRLOG("Unsupported IO Unit size specified, %d bytes\n", opts->io_unit_size);
572 		free(ttransport);
573 		return NULL;
574 	}
575 
576 	min_shared_buffers = spdk_thread_get_count() * opts->buf_cache_size;
577 	if (min_shared_buffers > opts->num_shared_buffers) {
578 		SPDK_ERRLOG("There are not enough buffers to satisfy"
579 			    "per-poll group caches for each thread. (%" PRIu32 ")"
580 			    "supplied. (%" PRIu32 ") required\n", opts->num_shared_buffers, min_shared_buffers);
581 		SPDK_ERRLOG("Please specify a larger number of shared buffers\n");
582 		spdk_nvmf_tcp_destroy(&ttransport->transport);
583 		return NULL;
584 	}
585 
586 	pthread_mutex_init(&ttransport->lock, NULL);
587 
588 	return &ttransport->transport;
589 }
590 
591 static int
592 _spdk_nvmf_tcp_trsvcid_to_int(const char *trsvcid)
593 {
594 	unsigned long long ull;
595 	char *end = NULL;
596 
597 	ull = strtoull(trsvcid, &end, 10);
598 	if (end == NULL || end == trsvcid || *end != '\0') {
599 		return -1;
600 	}
601 
602 	/* Valid TCP/IP port numbers are in [0, 65535] */
603 	if (ull > 65535) {
604 		return -1;
605 	}
606 
607 	return (int)ull;
608 }
609 
610 /**
611  * Canonicalize a listen address trid.
612  */
613 static int
614 _spdk_nvmf_tcp_canon_listen_trid(struct spdk_nvme_transport_id *canon_trid,
615 				 const struct spdk_nvme_transport_id *trid)
616 {
617 	int trsvcid_int;
618 
619 	trsvcid_int = _spdk_nvmf_tcp_trsvcid_to_int(trid->trsvcid);
620 	if (trsvcid_int < 0) {
621 		return -EINVAL;
622 	}
623 
624 	memset(canon_trid, 0, sizeof(*canon_trid));
625 	canon_trid->trtype = SPDK_NVME_TRANSPORT_TCP;
626 	canon_trid->adrfam = trid->adrfam;
627 	snprintf(canon_trid->traddr, sizeof(canon_trid->traddr), "%s", trid->traddr);
628 	snprintf(canon_trid->trsvcid, sizeof(canon_trid->trsvcid), "%d", trsvcid_int);
629 
630 	return 0;
631 }
632 
633 /**
634  * Find an existing listening port.
635  *
636  * Caller must hold ttransport->lock.
637  */
638 static struct spdk_nvmf_tcp_port *
639 _spdk_nvmf_tcp_find_port(struct spdk_nvmf_tcp_transport *ttransport,
640 			 const struct spdk_nvme_transport_id *trid)
641 {
642 	struct spdk_nvme_transport_id canon_trid;
643 	struct spdk_nvmf_tcp_port *port;
644 
645 	if (_spdk_nvmf_tcp_canon_listen_trid(&canon_trid, trid) != 0) {
646 		return NULL;
647 	}
648 
649 	TAILQ_FOREACH(port, &ttransport->ports, link) {
650 		if (spdk_nvme_transport_id_compare(&canon_trid, &port->trid) == 0) {
651 			return port;
652 		}
653 	}
654 
655 	return NULL;
656 }
657 
658 static int
659 spdk_nvmf_tcp_listen(struct spdk_nvmf_transport *transport,
660 		     const struct spdk_nvme_transport_id *trid)
661 {
662 	struct spdk_nvmf_tcp_transport *ttransport;
663 	struct spdk_nvmf_tcp_port *port;
664 	int trsvcid_int;
665 	uint8_t adrfam;
666 
667 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
668 
669 	trsvcid_int = _spdk_nvmf_tcp_trsvcid_to_int(trid->trsvcid);
670 	if (trsvcid_int < 0) {
671 		SPDK_ERRLOG("Invalid trsvcid '%s'\n", trid->trsvcid);
672 		return -EINVAL;
673 	}
674 
675 	pthread_mutex_lock(&ttransport->lock);
676 
677 	port = _spdk_nvmf_tcp_find_port(ttransport, trid);
678 	if (port) {
679 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Already listening on %s port %s\n",
680 			      trid->traddr, trid->trsvcid);
681 		port->ref++;
682 		pthread_mutex_unlock(&ttransport->lock);
683 		return 0;
684 	}
685 
686 	port = calloc(1, sizeof(*port));
687 	if (!port) {
688 		SPDK_ERRLOG("Port allocation failed\n");
689 		free(port);
690 		pthread_mutex_unlock(&ttransport->lock);
691 		return -ENOMEM;
692 	}
693 
694 	port->ref = 1;
695 
696 	if (_spdk_nvmf_tcp_canon_listen_trid(&port->trid, trid) != 0) {
697 		SPDK_ERRLOG("Invalid traddr %s / trsvcid %s\n",
698 			    trid->traddr, trid->trsvcid);
699 		free(port);
700 		pthread_mutex_unlock(&ttransport->lock);
701 		return -ENOMEM;
702 	}
703 
704 	port->listen_sock = spdk_sock_listen(trid->traddr, trsvcid_int);
705 	if (port->listen_sock == NULL) {
706 		SPDK_ERRLOG("spdk_sock_listen(%s, %d) failed: %s (%d)\n",
707 			    trid->traddr, trsvcid_int,
708 			    spdk_strerror(errno), errno);
709 		free(port);
710 		pthread_mutex_unlock(&ttransport->lock);
711 		return -errno;
712 	}
713 
714 	if (spdk_sock_is_ipv4(port->listen_sock)) {
715 		adrfam = SPDK_NVMF_ADRFAM_IPV4;
716 	} else if (spdk_sock_is_ipv6(port->listen_sock)) {
717 		adrfam = SPDK_NVMF_ADRFAM_IPV6;
718 	} else {
719 		SPDK_ERRLOG("Unhandled socket type\n");
720 		adrfam = 0;
721 	}
722 
723 	if (adrfam != trid->adrfam) {
724 		SPDK_ERRLOG("Socket address family mismatch\n");
725 		spdk_sock_close(&port->listen_sock);
726 		free(port);
727 		pthread_mutex_unlock(&ttransport->lock);
728 		return -EINVAL;
729 	}
730 
731 	SPDK_NOTICELOG("*** NVMe/TCP Target Listening on %s port %d ***\n",
732 		       trid->traddr, trsvcid_int);
733 
734 	TAILQ_INSERT_TAIL(&ttransport->ports, port, link);
735 	pthread_mutex_unlock(&ttransport->lock);
736 
737 	return 0;
738 }
739 
740 static int
741 spdk_nvmf_tcp_stop_listen(struct spdk_nvmf_transport *transport,
742 			  const struct spdk_nvme_transport_id *trid)
743 {
744 	struct spdk_nvmf_tcp_transport *ttransport;
745 	struct spdk_nvmf_tcp_port *port;
746 	int rc;
747 
748 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
749 
750 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Removing listen address %s port %s\n",
751 		      trid->traddr, trid->trsvcid);
752 
753 	pthread_mutex_lock(&ttransport->lock);
754 	port = _spdk_nvmf_tcp_find_port(ttransport, trid);
755 	if (port) {
756 		assert(port->ref > 0);
757 		port->ref--;
758 		if (port->ref == 0) {
759 			TAILQ_REMOVE(&ttransport->ports, port, link);
760 			spdk_sock_close(&port->listen_sock);
761 			free(port);
762 		}
763 		rc = 0;
764 	} else {
765 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Port not found\n");
766 		rc = -ENOENT;
767 	}
768 	pthread_mutex_unlock(&ttransport->lock);
769 
770 	return rc;
771 }
772 
773 static int
774 spdk_nvmf_tcp_qpair_flush_pdus_internal(struct spdk_nvmf_tcp_qpair *tqpair)
775 {
776 	const int array_size = 32;
777 	struct iovec iovs[array_size];
778 	int iovcnt = 0;
779 	int bytes = 0;
780 	int total_length = 0;
781 	uint32_t mapped_length;
782 	struct nvme_tcp_pdu *pdu;
783 	int pdu_length;
784 	TAILQ_HEAD(, nvme_tcp_pdu) completed_pdus_list;
785 	struct spdk_nvmf_tcp_transport *ttransport;
786 
787 	pdu = TAILQ_FIRST(&tqpair->send_queue);
788 
789 	if (pdu == NULL) {
790 		return 0;
791 	}
792 
793 	/*
794 	 * Build up a list of iovecs for the first few PDUs in the
795 	 *  tqpair 's send_queue.
796 	 */
797 	while (pdu != NULL && ((array_size - iovcnt) >= 3)) {
798 		iovcnt += nvme_tcp_build_iovs(&iovs[iovcnt],
799 					      array_size - iovcnt,
800 					      pdu,
801 					      tqpair->host_hdgst_enable,
802 					      tqpair->host_ddgst_enable,
803 					      &mapped_length);
804 		total_length += mapped_length;
805 		pdu = TAILQ_NEXT(pdu, tailq);
806 	}
807 
808 	spdk_trace_record(TRACE_TCP_FLUSH_WRITEBUF_START, 0, total_length, 0, iovcnt);
809 
810 	bytes = spdk_sock_writev(tqpair->sock, iovs, iovcnt);
811 	if (bytes == -1) {
812 		if (errno == EWOULDBLOCK || errno == EAGAIN) {
813 			return 1;
814 		} else {
815 			SPDK_ERRLOG("spdk_sock_writev() failed, errno %d: %s\n",
816 				    errno, spdk_strerror(errno));
817 			return -1;
818 		}
819 	}
820 
821 	spdk_trace_record(TRACE_TCP_FLUSH_WRITEBUF_DONE, 0, bytes, 0, 0);
822 
823 	pdu = TAILQ_FIRST(&tqpair->send_queue);
824 
825 	/*
826 	 * Free any PDUs that were fully written.  If a PDU was only
827 	 *  partially written, update its writev_offset so that next
828 	 *  time only the unwritten portion will be sent to writev().
829 	 */
830 	TAILQ_INIT(&completed_pdus_list);
831 	while (bytes > 0) {
832 		pdu_length = pdu->hdr.common.plen - pdu->writev_offset;
833 		if (bytes >= pdu_length) {
834 			bytes -= pdu_length;
835 			TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq);
836 			TAILQ_INSERT_TAIL(&completed_pdus_list, pdu, tailq);
837 			pdu = TAILQ_FIRST(&tqpair->send_queue);
838 
839 		} else {
840 			pdu->writev_offset += bytes;
841 			bytes = 0;
842 		}
843 	}
844 
845 	while (!TAILQ_EMPTY(&completed_pdus_list)) {
846 		pdu = TAILQ_FIRST(&completed_pdus_list);
847 		TAILQ_REMOVE(&completed_pdus_list, pdu, tailq);
848 		assert(pdu->cb_fn != NULL);
849 		pdu->cb_fn(pdu->cb_arg);
850 		spdk_nvmf_tcp_pdu_put(tqpair, pdu);
851 	}
852 
853 	ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport, struct spdk_nvmf_tcp_transport, transport);
854 	spdk_nvmf_tcp_qpair_process_pending(ttransport, tqpair);
855 
856 	return TAILQ_EMPTY(&tqpair->send_queue) ? 0 : 1;
857 }
858 
859 static int
860 spdk_nvmf_tcp_qpair_flush_pdus(void *_tqpair)
861 {
862 	struct spdk_nvmf_tcp_qpair *tqpair = _tqpair;
863 	int rc;
864 
865 	if (tqpair->state == NVME_TCP_QPAIR_STATE_RUNNING) {
866 		rc = spdk_nvmf_tcp_qpair_flush_pdus_internal(tqpair);
867 		if (rc == 0 && tqpair->flush_poller != NULL) {
868 			spdk_poller_unregister(&tqpair->flush_poller);
869 		} else if (rc == 1 && tqpair->flush_poller == NULL) {
870 			tqpair->flush_poller = spdk_poller_register(spdk_nvmf_tcp_qpair_flush_pdus,
871 					       tqpair, 50);
872 		}
873 	} else {
874 		/*
875 		 * If the tqpair state is not RUNNING, then
876 		 * keep trying to flush PDUs until our list is
877 		 * empty - to make sure all data is sent before
878 		 * closing the connection.
879 		 */
880 		do {
881 			rc = spdk_nvmf_tcp_qpair_flush_pdus_internal(tqpair);
882 		} while (rc == 1);
883 	}
884 
885 	if (rc < 0 && tqpair->state < NVME_TCP_QPAIR_STATE_EXITING) {
886 		/*
887 		 * If the poller has already started destruction of the tqpair,
888 		 *  i.e. the socket read failed, then the connection state may already
889 		 *  be EXITED.  We don't want to set it back to EXITING in that case.
890 		 */
891 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
892 	}
893 
894 	return -1;
895 }
896 
897 static void
898 spdk_nvmf_tcp_qpair_write_pdu(struct spdk_nvmf_tcp_qpair *tqpair,
899 			      struct nvme_tcp_pdu *pdu,
900 			      nvme_tcp_qpair_xfer_complete_cb cb_fn,
901 			      void *cb_arg)
902 {
903 	int enable_digest;
904 	int hlen;
905 	uint32_t crc32c;
906 
907 	hlen = pdu->hdr.common.hlen;
908 	enable_digest = 1;
909 	if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP ||
910 	    pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ) {
911 		/* this PDU should be sent without digest */
912 		enable_digest = 0;
913 	}
914 
915 	/* Header Digest */
916 	if (enable_digest && tqpair->host_hdgst_enable) {
917 		crc32c = nvme_tcp_pdu_calc_header_digest(pdu);
918 		MAKE_DIGEST_WORD((uint8_t *)pdu->hdr.raw + hlen, crc32c);
919 	}
920 
921 	/* Data Digest */
922 	if (pdu->data_len > 0 && enable_digest && tqpair->host_ddgst_enable) {
923 		crc32c = nvme_tcp_pdu_calc_data_digest(pdu);
924 		MAKE_DIGEST_WORD(pdu->data_digest, crc32c);
925 	}
926 
927 	pdu->cb_fn = cb_fn;
928 	pdu->cb_arg = cb_arg;
929 	TAILQ_INSERT_TAIL(&tqpair->send_queue, pdu, tailq);
930 	spdk_nvmf_tcp_qpair_flush_pdus(tqpair);
931 }
932 
933 static int
934 spdk_nvmf_tcp_qpair_init_mem_resource(struct spdk_nvmf_tcp_qpair *tqpair, uint16_t size)
935 {
936 	int i;
937 	struct spdk_nvmf_tcp_req *tcp_req;
938 	struct spdk_nvmf_transport *transport = tqpair->qpair.transport;
939 	uint32_t in_capsule_data_size;
940 
941 	in_capsule_data_size = transport->opts.in_capsule_data_size;
942 	if (transport->opts.dif_insert_or_strip) {
943 		in_capsule_data_size = SPDK_BDEV_BUF_SIZE_WITH_MD(in_capsule_data_size);
944 	}
945 
946 	if (!tqpair->qpair.sq_head_max) {
947 		tqpair->req = calloc(1, sizeof(*tqpair->req));
948 		if (!tqpair->req) {
949 			SPDK_ERRLOG("Unable to allocate req on tqpair=%p.\n", tqpair);
950 			return -1;
951 		}
952 
953 		if (in_capsule_data_size) {
954 			tqpair->buf = spdk_zmalloc(in_capsule_data_size, 0x1000,
955 						   NULL, SPDK_ENV_LCORE_ID_ANY,
956 						   SPDK_MALLOC_DMA);
957 			if (!tqpair->buf) {
958 				SPDK_ERRLOG("Unable to allocate buf on tqpair=%p.\n", tqpair);
959 				return -1;
960 			}
961 		}
962 
963 		tcp_req = tqpair->req;
964 		tcp_req->ttag = 0;
965 		tcp_req->req.qpair = &tqpair->qpair;
966 
967 		/* Set up memory to receive commands */
968 		if (tqpair->buf) {
969 			tcp_req->buf = tqpair->buf;
970 		}
971 
972 		/* Set the cmdn and rsp */
973 		tcp_req->req.rsp = (union nvmf_c2h_msg *)&tcp_req->rsp;
974 		tcp_req->req.cmd = (union nvmf_h2c_msg *)&tcp_req->cmd;
975 
976 		/* Initialize request state to FREE */
977 		tcp_req->state = TCP_REQUEST_STATE_FREE;
978 		TAILQ_INSERT_TAIL(&tqpair->state_queue[tcp_req->state], tcp_req, state_link);
979 
980 		tqpair->pdu = calloc(NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM + 1, sizeof(*tqpair->pdu));
981 		if (!tqpair->pdu) {
982 			SPDK_ERRLOG("Unable to allocate pdu on tqpair=%p.\n", tqpair);
983 			return -1;
984 		}
985 
986 		for (i = 0; i < 1 + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM; i++) {
987 			TAILQ_INSERT_TAIL(&tqpair->free_queue, &tqpair->pdu[i], tailq);
988 		}
989 
990 	} else {
991 		tqpair->reqs = calloc(size, sizeof(*tqpair->reqs));
992 		if (!tqpair->reqs) {
993 			SPDK_ERRLOG("Unable to allocate reqs on tqpair=%p\n", tqpair);
994 			return -1;
995 		}
996 
997 		if (in_capsule_data_size) {
998 			tqpair->bufs = spdk_zmalloc(size * in_capsule_data_size, 0x1000,
999 						    NULL, SPDK_ENV_LCORE_ID_ANY,
1000 						    SPDK_MALLOC_DMA);
1001 			if (!tqpair->bufs) {
1002 				SPDK_ERRLOG("Unable to allocate bufs on tqpair=%p.\n", tqpair);
1003 				return -1;
1004 			}
1005 		}
1006 
1007 		for (i = 0; i < size; i++) {
1008 			struct spdk_nvmf_tcp_req *tcp_req = &tqpair->reqs[i];
1009 
1010 			tcp_req->ttag = i + 1;
1011 			tcp_req->req.qpair = &tqpair->qpair;
1012 
1013 			/* Set up memory to receive commands */
1014 			if (tqpair->bufs) {
1015 				tcp_req->buf = (void *)((uintptr_t)tqpair->bufs + (i * in_capsule_data_size));
1016 			}
1017 
1018 			/* Set the cmdn and rsp */
1019 			tcp_req->req.rsp = (union nvmf_c2h_msg *)&tcp_req->rsp;
1020 			tcp_req->req.cmd = (union nvmf_h2c_msg *)&tcp_req->cmd;
1021 
1022 			/* Initialize request state to FREE */
1023 			tcp_req->state = TCP_REQUEST_STATE_FREE;
1024 			TAILQ_INSERT_TAIL(&tqpair->state_queue[tcp_req->state], tcp_req, state_link);
1025 		}
1026 
1027 		tqpair->pdu_pool = calloc(size, sizeof(*tqpair->pdu_pool));
1028 		if (!tqpair->pdu_pool) {
1029 			SPDK_ERRLOG("Unable to allocate pdu pool on tqpair =%p.\n", tqpair);
1030 			return -1;
1031 		}
1032 
1033 		for (i = 0; i < size; i++) {
1034 			TAILQ_INSERT_TAIL(&tqpair->free_queue, &tqpair->pdu_pool[i], tailq);
1035 		}
1036 	}
1037 
1038 	return 0;
1039 }
1040 
1041 static int
1042 spdk_nvmf_tcp_qpair_init(struct spdk_nvmf_qpair *qpair)
1043 {
1044 	struct spdk_nvmf_tcp_qpair *tqpair;
1045 	int i;
1046 
1047 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
1048 
1049 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "New TCP Connection: %p\n", qpair);
1050 
1051 	TAILQ_INIT(&tqpair->send_queue);
1052 	TAILQ_INIT(&tqpair->free_queue);
1053 	TAILQ_INIT(&tqpair->queued_c2h_data_tcp_req);
1054 
1055 	/* Initialise request state queues of the qpair */
1056 	for (i = TCP_REQUEST_STATE_FREE; i < TCP_REQUEST_NUM_STATES; i++) {
1057 		TAILQ_INIT(&tqpair->state_queue[i]);
1058 	}
1059 
1060 	tqpair->host_hdgst_enable = true;
1061 	tqpair->host_ddgst_enable = true;
1062 
1063 	return 0;
1064 }
1065 
1066 static int
1067 spdk_nvmf_tcp_qpair_sock_init(struct spdk_nvmf_tcp_qpair *tqpair)
1068 {
1069 
1070 	int rc;
1071 	int buf_size;
1072 
1073 	/* set recv buffer size */
1074 	buf_size = 2 * 1024 * 1024;
1075 	rc = spdk_sock_set_recvbuf(tqpair->sock, buf_size);
1076 	if (rc != 0) {
1077 		SPDK_ERRLOG("spdk_sock_set_recvbuf failed\n");
1078 		return rc;
1079 	}
1080 
1081 	/* set send buffer size */
1082 	rc = spdk_sock_set_sendbuf(tqpair->sock, buf_size);
1083 	if (rc != 0) {
1084 		SPDK_ERRLOG("spdk_sock_set_sendbuf failed\n");
1085 		return rc;
1086 	}
1087 
1088 	/* set low water mark */
1089 	rc = spdk_sock_set_recvlowat(tqpair->sock, sizeof(struct spdk_nvme_tcp_c2h_data_hdr));
1090 	if (rc != 0) {
1091 		SPDK_ERRLOG("spdk_sock_set_recvlowat() failed\n");
1092 		return rc;
1093 	}
1094 
1095 	return 0;
1096 }
1097 
1098 static void
1099 _spdk_nvmf_tcp_handle_connect(struct spdk_nvmf_transport *transport,
1100 			      struct spdk_nvmf_tcp_port *port,
1101 			      struct spdk_sock *sock, new_qpair_fn cb_fn)
1102 {
1103 	struct spdk_nvmf_tcp_qpair *tqpair;
1104 	int rc;
1105 
1106 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "New connection accepted on %s port %s\n",
1107 		      port->trid.traddr, port->trid.trsvcid);
1108 
1109 	if (transport->opts.sock_priority) {
1110 		rc = spdk_sock_set_priority(sock, transport->opts.sock_priority);
1111 		if (rc) {
1112 			SPDK_ERRLOG("Failed to set the priority of the socket\n");
1113 			spdk_sock_close(&sock);
1114 			return;
1115 		}
1116 	}
1117 
1118 	tqpair = calloc(1, sizeof(struct spdk_nvmf_tcp_qpair));
1119 	if (tqpair == NULL) {
1120 		SPDK_ERRLOG("Could not allocate new connection.\n");
1121 		spdk_sock_close(&sock);
1122 		return;
1123 	}
1124 
1125 	tqpair->sock = sock;
1126 	tqpair->max_queue_depth = 1;
1127 	tqpair->free_pdu_num = tqpair->max_queue_depth + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM;
1128 	tqpair->state_cntr[TCP_REQUEST_STATE_FREE] = tqpair->max_queue_depth;
1129 	tqpair->port = port;
1130 	tqpair->qpair.transport = transport;
1131 
1132 	rc = spdk_sock_getaddr(tqpair->sock, tqpair->target_addr,
1133 			       sizeof(tqpair->target_addr), &tqpair->target_port,
1134 			       tqpair->initiator_addr, sizeof(tqpair->initiator_addr),
1135 			       &tqpair->initiator_port);
1136 	if (rc < 0) {
1137 		SPDK_ERRLOG("spdk_sock_getaddr() failed of tqpair=%p\n", tqpair);
1138 		spdk_nvmf_tcp_qpair_destroy(tqpair);
1139 		return;
1140 	}
1141 
1142 	cb_fn(&tqpair->qpair);
1143 }
1144 
1145 static void
1146 spdk_nvmf_tcp_port_accept(struct spdk_nvmf_transport *transport, struct spdk_nvmf_tcp_port *port,
1147 			  new_qpair_fn cb_fn)
1148 {
1149 	struct spdk_sock *sock;
1150 	int i;
1151 
1152 	for (i = 0; i < NVMF_TCP_MAX_ACCEPT_SOCK_ONE_TIME; i++) {
1153 		sock = spdk_sock_accept(port->listen_sock);
1154 		if (sock) {
1155 			_spdk_nvmf_tcp_handle_connect(transport, port, sock, cb_fn);
1156 		}
1157 	}
1158 }
1159 
1160 static void
1161 spdk_nvmf_tcp_accept(struct spdk_nvmf_transport *transport, new_qpair_fn cb_fn)
1162 {
1163 	struct spdk_nvmf_tcp_transport *ttransport;
1164 	struct spdk_nvmf_tcp_port *port;
1165 
1166 	ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
1167 
1168 	TAILQ_FOREACH(port, &ttransport->ports, link) {
1169 		spdk_nvmf_tcp_port_accept(transport, port, cb_fn);
1170 	}
1171 }
1172 
1173 static void
1174 spdk_nvmf_tcp_discover(struct spdk_nvmf_transport *transport,
1175 		       struct spdk_nvme_transport_id *trid,
1176 		       struct spdk_nvmf_discovery_log_page_entry *entry)
1177 {
1178 	entry->trtype = SPDK_NVMF_TRTYPE_TCP;
1179 	entry->adrfam = trid->adrfam;
1180 	entry->treq.secure_channel = SPDK_NVMF_TREQ_SECURE_CHANNEL_NOT_SPECIFIED;
1181 
1182 	spdk_strcpy_pad(entry->trsvcid, trid->trsvcid, sizeof(entry->trsvcid), ' ');
1183 	spdk_strcpy_pad(entry->traddr, trid->traddr, sizeof(entry->traddr), ' ');
1184 
1185 	entry->tsas.tcp.sectype = SPDK_NVME_TCP_SECURITY_NONE;
1186 }
1187 
1188 static struct spdk_nvmf_transport_poll_group *
1189 spdk_nvmf_tcp_poll_group_create(struct spdk_nvmf_transport *transport)
1190 {
1191 	struct spdk_nvmf_tcp_poll_group *tgroup;
1192 
1193 	tgroup = calloc(1, sizeof(*tgroup));
1194 	if (!tgroup) {
1195 		return NULL;
1196 	}
1197 
1198 	tgroup->sock_group = spdk_sock_group_create(&tgroup->group);
1199 	if (!tgroup->sock_group) {
1200 		goto cleanup;
1201 	}
1202 
1203 	TAILQ_INIT(&tgroup->qpairs);
1204 	TAILQ_INIT(&tgroup->pending_data_buf_queue);
1205 
1206 	return &tgroup->group;
1207 
1208 cleanup:
1209 	free(tgroup);
1210 	return NULL;
1211 }
1212 
1213 static struct spdk_nvmf_transport_poll_group *
1214 spdk_nvmf_tcp_get_optimal_poll_group(struct spdk_nvmf_qpair *qpair)
1215 {
1216 	struct spdk_nvmf_tcp_qpair *tqpair;
1217 	struct spdk_sock_group *group = NULL;
1218 	int rc;
1219 
1220 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
1221 	rc = spdk_sock_get_optimal_sock_group(tqpair->sock, &group);
1222 	if (!rc && group != NULL) {
1223 		return spdk_sock_group_get_ctx(group);
1224 	}
1225 
1226 	return NULL;
1227 }
1228 
1229 static void
1230 spdk_nvmf_tcp_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group)
1231 {
1232 	struct spdk_nvmf_tcp_poll_group *tgroup;
1233 
1234 	tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
1235 	spdk_sock_group_close(&tgroup->sock_group);
1236 
1237 	if (!TAILQ_EMPTY(&tgroup->pending_data_buf_queue)) {
1238 		SPDK_ERRLOG("Pending I/O list wasn't empty on poll group destruction\n");
1239 	}
1240 
1241 	free(tgroup);
1242 }
1243 
1244 static void
1245 spdk_nvmf_tcp_qpair_set_recv_state(struct spdk_nvmf_tcp_qpair *tqpair,
1246 				   enum nvme_tcp_pdu_recv_state state)
1247 {
1248 	if (tqpair->recv_state == state) {
1249 		SPDK_ERRLOG("The recv state of tqpair=%p is same with the state(%d) to be set\n",
1250 			    tqpair, state);
1251 		return;
1252 	}
1253 
1254 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair(%p) recv state=%d\n", tqpair, state);
1255 	tqpair->recv_state = state;
1256 
1257 	switch (state) {
1258 	case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH:
1259 	case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH:
1260 	case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD:
1261 		break;
1262 	case NVME_TCP_PDU_RECV_STATE_ERROR:
1263 	case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY:
1264 		memset(&tqpair->pdu_in_progress, 0, sizeof(tqpair->pdu_in_progress));
1265 		break;
1266 	default:
1267 		SPDK_ERRLOG("The state(%d) is invalid\n", state);
1268 		abort();
1269 		break;
1270 	}
1271 }
1272 
1273 static int
1274 spdk_nvmf_tcp_qpair_handle_timeout(void *ctx)
1275 {
1276 	struct spdk_nvmf_tcp_qpair *tqpair = ctx;
1277 
1278 	assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_ERROR);
1279 
1280 	SPDK_ERRLOG("No pdu coming for tqpair=%p within %d seconds\n", tqpair,
1281 		    SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT);
1282 	tqpair->state = NVME_TCP_QPAIR_STATE_EXITED;
1283 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "will disconect the tqpair=%p\n", tqpair);
1284 	spdk_poller_unregister(&tqpair->timeout_poller);
1285 	spdk_nvmf_qpair_disconnect(&tqpair->qpair, NULL, NULL);
1286 
1287 	return 0;
1288 }
1289 
1290 static void
1291 spdk_nvmf_tcp_send_c2h_term_req_complete(void *cb_arg)
1292 {
1293 	struct spdk_nvmf_tcp_qpair *tqpair = (struct spdk_nvmf_tcp_qpair *)cb_arg;
1294 
1295 	if (!tqpair->timeout_poller) {
1296 		tqpair->timeout_poller = spdk_poller_register(spdk_nvmf_tcp_qpair_handle_timeout, tqpair,
1297 					 SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT * 1000000);
1298 	}
1299 }
1300 
1301 static void
1302 spdk_nvmf_tcp_send_c2h_term_req(struct spdk_nvmf_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu,
1303 				enum spdk_nvme_tcp_term_req_fes fes, uint32_t error_offset)
1304 {
1305 	struct nvme_tcp_pdu *rsp_pdu;
1306 	struct spdk_nvme_tcp_term_req_hdr *c2h_term_req;
1307 	uint32_t c2h_term_req_hdr_len = sizeof(*c2h_term_req);
1308 	uint32_t copy_len;
1309 
1310 	rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair);
1311 	if (!rsp_pdu) {
1312 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
1313 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1314 		return;
1315 	}
1316 
1317 	c2h_term_req = &rsp_pdu->hdr.term_req;
1318 	c2h_term_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ;
1319 	c2h_term_req->common.hlen = c2h_term_req_hdr_len;
1320 
1321 	if ((fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) ||
1322 	    (fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) {
1323 		DSET32(&c2h_term_req->fei, error_offset);
1324 	}
1325 
1326 	copy_len = pdu->hdr.common.hlen;
1327 	if (copy_len > SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE) {
1328 		copy_len = SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE;
1329 	}
1330 
1331 	/* Copy the error info into the buffer */
1332 	memcpy((uint8_t *)rsp_pdu->hdr.raw + c2h_term_req_hdr_len, pdu->hdr.raw, copy_len);
1333 	nvme_tcp_pdu_set_data(rsp_pdu, (uint8_t *)rsp_pdu->hdr.raw + c2h_term_req_hdr_len, copy_len);
1334 
1335 	/* Contain the header of the wrong received pdu */
1336 	c2h_term_req->common.plen = c2h_term_req->common.hlen + copy_len;
1337 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1338 	spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_send_c2h_term_req_complete, tqpair);
1339 }
1340 
1341 static void
1342 spdk_nvmf_tcp_capsule_cmd_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport,
1343 				     struct spdk_nvmf_tcp_qpair *tqpair,
1344 				     struct nvme_tcp_pdu *pdu)
1345 {
1346 	struct spdk_nvmf_tcp_req *tcp_req;
1347 
1348 	tcp_req = spdk_nvmf_tcp_req_get(tqpair);
1349 	if (!tcp_req) {
1350 		SPDK_ERRLOG("Cannot allocate tcp_req\n");
1351 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
1352 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1353 		return;
1354 	}
1355 
1356 	pdu->ctx = tcp_req;
1357 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEW);
1358 	spdk_nvmf_tcp_req_process(ttransport, tcp_req);
1359 	return;
1360 }
1361 
1362 static void
1363 spdk_nvmf_tcp_capsule_cmd_payload_handle(struct spdk_nvmf_tcp_transport *ttransport,
1364 		struct spdk_nvmf_tcp_qpair *tqpair,
1365 		struct nvme_tcp_pdu *pdu)
1366 {
1367 	struct spdk_nvmf_tcp_req *tcp_req;
1368 	struct spdk_nvme_tcp_cmd *capsule_cmd;
1369 	uint32_t error_offset = 0;
1370 	enum spdk_nvme_tcp_term_req_fes fes;
1371 
1372 	capsule_cmd = &pdu->hdr.capsule_cmd;
1373 	tcp_req = pdu->ctx;
1374 	assert(tcp_req != NULL);
1375 	if (capsule_cmd->common.pdo > SPDK_NVME_TCP_PDU_PDO_MAX_OFFSET) {
1376 		SPDK_ERRLOG("Expected ICReq capsule_cmd pdu offset <= %d, got %c\n",
1377 			    SPDK_NVME_TCP_PDU_PDO_MAX_OFFSET, capsule_cmd->common.pdo);
1378 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1379 		error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdo);
1380 		goto err;
1381 	}
1382 
1383 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
1384 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
1385 	spdk_nvmf_tcp_req_process(ttransport, tcp_req);
1386 
1387 	return;
1388 err:
1389 	spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1390 }
1391 
1392 static void
1393 spdk_nvmf_tcp_h2c_data_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport,
1394 				  struct spdk_nvmf_tcp_qpair *tqpair,
1395 				  struct nvme_tcp_pdu *pdu)
1396 {
1397 	struct spdk_nvmf_tcp_req *tcp_req;
1398 	uint32_t error_offset = 0;
1399 	enum spdk_nvme_tcp_term_req_fes fes = 0;
1400 	struct spdk_nvme_tcp_h2c_data_hdr *h2c_data;
1401 	bool ttag_offset_error = false;
1402 
1403 	h2c_data = &pdu->hdr.h2c_data;
1404 
1405 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair=%p, r2t_info: datao=%u, datal=%u, cccid=%u, ttag=%u\n",
1406 		      tqpair, h2c_data->datao, h2c_data->datal, h2c_data->cccid, h2c_data->ttag);
1407 
1408 	/* According to the information in the pdu to find the req */
1409 	TAILQ_FOREACH(tcp_req, &tqpair->state_queue[TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER],
1410 		      state_link) {
1411 		if ((tcp_req->req.cmd->nvme_cmd.cid == h2c_data->cccid) && (tcp_req->ttag == h2c_data->ttag)) {
1412 			break;
1413 		}
1414 
1415 		if (!ttag_offset_error && (tcp_req->req.cmd->nvme_cmd.cid == h2c_data->cccid)) {
1416 			ttag_offset_error = true;
1417 		}
1418 	}
1419 
1420 	if (!tcp_req) {
1421 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tcp_req is not found for tqpair=%p\n", tqpair);
1422 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER;
1423 		if (!ttag_offset_error) {
1424 			error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, cccid);
1425 		} else {
1426 			error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, ttag);
1427 		}
1428 		goto err;
1429 	}
1430 
1431 	if (tcp_req->next_expected_r2t_offset != h2c_data->datao) {
1432 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
1433 			      "tcp_req(%p), tqpair=%p,  expected_r2t_offset=%u, but data offset =%u\n",
1434 			      tcp_req, tqpair, tcp_req->next_expected_r2t_offset, h2c_data->datao);
1435 		fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE;
1436 		goto err;
1437 	}
1438 
1439 	if (h2c_data->datal > tqpair->maxh2cdata) {
1440 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tcp_req(%p), tqpair=%p,  datao=%u execeeds maxh2cdata size=%u\n",
1441 			      tcp_req, tqpair, h2c_data->datao, tqpair->maxh2cdata);
1442 		fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE;
1443 		goto err;
1444 	}
1445 
1446 	if ((h2c_data->datao + h2c_data->datal) > tcp_req->req.length) {
1447 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
1448 			      "tcp_req(%p), tqpair=%p,  (datao=%u + datal=%u) execeeds requested length=%u\n",
1449 			      tcp_req, tqpair, h2c_data->datao, h2c_data->datal, tcp_req->req.length);
1450 		fes = SPDK_NVME_TCP_TERM_REQ_FES_R2T_LIMIT_EXCEEDED;
1451 		goto err;
1452 	}
1453 
1454 	pdu->ctx = tcp_req;
1455 
1456 	if (spdk_unlikely(tcp_req->dif_insert_or_strip)) {
1457 		pdu->dif_ctx = &tcp_req->dif_ctx;
1458 	}
1459 
1460 	nvme_tcp_pdu_set_data_buf(pdu, tcp_req->req.iov, tcp_req->req.iovcnt,
1461 				  h2c_data->datao, h2c_data->datal);
1462 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
1463 	return;
1464 
1465 err:
1466 	spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1467 }
1468 
1469 static void
1470 spdk_nvmf_tcp_pdu_cmd_complete(void *cb_arg)
1471 {
1472 	struct spdk_nvmf_tcp_req *tcp_req = cb_arg;
1473 	nvmf_tcp_request_free(tcp_req);
1474 }
1475 
1476 static void
1477 spdk_nvmf_tcp_send_capsule_resp_pdu(struct spdk_nvmf_tcp_req *tcp_req,
1478 				    struct spdk_nvmf_tcp_qpair *tqpair)
1479 {
1480 	struct nvme_tcp_pdu *rsp_pdu;
1481 	struct spdk_nvme_tcp_rsp *capsule_resp;
1482 
1483 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter, tqpair=%p\n", tqpair);
1484 	rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair);
1485 	if (!rsp_pdu) {
1486 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1487 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
1488 		return;
1489 	}
1490 
1491 	capsule_resp = &rsp_pdu->hdr.capsule_resp;
1492 	capsule_resp->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP;
1493 	capsule_resp->common.plen = capsule_resp->common.hlen = sizeof(*capsule_resp);
1494 	capsule_resp->rccqe = tcp_req->req.rsp->nvme_cpl;
1495 	if (tqpair->host_hdgst_enable) {
1496 		capsule_resp->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF;
1497 		capsule_resp->common.plen += SPDK_NVME_TCP_DIGEST_LEN;
1498 	}
1499 
1500 	spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_pdu_cmd_complete, tcp_req);
1501 }
1502 
1503 static void
1504 spdk_nvmf_tcp_pdu_c2h_data_complete(void *cb_arg)
1505 {
1506 	struct spdk_nvmf_tcp_req *tcp_req = cb_arg;
1507 	struct spdk_nvmf_tcp_qpair *tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair,
1508 					     struct spdk_nvmf_tcp_qpair, qpair);
1509 
1510 	assert(tqpair != NULL);
1511 	assert(tcp_req->c2h_data_pdu_num > 0);
1512 	tcp_req->c2h_data_pdu_num--;
1513 	if (!tcp_req->c2h_data_pdu_num) {
1514 		if (tqpair->qpair.transport->opts.c2h_success) {
1515 			nvmf_tcp_request_free(tcp_req);
1516 		} else {
1517 			spdk_nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair);
1518 		}
1519 	}
1520 
1521 	tqpair->c2h_data_pdu_cnt--;
1522 	spdk_nvmf_tcp_handle_pending_c2h_data_queue(tqpair);
1523 }
1524 
1525 static void
1526 spdk_nvmf_tcp_send_r2t_pdu(struct spdk_nvmf_tcp_qpair *tqpair,
1527 			   struct spdk_nvmf_tcp_req *tcp_req)
1528 {
1529 	struct nvme_tcp_pdu *rsp_pdu;
1530 	struct spdk_nvme_tcp_r2t_hdr *r2t;
1531 
1532 	rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair);
1533 	if (!rsp_pdu) {
1534 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
1535 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1536 		return;
1537 	}
1538 
1539 	r2t = &rsp_pdu->hdr.r2t;
1540 	r2t->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_R2T;
1541 	r2t->common.plen = r2t->common.hlen = sizeof(*r2t);
1542 
1543 	if (tqpair->host_hdgst_enable) {
1544 		r2t->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF;
1545 		r2t->common.plen += SPDK_NVME_TCP_DIGEST_LEN;
1546 	}
1547 
1548 	r2t->cccid = tcp_req->req.cmd->nvme_cmd.cid;
1549 	r2t->ttag = tcp_req->ttag;
1550 	r2t->r2to = tcp_req->next_expected_r2t_offset;
1551 	r2t->r2tl = spdk_min(tcp_req->req.length - tcp_req->next_expected_r2t_offset, tqpair->maxh2cdata);
1552 	tcp_req->r2tl_remain = r2t->r2tl;
1553 
1554 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
1555 		      "tcp_req(%p) on tqpair(%p), r2t_info: cccid=%u, ttag=%u, r2to=%u, r2tl=%u\n",
1556 		      tcp_req, tqpair, r2t->cccid, r2t->ttag, r2t->r2to, r2t->r2tl);
1557 	spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_pdu_cmd_complete, NULL);
1558 }
1559 
1560 static void
1561 spdk_nvmf_tcp_h2c_data_payload_handle(struct spdk_nvmf_tcp_transport *ttransport,
1562 				      struct spdk_nvmf_tcp_qpair *tqpair,
1563 				      struct nvme_tcp_pdu *pdu)
1564 {
1565 	struct spdk_nvmf_tcp_req *tcp_req;
1566 
1567 	tcp_req = pdu->ctx;
1568 	assert(tcp_req != NULL);
1569 
1570 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
1571 
1572 	tcp_req->next_expected_r2t_offset += pdu->data_len;
1573 	tcp_req->r2tl_remain -= pdu->data_len;
1574 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
1575 
1576 	if (!tcp_req->r2tl_remain) {
1577 		if (tcp_req->next_expected_r2t_offset == tcp_req->req.length) {
1578 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
1579 			spdk_nvmf_tcp_req_process(ttransport, tcp_req);
1580 		} else {
1581 			SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Send r2t pdu for tcp_req=%p on tqpair=%p\n", tcp_req, tqpair);
1582 			spdk_nvmf_tcp_send_r2t_pdu(tqpair, tcp_req);
1583 		}
1584 	}
1585 }
1586 
1587 static void
1588 spdk_nvmf_tcp_h2c_term_req_dump(struct spdk_nvme_tcp_term_req_hdr *h2c_term_req)
1589 {
1590 	SPDK_ERRLOG("Error info of pdu(%p): %s\n", h2c_term_req,
1591 		    spdk_nvmf_tcp_term_req_fes_str[h2c_term_req->fes]);
1592 	if ((h2c_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) ||
1593 	    (h2c_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) {
1594 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "The offset from the start of the PDU header is %u\n",
1595 			      DGET32(h2c_term_req->fei));
1596 	}
1597 }
1598 
1599 static void
1600 spdk_nvmf_tcp_h2c_term_req_hdr_handle(struct spdk_nvmf_tcp_qpair *tqpair,
1601 				      struct nvme_tcp_pdu *pdu)
1602 {
1603 	struct spdk_nvme_tcp_term_req_hdr *h2c_term_req = &pdu->hdr.term_req;
1604 	uint32_t error_offset = 0;
1605 	enum spdk_nvme_tcp_term_req_fes fes;
1606 
1607 
1608 	if (h2c_term_req->fes > SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER) {
1609 		SPDK_ERRLOG("Fatal Error Stauts(FES) is unknown for h2c_term_req pdu=%p\n", pdu);
1610 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1611 		error_offset = offsetof(struct spdk_nvme_tcp_term_req_hdr, fes);
1612 		goto end;
1613 	}
1614 
1615 	/* set the data buffer */
1616 	nvme_tcp_pdu_set_data(pdu, (uint8_t *)pdu->hdr.raw + h2c_term_req->common.hlen,
1617 			      h2c_term_req->common.plen - h2c_term_req->common.hlen);
1618 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
1619 	return;
1620 end:
1621 	spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1622 	return;
1623 }
1624 
1625 static void
1626 spdk_nvmf_tcp_h2c_term_req_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair,
1627 		struct nvme_tcp_pdu *pdu)
1628 {
1629 	struct spdk_nvme_tcp_term_req_hdr *h2c_term_req = &pdu->hdr.term_req;
1630 
1631 	spdk_nvmf_tcp_h2c_term_req_dump(h2c_term_req);
1632 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1633 	return;
1634 }
1635 
1636 static void
1637 spdk_nvmf_tcp_pdu_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair)
1638 {
1639 	int rc = 0;
1640 	struct nvme_tcp_pdu *pdu;
1641 	uint32_t crc32c, error_offset = 0;
1642 	enum spdk_nvme_tcp_term_req_fes fes;
1643 	struct spdk_nvmf_tcp_transport *ttransport;
1644 
1645 	assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
1646 	pdu = &tqpair->pdu_in_progress;
1647 
1648 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
1649 	/* check data digest if need */
1650 	if (pdu->ddgst_enable) {
1651 		crc32c = nvme_tcp_pdu_calc_data_digest(pdu);
1652 		rc = MATCH_DIGEST_WORD(pdu->data_digest, crc32c);
1653 		if (rc == 0) {
1654 			SPDK_ERRLOG("Data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu);
1655 			fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR;
1656 			spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1657 			return;
1658 
1659 		}
1660 	}
1661 
1662 	ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport, struct spdk_nvmf_tcp_transport, transport);
1663 	switch (pdu->hdr.common.pdu_type) {
1664 	case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD:
1665 		spdk_nvmf_tcp_capsule_cmd_payload_handle(ttransport, tqpair, pdu);
1666 		break;
1667 	case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA:
1668 		spdk_nvmf_tcp_h2c_data_payload_handle(ttransport, tqpair, pdu);
1669 		break;
1670 
1671 	case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ:
1672 		spdk_nvmf_tcp_h2c_term_req_payload_handle(tqpair, pdu);
1673 		break;
1674 
1675 	default:
1676 		/* The code should not go to here */
1677 		SPDK_ERRLOG("The code should not go to here\n");
1678 		break;
1679 	}
1680 }
1681 
1682 static void
1683 spdk_nvmf_tcp_send_icresp_complete(void *cb_arg)
1684 {
1685 	struct spdk_nvmf_tcp_qpair *tqpair = cb_arg;
1686 
1687 	tqpair->state = NVME_TCP_QPAIR_STATE_RUNNING;
1688 }
1689 
1690 static void
1691 spdk_nvmf_tcp_icreq_handle(struct spdk_nvmf_tcp_transport *ttransport,
1692 			   struct spdk_nvmf_tcp_qpair *tqpair,
1693 			   struct nvme_tcp_pdu *pdu)
1694 {
1695 	struct spdk_nvme_tcp_ic_req *ic_req = &pdu->hdr.ic_req;
1696 	struct nvme_tcp_pdu *rsp_pdu;
1697 	struct spdk_nvme_tcp_ic_resp *ic_resp;
1698 	uint32_t error_offset = 0;
1699 	enum spdk_nvme_tcp_term_req_fes fes;
1700 
1701 	/* Only PFV 0 is defined currently */
1702 	if (ic_req->pfv != 0) {
1703 		SPDK_ERRLOG("Expected ICReq PFV %u, got %u\n", 0u, ic_req->pfv);
1704 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1705 		error_offset = offsetof(struct spdk_nvme_tcp_ic_req, pfv);
1706 		goto end;
1707 	}
1708 
1709 	/* MAXR2T is 0's based */
1710 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "maxr2t =%u\n", (ic_req->maxr2t + 1u));
1711 
1712 	tqpair->host_hdgst_enable = ic_req->dgst.bits.hdgst_enable ? true : false;
1713 	tqpair->host_ddgst_enable = ic_req->dgst.bits.ddgst_enable ? true : false;
1714 
1715 	tqpair->cpda = spdk_min(ic_req->hpda, SPDK_NVME_TCP_CPDA_MAX);
1716 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "cpda of tqpair=(%p) is : %u\n", tqpair, tqpair->cpda);
1717 
1718 	rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair);
1719 	if (!rsp_pdu) {
1720 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
1721 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1722 		return;
1723 	}
1724 
1725 	ic_resp = &rsp_pdu->hdr.ic_resp;
1726 	ic_resp->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_IC_RESP;
1727 	ic_resp->common.hlen = ic_resp->common.plen =  sizeof(*ic_resp);
1728 	ic_resp->pfv = 0;
1729 	ic_resp->cpda = tqpair->cpda;
1730 	tqpair->maxh2cdata = spdk_min(NVMF_TCP_PDU_MAX_H2C_DATA_SIZE,
1731 				      ttransport->transport.opts.io_unit_size);
1732 	ic_resp->maxh2cdata = tqpair->maxh2cdata;
1733 	ic_resp->dgst.bits.hdgst_enable = tqpair->host_hdgst_enable ? 1 : 0;
1734 	ic_resp->dgst.bits.ddgst_enable = tqpair->host_ddgst_enable ? 1 : 0;
1735 
1736 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "host_hdgst_enable: %u\n", tqpair->host_hdgst_enable);
1737 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "host_ddgst_enable: %u\n", tqpair->host_ddgst_enable);
1738 
1739 	spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_send_icresp_complete, tqpair);
1740 	spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
1741 	return;
1742 end:
1743 	spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1744 	return;
1745 }
1746 
1747 static void
1748 spdk_nvmf_tcp_pdu_psh_handle(struct spdk_nvmf_tcp_qpair *tqpair)
1749 {
1750 	struct nvme_tcp_pdu *pdu;
1751 	int rc;
1752 	uint32_t crc32c, error_offset = 0;
1753 	enum spdk_nvme_tcp_term_req_fes fes;
1754 	struct spdk_nvmf_tcp_transport *ttransport;
1755 
1756 	assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH);
1757 	pdu = &tqpair->pdu_in_progress;
1758 
1759 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "pdu type of tqpair(%p) is %d\n", tqpair,
1760 		      pdu->hdr.common.pdu_type);
1761 	/* check header digest if needed */
1762 	if (pdu->has_hdgst) {
1763 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Compare the header of pdu=%p on tqpair=%p\n", pdu, tqpair);
1764 		crc32c = nvme_tcp_pdu_calc_header_digest(pdu);
1765 		rc = MATCH_DIGEST_WORD((uint8_t *)pdu->hdr.raw + pdu->hdr.common.hlen, crc32c);
1766 		if (rc == 0) {
1767 			SPDK_ERRLOG("Header digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu);
1768 			fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR;
1769 			spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1770 			return;
1771 
1772 		}
1773 	}
1774 
1775 	ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport, struct spdk_nvmf_tcp_transport, transport);
1776 	switch (pdu->hdr.common.pdu_type) {
1777 	case SPDK_NVME_TCP_PDU_TYPE_IC_REQ:
1778 		spdk_nvmf_tcp_icreq_handle(ttransport, tqpair, pdu);
1779 		break;
1780 	case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD:
1781 		spdk_nvmf_tcp_capsule_cmd_hdr_handle(ttransport, tqpair, pdu);
1782 		break;
1783 	case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA:
1784 		spdk_nvmf_tcp_h2c_data_hdr_handle(ttransport, tqpair, pdu);
1785 		break;
1786 
1787 	case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ:
1788 		spdk_nvmf_tcp_h2c_term_req_hdr_handle(tqpair, pdu);
1789 		break;
1790 
1791 	default:
1792 		SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->pdu_in_progress.hdr.common.pdu_type);
1793 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1794 		error_offset = 1;
1795 		spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1796 		break;
1797 	}
1798 }
1799 
1800 static void
1801 spdk_nvmf_tcp_pdu_ch_handle(struct spdk_nvmf_tcp_qpair *tqpair)
1802 {
1803 	struct nvme_tcp_pdu *pdu;
1804 	uint32_t error_offset = 0;
1805 	enum spdk_nvme_tcp_term_req_fes fes;
1806 	uint8_t expected_hlen, pdo;
1807 	bool plen_error = false, pdo_error = false;
1808 
1809 	assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH);
1810 	pdu = &tqpair->pdu_in_progress;
1811 
1812 	if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_REQ) {
1813 		if (tqpair->state != NVME_TCP_QPAIR_STATE_INVALID) {
1814 			SPDK_ERRLOG("Already received ICreq PDU, and reject this pdu=%p\n", pdu);
1815 			fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR;
1816 			goto err;
1817 		}
1818 		expected_hlen = sizeof(struct spdk_nvme_tcp_ic_req);
1819 		if (pdu->hdr.common.plen != expected_hlen) {
1820 			plen_error = true;
1821 		}
1822 	} else {
1823 		if (tqpair->state != NVME_TCP_QPAIR_STATE_RUNNING) {
1824 			SPDK_ERRLOG("The TCP/IP connection is not negotitated\n");
1825 			fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR;
1826 			goto err;
1827 		}
1828 
1829 		switch (pdu->hdr.common.pdu_type) {
1830 		case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD:
1831 			expected_hlen = sizeof(struct spdk_nvme_tcp_cmd);
1832 			pdo = pdu->hdr.common.pdo;
1833 			if ((tqpair->cpda != 0) && (pdo != ((tqpair->cpda + 1) << 2))) {
1834 				pdo_error = true;
1835 				break;
1836 			}
1837 
1838 			if (pdu->hdr.common.plen < expected_hlen) {
1839 				plen_error = true;
1840 			}
1841 			break;
1842 		case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA:
1843 			expected_hlen = sizeof(struct spdk_nvme_tcp_h2c_data_hdr);
1844 			pdo = pdu->hdr.common.pdo;
1845 			if ((tqpair->cpda != 0) && (pdo != ((tqpair->cpda + 1) << 2))) {
1846 				pdo_error = true;
1847 				break;
1848 			}
1849 			if (pdu->hdr.common.plen < expected_hlen) {
1850 				plen_error = true;
1851 			}
1852 			break;
1853 
1854 		case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ:
1855 			expected_hlen = sizeof(struct spdk_nvme_tcp_term_req_hdr);
1856 			if ((pdu->hdr.common.plen <= expected_hlen) ||
1857 			    (pdu->hdr.common.plen > SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE)) {
1858 				plen_error = true;
1859 			}
1860 			break;
1861 
1862 		default:
1863 			SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", pdu->hdr.common.pdu_type);
1864 			fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1865 			error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdu_type);
1866 			goto err;
1867 		}
1868 	}
1869 
1870 	if (pdu->hdr.common.hlen != expected_hlen) {
1871 		SPDK_ERRLOG("PDU type=0x%02x, Expected ICReq header length %u, got %u on tqpair=%p\n",
1872 			    pdu->hdr.common.pdu_type,
1873 			    expected_hlen, pdu->hdr.common.hlen, tqpair);
1874 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1875 		error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, hlen);
1876 		goto err;
1877 	} else if (pdo_error) {
1878 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1879 		error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdo);
1880 	} else if (plen_error) {
1881 		fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1882 		error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, plen);
1883 		goto err;
1884 	} else {
1885 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH);
1886 		return;
1887 	}
1888 err:
1889 	spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1890 }
1891 
1892 static int
1893 nvmf_tcp_pdu_payload_insert_dif(struct nvme_tcp_pdu *pdu, uint32_t read_offset,
1894 				int read_len)
1895 {
1896 	int rc;
1897 
1898 	rc = spdk_dif_generate_stream(pdu->data_iov, pdu->data_iovcnt,
1899 				      read_offset, read_len, pdu->dif_ctx);
1900 	if (rc != 0) {
1901 		SPDK_ERRLOG("DIF generate failed\n");
1902 	}
1903 
1904 	return rc;
1905 }
1906 
1907 #define MAX_NVME_TCP_PDU_LOOP_COUNT 32
1908 
1909 static int
1910 spdk_nvmf_tcp_sock_process(struct spdk_nvmf_tcp_qpair *tqpair)
1911 {
1912 	int rc = 0;
1913 	struct nvme_tcp_pdu *pdu;
1914 	enum nvme_tcp_pdu_recv_state prev_state;
1915 	uint32_t data_len, current_pdu_num = 0;
1916 	uint8_t psh_len, pdo, hlen;
1917 	int8_t  padding_len;
1918 
1919 	/* The loop here is to allow for several back-to-back state changes. */
1920 	do {
1921 		prev_state = tqpair->recv_state;
1922 
1923 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair(%p) recv pdu entering state %d\n", tqpair, prev_state);
1924 
1925 		switch (tqpair->recv_state) {
1926 		/* Wait for the common header  */
1927 		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY:
1928 		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH:
1929 			pdu = &tqpair->pdu_in_progress;
1930 
1931 			rc = nvme_tcp_read_data(tqpair->sock,
1932 						sizeof(struct spdk_nvme_tcp_common_pdu_hdr) - pdu->ch_valid_bytes,
1933 						(void *)&pdu->hdr.common + pdu->ch_valid_bytes);
1934 			if (rc < 0) {
1935 				SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "will disconnect tqpair=%p\n", tqpair);
1936 				return NVME_TCP_PDU_FATAL;
1937 			} else if (rc > 0) {
1938 				pdu->ch_valid_bytes += rc;
1939 				spdk_trace_record(TRACE_TCP_READ_FROM_SOCKET_DONE, 0, rc, 0, 0);
1940 				if (spdk_likely(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY)) {
1941 					spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH);
1942 				}
1943 			}
1944 
1945 			if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) {
1946 				return NVME_TCP_PDU_IN_PROGRESS;
1947 			}
1948 
1949 			/* The command header of this PDU has now been read from the socket. */
1950 			spdk_nvmf_tcp_pdu_ch_handle(tqpair);
1951 			break;
1952 		/* Wait for the pdu specific header  */
1953 		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH:
1954 			pdu = &tqpair->pdu_in_progress;
1955 			psh_len = hlen = pdu->hdr.common.hlen;
1956 			/* Only capsule_cmd and h2c_data has header digest */
1957 			if (((pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD) ||
1958 			     (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_H2C_DATA)) &&
1959 			    tqpair->host_hdgst_enable) {
1960 				pdu->has_hdgst = true;
1961 				psh_len += SPDK_NVME_TCP_DIGEST_LEN;
1962 				if (pdu->hdr.common.plen > psh_len) {
1963 					pdo = pdu->hdr.common.pdo;
1964 					padding_len = pdo - psh_len;
1965 					SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "padding length is =%d for pdu=%p on tqpair=%p\n", padding_len,
1966 						      pdu, tqpair);
1967 					if (padding_len > 0) {
1968 						psh_len = pdo;
1969 					}
1970 				}
1971 			}
1972 
1973 			psh_len -= sizeof(struct spdk_nvme_tcp_common_pdu_hdr);
1974 			/* The following will read psh + hdgest (if possbile) + padding (if posssible) */
1975 			if (pdu->psh_valid_bytes < psh_len) {
1976 				rc = nvme_tcp_read_data(tqpair->sock,
1977 							psh_len - pdu->psh_valid_bytes,
1978 							(void *)&pdu->hdr.raw + sizeof(struct spdk_nvme_tcp_common_pdu_hdr) + pdu->psh_valid_bytes);
1979 				if (rc < 0) {
1980 					return NVME_TCP_PDU_FATAL;
1981 				} else if (rc > 0) {
1982 					spdk_trace_record(TRACE_TCP_READ_FROM_SOCKET_DONE,
1983 							  0, rc, 0, 0);
1984 					pdu->psh_valid_bytes += rc;
1985 				}
1986 				if (pdu->psh_valid_bytes < psh_len) {
1987 					return NVME_TCP_PDU_IN_PROGRESS;
1988 				}
1989 			}
1990 
1991 			/* All header(ch, psh, head digist) of this PDU has now been read from the socket. */
1992 			spdk_nvmf_tcp_pdu_psh_handle(tqpair);
1993 			if (tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY) {
1994 				current_pdu_num++;
1995 			}
1996 			break;
1997 		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD:
1998 			pdu = &tqpair->pdu_in_progress;
1999 
2000 			/* check whether the data is valid, if not we just return */
2001 			if (!pdu->data_len) {
2002 				return NVME_TCP_PDU_IN_PROGRESS;
2003 			}
2004 
2005 			data_len = pdu->data_len;
2006 			/* data digest */
2007 			if (spdk_unlikely((pdu->hdr.common.pdu_type != SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ) &&
2008 					  tqpair->host_ddgst_enable)) {
2009 				data_len += SPDK_NVME_TCP_DIGEST_LEN;
2010 				pdu->ddgst_enable = true;
2011 			}
2012 
2013 			rc = nvme_tcp_read_payload_data(tqpair->sock, pdu);
2014 			if (rc < 0) {
2015 				return NVME_TCP_PDU_IN_PROGRESS;
2016 			}
2017 			pdu->readv_offset += rc;
2018 
2019 			if (spdk_unlikely(pdu->dif_ctx != NULL)) {
2020 				rc = nvmf_tcp_pdu_payload_insert_dif(pdu, pdu->readv_offset - rc, rc);
2021 				if (rc != 0) {
2022 					return NVME_TCP_PDU_FATAL;
2023 				}
2024 			}
2025 
2026 			if (pdu->readv_offset < data_len) {
2027 				return NVME_TCP_PDU_IN_PROGRESS;
2028 			}
2029 
2030 			/* All of this PDU has now been read from the socket. */
2031 			spdk_nvmf_tcp_pdu_payload_handle(tqpair);
2032 			current_pdu_num++;
2033 			break;
2034 		case NVME_TCP_PDU_RECV_STATE_ERROR:
2035 			pdu = &tqpair->pdu_in_progress;
2036 			/* Check whether the connection is closed. Each time, we only read 1 byte every time */
2037 			rc = nvme_tcp_read_data(tqpair->sock, 1, (void *)&pdu->hdr.common);
2038 			if (rc < 0) {
2039 				return NVME_TCP_PDU_FATAL;
2040 			}
2041 			break;
2042 		default:
2043 			assert(0);
2044 			SPDK_ERRLOG("code should not come to here");
2045 			break;
2046 		}
2047 	} while ((tqpair->recv_state != prev_state) && (current_pdu_num < MAX_NVME_TCP_PDU_LOOP_COUNT));
2048 
2049 	return rc;
2050 }
2051 
2052 static enum spdk_nvme_data_transfer
2053 spdk_nvmf_tcp_req_get_xfer(struct spdk_nvmf_tcp_req *tcp_req) {
2054 	enum spdk_nvme_data_transfer xfer;
2055 	struct spdk_nvme_cmd *cmd = &tcp_req->req.cmd->nvme_cmd;
2056 	struct spdk_nvme_sgl_descriptor *sgl = &cmd->dptr.sgl1;
2057 
2058 	/* Figure out data transfer direction */
2059 	if (cmd->opc == SPDK_NVME_OPC_FABRIC)
2060 	{
2061 		xfer = spdk_nvme_opc_get_data_transfer(tcp_req->req.cmd->nvmf_cmd.fctype);
2062 	} else
2063 	{
2064 		xfer = spdk_nvme_opc_get_data_transfer(cmd->opc);
2065 
2066 		/* Some admin commands are special cases */
2067 		if ((tcp_req->req.qpair->qid == 0) &&
2068 		    ((cmd->opc == SPDK_NVME_OPC_GET_FEATURES) ||
2069 		     (cmd->opc == SPDK_NVME_OPC_SET_FEATURES))) {
2070 			switch (cmd->cdw10 & 0xff) {
2071 			case SPDK_NVME_FEAT_LBA_RANGE_TYPE:
2072 			case SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION:
2073 			case SPDK_NVME_FEAT_HOST_IDENTIFIER:
2074 				break;
2075 			default:
2076 				xfer = SPDK_NVME_DATA_NONE;
2077 			}
2078 		}
2079 	}
2080 
2081 	if (xfer == SPDK_NVME_DATA_NONE)
2082 	{
2083 		return xfer;
2084 	}
2085 
2086 	/* Even for commands that may transfer data, they could have specified 0 length.
2087 	 * We want those to show up with xfer SPDK_NVME_DATA_NONE.
2088 	 */
2089 	switch (sgl->generic.type)
2090 	{
2091 	case SPDK_NVME_SGL_TYPE_DATA_BLOCK:
2092 	case SPDK_NVME_SGL_TYPE_BIT_BUCKET:
2093 	case SPDK_NVME_SGL_TYPE_SEGMENT:
2094 	case SPDK_NVME_SGL_TYPE_LAST_SEGMENT:
2095 	case SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK:
2096 		if (sgl->unkeyed.length == 0) {
2097 			xfer = SPDK_NVME_DATA_NONE;
2098 		}
2099 		break;
2100 	case SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK:
2101 		if (sgl->keyed.length == 0) {
2102 			xfer = SPDK_NVME_DATA_NONE;
2103 		}
2104 		break;
2105 	}
2106 
2107 	return xfer;
2108 }
2109 
2110 static void
2111 spdk_nvmf_tcp_request_free_buffers(struct spdk_nvmf_tcp_req *tcp_req,
2112 				   struct spdk_nvmf_transport_poll_group *group, struct spdk_nvmf_transport *transport)
2113 {
2114 	for (uint32_t i = 0; i < tcp_req->req.iovcnt; i++) {
2115 		assert(tcp_req->buffers[i] != NULL);
2116 		if (group->buf_cache_count < group->buf_cache_size) {
2117 			STAILQ_INSERT_HEAD(&group->buf_cache,
2118 					   (struct spdk_nvmf_transport_pg_cache_buf *)tcp_req->buffers[i], link);
2119 			group->buf_cache_count++;
2120 		} else {
2121 			spdk_mempool_put(transport->data_buf_pool, tcp_req->buffers[i]);
2122 		}
2123 		tcp_req->req.iov[i].iov_base = NULL;
2124 		tcp_req->buffers[i] = NULL;
2125 		tcp_req->req.iov[i].iov_len = 0;
2126 	}
2127 	tcp_req->data_from_pool = false;
2128 }
2129 
2130 static int
2131 spdk_nvmf_tcp_req_fill_iovs(struct spdk_nvmf_tcp_transport *ttransport,
2132 			    struct spdk_nvmf_tcp_req *tcp_req, uint32_t length)
2133 {
2134 	void					*buf = NULL;
2135 	uint32_t				i = 0;
2136 	struct spdk_nvmf_tcp_qpair		*tqpair;
2137 	struct spdk_nvmf_transport_poll_group	*group;
2138 
2139 	tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair);
2140 	group = &tqpair->group->group;
2141 
2142 	tcp_req->req.iovcnt = 0;
2143 	while (length) {
2144 		if (!(STAILQ_EMPTY(&group->buf_cache))) {
2145 			group->buf_cache_count--;
2146 			buf = STAILQ_FIRST(&group->buf_cache);
2147 			STAILQ_REMOVE_HEAD(&group->buf_cache, link);
2148 		} else {
2149 			buf = spdk_mempool_get(ttransport->transport.data_buf_pool);
2150 			if (!buf) {
2151 				goto nomem;
2152 			}
2153 		}
2154 
2155 		tcp_req->req.iov[i].iov_base = (void *)((uintptr_t)(buf + NVMF_DATA_BUFFER_MASK) &
2156 							~NVMF_DATA_BUFFER_MASK);
2157 		tcp_req->req.iov[i].iov_len  = spdk_min(length, ttransport->transport.opts.io_unit_size);
2158 		tcp_req->req.iovcnt++;
2159 		tcp_req->buffers[i] = buf;
2160 		length -= tcp_req->req.iov[i].iov_len;
2161 		i++;
2162 	}
2163 
2164 	assert(tcp_req->req.iovcnt <= SPDK_NVMF_MAX_SGL_ENTRIES);
2165 	tcp_req->data_from_pool = true;
2166 	return 0;
2167 
2168 nomem:
2169 	spdk_nvmf_tcp_request_free_buffers(tcp_req, group, &ttransport->transport);
2170 	tcp_req->req.iovcnt = 0;
2171 	return -ENOMEM;
2172 }
2173 
2174 static int
2175 spdk_nvmf_tcp_req_parse_sgl(struct spdk_nvmf_tcp_transport *ttransport,
2176 			    struct spdk_nvmf_tcp_req *tcp_req)
2177 {
2178 	struct spdk_nvme_cmd			*cmd;
2179 	struct spdk_nvme_cpl			*rsp;
2180 	struct spdk_nvme_sgl_descriptor		*sgl;
2181 	uint32_t				length;
2182 
2183 	cmd = &tcp_req->req.cmd->nvme_cmd;
2184 	rsp = &tcp_req->req.rsp->nvme_cpl;
2185 	sgl = &cmd->dptr.sgl1;
2186 
2187 	length = sgl->unkeyed.length;
2188 
2189 	if (sgl->generic.type == SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK &&
2190 	    sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_TRANSPORT) {
2191 		if (length > ttransport->transport.opts.max_io_size) {
2192 			SPDK_ERRLOG("SGL length 0x%x exceeds max io size 0x%x\n",
2193 				    length, ttransport->transport.opts.max_io_size);
2194 			rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
2195 			return -1;
2196 		}
2197 
2198 		/* fill request length and populate iovs */
2199 		tcp_req->req.length = length;
2200 
2201 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Data requested length= 0x%x\n", length);
2202 
2203 		if (spdk_unlikely(tcp_req->dif_insert_or_strip)) {
2204 			length = spdk_dif_get_length_with_md(length, &tcp_req->dif_ctx);
2205 			tcp_req->elba_length = length;
2206 		}
2207 
2208 		if (spdk_nvmf_tcp_req_fill_iovs(ttransport, tcp_req, length) < 0) {
2209 			/* No available buffers. Queue this request up. */
2210 			SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "No available large data buffers. Queueing request %p\n",
2211 				      tcp_req);
2212 			return 0;
2213 		}
2214 
2215 		/* backward compatible */
2216 		tcp_req->req.data = tcp_req->req.iov[0].iov_base;
2217 
2218 
2219 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Request %p took %d buffer/s from central pool, and data=%p\n",
2220 			      tcp_req,
2221 			      tcp_req->req.iovcnt, tcp_req->req.data);
2222 
2223 		return 0;
2224 	} else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK &&
2225 		   sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) {
2226 		uint64_t offset = sgl->address;
2227 		uint32_t max_len = ttransport->transport.opts.in_capsule_data_size;
2228 
2229 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "In-capsule data: offset 0x%" PRIx64 ", length 0x%x\n",
2230 			      offset, length);
2231 
2232 		if (offset > max_len) {
2233 			SPDK_ERRLOG("In-capsule offset 0x%" PRIx64 " exceeds capsule length 0x%x\n",
2234 				    offset, max_len);
2235 			rsp->status.sc = SPDK_NVME_SC_INVALID_SGL_OFFSET;
2236 			return -1;
2237 		}
2238 		max_len -= (uint32_t)offset;
2239 
2240 		if (length > max_len) {
2241 			SPDK_ERRLOG("In-capsule data length 0x%x exceeds capsule length 0x%x\n",
2242 				    length, max_len);
2243 			rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
2244 			return -1;
2245 		}
2246 
2247 		tcp_req->req.data = tcp_req->buf + offset;
2248 		tcp_req->data_from_pool = false;
2249 		tcp_req->req.length = length;
2250 
2251 		if (spdk_unlikely(tcp_req->dif_insert_or_strip)) {
2252 			length = spdk_dif_get_length_with_md(length, &tcp_req->dif_ctx);
2253 			tcp_req->elba_length = length;
2254 		}
2255 
2256 		tcp_req->req.iov[0].iov_base = tcp_req->req.data;
2257 		tcp_req->req.iov[0].iov_len = length;
2258 		tcp_req->req.iovcnt = 1;
2259 
2260 		return 0;
2261 	}
2262 
2263 	SPDK_ERRLOG("Invalid NVMf I/O Command SGL:  Type 0x%x, Subtype 0x%x\n",
2264 		    sgl->generic.type, sgl->generic.subtype);
2265 	rsp->status.sc = SPDK_NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID;
2266 	return -1;
2267 }
2268 
2269 static int
2270 nvmf_tcp_pdu_verify_dif(struct nvme_tcp_pdu *pdu,
2271 			const struct spdk_dif_ctx *dif_ctx)
2272 {
2273 	struct spdk_dif_error err_blk = {};
2274 	int rc;
2275 
2276 	rc = spdk_dif_verify_stream(pdu->data_iov, pdu->data_iovcnt,
2277 				    0, pdu->data_len, pdu->dif_ctx, &err_blk);
2278 	if (rc != 0) {
2279 		SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n",
2280 			    err_blk.err_type, err_blk.err_offset);
2281 	}
2282 
2283 	return rc;
2284 }
2285 
2286 static void
2287 spdk_nvmf_tcp_send_c2h_data(struct spdk_nvmf_tcp_qpair *tqpair,
2288 			    struct spdk_nvmf_tcp_req *tcp_req)
2289 {
2290 	struct nvme_tcp_pdu *rsp_pdu;
2291 	struct spdk_nvme_tcp_c2h_data_hdr *c2h_data;
2292 	uint32_t plen, pdo, alignment;
2293 	int rc;
2294 
2295 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
2296 
2297 	rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair);
2298 	assert(rsp_pdu != NULL);
2299 
2300 	c2h_data = &rsp_pdu->hdr.c2h_data;
2301 	c2h_data->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_C2H_DATA;
2302 	plen = c2h_data->common.hlen = sizeof(*c2h_data);
2303 
2304 	if (tqpair->host_hdgst_enable) {
2305 		plen += SPDK_NVME_TCP_DIGEST_LEN;
2306 		c2h_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF;
2307 	}
2308 
2309 	/* set the psh */
2310 	c2h_data->cccid = tcp_req->req.cmd->nvme_cmd.cid;
2311 	c2h_data->datal = spdk_min(NVMF_TCP_PDU_MAX_C2H_DATA_SIZE,
2312 				   tcp_req->req.length - tcp_req->c2h_data_offset);
2313 	c2h_data->datao = tcp_req->c2h_data_offset;
2314 
2315 	/* set the padding */
2316 	rsp_pdu->padding_len = 0;
2317 	pdo = plen;
2318 	if (tqpair->cpda) {
2319 		alignment = (tqpair->cpda + 1) << 2;
2320 		if (alignment > plen) {
2321 			rsp_pdu->padding_len = alignment - plen;
2322 			pdo = plen = alignment;
2323 		}
2324 	}
2325 
2326 	c2h_data->common.pdo = pdo;
2327 	plen += c2h_data->datal;
2328 	if (tqpair->host_ddgst_enable) {
2329 		c2h_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF;
2330 		plen += SPDK_NVME_TCP_DIGEST_LEN;
2331 	}
2332 
2333 	c2h_data->common.plen = plen;
2334 
2335 	if (spdk_unlikely(tcp_req->dif_insert_or_strip)) {
2336 		rsp_pdu->dif_ctx = &tcp_req->dif_ctx;
2337 	}
2338 
2339 	nvme_tcp_pdu_set_data_buf(rsp_pdu, tcp_req->req.iov, tcp_req->req.iovcnt,
2340 				  c2h_data->datao, c2h_data->datal);
2341 
2342 	if (spdk_unlikely(tcp_req->dif_insert_or_strip)) {
2343 		rc = nvmf_tcp_pdu_verify_dif(rsp_pdu, rsp_pdu->dif_ctx);
2344 		if (rc != 0) {
2345 			/* Data digest error detected by the NVMe/TCP target is treated as non-fatal
2346 			 * transport error because the cause will be outside the NVMe/TCP target.
2347 			 *
2348 			 * On the other hand, treat DIF check error as fatal transport error here
2349 			 * here because the error is caused by the target itself. Fatal NVMe/TCP
2350 			 * transport error is handled by terminating the connection.
2351 			 */
2352 			tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
2353 			return;
2354 		}
2355 	}
2356 
2357 	tcp_req->c2h_data_offset += c2h_data->datal;
2358 	if (tcp_req->c2h_data_offset == tcp_req->req.length) {
2359 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Last pdu for tcp_req=%p on tqpair=%p\n", tcp_req, tqpair);
2360 		c2h_data->common.flags |= SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU;
2361 		if (tqpair->qpair.transport->opts.c2h_success) {
2362 			c2h_data->common.flags |= SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS;
2363 		}
2364 		TAILQ_REMOVE(&tqpair->queued_c2h_data_tcp_req, tcp_req, link);
2365 	}
2366 
2367 	tqpair->c2h_data_pdu_cnt += 1;
2368 	spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_pdu_c2h_data_complete, tcp_req);
2369 }
2370 
2371 static int
2372 spdk_nvmf_tcp_calc_c2h_data_pdu_num(struct spdk_nvmf_tcp_req *tcp_req)
2373 {
2374 	return (tcp_req->req.length + NVMF_TCP_PDU_MAX_C2H_DATA_SIZE - 1) /
2375 	       NVMF_TCP_PDU_MAX_C2H_DATA_SIZE;
2376 }
2377 
2378 static void
2379 spdk_nvmf_tcp_handle_pending_c2h_data_queue(struct spdk_nvmf_tcp_qpair *tqpair)
2380 {
2381 	struct spdk_nvmf_tcp_req *tcp_req;
2382 
2383 	while (!TAILQ_EMPTY(&tqpair->queued_c2h_data_tcp_req) &&
2384 	       (tqpair->c2h_data_pdu_cnt < NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM)) {
2385 		tcp_req = TAILQ_FIRST(&tqpair->queued_c2h_data_tcp_req);
2386 		spdk_nvmf_tcp_send_c2h_data(tqpair, tcp_req);
2387 	}
2388 }
2389 
2390 static void
2391 spdk_nvmf_tcp_queue_c2h_data(struct spdk_nvmf_tcp_req *tcp_req,
2392 			     struct spdk_nvmf_tcp_qpair *tqpair)
2393 {
2394 	tcp_req->c2h_data_pdu_num = spdk_nvmf_tcp_calc_c2h_data_pdu_num(tcp_req);
2395 
2396 	assert(tcp_req->c2h_data_pdu_num < NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM);
2397 
2398 	TAILQ_INSERT_TAIL(&tqpair->queued_c2h_data_tcp_req, tcp_req, link);
2399 	spdk_nvmf_tcp_handle_pending_c2h_data_queue(tqpair);
2400 }
2401 
2402 static int
2403 request_transfer_out(struct spdk_nvmf_request *req)
2404 {
2405 	struct spdk_nvmf_tcp_req	*tcp_req;
2406 	struct spdk_nvmf_qpair		*qpair;
2407 	struct spdk_nvmf_tcp_qpair	*tqpair;
2408 	struct spdk_nvme_cpl		*rsp;
2409 
2410 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
2411 
2412 	qpair = req->qpair;
2413 	rsp = &req->rsp->nvme_cpl;
2414 	tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req);
2415 
2416 	/* Advance our sq_head pointer */
2417 	if (qpair->sq_head == qpair->sq_head_max) {
2418 		qpair->sq_head = 0;
2419 	} else {
2420 		qpair->sq_head++;
2421 	}
2422 	rsp->sqhd = qpair->sq_head;
2423 
2424 	tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair);
2425 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST);
2426 	if (rsp->status.sc == SPDK_NVME_SC_SUCCESS &&
2427 	    req->xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
2428 		spdk_nvmf_tcp_queue_c2h_data(tcp_req, tqpair);
2429 	} else {
2430 		spdk_nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair);
2431 	}
2432 
2433 	return 0;
2434 }
2435 
2436 static void
2437 spdk_nvmf_tcp_pdu_set_buf_from_req(struct spdk_nvmf_tcp_qpair *tqpair,
2438 				   struct spdk_nvmf_tcp_req *tcp_req)
2439 {
2440 	struct nvme_tcp_pdu *pdu;
2441 
2442 	if (tcp_req->data_from_pool) {
2443 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Will send r2t for tcp_req(%p) on tqpair=%p\n", tcp_req, tqpair);
2444 		tcp_req->next_expected_r2t_offset = 0;
2445 		spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
2446 		spdk_nvmf_tcp_send_r2t_pdu(tqpair, tcp_req);
2447 	} else {
2448 		pdu = &tqpair->pdu_in_progress;
2449 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Not need to send r2t for tcp_req(%p) on tqpair=%p\n", tcp_req,
2450 			      tqpair);
2451 		/* No need to send r2t, contained in the capsuled data */
2452 		nvme_tcp_pdu_set_data_buf(pdu, tcp_req->req.iov, tcp_req->req.iovcnt,
2453 					  0, tcp_req->req.length);
2454 		spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
2455 		spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
2456 	}
2457 }
2458 
2459 static void
2460 spdk_nvmf_tcp_set_incapsule_data(struct spdk_nvmf_tcp_qpair *tqpair,
2461 				 struct spdk_nvmf_tcp_req *tcp_req)
2462 {
2463 	struct nvme_tcp_pdu *pdu;
2464 	uint32_t plen = 0;
2465 
2466 	pdu = &tqpair->pdu_in_progress;
2467 	plen = pdu->hdr.common.hlen;
2468 
2469 	if (tqpair->host_hdgst_enable) {
2470 		plen += SPDK_NVME_TCP_DIGEST_LEN;
2471 	}
2472 
2473 	if (pdu->hdr.common.plen != plen) {
2474 		tcp_req->has_incapsule_data = true;
2475 	}
2476 }
2477 
2478 static bool
2479 spdk_nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport,
2480 			  struct spdk_nvmf_tcp_req *tcp_req)
2481 {
2482 	struct spdk_nvmf_tcp_qpair		*tqpair;
2483 	struct spdk_nvme_cpl			*rsp = &tcp_req->req.rsp->nvme_cpl;
2484 	int					rc;
2485 	enum spdk_nvmf_tcp_req_state		prev_state;
2486 	bool					progress = false;
2487 	struct spdk_nvmf_transport_poll_group	*group;
2488 
2489 	tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair);
2490 	group = &tqpair->group->group;
2491 	assert(tcp_req->state != TCP_REQUEST_STATE_FREE);
2492 
2493 	/* The loop here is to allow for several back-to-back state changes. */
2494 	do {
2495 		prev_state = tcp_req->state;
2496 
2497 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Request %p entering state %d on tqpair=%p\n", tcp_req, prev_state,
2498 			      tqpair);
2499 
2500 		switch (tcp_req->state) {
2501 		case TCP_REQUEST_STATE_FREE:
2502 			/* Some external code must kick a request into TCP_REQUEST_STATE_NEW
2503 			 * to escape this state. */
2504 			break;
2505 		case TCP_REQUEST_STATE_NEW:
2506 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_NEW, 0, 0, (uintptr_t)tcp_req, 0);
2507 
2508 			/* copy the cmd from the receive pdu */
2509 			tcp_req->cmd = tqpair->pdu_in_progress.hdr.capsule_cmd.ccsqe;
2510 
2511 			if (spdk_unlikely(spdk_nvmf_request_get_dif_ctx(&tcp_req->req, &tcp_req->dif_ctx))) {
2512 				tcp_req->dif_insert_or_strip = true;
2513 				tqpair->pdu_in_progress.dif_ctx = &tcp_req->dif_ctx;
2514 			}
2515 
2516 			/* The next state transition depends on the data transfer needs of this request. */
2517 			tcp_req->req.xfer = spdk_nvmf_tcp_req_get_xfer(tcp_req);
2518 
2519 			/* If no data to transfer, ready to execute. */
2520 			if (tcp_req->req.xfer == SPDK_NVME_DATA_NONE) {
2521 				/* Reset the tqpair receving pdu state */
2522 				spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
2523 				spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
2524 				break;
2525 			}
2526 
2527 			spdk_nvmf_tcp_set_incapsule_data(tqpair, tcp_req);
2528 
2529 			if (!tcp_req->has_incapsule_data) {
2530 				spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
2531 			}
2532 
2533 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEED_BUFFER);
2534 			TAILQ_INSERT_TAIL(&tqpair->group->pending_data_buf_queue, tcp_req, link);
2535 			break;
2536 		case TCP_REQUEST_STATE_NEED_BUFFER:
2537 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_NEED_BUFFER, 0, 0, (uintptr_t)tcp_req, 0);
2538 
2539 			assert(tcp_req->req.xfer != SPDK_NVME_DATA_NONE);
2540 
2541 			if (!tcp_req->has_incapsule_data &&
2542 			    (tcp_req != TAILQ_FIRST(&tqpair->group->pending_data_buf_queue))) {
2543 				SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
2544 					      "Not the first element to wait for the buf for tcp_req(%p) on tqpair=%p\n",
2545 					      tcp_req, tqpair);
2546 				/* This request needs to wait in line to obtain a buffer */
2547 				break;
2548 			}
2549 
2550 			/* Try to get a data buffer */
2551 			rc = spdk_nvmf_tcp_req_parse_sgl(ttransport, tcp_req);
2552 			if (rc < 0) {
2553 				TAILQ_REMOVE(&tqpair->group->pending_data_buf_queue, tcp_req, link);
2554 				rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2555 				/* Reset the tqpair receving pdu state */
2556 				spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
2557 				spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE);
2558 				break;
2559 			}
2560 
2561 			if (!tcp_req->req.data) {
2562 				SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "No buffer allocated for tcp_req(%p) on tqpair(%p\n)",
2563 					      tcp_req, tqpair);
2564 				/* No buffers available. */
2565 				break;
2566 			}
2567 
2568 			TAILQ_REMOVE(&tqpair->group->pending_data_buf_queue, tcp_req, link);
2569 
2570 			/* If data is transferring from host to controller, we need to do a transfer from the host. */
2571 			if (tcp_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
2572 				spdk_nvmf_tcp_pdu_set_buf_from_req(tqpair, tcp_req);
2573 				break;
2574 			}
2575 
2576 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
2577 			break;
2578 		case TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER:
2579 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, 0, 0,
2580 					  (uintptr_t)tcp_req, 0);
2581 			/* Some external code must kick a request into TCP_REQUEST_STATE_READY_TO_EXECUTE
2582 			 * to escape this state. */
2583 			break;
2584 		case TCP_REQUEST_STATE_READY_TO_EXECUTE:
2585 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE, 0, 0, (uintptr_t)tcp_req, 0);
2586 
2587 			if (spdk_unlikely(tcp_req->dif_insert_or_strip)) {
2588 				assert(tcp_req->elba_length >= tcp_req->req.length);
2589 				tcp_req->orig_length = tcp_req->req.length;
2590 				tcp_req->req.length = tcp_req->elba_length;
2591 			}
2592 
2593 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_EXECUTING);
2594 			spdk_nvmf_request_exec(&tcp_req->req);
2595 			break;
2596 		case TCP_REQUEST_STATE_EXECUTING:
2597 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_EXECUTING, 0, 0, (uintptr_t)tcp_req, 0);
2598 			/* Some external code must kick a request into TCP_REQUEST_STATE_EXECUTED
2599 			 * to escape this state. */
2600 			break;
2601 		case TCP_REQUEST_STATE_EXECUTED:
2602 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_EXECUTED, 0, 0, (uintptr_t)tcp_req, 0);
2603 
2604 			if (spdk_unlikely(tcp_req->dif_insert_or_strip)) {
2605 				tcp_req->req.length = tcp_req->orig_length;
2606 			}
2607 
2608 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE);
2609 			break;
2610 		case TCP_REQUEST_STATE_READY_TO_COMPLETE:
2611 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE, 0, 0, (uintptr_t)tcp_req, 0);
2612 			rc = request_transfer_out(&tcp_req->req);
2613 			assert(rc == 0); /* No good way to handle this currently */
2614 			break;
2615 		case TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST:
2616 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST, 0, 0,
2617 					  (uintptr_t)tcp_req,
2618 					  0);
2619 			/* Some external code must kick a request into TCP_REQUEST_STATE_COMPLETED
2620 			 * to escape this state. */
2621 			break;
2622 		case TCP_REQUEST_STATE_COMPLETED:
2623 			spdk_trace_record(TRACE_TCP_REQUEST_STATE_COMPLETED, 0, 0, (uintptr_t)tcp_req, 0);
2624 			if (tcp_req->data_from_pool) {
2625 				spdk_nvmf_tcp_request_free_buffers(tcp_req, group, &ttransport->transport);
2626 			}
2627 			tcp_req->req.length = 0;
2628 			tcp_req->req.iovcnt = 0;
2629 			tcp_req->req.data = NULL;
2630 			spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_FREE);
2631 			break;
2632 		case TCP_REQUEST_NUM_STATES:
2633 		default:
2634 			assert(0);
2635 			break;
2636 		}
2637 
2638 		if (tcp_req->state != prev_state) {
2639 			progress = true;
2640 		}
2641 	} while (tcp_req->state != prev_state);
2642 
2643 	return progress;
2644 }
2645 
2646 static void
2647 spdk_nvmf_tcp_qpair_process_pending(struct spdk_nvmf_tcp_transport *ttransport,
2648 				    struct spdk_nvmf_tcp_qpair *tqpair)
2649 {
2650 	struct spdk_nvmf_tcp_req *tcp_req, *req_tmp;
2651 
2652 	/* Tqpair is not in a good state, so return it */
2653 	if (spdk_unlikely(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_ERROR)) {
2654 		return;
2655 	}
2656 
2657 
2658 	TAILQ_FOREACH_SAFE(tcp_req, &tqpair->group->pending_data_buf_queue, link, req_tmp) {
2659 		if (spdk_nvmf_tcp_req_process(ttransport, tcp_req) == false) {
2660 			break;
2661 		}
2662 	}
2663 }
2664 
2665 static void
2666 spdk_nvmf_tcp_sock_cb(void *arg, struct spdk_sock_group *group, struct spdk_sock *sock)
2667 {
2668 	struct spdk_nvmf_tcp_qpair *tqpair = arg;
2669 	struct spdk_nvmf_tcp_transport *ttransport;
2670 	int rc;
2671 
2672 	assert(tqpair != NULL);
2673 
2674 	ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport, struct spdk_nvmf_tcp_transport, transport);
2675 	spdk_nvmf_tcp_qpair_process_pending(ttransport, tqpair);
2676 	rc = spdk_nvmf_tcp_sock_process(tqpair);
2677 
2678 	/* check the following two factors:
2679 	 * rc: The socket is closed
2680 	 * State of tqpair: The tqpair is in EXITING state due to internal error
2681 	 */
2682 	if ((rc < 0) || (tqpair->state == NVME_TCP_QPAIR_STATE_EXITING)) {
2683 		tqpair->state = NVME_TCP_QPAIR_STATE_EXITED;
2684 		spdk_nvmf_tcp_qpair_flush_pdus(tqpair);
2685 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "will disconect the tqpair=%p\n", tqpair);
2686 		spdk_poller_unregister(&tqpair->timeout_poller);
2687 		spdk_nvmf_qpair_disconnect(&tqpair->qpair, NULL, NULL);
2688 	}
2689 }
2690 
2691 static int
2692 spdk_nvmf_tcp_poll_group_add(struct spdk_nvmf_transport_poll_group *group,
2693 			     struct spdk_nvmf_qpair *qpair)
2694 {
2695 	struct spdk_nvmf_tcp_poll_group	*tgroup;
2696 	struct spdk_nvmf_tcp_qpair	*tqpair;
2697 	int				rc;
2698 
2699 	tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
2700 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
2701 
2702 	rc = spdk_sock_group_add_sock(tgroup->sock_group, tqpair->sock,
2703 				      spdk_nvmf_tcp_sock_cb, tqpair);
2704 	if (rc != 0) {
2705 		SPDK_ERRLOG("Could not add sock to sock_group: %s (%d)\n",
2706 			    spdk_strerror(errno), errno);
2707 		spdk_nvmf_tcp_qpair_destroy(tqpair);
2708 		return -1;
2709 	}
2710 
2711 	rc =  spdk_nvmf_tcp_qpair_sock_init(tqpair);
2712 	if (rc != 0) {
2713 		SPDK_ERRLOG("Cannot set sock opt for tqpair=%p\n", tqpair);
2714 		spdk_nvmf_tcp_qpair_destroy(tqpair);
2715 		return -1;
2716 	}
2717 
2718 	rc = spdk_nvmf_tcp_qpair_init(&tqpair->qpair);
2719 	if (rc < 0) {
2720 		SPDK_ERRLOG("Cannot init tqpair=%p\n", tqpair);
2721 		spdk_nvmf_tcp_qpair_destroy(tqpair);
2722 		return -1;
2723 	}
2724 
2725 	rc = spdk_nvmf_tcp_qpair_init_mem_resource(tqpair, 1);
2726 	if (rc < 0) {
2727 		SPDK_ERRLOG("Cannot init memory resource info for tqpair=%p\n", tqpair);
2728 		spdk_nvmf_tcp_qpair_destroy(tqpair);
2729 		return -1;
2730 	}
2731 
2732 	tqpair->group = tgroup;
2733 	tqpair->state = NVME_TCP_QPAIR_STATE_INVALID;
2734 	TAILQ_INSERT_TAIL(&tgroup->qpairs, tqpair, link);
2735 
2736 	return 0;
2737 }
2738 
2739 static int
2740 spdk_nvmf_tcp_poll_group_remove(struct spdk_nvmf_transport_poll_group *group,
2741 				struct spdk_nvmf_qpair *qpair)
2742 {
2743 	struct spdk_nvmf_tcp_poll_group	*tgroup;
2744 	struct spdk_nvmf_tcp_qpair		*tqpair;
2745 	int				rc;
2746 
2747 	tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
2748 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
2749 
2750 	assert(tqpair->group == tgroup);
2751 
2752 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "remove tqpair=%p from the tgroup=%p\n", tqpair, tgroup);
2753 	TAILQ_REMOVE(&tgroup->qpairs, tqpair, link);
2754 	rc = spdk_sock_group_remove_sock(tgroup->sock_group, tqpair->sock);
2755 	if (rc != 0) {
2756 		SPDK_ERRLOG("Could not remove sock from sock_group: %s (%d)\n",
2757 			    spdk_strerror(errno), errno);
2758 	}
2759 
2760 	return rc;
2761 }
2762 
2763 static int
2764 spdk_nvmf_tcp_req_complete(struct spdk_nvmf_request *req)
2765 {
2766 	struct spdk_nvmf_tcp_transport *ttransport;
2767 	struct spdk_nvmf_tcp_req *tcp_req;
2768 
2769 	ttransport = SPDK_CONTAINEROF(req->qpair->transport, struct spdk_nvmf_tcp_transport, transport);
2770 	tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req);
2771 
2772 	spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_EXECUTED);
2773 	spdk_nvmf_tcp_req_process(ttransport, tcp_req);
2774 
2775 	return 0;
2776 }
2777 
2778 static void
2779 spdk_nvmf_tcp_close_qpair(struct spdk_nvmf_qpair *qpair)
2780 {
2781 	SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
2782 
2783 	spdk_nvmf_tcp_qpair_destroy(SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair));
2784 }
2785 
2786 static int
2787 spdk_nvmf_tcp_poll_group_poll(struct spdk_nvmf_transport_poll_group *group)
2788 {
2789 	struct spdk_nvmf_tcp_poll_group *tgroup;
2790 	int rc;
2791 
2792 	tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
2793 
2794 	if (spdk_unlikely(TAILQ_EMPTY(&tgroup->qpairs))) {
2795 		return 0;
2796 	}
2797 
2798 	rc = spdk_sock_group_poll(tgroup->sock_group);
2799 	if (rc < 0) {
2800 		SPDK_ERRLOG("Failed to poll sock_group=%p\n", tgroup->sock_group);
2801 		return rc;
2802 	}
2803 
2804 	return 0;
2805 }
2806 
2807 static int
2808 spdk_nvmf_tcp_qpair_get_trid(struct spdk_nvmf_qpair *qpair,
2809 			     struct spdk_nvme_transport_id *trid, bool peer)
2810 {
2811 	struct spdk_nvmf_tcp_qpair     *tqpair;
2812 	uint16_t			port;
2813 
2814 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
2815 	trid->trtype = SPDK_NVME_TRANSPORT_TCP;
2816 
2817 	if (peer) {
2818 		snprintf(trid->traddr, sizeof(trid->traddr), "%s", tqpair->initiator_addr);
2819 		port = tqpair->initiator_port;
2820 	} else {
2821 		snprintf(trid->traddr, sizeof(trid->traddr), "%s", tqpair->target_addr);
2822 		port = tqpair->target_port;
2823 	}
2824 
2825 	if (spdk_sock_is_ipv4(tqpair->sock)) {
2826 		trid->adrfam = SPDK_NVMF_ADRFAM_IPV4;
2827 	} else if (spdk_sock_is_ipv4(tqpair->sock)) {
2828 		trid->adrfam = SPDK_NVMF_ADRFAM_IPV6;
2829 	} else {
2830 		return -1;
2831 	}
2832 
2833 	snprintf(trid->trsvcid, sizeof(trid->trsvcid), "%d", port);
2834 	return 0;
2835 }
2836 
2837 static int
2838 spdk_nvmf_tcp_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair,
2839 				   struct spdk_nvme_transport_id *trid)
2840 {
2841 	return spdk_nvmf_tcp_qpair_get_trid(qpair, trid, 0);
2842 }
2843 
2844 static int
2845 spdk_nvmf_tcp_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair,
2846 				  struct spdk_nvme_transport_id *trid)
2847 {
2848 	return spdk_nvmf_tcp_qpair_get_trid(qpair, trid, 1);
2849 }
2850 
2851 static int
2852 spdk_nvmf_tcp_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair,
2853 				    struct spdk_nvme_transport_id *trid)
2854 {
2855 	return spdk_nvmf_tcp_qpair_get_trid(qpair, trid, 0);
2856 }
2857 
2858 static int
2859 spdk_nvmf_tcp_qpair_set_sq_size(struct spdk_nvmf_qpair *qpair)
2860 {
2861 	struct spdk_nvmf_tcp_qpair     *tqpair;
2862 	int rc;
2863 	tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
2864 
2865 	rc = spdk_nvmf_tcp_qpair_init_mem_resource(tqpair, tqpair->qpair.sq_head_max);
2866 	if (!rc) {
2867 		tqpair->max_queue_depth += tqpair->qpair.sq_head_max;
2868 		tqpair->free_pdu_num += tqpair->qpair.sq_head_max;
2869 		tqpair->state_cntr[TCP_REQUEST_STATE_FREE] += tqpair->qpair.sq_head_max;
2870 		SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "The queue depth=%u for tqpair=%p\n",
2871 			      tqpair->max_queue_depth, tqpair);
2872 	}
2873 
2874 	return rc;
2875 
2876 }
2877 
2878 #define SPDK_NVMF_TCP_DEFAULT_MAX_QUEUE_DEPTH 128
2879 #define SPDK_NVMF_TCP_DEFAULT_AQ_DEPTH 128
2880 #define SPDK_NVMF_TCP_DEFAULT_MAX_QPAIRS_PER_CTRLR 128
2881 #define SPDK_NVMF_TCP_DEFAULT_IN_CAPSULE_DATA_SIZE 4096
2882 #define SPDK_NVMF_TCP_DEFAULT_MAX_IO_SIZE 131072
2883 #define SPDK_NVMF_TCP_DEFAULT_IO_UNIT_SIZE 131072
2884 #define SPDK_NVMF_TCP_DEFAULT_NUM_SHARED_BUFFERS 511
2885 #define SPDK_NVMF_TCP_DEFAULT_BUFFER_CACHE_SIZE 32
2886 #define SPDK_NVMF_TCP_DEFAULT_SUCCESS_OPTIMIZATION true
2887 #define SPDK_NVMF_TCP_DEFAULT_DIF_INSERT_OR_STRIP false
2888 #define SPDK_NVMF_TCP_DEFAULT_SOCK_PRIORITY 0
2889 
2890 static void
2891 spdk_nvmf_tcp_opts_init(struct spdk_nvmf_transport_opts *opts)
2892 {
2893 	opts->max_queue_depth =		SPDK_NVMF_TCP_DEFAULT_MAX_QUEUE_DEPTH;
2894 	opts->max_qpairs_per_ctrlr =	SPDK_NVMF_TCP_DEFAULT_MAX_QPAIRS_PER_CTRLR;
2895 	opts->in_capsule_data_size =	SPDK_NVMF_TCP_DEFAULT_IN_CAPSULE_DATA_SIZE;
2896 	opts->max_io_size =		SPDK_NVMF_TCP_DEFAULT_MAX_IO_SIZE;
2897 	opts->io_unit_size =		SPDK_NVMF_TCP_DEFAULT_IO_UNIT_SIZE;
2898 	opts->max_aq_depth =		SPDK_NVMF_TCP_DEFAULT_AQ_DEPTH;
2899 	opts->num_shared_buffers =	SPDK_NVMF_TCP_DEFAULT_NUM_SHARED_BUFFERS;
2900 	opts->buf_cache_size =		SPDK_NVMF_TCP_DEFAULT_BUFFER_CACHE_SIZE;
2901 	opts->c2h_success =		SPDK_NVMF_TCP_DEFAULT_SUCCESS_OPTIMIZATION;
2902 	opts->dif_insert_or_strip =	SPDK_NVMF_TCP_DEFAULT_DIF_INSERT_OR_STRIP;
2903 	opts->sock_priority =		SPDK_NVMF_TCP_DEFAULT_SOCK_PRIORITY;
2904 }
2905 
2906 const struct spdk_nvmf_transport_ops spdk_nvmf_transport_tcp = {
2907 	.type = SPDK_NVME_TRANSPORT_TCP,
2908 	.opts_init = spdk_nvmf_tcp_opts_init,
2909 	.create = spdk_nvmf_tcp_create,
2910 	.destroy = spdk_nvmf_tcp_destroy,
2911 
2912 	.listen = spdk_nvmf_tcp_listen,
2913 	.stop_listen = spdk_nvmf_tcp_stop_listen,
2914 	.accept = spdk_nvmf_tcp_accept,
2915 
2916 	.listener_discover = spdk_nvmf_tcp_discover,
2917 
2918 	.poll_group_create = spdk_nvmf_tcp_poll_group_create,
2919 	.get_optimal_poll_group = spdk_nvmf_tcp_get_optimal_poll_group,
2920 	.poll_group_destroy = spdk_nvmf_tcp_poll_group_destroy,
2921 	.poll_group_add = spdk_nvmf_tcp_poll_group_add,
2922 	.poll_group_remove = spdk_nvmf_tcp_poll_group_remove,
2923 	.poll_group_poll = spdk_nvmf_tcp_poll_group_poll,
2924 
2925 	.req_free = spdk_nvmf_tcp_req_free,
2926 	.req_complete = spdk_nvmf_tcp_req_complete,
2927 
2928 	.qpair_fini = spdk_nvmf_tcp_close_qpair,
2929 	.qpair_get_local_trid = spdk_nvmf_tcp_qpair_get_local_trid,
2930 	.qpair_get_peer_trid = spdk_nvmf_tcp_qpair_get_peer_trid,
2931 	.qpair_get_listen_trid = spdk_nvmf_tcp_qpair_get_listen_trid,
2932 	.qpair_set_sqsize = spdk_nvmf_tcp_qpair_set_sq_size,
2933 };
2934 
2935 SPDK_LOG_REGISTER_COMPONENT("nvmf_tcp", SPDK_LOG_NVMF_TCP)
2936