xref: /spdk/lib/iscsi/conn.c (revision bb64a7e5118f36b264b0de2a9ef77f22f8d0dbd7)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
5  *   Copyright (c) Intel Corporation.
6  *   All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 #include "spdk/stdinc.h"
36 
37 #include "spdk/endian.h"
38 #include "spdk/env.h"
39 #include "spdk/event.h"
40 #include "spdk/likely.h"
41 #include "spdk/thread.h"
42 #include "spdk/queue.h"
43 #include "spdk/trace.h"
44 #include "spdk/net.h"
45 #include "spdk/sock.h"
46 #include "spdk/string.h"
47 
48 #include "spdk_internal/log.h"
49 
50 #include "iscsi/task.h"
51 #include "iscsi/conn.h"
52 #include "iscsi/tgt_node.h"
53 #include "iscsi/portal_grp.h"
54 
55 #define MAKE_DIGEST_WORD(BUF, CRC32C) \
56         (   ((*((uint8_t *)(BUF)+0)) = (uint8_t)((uint32_t)(CRC32C) >> 0)), \
57             ((*((uint8_t *)(BUF)+1)) = (uint8_t)((uint32_t)(CRC32C) >> 8)), \
58             ((*((uint8_t *)(BUF)+2)) = (uint8_t)((uint32_t)(CRC32C) >> 16)), \
59             ((*((uint8_t *)(BUF)+3)) = (uint8_t)((uint32_t)(CRC32C) >> 24)))
60 
61 #define SPDK_ISCSI_CONNECTION_MEMSET(conn)		\
62 	memset(&(conn)->portal, 0, sizeof(*(conn)) -	\
63 		offsetof(struct spdk_iscsi_conn, portal));
64 
65 static int g_connections_per_lcore;
66 static uint32_t *g_num_connections;
67 
68 struct spdk_iscsi_conn *g_conns_array = MAP_FAILED;
69 static int g_conns_array_fd = -1;
70 static char g_shm_name[64];
71 
72 static pthread_mutex_t g_conns_mutex = PTHREAD_MUTEX_INITIALIZER;
73 
74 static struct spdk_poller *g_shutdown_timer = NULL;
75 
76 static uint32_t iscsi_conn_allocate_reactor(const struct spdk_cpuset *cpumask);
77 
78 static void iscsi_conn_full_feature_migrate(void *arg1, void *arg2);
79 static void iscsi_conn_stop(struct spdk_iscsi_conn *conn);
80 static void iscsi_conn_sock_cb(void *arg, struct spdk_sock_group *group,
81 			       struct spdk_sock *sock);
82 
83 static struct spdk_iscsi_conn *
84 allocate_conn(void)
85 {
86 	struct spdk_iscsi_conn	*conn;
87 	int				i;
88 
89 	pthread_mutex_lock(&g_conns_mutex);
90 	for (i = 0; i < MAX_ISCSI_CONNECTIONS; i++) {
91 		conn = &g_conns_array[i];
92 		if (!conn->is_valid) {
93 			SPDK_ISCSI_CONNECTION_MEMSET(conn);
94 			conn->is_valid = 1;
95 			pthread_mutex_unlock(&g_conns_mutex);
96 			return conn;
97 		}
98 	}
99 	pthread_mutex_unlock(&g_conns_mutex);
100 
101 	return NULL;
102 }
103 
104 static void
105 free_conn(struct spdk_iscsi_conn *conn)
106 {
107 	free(conn->portal_host);
108 	free(conn->portal_port);
109 	conn->is_valid = 0;
110 }
111 
112 static struct spdk_iscsi_conn *
113 find_iscsi_connection_by_id(int cid)
114 {
115 	if (g_conns_array != MAP_FAILED && g_conns_array[cid].is_valid == 1) {
116 		return &g_conns_array[cid];
117 	} else {
118 		return NULL;
119 	}
120 }
121 
122 int spdk_initialize_iscsi_conns(void)
123 {
124 	size_t conns_size = sizeof(struct spdk_iscsi_conn) * MAX_ISCSI_CONNECTIONS;
125 	uint32_t i, last_core;
126 
127 	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "spdk_iscsi_init\n");
128 
129 	snprintf(g_shm_name, sizeof(g_shm_name), "/spdk_iscsi_conns.%d", spdk_app_get_shm_id());
130 	g_conns_array_fd = shm_open(g_shm_name, O_RDWR | O_CREAT, 0600);
131 	if (g_conns_array_fd < 0) {
132 		SPDK_ERRLOG("could not shm_open %s\n", g_shm_name);
133 		goto err;
134 	}
135 
136 	if (ftruncate(g_conns_array_fd, conns_size) != 0) {
137 		SPDK_ERRLOG("could not ftruncate\n");
138 		goto err;
139 	}
140 	g_conns_array = mmap(0, conns_size, PROT_READ | PROT_WRITE, MAP_SHARED,
141 			     g_conns_array_fd, 0);
142 
143 	if (g_conns_array == MAP_FAILED) {
144 		fprintf(stderr, "could not mmap cons array file %s (%d)\n", g_shm_name, errno);
145 		goto err;
146 	}
147 
148 	memset(g_conns_array, 0, conns_size);
149 
150 	for (i = 0; i < MAX_ISCSI_CONNECTIONS; i++) {
151 		g_conns_array[i].id = i;
152 	}
153 
154 	last_core = spdk_env_get_last_core();
155 	g_num_connections = calloc(last_core + 1, sizeof(uint32_t));
156 	if (!g_num_connections) {
157 		SPDK_ERRLOG("Could not allocate array size=%u for g_num_connections\n",
158 			    last_core + 1);
159 		goto err;
160 	}
161 
162 	return 0;
163 
164 err:
165 	if (g_conns_array != MAP_FAILED) {
166 		munmap(g_conns_array, conns_size);
167 		g_conns_array = MAP_FAILED;
168 	}
169 
170 	if (g_conns_array_fd >= 0) {
171 		close(g_conns_array_fd);
172 		g_conns_array_fd = -1;
173 		shm_unlink(g_shm_name);
174 	}
175 
176 	return -1;
177 }
178 
179 static void
180 iscsi_poll_group_add_conn_sock(struct spdk_iscsi_conn *conn)
181 {
182 	struct spdk_iscsi_poll_group *poll_group;
183 	int rc;
184 
185 	assert(conn->lcore == spdk_env_get_current_core());
186 
187 	poll_group = &g_spdk_iscsi.poll_group[conn->lcore];
188 
189 	rc = spdk_sock_group_add_sock(poll_group->sock_group, conn->sock, iscsi_conn_sock_cb, conn);
190 	if (rc < 0) {
191 		SPDK_ERRLOG("Failed to add sock=%p of conn=%p\n", conn->sock, conn);
192 	}
193 }
194 
195 static void
196 iscsi_poll_group_remove_conn_sock(struct spdk_iscsi_conn *conn)
197 {
198 	struct spdk_iscsi_poll_group *poll_group;
199 	int rc;
200 
201 	assert(conn->lcore == spdk_env_get_current_core());
202 
203 	poll_group = &g_spdk_iscsi.poll_group[conn->lcore];
204 
205 	rc = spdk_sock_group_remove_sock(poll_group->sock_group, conn->sock);
206 	if (rc < 0) {
207 		SPDK_ERRLOG("Failed to remove sock=%p of conn=%p\n", conn->sock, conn);
208 	}
209 }
210 
211 static void
212 iscsi_poll_group_add_conn(struct spdk_iscsi_conn *conn)
213 {
214 	struct spdk_iscsi_poll_group *poll_group;
215 
216 	assert(conn->lcore == spdk_env_get_current_core());
217 
218 	poll_group = &g_spdk_iscsi.poll_group[conn->lcore];
219 
220 	conn->is_stopped = false;
221 	STAILQ_INSERT_TAIL(&poll_group->connections, conn, link);
222 	iscsi_poll_group_add_conn_sock(conn);
223 }
224 
225 static void
226 iscsi_poll_group_remove_conn(struct spdk_iscsi_conn *conn)
227 {
228 	struct spdk_iscsi_poll_group *poll_group;
229 
230 	assert(conn->lcore == spdk_env_get_current_core());
231 
232 	poll_group = &g_spdk_iscsi.poll_group[conn->lcore];
233 
234 	conn->is_stopped = true;
235 	STAILQ_REMOVE(&poll_group->connections, conn, spdk_iscsi_conn, link);
236 }
237 
238 /**
239  * \brief Create an iSCSI connection from the given parameters and schedule it
240  *        on a reactor.
241  *
242  * \code
243  *
244  * # identify reactor where the new connections work item will be scheduled
245  * reactor = spdk_iscsi_conn_allocate_reactor()
246  * allocate spdk_iscsi_conn object
247  * initialize spdk_iscsi_conn object
248  * schedule iSCSI connection work item on reactor
249  *
250  * \endcode
251  */
252 int
253 spdk_iscsi_conn_construct(struct spdk_iscsi_portal *portal,
254 			  struct spdk_sock *sock)
255 {
256 	struct spdk_iscsi_conn *conn;
257 	int bufsize, i, rc;
258 
259 	conn = allocate_conn();
260 	if (conn == NULL) {
261 		SPDK_ERRLOG("Could not allocate connection.\n");
262 		return -1;
263 	}
264 
265 	pthread_mutex_lock(&g_spdk_iscsi.mutex);
266 	conn->timeout = g_spdk_iscsi.timeout;
267 	conn->nopininterval = g_spdk_iscsi.nopininterval;
268 	conn->nopininterval *= spdk_get_ticks_hz(); /* seconds to TSC */
269 	conn->nop_outstanding = false;
270 	conn->data_out_cnt = 0;
271 	conn->data_in_cnt = 0;
272 	pthread_mutex_unlock(&g_spdk_iscsi.mutex);
273 	conn->MaxRecvDataSegmentLength = 8192; /* RFC3720(12.12) */
274 
275 	conn->portal = portal;
276 	conn->pg_tag = portal->group->tag;
277 	conn->portal_host = strdup(portal->host);
278 	conn->portal_port = strdup(portal->port);
279 	conn->portal_cpumask = portal->cpumask;
280 	conn->sock = sock;
281 
282 	conn->state = ISCSI_CONN_STATE_INVALID;
283 	conn->login_phase = ISCSI_SECURITY_NEGOTIATION_PHASE;
284 	conn->ttt = 0;
285 
286 	conn->partial_text_parameter = NULL;
287 
288 	for (i = 0; i < MAX_CONNECTION_PARAMS; i++) {
289 		conn->conn_param_state_negotiated[i] = false;
290 	}
291 
292 	for (i = 0; i < MAX_SESSION_PARAMS; i++) {
293 		conn->sess_param_state_negotiated[i] = false;
294 	}
295 
296 	for (i = 0; i < DEFAULT_MAXR2T; i++) {
297 		conn->outstanding_r2t_tasks[i] = NULL;
298 	}
299 
300 	TAILQ_INIT(&conn->write_pdu_list);
301 	TAILQ_INIT(&conn->snack_pdu_list);
302 	TAILQ_INIT(&conn->queued_r2t_tasks);
303 	TAILQ_INIT(&conn->active_r2t_tasks);
304 	TAILQ_INIT(&conn->queued_datain_tasks);
305 	memset(&conn->open_lun_descs, 0, sizeof(conn->open_lun_descs));
306 
307 	rc = spdk_sock_getaddr(sock, conn->target_addr, sizeof conn->target_addr, NULL,
308 			       conn->initiator_addr, sizeof conn->initiator_addr, NULL);
309 	if (rc < 0) {
310 		SPDK_ERRLOG("spdk_sock_getaddr() failed\n");
311 		goto error_return;
312 	}
313 
314 	bufsize = 2 * 1024 * 1024;
315 	rc = spdk_sock_set_recvbuf(conn->sock, bufsize);
316 	if (rc != 0) {
317 		SPDK_ERRLOG("spdk_sock_set_recvbuf failed\n");
318 	}
319 
320 	bufsize = 32 * 1024 * 1024 / g_spdk_iscsi.MaxConnections;
321 	if (bufsize > 2 * 1024 * 1024) {
322 		bufsize = 2 * 1024 * 1024;
323 	}
324 	rc = spdk_sock_set_sendbuf(conn->sock, bufsize);
325 	if (rc != 0) {
326 		SPDK_ERRLOG("spdk_sock_set_sendbuf failed\n");
327 	}
328 
329 	/* set low water mark */
330 	rc = spdk_sock_set_recvlowat(conn->sock, 1);
331 	if (rc != 0) {
332 		SPDK_ERRLOG("spdk_sock_set_recvlowat() failed\n");
333 		goto error_return;
334 	}
335 
336 	/* set default params */
337 	rc = spdk_iscsi_conn_params_init(&conn->params);
338 	if (rc < 0) {
339 		SPDK_ERRLOG("iscsi_conn_params_init() failed\n");
340 		goto error_return;
341 	}
342 	conn->logout_timer = NULL;
343 	conn->shutdown_timer = NULL;
344 	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "Launching connection on acceptor thread\n");
345 	conn->pending_task_cnt = 0;
346 
347 	conn->lcore = spdk_env_get_current_core();
348 	__sync_fetch_and_add(&g_num_connections[conn->lcore], 1);
349 
350 	iscsi_poll_group_add_conn(conn);
351 	return 0;
352 
353 error_return:
354 	spdk_iscsi_param_free(conn->params);
355 	free_conn(conn);
356 	return -1;
357 }
358 
359 void
360 spdk_iscsi_conn_free_pdu(struct spdk_iscsi_conn *conn, struct spdk_iscsi_pdu *pdu)
361 {
362 	if (pdu->task) {
363 		if (pdu->bhs.opcode == ISCSI_OP_SCSI_DATAIN) {
364 			if (pdu->task->scsi.offset > 0) {
365 				conn->data_in_cnt--;
366 				if (pdu->bhs.flags & ISCSI_DATAIN_STATUS) {
367 					/* Free the primary task after the last subtask done */
368 					conn->data_in_cnt--;
369 					spdk_iscsi_task_put(spdk_iscsi_task_get_primary(pdu->task));
370 				}
371 			}
372 		} else if (pdu->bhs.opcode == ISCSI_OP_SCSI_RSP &&
373 			   pdu->task->scsi.status != SPDK_SCSI_STATUS_GOOD) {
374 			if (pdu->task->scsi.offset > 0) {
375 				spdk_iscsi_task_put(spdk_iscsi_task_get_primary(pdu->task));
376 			}
377 		}
378 		spdk_iscsi_task_put(pdu->task);
379 	}
380 	spdk_put_pdu(pdu);
381 }
382 
383 static int
384 iscsi_conn_free_tasks(struct spdk_iscsi_conn *conn)
385 {
386 	struct spdk_iscsi_pdu *pdu, *tmp_pdu;
387 	struct spdk_iscsi_task *iscsi_task, *tmp_iscsi_task;
388 
389 	TAILQ_FOREACH_SAFE(pdu, &conn->write_pdu_list, tailq, tmp_pdu) {
390 		TAILQ_REMOVE(&conn->write_pdu_list, pdu, tailq);
391 		spdk_iscsi_conn_free_pdu(conn, pdu);
392 	}
393 
394 	TAILQ_FOREACH_SAFE(pdu, &conn->snack_pdu_list, tailq, tmp_pdu) {
395 		TAILQ_REMOVE(&conn->snack_pdu_list, pdu, tailq);
396 		if (pdu->task) {
397 			spdk_iscsi_task_put(pdu->task);
398 		}
399 		spdk_put_pdu(pdu);
400 	}
401 
402 	TAILQ_FOREACH_SAFE(iscsi_task, &conn->queued_datain_tasks, link, tmp_iscsi_task) {
403 		if (!iscsi_task->is_queued) {
404 			TAILQ_REMOVE(&conn->queued_datain_tasks, iscsi_task, link);
405 			spdk_iscsi_task_put(iscsi_task);
406 		}
407 	}
408 
409 	if (conn->pending_task_cnt) {
410 		return -1;
411 	}
412 
413 	return 0;
414 }
415 
416 static void
417 _iscsi_conn_free(struct spdk_iscsi_conn *conn)
418 {
419 	if (conn == NULL) {
420 		return;
421 	}
422 
423 	spdk_iscsi_param_free(conn->params);
424 
425 	/*
426 	 * Each connection pre-allocates its next PDU - make sure these get
427 	 *  freed here.
428 	 */
429 	spdk_put_pdu(conn->pdu_in_progress);
430 
431 	free_conn(conn);
432 }
433 
434 static void
435 iscsi_conn_cleanup_backend(struct spdk_iscsi_conn *conn)
436 {
437 	int rc;
438 	struct spdk_iscsi_tgt_node *target;
439 
440 	if (conn->sess->connections > 1) {
441 		/* connection specific cleanup */
442 	} else if (!g_spdk_iscsi.AllowDuplicateIsid) {
443 		/* clean up all tasks to all LUNs for session */
444 		target = conn->sess->target;
445 		if (target != NULL) {
446 			rc = spdk_iscsi_tgt_node_cleanup_luns(conn, target);
447 			if (rc < 0) {
448 				SPDK_ERRLOG("target abort failed\n");
449 			}
450 		}
451 	}
452 }
453 
454 static void
455 iscsi_conn_free(struct spdk_iscsi_conn *conn)
456 {
457 	struct spdk_iscsi_sess *sess;
458 	int idx;
459 	uint32_t i;
460 
461 	pthread_mutex_lock(&g_conns_mutex);
462 
463 	if (conn->sess == NULL) {
464 		goto end;
465 	}
466 
467 	idx = -1;
468 	sess = conn->sess;
469 	conn->sess = NULL;
470 
471 	for (i = 0; i < sess->connections; i++) {
472 		if (sess->conns[i] == conn) {
473 			idx = i;
474 			break;
475 		}
476 	}
477 
478 	if (idx < 0) {
479 		SPDK_ERRLOG("remove conn not found\n");
480 	} else {
481 		for (i = idx; i < sess->connections - 1; i++) {
482 			sess->conns[i] = sess->conns[i + 1];
483 		}
484 		sess->conns[sess->connections - 1] = NULL;
485 		sess->connections--;
486 
487 		if (sess->connections == 0) {
488 			/* cleanup last connection */
489 			SPDK_DEBUGLOG(SPDK_LOG_ISCSI,
490 				      "cleanup last conn free sess\n");
491 			spdk_free_sess(sess);
492 		}
493 	}
494 
495 	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "Terminating connections(tsih %d): %d\n",
496 		      sess->tsih, sess->connections);
497 
498 end:
499 	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "cleanup free conn\n");
500 	_iscsi_conn_free(conn);
501 
502 	pthread_mutex_unlock(&g_conns_mutex);
503 }
504 
505 static int
506 _iscsi_conn_check_shutdown(void *arg)
507 {
508 	struct spdk_iscsi_conn *conn = arg;
509 	int rc;
510 
511 	rc = iscsi_conn_free_tasks(conn);
512 	if (rc < 0) {
513 		return 1;
514 	}
515 
516 	spdk_poller_unregister(&conn->shutdown_timer);
517 
518 	iscsi_conn_stop(conn);
519 	iscsi_conn_free(conn);
520 
521 	return 1;
522 }
523 
524 static void
525 _iscsi_conn_destruct(struct spdk_iscsi_conn *conn)
526 {
527 	int rc;
528 
529 	spdk_clear_all_transfer_task(conn, NULL, NULL);
530 	iscsi_poll_group_remove_conn_sock(conn);
531 	spdk_sock_close(&conn->sock);
532 	spdk_poller_unregister(&conn->logout_timer);
533 	spdk_poller_unregister(&conn->flush_poller);
534 
535 	rc = iscsi_conn_free_tasks(conn);
536 	if (rc < 0) {
537 		/* The connection cannot be freed yet. Check back later. */
538 		conn->shutdown_timer = spdk_poller_register(_iscsi_conn_check_shutdown, conn, 1000);
539 	} else {
540 		iscsi_conn_stop(conn);
541 		iscsi_conn_free(conn);
542 	}
543 }
544 
545 static int
546 _iscsi_conn_check_pending_tasks(void *arg)
547 {
548 	struct spdk_iscsi_conn *conn = arg;
549 
550 	if (conn->dev != NULL && spdk_scsi_dev_has_pending_tasks(conn->dev)) {
551 		return 1;
552 	}
553 
554 	spdk_poller_unregister(&conn->shutdown_timer);
555 
556 	_iscsi_conn_destruct(conn);
557 
558 	return 1;
559 }
560 
561 void
562 spdk_iscsi_conn_destruct(struct spdk_iscsi_conn *conn)
563 {
564 	/* If a connection is already in exited status, just return */
565 	if (conn->state >= ISCSI_CONN_STATE_EXITED) {
566 		return;
567 	}
568 
569 	conn->state = ISCSI_CONN_STATE_EXITED;
570 
571 	if (conn->sess != NULL && conn->pending_task_cnt > 0) {
572 		iscsi_conn_cleanup_backend(conn);
573 	}
574 
575 	if (conn->dev != NULL && spdk_scsi_dev_has_pending_tasks(conn->dev)) {
576 		conn->shutdown_timer = spdk_poller_register(_iscsi_conn_check_pending_tasks, conn, 1000);
577 	} else {
578 		_iscsi_conn_destruct(conn);
579 	}
580 }
581 
582 static int
583 iscsi_get_active_conns(void)
584 {
585 	struct spdk_iscsi_conn *conn;
586 	int num = 0;
587 	int i;
588 
589 	pthread_mutex_lock(&g_conns_mutex);
590 	for (i = 0; i < MAX_ISCSI_CONNECTIONS; i++) {
591 		conn = find_iscsi_connection_by_id(i);
592 		if (conn == NULL) {
593 			continue;
594 		}
595 		num++;
596 	}
597 	pthread_mutex_unlock(&g_conns_mutex);
598 	return num;
599 }
600 
601 static void
602 iscsi_conns_cleanup(void)
603 {
604 	free(g_num_connections);
605 	munmap(g_conns_array, sizeof(struct spdk_iscsi_conn) *
606 	       MAX_ISCSI_CONNECTIONS);
607 	shm_unlink(g_shm_name);
608 	if (g_conns_array_fd >= 0) {
609 		close(g_conns_array_fd);
610 		g_conns_array_fd = -1;
611 	}
612 }
613 
614 static void
615 iscsi_conn_check_shutdown_cb(void *arg1, void *arg2)
616 {
617 	iscsi_conns_cleanup();
618 	spdk_shutdown_iscsi_conns_done();
619 }
620 
621 static int
622 iscsi_conn_check_shutdown(void *arg)
623 {
624 	struct spdk_event *event;
625 
626 	if (iscsi_get_active_conns() != 0) {
627 		return 1;
628 	}
629 
630 	spdk_poller_unregister(&g_shutdown_timer);
631 	event = spdk_event_allocate(spdk_env_get_current_core(),
632 				    iscsi_conn_check_shutdown_cb, NULL, NULL);
633 	spdk_event_call(event);
634 
635 	return 1;
636 }
637 
638 static void
639 iscsi_conn_close_lun(struct spdk_iscsi_conn *conn, int lun_id)
640 {
641 	struct spdk_scsi_desc *desc;
642 
643 	desc = conn->open_lun_descs[lun_id];
644 	if (desc != NULL) {
645 		spdk_scsi_lun_free_io_channel(desc);
646 		spdk_scsi_lun_close(desc);
647 		conn->open_lun_descs[lun_id] = NULL;
648 	}
649 }
650 
651 static void
652 iscsi_conn_close_luns(struct spdk_iscsi_conn *conn)
653 {
654 	int i;
655 
656 	for (i = 0; i < SPDK_SCSI_DEV_MAX_LUN; i++) {
657 		iscsi_conn_close_lun(conn, i);
658 	}
659 }
660 
661 static void
662 _iscsi_conn_remove_lun(void *arg1, void *arg2)
663 {
664 	struct spdk_iscsi_conn *conn = arg1;
665 	struct spdk_scsi_lun *lun = arg2;
666 	int lun_id = spdk_scsi_lun_get_id(lun);
667 	struct spdk_iscsi_pdu *pdu, *tmp_pdu;
668 	struct spdk_iscsi_task *iscsi_task, *tmp_iscsi_task;
669 
670 	/* If a connection is already in stating status, just return */
671 	if (conn->state >= ISCSI_CONN_STATE_EXITING) {
672 		return;
673 	}
674 
675 	spdk_clear_all_transfer_task(conn, lun, NULL);
676 	TAILQ_FOREACH_SAFE(pdu, &conn->write_pdu_list, tailq, tmp_pdu) {
677 		if (pdu->task && (lun == pdu->task->scsi.lun)) {
678 			TAILQ_REMOVE(&conn->write_pdu_list, pdu, tailq);
679 			spdk_iscsi_conn_free_pdu(conn, pdu);
680 		}
681 	}
682 
683 	TAILQ_FOREACH_SAFE(pdu, &conn->snack_pdu_list, tailq, tmp_pdu) {
684 		if (pdu->task && (lun == pdu->task->scsi.lun)) {
685 			TAILQ_REMOVE(&conn->snack_pdu_list, pdu, tailq);
686 			spdk_iscsi_task_put(pdu->task);
687 			spdk_put_pdu(pdu);
688 		}
689 	}
690 
691 	TAILQ_FOREACH_SAFE(iscsi_task, &conn->queued_datain_tasks, link, tmp_iscsi_task) {
692 		if ((!iscsi_task->is_queued) && (lun == iscsi_task->scsi.lun)) {
693 			TAILQ_REMOVE(&conn->queued_datain_tasks, iscsi_task, link);
694 			spdk_iscsi_task_put(iscsi_task);
695 		}
696 	}
697 
698 	iscsi_conn_close_lun(conn, lun_id);
699 }
700 
701 static void
702 iscsi_conn_remove_lun(struct spdk_scsi_lun *lun, void *remove_ctx)
703 {
704 	struct spdk_iscsi_conn *conn = remove_ctx;
705 	struct spdk_event *event;
706 
707 	event = spdk_event_allocate(conn->lcore, _iscsi_conn_remove_lun,
708 				    conn, lun);
709 	spdk_event_call(event);
710 }
711 
712 static void
713 iscsi_conn_open_luns(struct spdk_iscsi_conn *conn)
714 {
715 	int i, rc;
716 	struct spdk_scsi_lun *lun;
717 	struct spdk_scsi_desc *desc;
718 
719 	for (i = 0; i < SPDK_SCSI_DEV_MAX_LUN; i++) {
720 		lun = spdk_scsi_dev_get_lun(conn->dev, i);
721 		if (lun == NULL) {
722 			continue;
723 		}
724 
725 		rc = spdk_scsi_lun_open(lun, iscsi_conn_remove_lun, conn, &desc);
726 		if (rc != 0) {
727 			goto error;
728 		}
729 
730 		rc = spdk_scsi_lun_allocate_io_channel(desc);
731 		if (rc != 0) {
732 			spdk_scsi_lun_close(desc);
733 			goto error;
734 		}
735 
736 		conn->open_lun_descs[i] = desc;
737 	}
738 
739 	return;
740 
741 error:
742 	iscsi_conn_close_luns(conn);
743 }
744 
745 /**
746  *  This function will stop executing the specified connection.
747  */
748 static void
749 iscsi_conn_stop(struct spdk_iscsi_conn *conn)
750 {
751 	struct spdk_iscsi_tgt_node *target;
752 
753 	if (conn->state == ISCSI_CONN_STATE_EXITED && conn->sess != NULL &&
754 	    conn->sess->session_type == SESSION_TYPE_NORMAL &&
755 	    conn->full_feature) {
756 		target = conn->sess->target;
757 		pthread_mutex_lock(&target->mutex);
758 		target->num_active_conns--;
759 		pthread_mutex_unlock(&target->mutex);
760 
761 		iscsi_conn_close_luns(conn);
762 	}
763 
764 	assert(conn->lcore == spdk_env_get_current_core());
765 
766 	__sync_fetch_and_sub(&g_num_connections[conn->lcore], 1);
767 	iscsi_poll_group_remove_conn(conn);
768 }
769 
770 void spdk_shutdown_iscsi_conns(void)
771 {
772 	struct spdk_iscsi_conn	*conn;
773 	int			i;
774 
775 	pthread_mutex_lock(&g_conns_mutex);
776 
777 	for (i = 0; i < MAX_ISCSI_CONNECTIONS; i++) {
778 		conn = find_iscsi_connection_by_id(i);
779 		if (conn == NULL) {
780 			continue;
781 		}
782 
783 		/* Do not set conn->state if the connection has already started exiting.
784 		  * This ensures we do not move a connection from EXITED state back to EXITING.
785 		  */
786 		if (conn->state < ISCSI_CONN_STATE_EXITING) {
787 			conn->state = ISCSI_CONN_STATE_EXITING;
788 		}
789 	}
790 
791 	pthread_mutex_unlock(&g_conns_mutex);
792 	g_shutdown_timer = spdk_poller_register(iscsi_conn_check_shutdown, NULL, 1000);
793 }
794 
795 int
796 spdk_iscsi_drop_conns(struct spdk_iscsi_conn *conn, const char *conn_match,
797 		      int drop_all)
798 {
799 	struct spdk_iscsi_conn	*xconn;
800 	const char			*xconn_match;
801 	int				i, num;
802 
803 	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "spdk_iscsi_drop_conns\n");
804 
805 	num = 0;
806 	pthread_mutex_lock(&g_conns_mutex);
807 	for (i = 0; i < MAX_ISCSI_CONNECTIONS; i++) {
808 		xconn = find_iscsi_connection_by_id(i);
809 
810 		if (xconn == NULL) {
811 			continue;
812 		}
813 
814 		if (xconn == conn) {
815 			continue;
816 		}
817 
818 		if (!drop_all && xconn->initiator_port == NULL) {
819 			continue;
820 		}
821 
822 		xconn_match =
823 			drop_all ? xconn->initiator_name : spdk_scsi_port_get_name(xconn->initiator_port);
824 
825 		if (!strcasecmp(conn_match, xconn_match) &&
826 		    conn->target == xconn->target) {
827 
828 			if (num == 0) {
829 				/*
830 				 * Only print this message before we report the
831 				 *  first dropped connection.
832 				 */
833 				SPDK_ERRLOG("drop old connections %s by %s\n",
834 					    conn->target->name, conn_match);
835 			}
836 
837 			SPDK_ERRLOG("exiting conn by %s (%s)\n",
838 				    xconn_match, xconn->initiator_addr);
839 			if (xconn->sess != NULL) {
840 				SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "TSIH=%u\n", xconn->sess->tsih);
841 			} else {
842 				SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "TSIH=xx\n");
843 			}
844 
845 			SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "CID=%u\n", xconn->cid);
846 
847 			/* Do not set xconn->state if the connection has already started exiting.
848 			  * This ensures we do not move a connection from EXITED state back to EXITING.
849 			  */
850 			if (xconn->state < ISCSI_CONN_STATE_EXITING) {
851 				xconn->state = ISCSI_CONN_STATE_EXITING;
852 			}
853 			num++;
854 		}
855 	}
856 
857 	pthread_mutex_unlock(&g_conns_mutex);
858 
859 	if (num != 0) {
860 		SPDK_ERRLOG("exiting %d conns\n", num);
861 	}
862 
863 	return 0;
864 }
865 
866 /**
867  * \brief Reads data for the specified iSCSI connection from its TCP socket.
868  *
869  * The TCP socket is marked as non-blocking, so this function may not read
870  * all data requested.
871  *
872  * Returns SPDK_ISCSI_CONNECTION_FATAL if the recv() operation indicates a fatal
873  * error with the TCP connection (including if the TCP connection was closed
874  * unexpectedly.
875  *
876  * Otherwise returns the number of bytes successfully read.
877  */
878 int
879 spdk_iscsi_conn_read_data(struct spdk_iscsi_conn *conn, int bytes,
880 			  void *buf)
881 {
882 	int ret;
883 
884 	if (bytes == 0) {
885 		return 0;
886 	}
887 
888 	ret = spdk_sock_recv(conn->sock, buf, bytes);
889 
890 	if (ret > 0) {
891 		spdk_trace_record(TRACE_ISCSI_READ_FROM_SOCKET_DONE, conn->id, ret, 0, 0);
892 		return ret;
893 	}
894 
895 	if (ret < 0) {
896 		if (errno == EAGAIN || errno == EWOULDBLOCK) {
897 			return 0;
898 		}
899 
900 		/* For connect reset issue, do not output error log */
901 		if (errno == ECONNRESET) {
902 			SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "spdk_sock_recv() failed, errno %d: %s\n",
903 				      errno, spdk_strerror(errno));
904 		} else {
905 			SPDK_ERRLOG("spdk_sock_recv() failed, errno %d: %s\n",
906 				    errno, spdk_strerror(errno));
907 		}
908 	}
909 
910 	/* connection closed */
911 	return SPDK_ISCSI_CONNECTION_FATAL;
912 }
913 
914 int
915 spdk_iscsi_conn_readv_data(struct spdk_iscsi_conn *conn,
916 			   struct iovec *iov, int iovcnt)
917 {
918 	int ret;
919 
920 	if (iov == NULL || iovcnt == 0) {
921 		return 0;
922 	}
923 
924 	if (iovcnt == 1) {
925 		return spdk_iscsi_conn_read_data(conn, iov[0].iov_len,
926 						 iov[0].iov_base);
927 	}
928 
929 	ret = spdk_sock_readv(conn->sock, iov, iovcnt);
930 
931 	if (ret > 0) {
932 		spdk_trace_record(TRACE_ISCSI_READ_FROM_SOCKET_DONE, conn->id, ret, 0, 0);
933 		return ret;
934 	}
935 
936 	if (ret < 0) {
937 		if (errno == EAGAIN || errno == EWOULDBLOCK) {
938 			return 0;
939 		}
940 
941 		/* For connect reset issue, do not output error log */
942 		if (errno == ECONNRESET) {
943 			SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "spdk_sock_readv() failed, errno %d: %s\n",
944 				      errno, spdk_strerror(errno));
945 		} else {
946 			SPDK_ERRLOG("spdk_sock_readv() failed, errno %d: %s\n",
947 				    errno, spdk_strerror(errno));
948 		}
949 	}
950 
951 	/* connection closed */
952 	return SPDK_ISCSI_CONNECTION_FATAL;
953 }
954 
955 void
956 spdk_iscsi_task_mgmt_cpl(struct spdk_scsi_task *scsi_task)
957 {
958 	struct spdk_iscsi_task *task = spdk_iscsi_task_from_scsi_task(scsi_task);
959 
960 	spdk_iscsi_task_mgmt_response(task->conn, task);
961 	spdk_iscsi_task_put(task);
962 }
963 
964 static void
965 iscsi_task_copy_to_rsp_scsi_status(struct spdk_iscsi_task *primary,
966 				   struct spdk_scsi_task *task)
967 {
968 	memcpy(primary->rsp_sense_data, task->sense_data, task->sense_data_len);
969 	primary->rsp_sense_data_len = task->sense_data_len;
970 	primary->rsp_scsi_status = task->status;
971 }
972 
973 static void
974 iscsi_task_copy_from_rsp_scsi_status(struct spdk_scsi_task *task,
975 				     struct spdk_iscsi_task *primary)
976 {
977 	memcpy(task->sense_data, primary->rsp_sense_data,
978 	       primary->rsp_sense_data_len);
979 	task->sense_data_len = primary->rsp_sense_data_len;
980 	task->status = primary->rsp_scsi_status;
981 }
982 
983 static void
984 process_completed_read_subtask_list(struct spdk_iscsi_conn *conn,
985 				    struct spdk_iscsi_task *primary)
986 {
987 	struct spdk_iscsi_task *subtask, *tmp;
988 
989 	TAILQ_FOREACH_SAFE(subtask, &primary->subtask_list, subtask_link, tmp) {
990 		if (subtask->scsi.offset == primary->bytes_completed) {
991 			TAILQ_REMOVE(&primary->subtask_list, subtask, subtask_link);
992 			primary->bytes_completed += subtask->scsi.length;
993 			spdk_iscsi_task_response(conn, subtask);
994 			spdk_iscsi_task_put(subtask);
995 		} else {
996 			break;
997 		}
998 	}
999 }
1000 
1001 static void
1002 process_read_task_completion(struct spdk_iscsi_conn *conn,
1003 			     struct spdk_iscsi_task *task,
1004 			     struct spdk_iscsi_task *primary)
1005 {
1006 	struct spdk_iscsi_task *tmp;
1007 
1008 	/* If the status of the completed subtask is the first failure,
1009 	 * copy it to out-of-order subtasks and remember it as the status
1010 	 * of the command,
1011 	 *
1012 	 * Even if the status of the completed task is success,
1013 	 * there are any failed subtask ever, copy the first failed status
1014 	 * to it.
1015 	 */
1016 	if (task->scsi.status != SPDK_SCSI_STATUS_GOOD) {
1017 		if (primary->rsp_scsi_status == SPDK_SCSI_STATUS_GOOD) {
1018 			TAILQ_FOREACH(tmp, &primary->subtask_list, subtask_link) {
1019 				spdk_scsi_task_copy_status(&tmp->scsi, &task->scsi);
1020 			}
1021 			iscsi_task_copy_to_rsp_scsi_status(primary, &task->scsi);
1022 		}
1023 	} else if (primary->rsp_scsi_status != SPDK_SCSI_STATUS_GOOD) {
1024 		iscsi_task_copy_from_rsp_scsi_status(&task->scsi, primary);
1025 	}
1026 
1027 	if ((task != primary) &&
1028 	    (task->scsi.offset != primary->bytes_completed)) {
1029 		TAILQ_FOREACH(tmp, &primary->subtask_list, subtask_link) {
1030 			if (task->scsi.offset < tmp->scsi.offset) {
1031 				TAILQ_INSERT_BEFORE(tmp, task, subtask_link);
1032 				return;
1033 			}
1034 		}
1035 
1036 		TAILQ_INSERT_TAIL(&primary->subtask_list, task, subtask_link);
1037 		return;
1038 	}
1039 
1040 	primary->bytes_completed += task->scsi.length;
1041 	spdk_iscsi_task_response(conn, task);
1042 
1043 	if ((task != primary) ||
1044 	    (task->scsi.transfer_len == task->scsi.length)) {
1045 		spdk_iscsi_task_put(task);
1046 	}
1047 	process_completed_read_subtask_list(conn, primary);
1048 
1049 	spdk_iscsi_conn_handle_queued_datain_tasks(conn);
1050 }
1051 
1052 void
1053 spdk_iscsi_task_cpl(struct spdk_scsi_task *scsi_task)
1054 {
1055 	struct spdk_iscsi_task *primary;
1056 	struct spdk_iscsi_task *task = spdk_iscsi_task_from_scsi_task(scsi_task);
1057 	struct spdk_iscsi_conn *conn = task->conn;
1058 	struct spdk_iscsi_pdu *pdu = task->pdu;
1059 
1060 	spdk_trace_record(TRACE_ISCSI_TASK_DONE, conn->id, 0, (uintptr_t)task, 0);
1061 
1062 	task->is_queued = false;
1063 	primary = spdk_iscsi_task_get_primary(task);
1064 
1065 	if (spdk_iscsi_task_is_read(primary)) {
1066 		process_read_task_completion(conn, task, primary);
1067 	} else {
1068 		primary->bytes_completed += task->scsi.length;
1069 
1070 		/* If the status of the subtask is the first failure, remember it as
1071 		 * the status of the command and set it to the status of the primary
1072 		 * task later.
1073 		 *
1074 		 * If the first failed task is the primary, two copies can be avoided
1075 		 * but code simplicity is prioritized.
1076 		 */
1077 		if (task->scsi.status == SPDK_SCSI_STATUS_GOOD) {
1078 			if (task != primary) {
1079 				primary->scsi.data_transferred += task->scsi.data_transferred;
1080 			}
1081 		} else if (primary->rsp_scsi_status == SPDK_SCSI_STATUS_GOOD) {
1082 			iscsi_task_copy_to_rsp_scsi_status(primary, &task->scsi);
1083 		}
1084 
1085 		if (primary->bytes_completed == primary->scsi.transfer_len) {
1086 			spdk_del_transfer_task(conn, primary->tag);
1087 			if (primary->rsp_scsi_status != SPDK_SCSI_STATUS_GOOD) {
1088 				iscsi_task_copy_from_rsp_scsi_status(&primary->scsi, primary);
1089 			}
1090 			spdk_iscsi_task_response(conn, primary);
1091 			/*
1092 			 * Check if this is the last task completed for an iSCSI write
1093 			 *  that required child subtasks.  If task != primary, we know
1094 			 *  for sure that it was part of an iSCSI write with child subtasks.
1095 			 *  The trickier case is when the last task completed was the initial
1096 			 *  task - in this case the task will have a smaller length than
1097 			 *  the overall transfer length.
1098 			 */
1099 			if (task != primary || task->scsi.length != task->scsi.transfer_len) {
1100 				TAILQ_REMOVE(&conn->active_r2t_tasks, primary, link);
1101 				spdk_iscsi_task_put(primary);
1102 			}
1103 		}
1104 		spdk_iscsi_task_put(task);
1105 	}
1106 	if (!task->parent) {
1107 		spdk_trace_record(TRACE_ISCSI_PDU_COMPLETED, 0, 0, (uintptr_t)pdu, 0);
1108 	}
1109 }
1110 
1111 static int
1112 iscsi_get_pdu_length(struct spdk_iscsi_pdu *pdu, int header_digest,
1113 		     int data_digest)
1114 {
1115 	int data_len, enable_digest, total;
1116 
1117 	enable_digest = 1;
1118 	if (pdu->bhs.opcode == ISCSI_OP_LOGIN_RSP) {
1119 		enable_digest = 0;
1120 	}
1121 
1122 	total = ISCSI_BHS_LEN;
1123 
1124 	total += (4 * pdu->bhs.total_ahs_len);
1125 
1126 	if (enable_digest && header_digest) {
1127 		total += ISCSI_DIGEST_LEN;
1128 	}
1129 
1130 	data_len = DGET24(pdu->bhs.data_segment_len);
1131 	if (data_len > 0) {
1132 		total += ISCSI_ALIGN(data_len);
1133 		if (enable_digest && data_digest) {
1134 			total += ISCSI_DIGEST_LEN;
1135 		}
1136 	}
1137 
1138 	return total;
1139 }
1140 
1141 void
1142 spdk_iscsi_conn_handle_nop(struct spdk_iscsi_conn *conn)
1143 {
1144 	uint64_t	tsc;
1145 
1146 	/**
1147 	  * This function will be executed by nop_poller of iSCSI polling group, so
1148 	  * we need to check the connection state first, then do the nop interval
1149 	  * expiration check work.
1150 	  */
1151 	if ((conn->state == ISCSI_CONN_STATE_EXITED) ||
1152 	    (conn->state == ISCSI_CONN_STATE_EXITING)) {
1153 		return;
1154 	}
1155 
1156 	/* Check for nop interval expiration */
1157 	tsc = spdk_get_ticks();
1158 	if (conn->nop_outstanding) {
1159 		if ((tsc - conn->last_nopin) > (conn->timeout  * spdk_get_ticks_hz())) {
1160 			SPDK_ERRLOG("Timed out waiting for NOP-Out response from initiator\n");
1161 			SPDK_ERRLOG("  tsc=0x%lx, last_nopin=0x%lx\n", tsc, conn->last_nopin);
1162 			SPDK_ERRLOG("  initiator=%s, target=%s\n", conn->initiator_name,
1163 				    conn->target_short_name);
1164 			conn->state = ISCSI_CONN_STATE_EXITING;
1165 		}
1166 	} else if (tsc - conn->last_nopin > conn->nopininterval) {
1167 		spdk_iscsi_send_nopin(conn);
1168 	}
1169 }
1170 
1171 /**
1172  * \brief Makes one attempt to flush response PDUs back to the initiator.
1173  *
1174  * Builds a list of iovecs for response PDUs that must be sent back to the
1175  * initiator and passes it to writev().
1176  *
1177  * Since the socket is non-blocking, writev() may not be able to flush all
1178  * of the iovecs, and may even partially flush one of the iovecs.  In this
1179  * case, the partially flushed PDU will remain on the write_pdu_list with
1180  * an offset pointing to the next byte to be flushed.
1181  *
1182  * Returns 0 if all PDUs were flushed.
1183  *
1184  * Returns 1 if some PDUs could not be flushed due to lack of send buffer
1185  * space.
1186  *
1187  * Returns -1 if an exception error occurred indicating the TCP connection
1188  * should be closed.
1189  */
1190 static int
1191 iscsi_conn_flush_pdus_internal(struct spdk_iscsi_conn *conn)
1192 {
1193 	const int num_iovs = 32;
1194 	struct iovec iovs[num_iovs];
1195 	struct iovec *iov = iovs;
1196 	int iovcnt = 0;
1197 	int bytes = 0;
1198 	uint32_t total_length = 0;
1199 	uint32_t mapped_length = 0;
1200 	struct spdk_iscsi_pdu *pdu;
1201 	int pdu_length;
1202 
1203 	pdu = TAILQ_FIRST(&conn->write_pdu_list);
1204 
1205 	if (pdu == NULL) {
1206 		return 0;
1207 	}
1208 
1209 	/*
1210 	 * Build up a list of iovecs for the first few PDUs in the
1211 	 *  connection's write_pdu_list. For the first PDU, check if it was
1212 	 *  partially written out the last time this function was called, and
1213 	 *  if so adjust the iovec array accordingly. This check is done in
1214 	 *  spdk_iscsi_build_iovs() and so applied to remaining PDUs too.
1215 	 *  But extra overhead is negligible.
1216 	 */
1217 	while (pdu != NULL && ((num_iovs - iovcnt) > 0)) {
1218 		iovcnt += spdk_iscsi_build_iovs(conn, &iovs[iovcnt], num_iovs - iovcnt,
1219 						pdu, &mapped_length);
1220 		total_length += mapped_length;
1221 		pdu = TAILQ_NEXT(pdu, tailq);
1222 	}
1223 
1224 	spdk_trace_record(TRACE_ISCSI_FLUSH_WRITEBUF_START, conn->id, total_length, 0, iovcnt);
1225 
1226 	bytes = spdk_sock_writev(conn->sock, iov, iovcnt);
1227 	if (bytes == -1) {
1228 		if (errno == EWOULDBLOCK || errno == EAGAIN) {
1229 			return 1;
1230 		} else {
1231 			SPDK_ERRLOG("spdk_sock_writev() failed, errno %d: %s\n",
1232 				    errno, spdk_strerror(errno));
1233 			return -1;
1234 		}
1235 	}
1236 
1237 	spdk_trace_record(TRACE_ISCSI_FLUSH_WRITEBUF_DONE, conn->id, bytes, 0, 0);
1238 
1239 	pdu = TAILQ_FIRST(&conn->write_pdu_list);
1240 
1241 	/*
1242 	 * Free any PDUs that were fully written.  If a PDU was only
1243 	 *  partially written, update its writev_offset so that next
1244 	 *  time only the unwritten portion will be sent to writev().
1245 	 */
1246 	while (bytes > 0) {
1247 		pdu_length = iscsi_get_pdu_length(pdu, conn->header_digest,
1248 						  conn->data_digest);
1249 		pdu_length -= pdu->writev_offset;
1250 
1251 		if (bytes >= pdu_length) {
1252 			bytes -= pdu_length;
1253 			TAILQ_REMOVE(&conn->write_pdu_list, pdu, tailq);
1254 
1255 			if ((conn->full_feature) &&
1256 			    (conn->sess->ErrorRecoveryLevel >= 1) &&
1257 			    spdk_iscsi_is_deferred_free_pdu(pdu)) {
1258 				SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "stat_sn=%d\n",
1259 					      from_be32(&pdu->bhs.stat_sn));
1260 				TAILQ_INSERT_TAIL(&conn->snack_pdu_list, pdu,
1261 						  tailq);
1262 			} else {
1263 				spdk_iscsi_conn_free_pdu(conn, pdu);
1264 			}
1265 
1266 			pdu = TAILQ_FIRST(&conn->write_pdu_list);
1267 		} else {
1268 			pdu->writev_offset += bytes;
1269 			bytes = 0;
1270 		}
1271 	}
1272 
1273 	return TAILQ_EMPTY(&conn->write_pdu_list) ? 0 : 1;
1274 }
1275 
1276 /**
1277  * \brief Flushes response PDUs back to the initiator.
1278  *
1279  * This function may return without all PDUs having flushed to the
1280  * underlying TCP socket buffer - for example, in the case where the
1281  * socket buffer is already full.
1282  *
1283  * During normal RUNNING connection state, if not all PDUs are flushed,
1284  * then subsequent calls to this routine will eventually flush
1285  * remaining PDUs.
1286  *
1287  * During other connection states (EXITING or LOGGED_OUT), this
1288  * function will spin until all PDUs have successfully been flushed.
1289  */
1290 static int
1291 iscsi_conn_flush_pdus(void *_conn)
1292 {
1293 	struct spdk_iscsi_conn *conn = _conn;
1294 	int rc;
1295 
1296 	if (conn->state == ISCSI_CONN_STATE_RUNNING) {
1297 		rc = iscsi_conn_flush_pdus_internal(conn);
1298 		if (rc == 0 && conn->flush_poller != NULL) {
1299 			spdk_poller_unregister(&conn->flush_poller);
1300 		} else if (rc == 1 && conn->flush_poller == NULL) {
1301 			conn->flush_poller = spdk_poller_register(iscsi_conn_flush_pdus,
1302 					     conn, 50);
1303 		}
1304 	} else {
1305 		/*
1306 		 * If the connection state is not RUNNING, then
1307 		 * keep trying to flush PDUs until our list is
1308 		 * empty - to make sure all data is sent before
1309 		 * closing the connection.
1310 		 */
1311 		do {
1312 			rc = iscsi_conn_flush_pdus_internal(conn);
1313 		} while (rc == 1);
1314 	}
1315 
1316 	if (rc < 0 && conn->state < ISCSI_CONN_STATE_EXITING) {
1317 		/*
1318 		 * If the poller has already started destruction of the connection,
1319 		 *  i.e. the socket read failed, then the connection state may already
1320 		 *  be EXITED.  We don't want to set it back to EXITING in that case.
1321 		 */
1322 		conn->state = ISCSI_CONN_STATE_EXITING;
1323 	}
1324 
1325 	return 1;
1326 }
1327 
1328 static int
1329 iscsi_dif_verify(struct spdk_iscsi_pdu *pdu, struct spdk_dif_ctx *dif_ctx)
1330 {
1331 	struct iovec iov;
1332 	struct spdk_dif_error err_blk = {};
1333 	uint32_t num_blocks;
1334 	int rc;
1335 
1336 	iov.iov_base = pdu->data;
1337 	iov.iov_len = pdu->data_buf_len;
1338 	num_blocks = pdu->data_buf_len / dif_ctx->block_size;
1339 
1340 	rc = spdk_dif_verify(&iov, 1, num_blocks, dif_ctx, &err_blk);
1341 	if (rc != 0) {
1342 		SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n",
1343 			    err_blk.err_type, err_blk.err_offset);
1344 	}
1345 
1346 	return rc;
1347 }
1348 
1349 void
1350 spdk_iscsi_conn_write_pdu(struct spdk_iscsi_conn *conn, struct spdk_iscsi_pdu *pdu)
1351 {
1352 	uint32_t crc32c;
1353 	int rc;
1354 
1355 	if (spdk_unlikely(spdk_iscsi_get_dif_ctx(conn, pdu, &pdu->dif_ctx))) {
1356 		rc = iscsi_dif_verify(pdu, &pdu->dif_ctx);
1357 		if (rc != 0) {
1358 			spdk_iscsi_conn_free_pdu(conn, pdu);
1359 			conn->state = ISCSI_CONN_STATE_EXITING;
1360 			return;
1361 		}
1362 		pdu->dif_insert_or_strip = true;
1363 	}
1364 
1365 	if (pdu->bhs.opcode != ISCSI_OP_LOGIN_RSP) {
1366 		/* Header Digest */
1367 		if (conn->header_digest) {
1368 			crc32c = spdk_iscsi_pdu_calc_header_digest(pdu);
1369 			MAKE_DIGEST_WORD(pdu->header_digest, crc32c);
1370 		}
1371 
1372 		/* Data Digest */
1373 		if (conn->data_digest && DGET24(pdu->bhs.data_segment_len) != 0) {
1374 			crc32c = spdk_iscsi_pdu_calc_data_digest(pdu);
1375 			MAKE_DIGEST_WORD(pdu->data_digest, crc32c);
1376 		}
1377 	}
1378 
1379 	TAILQ_INSERT_TAIL(&conn->write_pdu_list, pdu, tailq);
1380 	iscsi_conn_flush_pdus(conn);
1381 }
1382 
1383 #define GET_PDU_LOOP_COUNT	16
1384 
1385 static int
1386 iscsi_conn_handle_incoming_pdus(struct spdk_iscsi_conn *conn)
1387 {
1388 	struct spdk_iscsi_pdu *pdu;
1389 	int i, rc;
1390 
1391 	/* Read new PDUs from network */
1392 	for (i = 0; i < GET_PDU_LOOP_COUNT; i++) {
1393 		rc = spdk_iscsi_read_pdu(conn, &pdu);
1394 		if (rc == 0) {
1395 			break;
1396 		} else if (rc == SPDK_ISCSI_CONNECTION_FATAL) {
1397 			return rc;
1398 		}
1399 
1400 		if (conn->state == ISCSI_CONN_STATE_LOGGED_OUT) {
1401 			SPDK_ERRLOG("pdu received after logout\n");
1402 			spdk_put_pdu(pdu);
1403 			return SPDK_ISCSI_CONNECTION_FATAL;
1404 		}
1405 
1406 		rc = spdk_iscsi_execute(conn, pdu);
1407 		spdk_put_pdu(pdu);
1408 		if (rc != 0) {
1409 			SPDK_ERRLOG("spdk_iscsi_execute() fatal error on %s(%s)\n",
1410 				    conn->target_port != NULL ? spdk_scsi_port_get_name(conn->target_port) : "NULL",
1411 				    conn->initiator_port != NULL ? spdk_scsi_port_get_name(conn->initiator_port) : "NULL");
1412 			return rc;
1413 		}
1414 
1415 		spdk_trace_record(TRACE_ISCSI_TASK_EXECUTED, 0, 0, (uintptr_t)pdu, 0);
1416 		if (conn->is_stopped) {
1417 			break;
1418 		}
1419 	}
1420 
1421 	return i;
1422 }
1423 
1424 static void
1425 iscsi_conn_sock_cb(void *arg, struct spdk_sock_group *group, struct spdk_sock *sock)
1426 {
1427 	struct spdk_iscsi_conn *conn = arg;
1428 	int rc;
1429 
1430 	assert(conn != NULL);
1431 
1432 	if ((conn->state == ISCSI_CONN_STATE_EXITED) ||
1433 	    (conn->state == ISCSI_CONN_STATE_EXITING)) {
1434 		return;
1435 	}
1436 
1437 	/* Handle incoming PDUs */
1438 	rc = iscsi_conn_handle_incoming_pdus(conn);
1439 	if (rc < 0) {
1440 		conn->state = ISCSI_CONN_STATE_EXITING;
1441 		iscsi_conn_flush_pdus(conn);
1442 	}
1443 }
1444 
1445 static void
1446 iscsi_conn_full_feature_migrate(void *arg1, void *arg2)
1447 {
1448 	struct spdk_iscsi_conn *conn = arg1;
1449 
1450 	if (conn->sess->session_type == SESSION_TYPE_NORMAL) {
1451 		iscsi_conn_open_luns(conn);
1452 	}
1453 
1454 	/* The poller has been unregistered, so now we can re-register it on the new core. */
1455 	conn->lcore = spdk_env_get_current_core();
1456 	iscsi_poll_group_add_conn(conn);
1457 }
1458 
1459 void
1460 spdk_iscsi_conn_migration(struct spdk_iscsi_conn *conn)
1461 {
1462 	int				lcore;
1463 	struct spdk_event		*event;
1464 	struct spdk_iscsi_tgt_node *target;
1465 
1466 	lcore = iscsi_conn_allocate_reactor(conn->portal->cpumask);
1467 	if (conn->sess->session_type == SESSION_TYPE_NORMAL) {
1468 		target = conn->sess->target;
1469 		pthread_mutex_lock(&target->mutex);
1470 		target->num_active_conns++;
1471 		if (target->num_active_conns == 1) {
1472 			/**
1473 			 * This is the only active connection for this target node.
1474 			 *  Save the lcore in the target node so it can be used for
1475 			 *  any other connections to this target node.
1476 			 */
1477 			target->lcore = lcore;
1478 		} else {
1479 			/**
1480 			 * There are other active connections for this target node.
1481 			 *  Ignore the lcore specified by the allocator and use the
1482 			 *  the target node's lcore to ensure this connection runs on
1483 			 *  the same lcore as other connections for this target node.
1484 			 */
1485 			lcore = target->lcore;
1486 		}
1487 		pthread_mutex_unlock(&target->mutex);
1488 	}
1489 
1490 	iscsi_poll_group_remove_conn_sock(conn);
1491 	spdk_poller_unregister(&conn->flush_poller);
1492 	iscsi_conn_stop(conn);
1493 
1494 	__sync_fetch_and_add(&g_num_connections[lcore], 1);
1495 	conn->last_nopin = spdk_get_ticks();
1496 	event = spdk_event_allocate(lcore, iscsi_conn_full_feature_migrate,
1497 				    conn, NULL);
1498 	spdk_event_call(event);
1499 }
1500 
1501 void
1502 spdk_iscsi_conn_set_min_per_core(int count)
1503 {
1504 	g_connections_per_lcore = count;
1505 }
1506 
1507 int
1508 spdk_iscsi_conn_get_min_per_core(void)
1509 {
1510 	return g_connections_per_lcore;
1511 }
1512 
1513 static uint32_t
1514 iscsi_conn_allocate_reactor(const struct spdk_cpuset *cpumask)
1515 {
1516 	uint32_t i, selected_core;
1517 	int32_t num_pollers, min_pollers;
1518 
1519 	min_pollers = INT_MAX;
1520 	selected_core = spdk_env_get_first_core();
1521 
1522 	SPDK_ENV_FOREACH_CORE(i) {
1523 		if (!spdk_cpuset_get_cpu(cpumask, i)) {
1524 			continue;
1525 		}
1526 
1527 		/* This core is running. Check how many pollers it already has. */
1528 		num_pollers = g_num_connections[i];
1529 
1530 		if ((num_pollers > 0) && (num_pollers < g_connections_per_lcore)) {
1531 			/* Fewer than the maximum connections per core,
1532 			 * but at least 1. Use this core.
1533 			 */
1534 			return i;
1535 		} else if (num_pollers < min_pollers) {
1536 			/* Track the core that has the minimum number of pollers
1537 			 * to be used if no cores meet our criteria
1538 			 */
1539 			selected_core = i;
1540 			min_pollers = num_pollers;
1541 		}
1542 	}
1543 
1544 	return selected_core;
1545 }
1546 
1547 static int
1548 logout_timeout(void *arg)
1549 {
1550 	struct spdk_iscsi_conn *conn = arg;
1551 
1552 	spdk_iscsi_conn_destruct(conn);
1553 
1554 	return -1;
1555 }
1556 
1557 void
1558 spdk_iscsi_conn_logout(struct spdk_iscsi_conn *conn)
1559 {
1560 	conn->state = ISCSI_CONN_STATE_LOGGED_OUT;
1561 	conn->logout_timer = spdk_poller_register(logout_timeout, conn, ISCSI_LOGOUT_TIMEOUT * 1000000);
1562 }
1563 
1564 SPDK_TRACE_REGISTER_FN(iscsi_conn_trace, "iscsi_conn", TRACE_GROUP_ISCSI)
1565 {
1566 	spdk_trace_register_owner(OWNER_ISCSI_CONN, 'c');
1567 	spdk_trace_register_object(OBJECT_ISCSI_PDU, 'p');
1568 	spdk_trace_register_description("ISCSI_READ_FROM_SOCKET_DONE", "",
1569 					TRACE_ISCSI_READ_FROM_SOCKET_DONE,
1570 					OWNER_ISCSI_CONN, OBJECT_NONE, 0, 0, "");
1571 	spdk_trace_register_description("ISCSI_FLUSH_WRITEBUF_START", "", TRACE_ISCSI_FLUSH_WRITEBUF_START,
1572 					OWNER_ISCSI_CONN, OBJECT_NONE, 0, 0, "iovec: ");
1573 	spdk_trace_register_description("ISCSI_FLUSH_WRITEBUF_DONE", "", TRACE_ISCSI_FLUSH_WRITEBUF_DONE,
1574 					OWNER_ISCSI_CONN, OBJECT_NONE, 0, 0, "");
1575 	spdk_trace_register_description("ISCSI_READ_PDU", "", TRACE_ISCSI_READ_PDU,
1576 					OWNER_ISCSI_CONN, OBJECT_ISCSI_PDU, 1, 0, "opc:   ");
1577 	spdk_trace_register_description("ISCSI_TASK_DONE", "", TRACE_ISCSI_TASK_DONE,
1578 					OWNER_ISCSI_CONN, OBJECT_SCSI_TASK, 0, 0, "");
1579 	spdk_trace_register_description("ISCSI_TASK_QUEUE", "", TRACE_ISCSI_TASK_QUEUE,
1580 					OWNER_ISCSI_CONN, OBJECT_SCSI_TASK, 1, 1, "pdu:   ");
1581 	spdk_trace_register_description("ISCSI_TASK_EXECUTED", "", TRACE_ISCSI_TASK_EXECUTED,
1582 					OWNER_ISCSI_CONN, OBJECT_ISCSI_PDU, 0, 0, "");
1583 	spdk_trace_register_description("ISCSI_PDU_COMPLETED", "", TRACE_ISCSI_PDU_COMPLETED,
1584 					OWNER_ISCSI_CONN, OBJECT_ISCSI_PDU, 0, 0, "");
1585 }
1586