xref: /spdk/lib/iscsi/conn.c (revision 3aa204fb3138c43e63b868e488277f13b098cef1)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
5  *   Copyright (c) Intel Corporation.
6  *   All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 #include "spdk/stdinc.h"
36 
37 #include "spdk/endian.h"
38 #include "spdk/env.h"
39 #include "spdk/event.h"
40 #include "spdk/likely.h"
41 #include "spdk/thread.h"
42 #include "spdk/queue.h"
43 #include "spdk/trace.h"
44 #include "spdk/net.h"
45 #include "spdk/sock.h"
46 #include "spdk/string.h"
47 
48 #include "spdk_internal/log.h"
49 
50 #include "iscsi/task.h"
51 #include "iscsi/conn.h"
52 #include "iscsi/tgt_node.h"
53 #include "iscsi/portal_grp.h"
54 
55 #define MAKE_DIGEST_WORD(BUF, CRC32C) \
56         (   ((*((uint8_t *)(BUF)+0)) = (uint8_t)((uint32_t)(CRC32C) >> 0)), \
57             ((*((uint8_t *)(BUF)+1)) = (uint8_t)((uint32_t)(CRC32C) >> 8)), \
58             ((*((uint8_t *)(BUF)+2)) = (uint8_t)((uint32_t)(CRC32C) >> 16)), \
59             ((*((uint8_t *)(BUF)+3)) = (uint8_t)((uint32_t)(CRC32C) >> 24)))
60 
61 #define SPDK_ISCSI_CONNECTION_MEMSET(conn)		\
62 	memset(&(conn)->portal, 0, sizeof(*(conn)) -	\
63 		offsetof(struct spdk_iscsi_conn, portal));
64 
65 static int g_connections_per_lcore;
66 static uint32_t *g_num_connections;
67 
68 struct spdk_iscsi_conn *g_conns_array = MAP_FAILED;
69 static int g_conns_array_fd = -1;
70 static char g_shm_name[64];
71 
72 static pthread_mutex_t g_conns_mutex = PTHREAD_MUTEX_INITIALIZER;
73 
74 static struct spdk_poller *g_shutdown_timer = NULL;
75 
76 static uint32_t spdk_iscsi_conn_allocate_reactor(const struct spdk_cpuset *cpumask);
77 
78 static void spdk_iscsi_conn_full_feature_migrate(void *arg1, void *arg2);
79 static void spdk_iscsi_conn_stop(struct spdk_iscsi_conn *conn);
80 static void spdk_iscsi_conn_sock_cb(void *arg, struct spdk_sock_group *group,
81 				    struct spdk_sock *sock);
82 
83 static struct spdk_iscsi_conn *
84 allocate_conn(void)
85 {
86 	struct spdk_iscsi_conn	*conn;
87 	int				i;
88 
89 	pthread_mutex_lock(&g_conns_mutex);
90 	for (i = 0; i < MAX_ISCSI_CONNECTIONS; i++) {
91 		conn = &g_conns_array[i];
92 		if (!conn->is_valid) {
93 			SPDK_ISCSI_CONNECTION_MEMSET(conn);
94 			conn->is_valid = 1;
95 			pthread_mutex_unlock(&g_conns_mutex);
96 			return conn;
97 		}
98 	}
99 	pthread_mutex_unlock(&g_conns_mutex);
100 
101 	return NULL;
102 }
103 
104 static void
105 free_conn(struct spdk_iscsi_conn *conn)
106 {
107 	free(conn->portal_host);
108 	free(conn->portal_port);
109 	conn->is_valid = 0;
110 }
111 
112 static struct spdk_iscsi_conn *
113 spdk_find_iscsi_connection_by_id(int cid)
114 {
115 	if (g_conns_array[cid].is_valid == 1) {
116 		return &g_conns_array[cid];
117 	} else {
118 		return NULL;
119 	}
120 }
121 
122 int spdk_initialize_iscsi_conns(void)
123 {
124 	size_t conns_size = sizeof(struct spdk_iscsi_conn) * MAX_ISCSI_CONNECTIONS;
125 	uint32_t i, last_core;
126 
127 	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "spdk_iscsi_init\n");
128 
129 	snprintf(g_shm_name, sizeof(g_shm_name), "/spdk_iscsi_conns.%d", spdk_app_get_shm_id());
130 	g_conns_array_fd = shm_open(g_shm_name, O_RDWR | O_CREAT, 0600);
131 	if (g_conns_array_fd < 0) {
132 		SPDK_ERRLOG("could not shm_open %s\n", g_shm_name);
133 		goto err;
134 	}
135 
136 	if (ftruncate(g_conns_array_fd, conns_size) != 0) {
137 		SPDK_ERRLOG("could not ftruncate\n");
138 		goto err;
139 	}
140 	g_conns_array = mmap(0, conns_size, PROT_READ | PROT_WRITE, MAP_SHARED,
141 			     g_conns_array_fd, 0);
142 
143 	if (g_conns_array == MAP_FAILED) {
144 		fprintf(stderr, "could not mmap cons array file %s (%d)\n", g_shm_name, errno);
145 		goto err;
146 	}
147 
148 	memset(g_conns_array, 0, conns_size);
149 
150 	for (i = 0; i < MAX_ISCSI_CONNECTIONS; i++) {
151 		g_conns_array[i].id = i;
152 	}
153 
154 	last_core = spdk_env_get_last_core();
155 	g_num_connections = calloc(last_core + 1, sizeof(uint32_t));
156 	if (!g_num_connections) {
157 		SPDK_ERRLOG("Could not allocate array size=%u for g_num_connections\n",
158 			    last_core + 1);
159 		goto err;
160 	}
161 
162 	return 0;
163 
164 err:
165 	if (g_conns_array != MAP_FAILED) {
166 		munmap(g_conns_array, conns_size);
167 		g_conns_array = MAP_FAILED;
168 	}
169 
170 	if (g_conns_array_fd >= 0) {
171 		close(g_conns_array_fd);
172 		g_conns_array_fd = -1;
173 		shm_unlink(g_shm_name);
174 	}
175 
176 	return -1;
177 }
178 
179 static void
180 spdk_iscsi_poll_group_add_conn_sock(struct spdk_iscsi_conn *conn)
181 {
182 	struct spdk_iscsi_poll_group *poll_group;
183 	int rc;
184 
185 	assert(conn->lcore == spdk_env_get_current_core());
186 
187 	poll_group = &g_spdk_iscsi.poll_group[conn->lcore];
188 
189 	rc = spdk_sock_group_add_sock(poll_group->sock_group, conn->sock, spdk_iscsi_conn_sock_cb, conn);
190 	if (rc < 0) {
191 		SPDK_ERRLOG("Failed to add sock=%p of conn=%p\n", conn->sock, conn);
192 	}
193 }
194 
195 static void
196 spdk_iscsi_poll_group_remove_conn_sock(struct spdk_iscsi_conn *conn)
197 {
198 	struct spdk_iscsi_poll_group *poll_group;
199 	int rc;
200 
201 	assert(conn->lcore == spdk_env_get_current_core());
202 
203 	poll_group = &g_spdk_iscsi.poll_group[conn->lcore];
204 
205 	rc = spdk_sock_group_remove_sock(poll_group->sock_group, conn->sock);
206 	if (rc < 0) {
207 		SPDK_ERRLOG("Failed to remove sock=%p of conn=%p\n", conn->sock, conn);
208 	}
209 }
210 
211 static void
212 spdk_iscsi_poll_group_add_conn(struct spdk_iscsi_conn *conn)
213 {
214 	struct spdk_iscsi_poll_group *poll_group;
215 
216 	assert(conn->lcore == spdk_env_get_current_core());
217 
218 	poll_group = &g_spdk_iscsi.poll_group[conn->lcore];
219 
220 	conn->is_stopped = false;
221 	STAILQ_INSERT_TAIL(&poll_group->connections, conn, link);
222 	spdk_iscsi_poll_group_add_conn_sock(conn);
223 }
224 
225 static void
226 spdk_iscsi_poll_group_remove_conn(struct spdk_iscsi_conn *conn)
227 {
228 	struct spdk_iscsi_poll_group *poll_group;
229 
230 	assert(conn->lcore == spdk_env_get_current_core());
231 
232 	poll_group = &g_spdk_iscsi.poll_group[conn->lcore];
233 
234 	conn->is_stopped = true;
235 	STAILQ_REMOVE(&poll_group->connections, conn, spdk_iscsi_conn, link);
236 }
237 
238 /**
239  * \brief Create an iSCSI connection from the given parameters and schedule it
240  *        on a reactor.
241  *
242  * \code
243  *
244  * # identify reactor where the new connections work item will be scheduled
245  * reactor = spdk_iscsi_conn_allocate_reactor()
246  * allocate spdk_iscsi_conn object
247  * initialize spdk_iscsi_conn object
248  * schedule iSCSI connection work item on reactor
249  *
250  * \endcode
251  */
252 int
253 spdk_iscsi_conn_construct(struct spdk_iscsi_portal *portal,
254 			  struct spdk_sock *sock)
255 {
256 	struct spdk_iscsi_conn *conn;
257 	int bufsize, i, rc;
258 
259 	conn = allocate_conn();
260 	if (conn == NULL) {
261 		SPDK_ERRLOG("Could not allocate connection.\n");
262 		return -1;
263 	}
264 
265 	pthread_mutex_lock(&g_spdk_iscsi.mutex);
266 	conn->timeout = g_spdk_iscsi.timeout;
267 	conn->nopininterval = g_spdk_iscsi.nopininterval;
268 	conn->nopininterval *= spdk_get_ticks_hz(); /* seconds to TSC */
269 	conn->nop_outstanding = false;
270 	conn->data_out_cnt = 0;
271 	conn->data_in_cnt = 0;
272 	pthread_mutex_unlock(&g_spdk_iscsi.mutex);
273 	conn->MaxRecvDataSegmentLength = 8192; /* RFC3720(12.12) */
274 
275 	conn->portal = portal;
276 	conn->pg_tag = portal->group->tag;
277 	conn->portal_host = strdup(portal->host);
278 	conn->portal_port = strdup(portal->port);
279 	conn->portal_cpumask = portal->cpumask;
280 	conn->sock = sock;
281 
282 	conn->state = ISCSI_CONN_STATE_INVALID;
283 	conn->login_phase = ISCSI_SECURITY_NEGOTIATION_PHASE;
284 	conn->ttt = 0;
285 
286 	conn->partial_text_parameter = NULL;
287 
288 	for (i = 0; i < MAX_CONNECTION_PARAMS; i++) {
289 		conn->conn_param_state_negotiated[i] = false;
290 	}
291 
292 	for (i = 0; i < MAX_SESSION_PARAMS; i++) {
293 		conn->sess_param_state_negotiated[i] = false;
294 	}
295 
296 	for (i = 0; i < DEFAULT_MAXR2T; i++) {
297 		conn->outstanding_r2t_tasks[i] = NULL;
298 	}
299 
300 	TAILQ_INIT(&conn->write_pdu_list);
301 	TAILQ_INIT(&conn->snack_pdu_list);
302 	TAILQ_INIT(&conn->queued_r2t_tasks);
303 	TAILQ_INIT(&conn->active_r2t_tasks);
304 	TAILQ_INIT(&conn->queued_datain_tasks);
305 	memset(&conn->open_lun_descs, 0, sizeof(conn->open_lun_descs));
306 
307 	rc = spdk_sock_getaddr(sock, conn->target_addr, sizeof conn->target_addr, NULL,
308 			       conn->initiator_addr, sizeof conn->initiator_addr, NULL);
309 	if (rc < 0) {
310 		SPDK_ERRLOG("spdk_sock_getaddr() failed\n");
311 		goto error_return;
312 	}
313 
314 	bufsize = 2 * 1024 * 1024;
315 	rc = spdk_sock_set_recvbuf(conn->sock, bufsize);
316 	if (rc != 0) {
317 		SPDK_ERRLOG("spdk_sock_set_recvbuf failed\n");
318 	}
319 
320 	bufsize = 32 * 1024 * 1024 / g_spdk_iscsi.MaxConnections;
321 	if (bufsize > 2 * 1024 * 1024) {
322 		bufsize = 2 * 1024 * 1024;
323 	}
324 	rc = spdk_sock_set_sendbuf(conn->sock, bufsize);
325 	if (rc != 0) {
326 		SPDK_ERRLOG("spdk_sock_set_sendbuf failed\n");
327 	}
328 
329 	/* set low water mark */
330 	rc = spdk_sock_set_recvlowat(conn->sock, 1);
331 	if (rc != 0) {
332 		SPDK_ERRLOG("spdk_sock_set_recvlowat() failed\n");
333 		goto error_return;
334 	}
335 
336 	/* set default params */
337 	rc = spdk_iscsi_conn_params_init(&conn->params);
338 	if (rc < 0) {
339 		SPDK_ERRLOG("iscsi_conn_params_init() failed\n");
340 		goto error_return;
341 	}
342 	conn->logout_timer = NULL;
343 	conn->shutdown_timer = NULL;
344 	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "Launching connection on acceptor thread\n");
345 	conn->pending_task_cnt = 0;
346 
347 	conn->lcore = spdk_env_get_current_core();
348 	__sync_fetch_and_add(&g_num_connections[conn->lcore], 1);
349 
350 	spdk_iscsi_poll_group_add_conn(conn);
351 	return 0;
352 
353 error_return:
354 	spdk_iscsi_param_free(conn->params);
355 	free_conn(conn);
356 	return -1;
357 }
358 
359 void
360 spdk_iscsi_conn_free_pdu(struct spdk_iscsi_conn *conn, struct spdk_iscsi_pdu *pdu)
361 {
362 	if (pdu->task) {
363 		if (pdu->bhs.opcode == ISCSI_OP_SCSI_DATAIN) {
364 			if (pdu->task->scsi.offset > 0) {
365 				conn->data_in_cnt--;
366 				if (pdu->bhs.flags & ISCSI_DATAIN_STATUS) {
367 					/* Free the primary task after the last subtask done */
368 					conn->data_in_cnt--;
369 					spdk_iscsi_task_put(spdk_iscsi_task_get_primary(pdu->task));
370 				}
371 			}
372 		} else if (pdu->bhs.opcode == ISCSI_OP_SCSI_RSP &&
373 			   pdu->task->scsi.status != SPDK_SCSI_STATUS_GOOD) {
374 			if (pdu->task->scsi.offset > 0) {
375 				spdk_iscsi_task_put(spdk_iscsi_task_get_primary(pdu->task));
376 			}
377 		}
378 		spdk_iscsi_task_put(pdu->task);
379 	}
380 	spdk_put_pdu(pdu);
381 }
382 
383 static int spdk_iscsi_conn_free_tasks(struct spdk_iscsi_conn *conn)
384 {
385 	struct spdk_iscsi_pdu *pdu, *tmp_pdu;
386 	struct spdk_iscsi_task *iscsi_task, *tmp_iscsi_task;
387 
388 	TAILQ_FOREACH_SAFE(pdu, &conn->write_pdu_list, tailq, tmp_pdu) {
389 		TAILQ_REMOVE(&conn->write_pdu_list, pdu, tailq);
390 		spdk_iscsi_conn_free_pdu(conn, pdu);
391 	}
392 
393 	TAILQ_FOREACH_SAFE(pdu, &conn->snack_pdu_list, tailq, tmp_pdu) {
394 		TAILQ_REMOVE(&conn->snack_pdu_list, pdu, tailq);
395 		if (pdu->task) {
396 			spdk_iscsi_task_put(pdu->task);
397 		}
398 		spdk_put_pdu(pdu);
399 	}
400 
401 	TAILQ_FOREACH_SAFE(iscsi_task, &conn->queued_datain_tasks, link, tmp_iscsi_task) {
402 		if (!iscsi_task->is_queued) {
403 			TAILQ_REMOVE(&conn->queued_datain_tasks, iscsi_task, link);
404 			spdk_iscsi_task_put(iscsi_task);
405 		}
406 	}
407 
408 	if (conn->pending_task_cnt) {
409 		return -1;
410 	}
411 
412 	return 0;
413 }
414 
415 static void
416 _spdk_iscsi_conn_free(struct spdk_iscsi_conn *conn)
417 {
418 	if (conn == NULL) {
419 		return;
420 	}
421 
422 	spdk_iscsi_param_free(conn->params);
423 
424 	/*
425 	 * Each connection pre-allocates its next PDU - make sure these get
426 	 *  freed here.
427 	 */
428 	spdk_put_pdu(conn->pdu_in_progress);
429 
430 	free_conn(conn);
431 }
432 
433 static void
434 spdk_iscsi_conn_cleanup_backend(struct spdk_iscsi_conn *conn)
435 {
436 	int rc;
437 	struct spdk_iscsi_tgt_node *target;
438 
439 	if (conn->sess->connections > 1) {
440 		/* connection specific cleanup */
441 	} else if (!g_spdk_iscsi.AllowDuplicateIsid) {
442 		/* clean up all tasks to all LUNs for session */
443 		target = conn->sess->target;
444 		if (target != NULL) {
445 			rc = spdk_iscsi_tgt_node_cleanup_luns(conn, target);
446 			if (rc < 0) {
447 				SPDK_ERRLOG("target abort failed\n");
448 			}
449 		}
450 	}
451 }
452 
453 static void
454 spdk_iscsi_conn_free(struct spdk_iscsi_conn *conn)
455 {
456 	struct spdk_iscsi_sess *sess;
457 	int idx;
458 	uint32_t i;
459 
460 	pthread_mutex_lock(&g_conns_mutex);
461 
462 	if (conn->sess == NULL) {
463 		goto end;
464 	}
465 
466 	idx = -1;
467 	sess = conn->sess;
468 	conn->sess = NULL;
469 
470 	for (i = 0; i < sess->connections; i++) {
471 		if (sess->conns[i] == conn) {
472 			idx = i;
473 			break;
474 		}
475 	}
476 
477 	if (idx < 0) {
478 		SPDK_ERRLOG("remove conn not found\n");
479 	} else {
480 		for (i = idx; i < sess->connections - 1; i++) {
481 			sess->conns[i] = sess->conns[i + 1];
482 		}
483 		sess->conns[sess->connections - 1] = NULL;
484 		sess->connections--;
485 
486 		if (sess->connections == 0) {
487 			/* cleanup last connection */
488 			SPDK_DEBUGLOG(SPDK_LOG_ISCSI,
489 				      "cleanup last conn free sess\n");
490 			spdk_free_sess(sess);
491 		}
492 	}
493 
494 	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "Terminating connections(tsih %d): %d\n",
495 		      sess->tsih, sess->connections);
496 
497 end:
498 	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "cleanup free conn\n");
499 	_spdk_iscsi_conn_free(conn);
500 
501 	pthread_mutex_unlock(&g_conns_mutex);
502 }
503 
504 static int
505 _spdk_iscsi_conn_check_shutdown(void *arg)
506 {
507 	struct spdk_iscsi_conn *conn = arg;
508 	int rc;
509 
510 	rc = spdk_iscsi_conn_free_tasks(conn);
511 	if (rc < 0) {
512 		return 1;
513 	}
514 
515 	spdk_poller_unregister(&conn->shutdown_timer);
516 
517 	spdk_iscsi_conn_stop(conn);
518 	spdk_iscsi_conn_free(conn);
519 
520 	return 1;
521 }
522 
523 static void
524 _spdk_iscsi_conn_destruct(struct spdk_iscsi_conn *conn)
525 {
526 	int rc;
527 
528 	spdk_clear_all_transfer_task(conn, NULL, NULL);
529 	spdk_iscsi_poll_group_remove_conn_sock(conn);
530 	spdk_sock_close(&conn->sock);
531 	spdk_poller_unregister(&conn->logout_timer);
532 	spdk_poller_unregister(&conn->flush_poller);
533 
534 	rc = spdk_iscsi_conn_free_tasks(conn);
535 	if (rc < 0) {
536 		/* The connection cannot be freed yet. Check back later. */
537 		conn->shutdown_timer = spdk_poller_register(_spdk_iscsi_conn_check_shutdown, conn, 1000);
538 	} else {
539 		spdk_iscsi_conn_stop(conn);
540 		spdk_iscsi_conn_free(conn);
541 	}
542 }
543 
544 static int
545 _spdk_iscsi_conn_check_pending_tasks(void *arg)
546 {
547 	struct spdk_iscsi_conn *conn = arg;
548 
549 	if (conn->dev != NULL && spdk_scsi_dev_has_pending_tasks(conn->dev)) {
550 		return 1;
551 	}
552 
553 	spdk_poller_unregister(&conn->shutdown_timer);
554 
555 	_spdk_iscsi_conn_destruct(conn);
556 
557 	return 1;
558 }
559 
560 void
561 spdk_iscsi_conn_destruct(struct spdk_iscsi_conn *conn)
562 {
563 	/* If a connection is already in exited status, just return */
564 	if (conn->state >= ISCSI_CONN_STATE_EXITED) {
565 		return;
566 	}
567 
568 	conn->state = ISCSI_CONN_STATE_EXITED;
569 
570 	if (conn->sess != NULL && conn->pending_task_cnt > 0) {
571 		spdk_iscsi_conn_cleanup_backend(conn);
572 	}
573 
574 	if (conn->dev != NULL && spdk_scsi_dev_has_pending_tasks(conn->dev)) {
575 		conn->shutdown_timer = spdk_poller_register(_spdk_iscsi_conn_check_pending_tasks, conn, 1000);
576 	} else {
577 		_spdk_iscsi_conn_destruct(conn);
578 	}
579 }
580 
581 static int
582 spdk_iscsi_get_active_conns(void)
583 {
584 	struct spdk_iscsi_conn *conn;
585 	int num = 0;
586 	int i;
587 
588 	pthread_mutex_lock(&g_conns_mutex);
589 	for (i = 0; i < MAX_ISCSI_CONNECTIONS; i++) {
590 		conn = spdk_find_iscsi_connection_by_id(i);
591 		if (conn == NULL) {
592 			continue;
593 		}
594 		num++;
595 	}
596 	pthread_mutex_unlock(&g_conns_mutex);
597 	return num;
598 }
599 
600 static void
601 spdk_iscsi_conns_cleanup(void)
602 {
603 	free(g_num_connections);
604 	munmap(g_conns_array, sizeof(struct spdk_iscsi_conn) *
605 	       MAX_ISCSI_CONNECTIONS);
606 	shm_unlink(g_shm_name);
607 	if (g_conns_array_fd >= 0) {
608 		close(g_conns_array_fd);
609 		g_conns_array_fd = -1;
610 	}
611 }
612 
613 static void
614 spdk_iscsi_conn_check_shutdown_cb(void *arg1, void *arg2)
615 {
616 	spdk_iscsi_conns_cleanup();
617 	spdk_shutdown_iscsi_conns_done();
618 }
619 
620 static int
621 spdk_iscsi_conn_check_shutdown(void *arg)
622 {
623 	struct spdk_event *event;
624 
625 	if (spdk_iscsi_get_active_conns() != 0) {
626 		return 1;
627 	}
628 
629 	spdk_poller_unregister(&g_shutdown_timer);
630 	event = spdk_event_allocate(spdk_env_get_current_core(),
631 				    spdk_iscsi_conn_check_shutdown_cb, NULL, NULL);
632 	spdk_event_call(event);
633 
634 	return 1;
635 }
636 
637 static void
638 spdk_iscsi_conn_close_lun(struct spdk_iscsi_conn *conn, int lun_id)
639 {
640 	struct spdk_scsi_desc *desc;
641 
642 	desc = conn->open_lun_descs[lun_id];
643 	if (desc != NULL) {
644 		spdk_scsi_lun_free_io_channel(desc);
645 		spdk_scsi_lun_close(desc);
646 		conn->open_lun_descs[lun_id] = NULL;
647 	}
648 }
649 
650 static void
651 spdk_iscsi_conn_close_luns(struct spdk_iscsi_conn *conn)
652 {
653 	int i;
654 
655 	for (i = 0; i < SPDK_SCSI_DEV_MAX_LUN; i++) {
656 		spdk_iscsi_conn_close_lun(conn, i);
657 	}
658 }
659 
660 static void
661 _iscsi_conn_remove_lun(void *arg1, void *arg2)
662 {
663 	struct spdk_iscsi_conn *conn = arg1;
664 	struct spdk_scsi_lun *lun = arg2;
665 	int lun_id = spdk_scsi_lun_get_id(lun);
666 	struct spdk_iscsi_pdu *pdu, *tmp_pdu;
667 	struct spdk_iscsi_task *iscsi_task, *tmp_iscsi_task;
668 
669 	/* If a connection is already in stating status, just return */
670 	if (conn->state >= ISCSI_CONN_STATE_EXITING) {
671 		return;
672 	}
673 
674 	spdk_clear_all_transfer_task(conn, lun, NULL);
675 	TAILQ_FOREACH_SAFE(pdu, &conn->write_pdu_list, tailq, tmp_pdu) {
676 		if (pdu->task && (lun == pdu->task->scsi.lun)) {
677 			TAILQ_REMOVE(&conn->write_pdu_list, pdu, tailq);
678 			spdk_iscsi_conn_free_pdu(conn, pdu);
679 		}
680 	}
681 
682 	TAILQ_FOREACH_SAFE(pdu, &conn->snack_pdu_list, tailq, tmp_pdu) {
683 		if (pdu->task && (lun == pdu->task->scsi.lun)) {
684 			TAILQ_REMOVE(&conn->snack_pdu_list, pdu, tailq);
685 			spdk_iscsi_task_put(pdu->task);
686 			spdk_put_pdu(pdu);
687 		}
688 	}
689 
690 	TAILQ_FOREACH_SAFE(iscsi_task, &conn->queued_datain_tasks, link, tmp_iscsi_task) {
691 		if ((!iscsi_task->is_queued) && (lun == iscsi_task->scsi.lun)) {
692 			TAILQ_REMOVE(&conn->queued_datain_tasks, iscsi_task, link);
693 			spdk_iscsi_task_put(iscsi_task);
694 		}
695 	}
696 
697 	spdk_iscsi_conn_close_lun(conn, lun_id);
698 }
699 
700 static void
701 spdk_iscsi_conn_remove_lun(struct spdk_scsi_lun *lun, void *remove_ctx)
702 {
703 	struct spdk_iscsi_conn *conn = remove_ctx;
704 	struct spdk_event *event;
705 
706 	event = spdk_event_allocate(conn->lcore, _iscsi_conn_remove_lun,
707 				    conn, lun);
708 	spdk_event_call(event);
709 }
710 
711 static void
712 spdk_iscsi_conn_open_luns(struct spdk_iscsi_conn *conn)
713 {
714 	int i, rc;
715 	struct spdk_scsi_lun *lun;
716 	struct spdk_scsi_desc *desc;
717 
718 	for (i = 0; i < SPDK_SCSI_DEV_MAX_LUN; i++) {
719 		lun = spdk_scsi_dev_get_lun(conn->dev, i);
720 		if (lun == NULL) {
721 			continue;
722 		}
723 
724 		rc = spdk_scsi_lun_open(lun, spdk_iscsi_conn_remove_lun, conn, &desc);
725 		if (rc != 0) {
726 			goto error;
727 		}
728 
729 		rc = spdk_scsi_lun_allocate_io_channel(desc);
730 		if (rc != 0) {
731 			spdk_scsi_lun_close(desc);
732 			goto error;
733 		}
734 
735 		conn->open_lun_descs[i] = desc;
736 	}
737 
738 	return;
739 
740 error:
741 	spdk_iscsi_conn_close_luns(conn);
742 }
743 
744 /**
745  *  This function will stop executing the specified connection.
746  */
747 static void
748 spdk_iscsi_conn_stop(struct spdk_iscsi_conn *conn)
749 {
750 	struct spdk_iscsi_tgt_node *target;
751 
752 	if (conn->state == ISCSI_CONN_STATE_EXITED && conn->sess != NULL &&
753 	    conn->sess->session_type == SESSION_TYPE_NORMAL &&
754 	    conn->full_feature) {
755 		target = conn->sess->target;
756 		pthread_mutex_lock(&target->mutex);
757 		target->num_active_conns--;
758 		pthread_mutex_unlock(&target->mutex);
759 
760 		spdk_iscsi_conn_close_luns(conn);
761 	}
762 
763 	assert(conn->lcore == spdk_env_get_current_core());
764 
765 	__sync_fetch_and_sub(&g_num_connections[conn->lcore], 1);
766 	spdk_iscsi_poll_group_remove_conn(conn);
767 }
768 
769 void spdk_shutdown_iscsi_conns(void)
770 {
771 	struct spdk_iscsi_conn	*conn;
772 	int			i;
773 
774 	pthread_mutex_lock(&g_conns_mutex);
775 
776 	for (i = 0; i < MAX_ISCSI_CONNECTIONS; i++) {
777 		conn = spdk_find_iscsi_connection_by_id(i);
778 		if (conn == NULL) {
779 			continue;
780 		}
781 
782 		/* Do not set conn->state if the connection has already started exiting.
783 		  * This ensures we do not move a connection from EXITED state back to EXITING.
784 		  */
785 		if (conn->state < ISCSI_CONN_STATE_EXITING) {
786 			conn->state = ISCSI_CONN_STATE_EXITING;
787 		}
788 	}
789 
790 	pthread_mutex_unlock(&g_conns_mutex);
791 	g_shutdown_timer = spdk_poller_register(spdk_iscsi_conn_check_shutdown, NULL,
792 						1000);
793 }
794 
795 int
796 spdk_iscsi_drop_conns(struct spdk_iscsi_conn *conn, const char *conn_match,
797 		      int drop_all)
798 {
799 	struct spdk_iscsi_conn	*xconn;
800 	const char			*xconn_match;
801 	int				i, num;
802 
803 	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "spdk_iscsi_drop_conns\n");
804 
805 	num = 0;
806 	pthread_mutex_lock(&g_conns_mutex);
807 	for (i = 0; i < MAX_ISCSI_CONNECTIONS; i++) {
808 		xconn = spdk_find_iscsi_connection_by_id(i);
809 
810 		if (xconn == NULL) {
811 			continue;
812 		}
813 
814 		if (xconn == conn) {
815 			continue;
816 		}
817 
818 		if (!drop_all && xconn->initiator_port == NULL) {
819 			continue;
820 		}
821 
822 		xconn_match =
823 			drop_all ? xconn->initiator_name : spdk_scsi_port_get_name(xconn->initiator_port);
824 
825 		if (!strcasecmp(conn_match, xconn_match) &&
826 		    conn->target == xconn->target) {
827 
828 			if (num == 0) {
829 				/*
830 				 * Only print this message before we report the
831 				 *  first dropped connection.
832 				 */
833 				SPDK_ERRLOG("drop old connections %s by %s\n",
834 					    conn->target->name, conn_match);
835 			}
836 
837 			SPDK_ERRLOG("exiting conn by %s (%s)\n",
838 				    xconn_match, xconn->initiator_addr);
839 			if (xconn->sess != NULL) {
840 				SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "TSIH=%u\n", xconn->sess->tsih);
841 			} else {
842 				SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "TSIH=xx\n");
843 			}
844 
845 			SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "CID=%u\n", xconn->cid);
846 
847 			/* Do not set xconn->state if the connection has already started exiting.
848 			  * This ensures we do not move a connection from EXITED state back to EXITING.
849 			  */
850 			if (xconn->state < ISCSI_CONN_STATE_EXITING) {
851 				xconn->state = ISCSI_CONN_STATE_EXITING;
852 			}
853 			num++;
854 		}
855 	}
856 
857 	pthread_mutex_unlock(&g_conns_mutex);
858 
859 	if (num != 0) {
860 		SPDK_ERRLOG("exiting %d conns\n", num);
861 	}
862 
863 	return 0;
864 }
865 
866 /**
867  * \brief Reads data for the specified iSCSI connection from its TCP socket.
868  *
869  * The TCP socket is marked as non-blocking, so this function may not read
870  * all data requested.
871  *
872  * Returns SPDK_ISCSI_CONNECTION_FATAL if the recv() operation indicates a fatal
873  * error with the TCP connection (including if the TCP connection was closed
874  * unexpectedly.
875  *
876  * Otherwise returns the number of bytes successfully read.
877  */
878 int
879 spdk_iscsi_conn_read_data(struct spdk_iscsi_conn *conn, int bytes,
880 			  void *buf)
881 {
882 	int ret;
883 
884 	if (bytes == 0) {
885 		return 0;
886 	}
887 
888 	ret = spdk_sock_recv(conn->sock, buf, bytes);
889 
890 	if (ret > 0) {
891 		spdk_trace_record(TRACE_ISCSI_READ_FROM_SOCKET_DONE, conn->id, ret, 0, 0);
892 		return ret;
893 	}
894 
895 	if (ret < 0) {
896 		if (errno == EAGAIN || errno == EWOULDBLOCK) {
897 			return 0;
898 		}
899 
900 		/* For connect reset issue, do not output error log */
901 		if (errno == ECONNRESET) {
902 			SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "spdk_sock_recv() failed, errno %d: %s\n",
903 				      errno, spdk_strerror(errno));
904 		} else {
905 			SPDK_ERRLOG("spdk_sock_recv() failed, errno %d: %s\n",
906 				    errno, spdk_strerror(errno));
907 		}
908 	}
909 
910 	/* connection closed */
911 	return SPDK_ISCSI_CONNECTION_FATAL;
912 }
913 
914 int
915 spdk_iscsi_conn_readv_data(struct spdk_iscsi_conn *conn,
916 			   struct iovec *iov, int iovcnt)
917 {
918 	int ret;
919 
920 	if (iov == NULL || iovcnt == 0) {
921 		return 0;
922 	}
923 
924 	if (iovcnt == 1) {
925 		return spdk_iscsi_conn_read_data(conn, iov[0].iov_len,
926 						 iov[0].iov_base);
927 	}
928 
929 	ret = spdk_sock_readv(conn->sock, iov, iovcnt);
930 
931 	if (ret > 0) {
932 		spdk_trace_record(TRACE_ISCSI_READ_FROM_SOCKET_DONE, conn->id, ret, 0, 0);
933 		return ret;
934 	}
935 
936 	if (ret < 0) {
937 		if (errno == EAGAIN || errno == EWOULDBLOCK) {
938 			return 0;
939 		}
940 
941 		/* For connect reset issue, do not output error log */
942 		if (errno == ECONNRESET) {
943 			SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "spdk_sock_readv() failed, errno %d: %s\n",
944 				      errno, spdk_strerror(errno));
945 		} else {
946 			SPDK_ERRLOG("spdk_sock_readv() failed, errno %d: %s\n",
947 				    errno, spdk_strerror(errno));
948 		}
949 	}
950 
951 	/* connection closed */
952 	return SPDK_ISCSI_CONNECTION_FATAL;
953 }
954 
955 void
956 spdk_iscsi_task_mgmt_cpl(struct spdk_scsi_task *scsi_task)
957 {
958 	struct spdk_iscsi_task *task = spdk_iscsi_task_from_scsi_task(scsi_task);
959 
960 	spdk_iscsi_task_mgmt_response(task->conn, task);
961 	spdk_iscsi_task_put(task);
962 }
963 
964 static void
965 spdk_iscsi_task_copy_to_rsp_scsi_status(struct spdk_iscsi_task *primary,
966 					struct spdk_scsi_task *task)
967 {
968 	memcpy(primary->rsp_sense_data, task->sense_data, task->sense_data_len);
969 	primary->rsp_sense_data_len = task->sense_data_len;
970 	primary->rsp_scsi_status = task->status;
971 }
972 
973 static void
974 spdk_iscsi_task_copy_from_rsp_scsi_status(struct spdk_scsi_task *task,
975 		struct spdk_iscsi_task *primary)
976 {
977 	memcpy(task->sense_data, primary->rsp_sense_data,
978 	       primary->rsp_sense_data_len);
979 	task->sense_data_len = primary->rsp_sense_data_len;
980 	task->status = primary->rsp_scsi_status;
981 }
982 
983 static void
984 process_completed_read_subtask_list(struct spdk_iscsi_conn *conn,
985 				    struct spdk_iscsi_task *primary)
986 {
987 	struct spdk_iscsi_task *subtask, *tmp;
988 
989 	TAILQ_FOREACH_SAFE(subtask, &primary->subtask_list, subtask_link, tmp) {
990 		if (subtask->scsi.offset == primary->bytes_completed) {
991 			TAILQ_REMOVE(&primary->subtask_list, subtask, subtask_link);
992 			primary->bytes_completed += subtask->scsi.length;
993 			spdk_iscsi_task_response(conn, subtask);
994 			spdk_iscsi_task_put(subtask);
995 		} else {
996 			break;
997 		}
998 	}
999 }
1000 
1001 static void
1002 process_read_task_completion(struct spdk_iscsi_conn *conn,
1003 			     struct spdk_iscsi_task *task,
1004 			     struct spdk_iscsi_task *primary)
1005 {
1006 	struct spdk_iscsi_task *tmp;
1007 
1008 	/* If the status of the completed subtask is the first failure,
1009 	 * copy it to out-of-order subtasks and remember it as the status
1010 	 * of the command,
1011 	 *
1012 	 * Even if the status of the completed task is success,
1013 	 * there are any failed subtask ever, copy the first failed status
1014 	 * to it.
1015 	 */
1016 	if (task->scsi.status != SPDK_SCSI_STATUS_GOOD) {
1017 		if (primary->rsp_scsi_status == SPDK_SCSI_STATUS_GOOD) {
1018 			TAILQ_FOREACH(tmp, &primary->subtask_list, subtask_link) {
1019 				spdk_scsi_task_copy_status(&tmp->scsi, &task->scsi);
1020 			}
1021 			spdk_iscsi_task_copy_to_rsp_scsi_status(primary, &task->scsi);
1022 		}
1023 	} else if (primary->rsp_scsi_status != SPDK_SCSI_STATUS_GOOD) {
1024 		spdk_iscsi_task_copy_from_rsp_scsi_status(&task->scsi, primary);
1025 	}
1026 
1027 	if ((task != primary) &&
1028 	    (task->scsi.offset != primary->bytes_completed)) {
1029 		TAILQ_FOREACH(tmp, &primary->subtask_list, subtask_link) {
1030 			if (task->scsi.offset < tmp->scsi.offset) {
1031 				TAILQ_INSERT_BEFORE(tmp, task, subtask_link);
1032 				return;
1033 			}
1034 		}
1035 
1036 		TAILQ_INSERT_TAIL(&primary->subtask_list, task, subtask_link);
1037 		return;
1038 	}
1039 
1040 	primary->bytes_completed += task->scsi.length;
1041 	spdk_iscsi_task_response(conn, task);
1042 
1043 	if ((task != primary) ||
1044 	    (task->scsi.transfer_len == task->scsi.length)) {
1045 		spdk_iscsi_task_put(task);
1046 	}
1047 	process_completed_read_subtask_list(conn, primary);
1048 
1049 	spdk_iscsi_conn_handle_queued_datain_tasks(conn);
1050 }
1051 
1052 void
1053 spdk_iscsi_task_cpl(struct spdk_scsi_task *scsi_task)
1054 {
1055 	struct spdk_iscsi_task *primary;
1056 	struct spdk_iscsi_task *task = spdk_iscsi_task_from_scsi_task(scsi_task);
1057 	struct spdk_iscsi_conn *conn = task->conn;
1058 	struct spdk_iscsi_pdu *pdu = task->pdu;
1059 
1060 	spdk_trace_record(TRACE_ISCSI_TASK_DONE, conn->id, 0, (uintptr_t)task, 0);
1061 
1062 	task->is_queued = false;
1063 	primary = spdk_iscsi_task_get_primary(task);
1064 
1065 	if (spdk_iscsi_task_is_read(primary)) {
1066 		process_read_task_completion(conn, task, primary);
1067 	} else {
1068 		primary->bytes_completed += task->scsi.length;
1069 
1070 		/* If the status of the subtask is the first failure, remember it as
1071 		 * the status of the command and set it to the status of the primary
1072 		 * task later.
1073 		 *
1074 		 * If the first failed task is the primary, two copies can be avoided
1075 		 * but code simplicity is prioritized.
1076 		 */
1077 		if (task->scsi.status == SPDK_SCSI_STATUS_GOOD) {
1078 			if (task != primary) {
1079 				primary->scsi.data_transferred += task->scsi.data_transferred;
1080 			}
1081 		} else if (primary->rsp_scsi_status == SPDK_SCSI_STATUS_GOOD) {
1082 			spdk_iscsi_task_copy_to_rsp_scsi_status(primary, &task->scsi);
1083 		}
1084 
1085 		if (primary->bytes_completed == primary->scsi.transfer_len) {
1086 			spdk_del_transfer_task(conn, primary->tag);
1087 			if (primary->rsp_scsi_status != SPDK_SCSI_STATUS_GOOD) {
1088 				spdk_iscsi_task_copy_from_rsp_scsi_status(&primary->scsi, primary);
1089 			}
1090 			spdk_iscsi_task_response(conn, primary);
1091 			/*
1092 			 * Check if this is the last task completed for an iSCSI write
1093 			 *  that required child subtasks.  If task != primary, we know
1094 			 *  for sure that it was part of an iSCSI write with child subtasks.
1095 			 *  The trickier case is when the last task completed was the initial
1096 			 *  task - in this case the task will have a smaller length than
1097 			 *  the overall transfer length.
1098 			 */
1099 			if (task != primary || task->scsi.length != task->scsi.transfer_len) {
1100 				TAILQ_REMOVE(&conn->active_r2t_tasks, primary, link);
1101 				spdk_iscsi_task_put(primary);
1102 			}
1103 		}
1104 		spdk_iscsi_task_put(task);
1105 	}
1106 	if (!task->parent) {
1107 		spdk_trace_record(TRACE_ISCSI_PDU_COMPLETED, 0, 0, (uintptr_t)pdu, 0);
1108 	}
1109 }
1110 
1111 static int
1112 spdk_iscsi_get_pdu_length(struct spdk_iscsi_pdu *pdu, int header_digest,
1113 			  int data_digest)
1114 {
1115 	int data_len, enable_digest, total;
1116 
1117 	enable_digest = 1;
1118 	if (pdu->bhs.opcode == ISCSI_OP_LOGIN_RSP) {
1119 		enable_digest = 0;
1120 	}
1121 
1122 	total = ISCSI_BHS_LEN;
1123 
1124 	total += (4 * pdu->bhs.total_ahs_len);
1125 
1126 	if (enable_digest && header_digest) {
1127 		total += ISCSI_DIGEST_LEN;
1128 	}
1129 
1130 	data_len = DGET24(pdu->bhs.data_segment_len);
1131 	if (data_len > 0) {
1132 		total += ISCSI_ALIGN(data_len);
1133 		if (enable_digest && data_digest) {
1134 			total += ISCSI_DIGEST_LEN;
1135 		}
1136 	}
1137 
1138 	return total;
1139 }
1140 
1141 void
1142 spdk_iscsi_conn_handle_nop(struct spdk_iscsi_conn *conn)
1143 {
1144 	uint64_t	tsc;
1145 
1146 	/**
1147 	  * This function will be executed by nop_poller of iSCSI polling group, so
1148 	  * we need to check the connection state first, then do the nop interval
1149 	  * expiration check work.
1150 	  */
1151 	if ((conn->state == ISCSI_CONN_STATE_EXITED) ||
1152 	    (conn->state == ISCSI_CONN_STATE_EXITING)) {
1153 		return;
1154 	}
1155 
1156 	/* Check for nop interval expiration */
1157 	tsc = spdk_get_ticks();
1158 	if (conn->nop_outstanding) {
1159 		if ((tsc - conn->last_nopin) > (conn->timeout  * spdk_get_ticks_hz())) {
1160 			SPDK_ERRLOG("Timed out waiting for NOP-Out response from initiator\n");
1161 			SPDK_ERRLOG("  tsc=0x%lx, last_nopin=0x%lx\n", tsc, conn->last_nopin);
1162 			SPDK_ERRLOG("  initiator=%s, target=%s\n", conn->initiator_name,
1163 				    conn->target_short_name);
1164 			conn->state = ISCSI_CONN_STATE_EXITING;
1165 		}
1166 	} else if (tsc - conn->last_nopin > conn->nopininterval) {
1167 		spdk_iscsi_send_nopin(conn);
1168 	}
1169 }
1170 
1171 /**
1172  * \brief Makes one attempt to flush response PDUs back to the initiator.
1173  *
1174  * Builds a list of iovecs for response PDUs that must be sent back to the
1175  * initiator and passes it to writev().
1176  *
1177  * Since the socket is non-blocking, writev() may not be able to flush all
1178  * of the iovecs, and may even partially flush one of the iovecs.  In this
1179  * case, the partially flushed PDU will remain on the write_pdu_list with
1180  * an offset pointing to the next byte to be flushed.
1181  *
1182  * Returns 0 if all PDUs were flushed.
1183  *
1184  * Returns 1 if some PDUs could not be flushed due to lack of send buffer
1185  * space.
1186  *
1187  * Returns -1 if an exception error occurred indicating the TCP connection
1188  * should be closed.
1189  */
1190 static int
1191 spdk_iscsi_conn_flush_pdus_internal(struct spdk_iscsi_conn *conn)
1192 {
1193 	const int num_iovs = 32;
1194 	struct iovec iovs[num_iovs];
1195 	struct iovec *iov = iovs;
1196 	int iovcnt = 0;
1197 	int bytes = 0;
1198 	uint32_t total_length = 0;
1199 	uint32_t mapped_length = 0;
1200 	struct spdk_iscsi_pdu *pdu;
1201 	int pdu_length;
1202 
1203 	pdu = TAILQ_FIRST(&conn->write_pdu_list);
1204 
1205 	if (pdu == NULL) {
1206 		return 0;
1207 	}
1208 
1209 	/*
1210 	 * Build up a list of iovecs for the first few PDUs in the
1211 	 *  connection's write_pdu_list. For the first PDU, check if it was
1212 	 *  partially written out the last time this function was called, and
1213 	 *  if so adjust the iovec array accordingly. This check is done in
1214 	 *  spdk_iscsi_build_iovs() and so applied to remaining PDUs too.
1215 	 *  But extra overhead is negligible.
1216 	 */
1217 	while (pdu != NULL && ((num_iovs - iovcnt) > 0)) {
1218 		iovcnt += spdk_iscsi_build_iovs(conn, &iovs[iovcnt], num_iovs - iovcnt,
1219 						pdu, &mapped_length);
1220 		total_length += mapped_length;
1221 		pdu = TAILQ_NEXT(pdu, tailq);
1222 	}
1223 
1224 	spdk_trace_record(TRACE_ISCSI_FLUSH_WRITEBUF_START, conn->id, total_length, 0, iovcnt);
1225 
1226 	bytes = spdk_sock_writev(conn->sock, iov, iovcnt);
1227 	if (bytes == -1) {
1228 		if (errno == EWOULDBLOCK || errno == EAGAIN) {
1229 			return 1;
1230 		} else {
1231 			SPDK_ERRLOG("spdk_sock_writev() failed, errno %d: %s\n",
1232 				    errno, spdk_strerror(errno));
1233 			return -1;
1234 		}
1235 	}
1236 
1237 	spdk_trace_record(TRACE_ISCSI_FLUSH_WRITEBUF_DONE, conn->id, bytes, 0, 0);
1238 
1239 	pdu = TAILQ_FIRST(&conn->write_pdu_list);
1240 
1241 	/*
1242 	 * Free any PDUs that were fully written.  If a PDU was only
1243 	 *  partially written, update its writev_offset so that next
1244 	 *  time only the unwritten portion will be sent to writev().
1245 	 */
1246 	while (bytes > 0) {
1247 		pdu_length = spdk_iscsi_get_pdu_length(pdu,
1248 						       conn->header_digest,
1249 						       conn->data_digest);
1250 		pdu_length -= pdu->writev_offset;
1251 
1252 		if (bytes >= pdu_length) {
1253 			bytes -= pdu_length;
1254 			TAILQ_REMOVE(&conn->write_pdu_list, pdu, tailq);
1255 
1256 			if ((conn->full_feature) &&
1257 			    (conn->sess->ErrorRecoveryLevel >= 1) &&
1258 			    spdk_iscsi_is_deferred_free_pdu(pdu)) {
1259 				SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "stat_sn=%d\n",
1260 					      from_be32(&pdu->bhs.stat_sn));
1261 				TAILQ_INSERT_TAIL(&conn->snack_pdu_list, pdu,
1262 						  tailq);
1263 			} else {
1264 				spdk_iscsi_conn_free_pdu(conn, pdu);
1265 			}
1266 
1267 			pdu = TAILQ_FIRST(&conn->write_pdu_list);
1268 		} else {
1269 			pdu->writev_offset += bytes;
1270 			bytes = 0;
1271 		}
1272 	}
1273 
1274 	return TAILQ_EMPTY(&conn->write_pdu_list) ? 0 : 1;
1275 }
1276 
1277 /**
1278  * \brief Flushes response PDUs back to the initiator.
1279  *
1280  * This function may return without all PDUs having flushed to the
1281  * underlying TCP socket buffer - for example, in the case where the
1282  * socket buffer is already full.
1283  *
1284  * During normal RUNNING connection state, if not all PDUs are flushed,
1285  * then subsequent calls to this routine will eventually flush
1286  * remaining PDUs.
1287  *
1288  * During other connection states (EXITING or LOGGED_OUT), this
1289  * function will spin until all PDUs have successfully been flushed.
1290  */
1291 static int
1292 spdk_iscsi_conn_flush_pdus(void *_conn)
1293 {
1294 	struct spdk_iscsi_conn *conn = _conn;
1295 	int rc;
1296 
1297 	if (conn->state == ISCSI_CONN_STATE_RUNNING) {
1298 		rc = spdk_iscsi_conn_flush_pdus_internal(conn);
1299 		if (rc == 0 && conn->flush_poller != NULL) {
1300 			spdk_poller_unregister(&conn->flush_poller);
1301 		} else if (rc == 1 && conn->flush_poller == NULL) {
1302 			conn->flush_poller = spdk_poller_register(spdk_iscsi_conn_flush_pdus,
1303 					     conn, 50);
1304 		}
1305 	} else {
1306 		/*
1307 		 * If the connection state is not RUNNING, then
1308 		 * keep trying to flush PDUs until our list is
1309 		 * empty - to make sure all data is sent before
1310 		 * closing the connection.
1311 		 */
1312 		do {
1313 			rc = spdk_iscsi_conn_flush_pdus_internal(conn);
1314 		} while (rc == 1);
1315 	}
1316 
1317 	if (rc < 0 && conn->state < ISCSI_CONN_STATE_EXITING) {
1318 		/*
1319 		 * If the poller has already started destruction of the connection,
1320 		 *  i.e. the socket read failed, then the connection state may already
1321 		 *  be EXITED.  We don't want to set it back to EXITING in that case.
1322 		 */
1323 		conn->state = ISCSI_CONN_STATE_EXITING;
1324 	}
1325 
1326 	return 1;
1327 }
1328 
1329 static int
1330 spdk_iscsi_dif_verify(struct spdk_iscsi_pdu *pdu, struct spdk_dif_ctx *dif_ctx)
1331 {
1332 	struct iovec iov;
1333 	struct spdk_dif_error err_blk = {};
1334 	uint32_t num_blocks;
1335 	int rc;
1336 
1337 	iov.iov_base = pdu->data;
1338 	iov.iov_len = pdu->data_buf_len;
1339 	num_blocks = pdu->data_buf_len / dif_ctx->block_size;
1340 
1341 	rc = spdk_dif_verify(&iov, 1, num_blocks, dif_ctx, &err_blk);
1342 	if (rc != 0) {
1343 		SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n",
1344 			    err_blk.err_type, err_blk.err_offset);
1345 	}
1346 
1347 	return rc;
1348 }
1349 
1350 void
1351 spdk_iscsi_conn_write_pdu(struct spdk_iscsi_conn *conn, struct spdk_iscsi_pdu *pdu)
1352 {
1353 	uint32_t crc32c;
1354 	int rc;
1355 
1356 	if (spdk_unlikely(spdk_iscsi_get_dif_ctx(conn, pdu, &pdu->dif_ctx))) {
1357 		rc = spdk_iscsi_dif_verify(pdu, &pdu->dif_ctx);
1358 		if (rc != 0) {
1359 			spdk_iscsi_conn_free_pdu(conn, pdu);
1360 			conn->state = ISCSI_CONN_STATE_EXITING;
1361 			return;
1362 		}
1363 		pdu->dif_insert_or_strip = true;
1364 	}
1365 
1366 	if (pdu->bhs.opcode != ISCSI_OP_LOGIN_RSP) {
1367 		/* Header Digest */
1368 		if (conn->header_digest) {
1369 			crc32c = spdk_iscsi_pdu_calc_header_digest(pdu);
1370 			MAKE_DIGEST_WORD(pdu->header_digest, crc32c);
1371 		}
1372 
1373 		/* Data Digest */
1374 		if (conn->data_digest && DGET24(pdu->bhs.data_segment_len) != 0) {
1375 			crc32c = spdk_iscsi_pdu_calc_data_digest(pdu);
1376 			MAKE_DIGEST_WORD(pdu->data_digest, crc32c);
1377 		}
1378 	}
1379 
1380 	TAILQ_INSERT_TAIL(&conn->write_pdu_list, pdu, tailq);
1381 	spdk_iscsi_conn_flush_pdus(conn);
1382 }
1383 
1384 #define GET_PDU_LOOP_COUNT	16
1385 
1386 static int
1387 spdk_iscsi_conn_handle_incoming_pdus(struct spdk_iscsi_conn *conn)
1388 {
1389 	struct spdk_iscsi_pdu *pdu;
1390 	int i, rc;
1391 
1392 	/* Read new PDUs from network */
1393 	for (i = 0; i < GET_PDU_LOOP_COUNT; i++) {
1394 		rc = spdk_iscsi_read_pdu(conn, &pdu);
1395 		if (rc == 0) {
1396 			break;
1397 		} else if (rc == SPDK_ISCSI_CONNECTION_FATAL) {
1398 			return rc;
1399 		}
1400 
1401 		if (conn->state == ISCSI_CONN_STATE_LOGGED_OUT) {
1402 			SPDK_ERRLOG("pdu received after logout\n");
1403 			spdk_put_pdu(pdu);
1404 			return SPDK_ISCSI_CONNECTION_FATAL;
1405 		}
1406 
1407 		rc = spdk_iscsi_execute(conn, pdu);
1408 		spdk_put_pdu(pdu);
1409 		if (rc != 0) {
1410 			SPDK_ERRLOG("spdk_iscsi_execute() fatal error on %s(%s)\n",
1411 				    conn->target_port != NULL ? spdk_scsi_port_get_name(conn->target_port) : "NULL",
1412 				    conn->initiator_port != NULL ? spdk_scsi_port_get_name(conn->initiator_port) : "NULL");
1413 			return rc;
1414 		}
1415 
1416 		spdk_trace_record(TRACE_ISCSI_TASK_EXECUTED, 0, 0, (uintptr_t)pdu, 0);
1417 		if (conn->is_stopped) {
1418 			break;
1419 		}
1420 	}
1421 
1422 	return i;
1423 }
1424 
1425 static void
1426 spdk_iscsi_conn_sock_cb(void *arg, struct spdk_sock_group *group, struct spdk_sock *sock)
1427 {
1428 	struct spdk_iscsi_conn *conn = arg;
1429 	int rc;
1430 
1431 	assert(conn != NULL);
1432 
1433 	if ((conn->state == ISCSI_CONN_STATE_EXITED) ||
1434 	    (conn->state == ISCSI_CONN_STATE_EXITING)) {
1435 		return;
1436 	}
1437 
1438 	/* Handle incoming PDUs */
1439 	rc = spdk_iscsi_conn_handle_incoming_pdus(conn);
1440 	if (rc < 0) {
1441 		conn->state = ISCSI_CONN_STATE_EXITING;
1442 		spdk_iscsi_conn_flush_pdus(conn);
1443 	}
1444 }
1445 
1446 static void
1447 spdk_iscsi_conn_full_feature_migrate(void *arg1, void *arg2)
1448 {
1449 	struct spdk_iscsi_conn *conn = arg1;
1450 
1451 	if (conn->sess->session_type == SESSION_TYPE_NORMAL) {
1452 		spdk_iscsi_conn_open_luns(conn);
1453 	}
1454 
1455 	/* The poller has been unregistered, so now we can re-register it on the new core. */
1456 	conn->lcore = spdk_env_get_current_core();
1457 	spdk_iscsi_poll_group_add_conn(conn);
1458 }
1459 
1460 void
1461 spdk_iscsi_conn_migration(struct spdk_iscsi_conn *conn)
1462 {
1463 	int				lcore;
1464 	struct spdk_event		*event;
1465 	struct spdk_iscsi_tgt_node *target;
1466 
1467 	lcore = spdk_iscsi_conn_allocate_reactor(conn->portal->cpumask);
1468 	if (conn->sess->session_type == SESSION_TYPE_NORMAL) {
1469 		target = conn->sess->target;
1470 		pthread_mutex_lock(&target->mutex);
1471 		target->num_active_conns++;
1472 		if (target->num_active_conns == 1) {
1473 			/**
1474 			 * This is the only active connection for this target node.
1475 			 *  Save the lcore in the target node so it can be used for
1476 			 *  any other connections to this target node.
1477 			 */
1478 			target->lcore = lcore;
1479 		} else {
1480 			/**
1481 			 * There are other active connections for this target node.
1482 			 *  Ignore the lcore specified by the allocator and use the
1483 			 *  the target node's lcore to ensure this connection runs on
1484 			 *  the same lcore as other connections for this target node.
1485 			 */
1486 			lcore = target->lcore;
1487 		}
1488 		pthread_mutex_unlock(&target->mutex);
1489 	}
1490 
1491 	spdk_iscsi_poll_group_remove_conn_sock(conn);
1492 	spdk_poller_unregister(&conn->flush_poller);
1493 	spdk_iscsi_conn_stop(conn);
1494 
1495 	__sync_fetch_and_add(&g_num_connections[lcore], 1);
1496 	conn->last_nopin = spdk_get_ticks();
1497 	event = spdk_event_allocate(lcore, spdk_iscsi_conn_full_feature_migrate,
1498 				    conn, NULL);
1499 	spdk_event_call(event);
1500 }
1501 
1502 void
1503 spdk_iscsi_conn_set_min_per_core(int count)
1504 {
1505 	g_connections_per_lcore = count;
1506 }
1507 
1508 int
1509 spdk_iscsi_conn_get_min_per_core(void)
1510 {
1511 	return g_connections_per_lcore;
1512 }
1513 
1514 static uint32_t
1515 spdk_iscsi_conn_allocate_reactor(const struct spdk_cpuset *cpumask)
1516 {
1517 	uint32_t i, selected_core;
1518 	int32_t num_pollers, min_pollers;
1519 
1520 	min_pollers = INT_MAX;
1521 	selected_core = spdk_env_get_first_core();
1522 
1523 	SPDK_ENV_FOREACH_CORE(i) {
1524 		if (!spdk_cpuset_get_cpu(cpumask, i)) {
1525 			continue;
1526 		}
1527 
1528 		/* This core is running. Check how many pollers it already has. */
1529 		num_pollers = g_num_connections[i];
1530 
1531 		if ((num_pollers > 0) && (num_pollers < g_connections_per_lcore)) {
1532 			/* Fewer than the maximum connections per core,
1533 			 * but at least 1. Use this core.
1534 			 */
1535 			return i;
1536 		} else if (num_pollers < min_pollers) {
1537 			/* Track the core that has the minimum number of pollers
1538 			 * to be used if no cores meet our criteria
1539 			 */
1540 			selected_core = i;
1541 			min_pollers = num_pollers;
1542 		}
1543 	}
1544 
1545 	return selected_core;
1546 }
1547 
1548 static int
1549 logout_timeout(void *arg)
1550 {
1551 	struct spdk_iscsi_conn *conn = arg;
1552 
1553 	spdk_iscsi_conn_destruct(conn);
1554 
1555 	return -1;
1556 }
1557 
1558 void
1559 spdk_iscsi_conn_logout(struct spdk_iscsi_conn *conn)
1560 {
1561 	conn->state = ISCSI_CONN_STATE_LOGGED_OUT;
1562 	conn->logout_timer = spdk_poller_register(logout_timeout, conn, ISCSI_LOGOUT_TIMEOUT * 1000000);
1563 }
1564 
1565 SPDK_TRACE_REGISTER_FN(iscsi_conn_trace, "iscsi_conn", TRACE_GROUP_ISCSI)
1566 {
1567 	spdk_trace_register_owner(OWNER_ISCSI_CONN, 'c');
1568 	spdk_trace_register_object(OBJECT_ISCSI_PDU, 'p');
1569 	spdk_trace_register_description("ISCSI_READ_FROM_SOCKET_DONE", "",
1570 					TRACE_ISCSI_READ_FROM_SOCKET_DONE,
1571 					OWNER_ISCSI_CONN, OBJECT_NONE, 0, 0, "");
1572 	spdk_trace_register_description("ISCSI_FLUSH_WRITEBUF_START", "", TRACE_ISCSI_FLUSH_WRITEBUF_START,
1573 					OWNER_ISCSI_CONN, OBJECT_NONE, 0, 0, "iovec: ");
1574 	spdk_trace_register_description("ISCSI_FLUSH_WRITEBUF_DONE", "", TRACE_ISCSI_FLUSH_WRITEBUF_DONE,
1575 					OWNER_ISCSI_CONN, OBJECT_NONE, 0, 0, "");
1576 	spdk_trace_register_description("ISCSI_READ_PDU", "", TRACE_ISCSI_READ_PDU,
1577 					OWNER_ISCSI_CONN, OBJECT_ISCSI_PDU, 1, 0, "opc:   ");
1578 	spdk_trace_register_description("ISCSI_TASK_DONE", "", TRACE_ISCSI_TASK_DONE,
1579 					OWNER_ISCSI_CONN, OBJECT_SCSI_TASK, 0, 0, "");
1580 	spdk_trace_register_description("ISCSI_TASK_QUEUE", "", TRACE_ISCSI_TASK_QUEUE,
1581 					OWNER_ISCSI_CONN, OBJECT_SCSI_TASK, 1, 1, "pdu:   ");
1582 	spdk_trace_register_description("ISCSI_TASK_EXECUTED", "", TRACE_ISCSI_TASK_EXECUTED,
1583 					OWNER_ISCSI_CONN, OBJECT_ISCSI_PDU, 0, 0, "");
1584 	spdk_trace_register_description("ISCSI_PDU_COMPLETED", "", TRACE_ISCSI_PDU_COMPLETED,
1585 					OWNER_ISCSI_CONN, OBJECT_ISCSI_PDU, 0, 0, "");
1586 }
1587