xref: /spdk/lib/iscsi/conn.c (revision f93b6fb0a4ebcee203e7c44c9e170c20bbce96cc)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
5  *   Copyright (c) Intel Corporation.
6  *   All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 #include "spdk/stdinc.h"
36 
37 #include "spdk/endian.h"
38 #include "spdk/env.h"
39 #include "spdk/event.h"
40 #include "spdk/likely.h"
41 #include "spdk/thread.h"
42 #include "spdk/queue.h"
43 #include "spdk/trace.h"
44 #include "spdk/net.h"
45 #include "spdk/sock.h"
46 #include "spdk/string.h"
47 
48 #include "spdk_internal/log.h"
49 
50 #include "iscsi/task.h"
51 #include "iscsi/conn.h"
52 #include "iscsi/tgt_node.h"
53 #include "iscsi/portal_grp.h"
54 
55 #define MAKE_DIGEST_WORD(BUF, CRC32C) \
56         (   ((*((uint8_t *)(BUF)+0)) = (uint8_t)((uint32_t)(CRC32C) >> 0)), \
57             ((*((uint8_t *)(BUF)+1)) = (uint8_t)((uint32_t)(CRC32C) >> 8)), \
58             ((*((uint8_t *)(BUF)+2)) = (uint8_t)((uint32_t)(CRC32C) >> 16)), \
59             ((*((uint8_t *)(BUF)+3)) = (uint8_t)((uint32_t)(CRC32C) >> 24)))
60 
61 #define SPDK_ISCSI_CONNECTION_MEMSET(conn)		\
62 	memset(&(conn)->portal, 0, sizeof(*(conn)) -	\
63 		offsetof(struct spdk_iscsi_conn, portal));
64 
65 static int g_connections_per_lcore;
66 static uint32_t *g_num_connections;
67 
68 struct spdk_iscsi_conn *g_conns_array = MAP_FAILED;
69 static int g_conns_array_fd = -1;
70 static char g_shm_name[64];
71 
72 static pthread_mutex_t g_conns_mutex = PTHREAD_MUTEX_INITIALIZER;
73 
74 static struct spdk_poller *g_shutdown_timer = NULL;
75 
76 static uint32_t iscsi_conn_allocate_reactor(const struct spdk_cpuset *cpumask);
77 
78 static void iscsi_conn_full_feature_migrate(void *arg1, void *arg2);
79 static void iscsi_conn_stop(struct spdk_iscsi_conn *conn);
80 static void iscsi_conn_sock_cb(void *arg, struct spdk_sock_group *group,
81 			       struct spdk_sock *sock);
82 
83 static struct spdk_iscsi_conn *
84 allocate_conn(void)
85 {
86 	struct spdk_iscsi_conn	*conn;
87 	int				i;
88 
89 	pthread_mutex_lock(&g_conns_mutex);
90 	for (i = 0; i < MAX_ISCSI_CONNECTIONS; i++) {
91 		conn = &g_conns_array[i];
92 		if (!conn->is_valid) {
93 			SPDK_ISCSI_CONNECTION_MEMSET(conn);
94 			conn->is_valid = 1;
95 			pthread_mutex_unlock(&g_conns_mutex);
96 			return conn;
97 		}
98 	}
99 	pthread_mutex_unlock(&g_conns_mutex);
100 
101 	return NULL;
102 }
103 
104 static void
105 free_conn(struct spdk_iscsi_conn *conn)
106 {
107 	free(conn->portal_host);
108 	free(conn->portal_port);
109 	conn->is_valid = 0;
110 }
111 
112 static struct spdk_iscsi_conn *
113 find_iscsi_connection_by_id(int cid)
114 {
115 	if (g_conns_array != MAP_FAILED && g_conns_array[cid].is_valid == 1) {
116 		return &g_conns_array[cid];
117 	} else {
118 		return NULL;
119 	}
120 }
121 
122 int spdk_initialize_iscsi_conns(void)
123 {
124 	size_t conns_size = sizeof(struct spdk_iscsi_conn) * MAX_ISCSI_CONNECTIONS;
125 	uint32_t i, last_core;
126 
127 	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "spdk_iscsi_init\n");
128 
129 	snprintf(g_shm_name, sizeof(g_shm_name), "/spdk_iscsi_conns.%d", spdk_app_get_shm_id());
130 	g_conns_array_fd = shm_open(g_shm_name, O_RDWR | O_CREAT, 0600);
131 	if (g_conns_array_fd < 0) {
132 		SPDK_ERRLOG("could not shm_open %s\n", g_shm_name);
133 		goto err;
134 	}
135 
136 	if (ftruncate(g_conns_array_fd, conns_size) != 0) {
137 		SPDK_ERRLOG("could not ftruncate\n");
138 		goto err;
139 	}
140 	g_conns_array = mmap(0, conns_size, PROT_READ | PROT_WRITE, MAP_SHARED,
141 			     g_conns_array_fd, 0);
142 
143 	if (g_conns_array == MAP_FAILED) {
144 		fprintf(stderr, "could not mmap cons array file %s (%d)\n", g_shm_name, errno);
145 		goto err;
146 	}
147 
148 	memset(g_conns_array, 0, conns_size);
149 
150 	for (i = 0; i < MAX_ISCSI_CONNECTIONS; i++) {
151 		g_conns_array[i].id = i;
152 	}
153 
154 	last_core = spdk_env_get_last_core();
155 	g_num_connections = calloc(last_core + 1, sizeof(uint32_t));
156 	if (!g_num_connections) {
157 		SPDK_ERRLOG("Could not allocate array size=%u for g_num_connections\n",
158 			    last_core + 1);
159 		goto err;
160 	}
161 
162 	return 0;
163 
164 err:
165 	if (g_conns_array != MAP_FAILED) {
166 		munmap(g_conns_array, conns_size);
167 		g_conns_array = MAP_FAILED;
168 	}
169 
170 	if (g_conns_array_fd >= 0) {
171 		close(g_conns_array_fd);
172 		g_conns_array_fd = -1;
173 		shm_unlink(g_shm_name);
174 	}
175 
176 	return -1;
177 }
178 
179 static void
180 iscsi_poll_group_add_conn_sock(struct spdk_iscsi_conn *conn)
181 {
182 	struct spdk_iscsi_poll_group *poll_group;
183 	int rc;
184 
185 	assert(conn->lcore == spdk_env_get_current_core());
186 
187 	poll_group = &g_spdk_iscsi.poll_group[conn->lcore];
188 
189 	rc = spdk_sock_group_add_sock(poll_group->sock_group, conn->sock, iscsi_conn_sock_cb, conn);
190 	if (rc < 0) {
191 		SPDK_ERRLOG("Failed to add sock=%p of conn=%p\n", conn->sock, conn);
192 	}
193 }
194 
195 static void
196 iscsi_poll_group_remove_conn_sock(struct spdk_iscsi_conn *conn)
197 {
198 	struct spdk_iscsi_poll_group *poll_group;
199 	int rc;
200 
201 	assert(conn->lcore == spdk_env_get_current_core());
202 
203 	poll_group = &g_spdk_iscsi.poll_group[conn->lcore];
204 
205 	rc = spdk_sock_group_remove_sock(poll_group->sock_group, conn->sock);
206 	if (rc < 0) {
207 		SPDK_ERRLOG("Failed to remove sock=%p of conn=%p\n", conn->sock, conn);
208 	}
209 }
210 
211 static void
212 iscsi_poll_group_add_conn(struct spdk_iscsi_conn *conn)
213 {
214 	struct spdk_iscsi_poll_group *poll_group;
215 
216 	assert(conn->lcore == spdk_env_get_current_core());
217 
218 	poll_group = &g_spdk_iscsi.poll_group[conn->lcore];
219 
220 	conn->is_stopped = false;
221 	STAILQ_INSERT_TAIL(&poll_group->connections, conn, link);
222 	iscsi_poll_group_add_conn_sock(conn);
223 }
224 
225 static void
226 iscsi_poll_group_remove_conn(struct spdk_iscsi_conn *conn)
227 {
228 	struct spdk_iscsi_poll_group *poll_group;
229 
230 	assert(conn->lcore == spdk_env_get_current_core());
231 
232 	poll_group = &g_spdk_iscsi.poll_group[conn->lcore];
233 
234 	conn->is_stopped = true;
235 	STAILQ_REMOVE(&poll_group->connections, conn, spdk_iscsi_conn, link);
236 }
237 
238 /**
239  * \brief Create an iSCSI connection from the given parameters and schedule it
240  *        on a reactor.
241  *
242  * \code
243  *
244  * # identify reactor where the new connections work item will be scheduled
245  * reactor = spdk_iscsi_conn_allocate_reactor()
246  * allocate spdk_iscsi_conn object
247  * initialize spdk_iscsi_conn object
248  * schedule iSCSI connection work item on reactor
249  *
250  * \endcode
251  */
252 int
253 spdk_iscsi_conn_construct(struct spdk_iscsi_portal *portal,
254 			  struct spdk_sock *sock)
255 {
256 	struct spdk_iscsi_conn *conn;
257 	int bufsize, i, rc;
258 
259 	conn = allocate_conn();
260 	if (conn == NULL) {
261 		SPDK_ERRLOG("Could not allocate connection.\n");
262 		return -1;
263 	}
264 
265 	pthread_mutex_lock(&g_spdk_iscsi.mutex);
266 	conn->timeout = g_spdk_iscsi.timeout;
267 	conn->nopininterval = g_spdk_iscsi.nopininterval;
268 	conn->nopininterval *= spdk_get_ticks_hz(); /* seconds to TSC */
269 	conn->nop_outstanding = false;
270 	conn->data_out_cnt = 0;
271 	conn->data_in_cnt = 0;
272 	pthread_mutex_unlock(&g_spdk_iscsi.mutex);
273 	conn->MaxRecvDataSegmentLength = 8192; /* RFC3720(12.12) */
274 
275 	conn->portal = portal;
276 	conn->pg_tag = portal->group->tag;
277 	conn->portal_host = strdup(portal->host);
278 	conn->portal_port = strdup(portal->port);
279 	conn->portal_cpumask = portal->cpumask;
280 	conn->sock = sock;
281 
282 	conn->state = ISCSI_CONN_STATE_INVALID;
283 	conn->login_phase = ISCSI_SECURITY_NEGOTIATION_PHASE;
284 	conn->ttt = 0;
285 
286 	conn->partial_text_parameter = NULL;
287 
288 	for (i = 0; i < MAX_CONNECTION_PARAMS; i++) {
289 		conn->conn_param_state_negotiated[i] = false;
290 	}
291 
292 	for (i = 0; i < MAX_SESSION_PARAMS; i++) {
293 		conn->sess_param_state_negotiated[i] = false;
294 	}
295 
296 	for (i = 0; i < DEFAULT_MAXR2T; i++) {
297 		conn->outstanding_r2t_tasks[i] = NULL;
298 	}
299 
300 	TAILQ_INIT(&conn->write_pdu_list);
301 	TAILQ_INIT(&conn->snack_pdu_list);
302 	TAILQ_INIT(&conn->queued_r2t_tasks);
303 	TAILQ_INIT(&conn->active_r2t_tasks);
304 	TAILQ_INIT(&conn->queued_datain_tasks);
305 	memset(&conn->open_lun_descs, 0, sizeof(conn->open_lun_descs));
306 
307 	rc = spdk_sock_getaddr(sock, conn->target_addr, sizeof conn->target_addr, NULL,
308 			       conn->initiator_addr, sizeof conn->initiator_addr, NULL);
309 	if (rc < 0) {
310 		SPDK_ERRLOG("spdk_sock_getaddr() failed\n");
311 		goto error_return;
312 	}
313 
314 	bufsize = 2 * 1024 * 1024;
315 	rc = spdk_sock_set_recvbuf(conn->sock, bufsize);
316 	if (rc != 0) {
317 		SPDK_ERRLOG("spdk_sock_set_recvbuf failed\n");
318 	}
319 
320 	bufsize = 32 * 1024 * 1024 / g_spdk_iscsi.MaxConnections;
321 	if (bufsize > 2 * 1024 * 1024) {
322 		bufsize = 2 * 1024 * 1024;
323 	}
324 	rc = spdk_sock_set_sendbuf(conn->sock, bufsize);
325 	if (rc != 0) {
326 		SPDK_ERRLOG("spdk_sock_set_sendbuf failed\n");
327 	}
328 
329 	/* set low water mark */
330 	rc = spdk_sock_set_recvlowat(conn->sock, 1);
331 	if (rc != 0) {
332 		SPDK_ERRLOG("spdk_sock_set_recvlowat() failed\n");
333 		goto error_return;
334 	}
335 
336 	/* set default params */
337 	rc = spdk_iscsi_conn_params_init(&conn->params);
338 	if (rc < 0) {
339 		SPDK_ERRLOG("iscsi_conn_params_init() failed\n");
340 		goto error_return;
341 	}
342 	conn->logout_timer = NULL;
343 	conn->shutdown_timer = NULL;
344 	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "Launching connection on acceptor thread\n");
345 	conn->pending_task_cnt = 0;
346 
347 	conn->lcore = spdk_env_get_current_core();
348 	__sync_fetch_and_add(&g_num_connections[conn->lcore], 1);
349 
350 	iscsi_poll_group_add_conn(conn);
351 	return 0;
352 
353 error_return:
354 	spdk_iscsi_param_free(conn->params);
355 	free_conn(conn);
356 	return -1;
357 }
358 
359 void
360 spdk_iscsi_conn_free_pdu(struct spdk_iscsi_conn *conn, struct spdk_iscsi_pdu *pdu)
361 {
362 	if (pdu->task) {
363 		if (pdu->bhs.opcode == ISCSI_OP_SCSI_DATAIN) {
364 			if (pdu->task->scsi.offset > 0) {
365 				conn->data_in_cnt--;
366 				if (pdu->bhs.flags & ISCSI_DATAIN_STATUS) {
367 					/* Free the primary task after the last subtask done */
368 					conn->data_in_cnt--;
369 					spdk_iscsi_task_put(spdk_iscsi_task_get_primary(pdu->task));
370 				}
371 			}
372 		} else if (pdu->bhs.opcode == ISCSI_OP_SCSI_RSP &&
373 			   pdu->task->scsi.status != SPDK_SCSI_STATUS_GOOD) {
374 			if (pdu->task->scsi.offset > 0) {
375 				spdk_iscsi_task_put(spdk_iscsi_task_get_primary(pdu->task));
376 			}
377 		}
378 		spdk_iscsi_task_put(pdu->task);
379 	}
380 	spdk_put_pdu(pdu);
381 }
382 
383 static int
384 iscsi_conn_free_tasks(struct spdk_iscsi_conn *conn)
385 {
386 	struct spdk_iscsi_pdu *pdu, *tmp_pdu;
387 	struct spdk_iscsi_task *iscsi_task, *tmp_iscsi_task;
388 
389 	TAILQ_FOREACH_SAFE(pdu, &conn->write_pdu_list, tailq, tmp_pdu) {
390 		TAILQ_REMOVE(&conn->write_pdu_list, pdu, tailq);
391 		spdk_iscsi_conn_free_pdu(conn, pdu);
392 	}
393 
394 	TAILQ_FOREACH_SAFE(pdu, &conn->snack_pdu_list, tailq, tmp_pdu) {
395 		TAILQ_REMOVE(&conn->snack_pdu_list, pdu, tailq);
396 		if (pdu->task) {
397 			spdk_iscsi_task_put(pdu->task);
398 		}
399 		spdk_put_pdu(pdu);
400 	}
401 
402 	TAILQ_FOREACH_SAFE(iscsi_task, &conn->queued_datain_tasks, link, tmp_iscsi_task) {
403 		if (!iscsi_task->is_queued) {
404 			TAILQ_REMOVE(&conn->queued_datain_tasks, iscsi_task, link);
405 			spdk_iscsi_task_put(iscsi_task);
406 		}
407 	}
408 
409 	if (conn->pending_task_cnt) {
410 		return -1;
411 	}
412 
413 	return 0;
414 }
415 
416 static void
417 _iscsi_conn_free(struct spdk_iscsi_conn *conn)
418 {
419 	if (conn == NULL) {
420 		return;
421 	}
422 
423 	spdk_iscsi_param_free(conn->params);
424 
425 	/*
426 	 * Each connection pre-allocates its next PDU - make sure these get
427 	 *  freed here.
428 	 */
429 	spdk_put_pdu(conn->pdu_in_progress);
430 
431 	free_conn(conn);
432 }
433 
434 static void
435 iscsi_conn_cleanup_backend(struct spdk_iscsi_conn *conn)
436 {
437 	int rc;
438 	struct spdk_iscsi_tgt_node *target;
439 
440 	if (conn->sess->connections > 1) {
441 		/* connection specific cleanup */
442 	} else if (!g_spdk_iscsi.AllowDuplicateIsid) {
443 		/* clean up all tasks to all LUNs for session */
444 		target = conn->sess->target;
445 		if (target != NULL) {
446 			rc = spdk_iscsi_tgt_node_cleanup_luns(conn, target);
447 			if (rc < 0) {
448 				SPDK_ERRLOG("target abort failed\n");
449 			}
450 		}
451 	}
452 }
453 
454 static void
455 iscsi_conn_free(struct spdk_iscsi_conn *conn)
456 {
457 	struct spdk_iscsi_sess *sess;
458 	int idx;
459 	uint32_t i;
460 
461 	pthread_mutex_lock(&g_conns_mutex);
462 
463 	if (conn->sess == NULL) {
464 		goto end;
465 	}
466 
467 	idx = -1;
468 	sess = conn->sess;
469 	conn->sess = NULL;
470 
471 	for (i = 0; i < sess->connections; i++) {
472 		if (sess->conns[i] == conn) {
473 			idx = i;
474 			break;
475 		}
476 	}
477 
478 	if (idx < 0) {
479 		SPDK_ERRLOG("remove conn not found\n");
480 	} else {
481 		for (i = idx; i < sess->connections - 1; i++) {
482 			sess->conns[i] = sess->conns[i + 1];
483 		}
484 		sess->conns[sess->connections - 1] = NULL;
485 		sess->connections--;
486 
487 		if (sess->connections == 0) {
488 			/* cleanup last connection */
489 			SPDK_DEBUGLOG(SPDK_LOG_ISCSI,
490 				      "cleanup last conn free sess\n");
491 			spdk_free_sess(sess);
492 		}
493 	}
494 
495 	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "Terminating connections(tsih %d): %d\n",
496 		      sess->tsih, sess->connections);
497 
498 end:
499 	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "cleanup free conn\n");
500 	_iscsi_conn_free(conn);
501 
502 	pthread_mutex_unlock(&g_conns_mutex);
503 }
504 
505 static int
506 _iscsi_conn_check_shutdown(void *arg)
507 {
508 	struct spdk_iscsi_conn *conn = arg;
509 	int rc;
510 
511 	rc = iscsi_conn_free_tasks(conn);
512 	if (rc < 0) {
513 		return 1;
514 	}
515 
516 	spdk_poller_unregister(&conn->shutdown_timer);
517 
518 	iscsi_conn_stop(conn);
519 	iscsi_conn_free(conn);
520 
521 	return 1;
522 }
523 
524 static void
525 _iscsi_conn_destruct(struct spdk_iscsi_conn *conn)
526 {
527 	int rc;
528 
529 	spdk_clear_all_transfer_task(conn, NULL, NULL);
530 	iscsi_poll_group_remove_conn_sock(conn);
531 	spdk_sock_close(&conn->sock);
532 	spdk_poller_unregister(&conn->logout_timer);
533 	spdk_poller_unregister(&conn->flush_poller);
534 
535 	rc = iscsi_conn_free_tasks(conn);
536 	if (rc < 0) {
537 		/* The connection cannot be freed yet. Check back later. */
538 		conn->shutdown_timer = spdk_poller_register(_iscsi_conn_check_shutdown, conn, 1000);
539 	} else {
540 		iscsi_conn_stop(conn);
541 		iscsi_conn_free(conn);
542 	}
543 }
544 
545 static int
546 _iscsi_conn_check_pending_tasks(void *arg)
547 {
548 	struct spdk_iscsi_conn *conn = arg;
549 
550 	if (conn->dev != NULL && spdk_scsi_dev_has_pending_tasks(conn->dev)) {
551 		return 1;
552 	}
553 
554 	spdk_poller_unregister(&conn->shutdown_timer);
555 
556 	_iscsi_conn_destruct(conn);
557 
558 	return 1;
559 }
560 
561 void
562 spdk_iscsi_conn_destruct(struct spdk_iscsi_conn *conn)
563 {
564 	/* If a connection is already in exited status, just return */
565 	if (conn->state >= ISCSI_CONN_STATE_EXITED) {
566 		return;
567 	}
568 
569 	conn->state = ISCSI_CONN_STATE_EXITED;
570 
571 	if (conn->sess != NULL && conn->pending_task_cnt > 0) {
572 		iscsi_conn_cleanup_backend(conn);
573 	}
574 
575 	if (conn->dev != NULL && spdk_scsi_dev_has_pending_tasks(conn->dev)) {
576 		conn->shutdown_timer = spdk_poller_register(_iscsi_conn_check_pending_tasks, conn, 1000);
577 	} else {
578 		_iscsi_conn_destruct(conn);
579 	}
580 }
581 
582 int
583 spdk_iscsi_get_active_conns(struct spdk_iscsi_tgt_node *target)
584 {
585 	struct spdk_iscsi_conn *conn;
586 	int num = 0;
587 	int i;
588 
589 	pthread_mutex_lock(&g_conns_mutex);
590 	for (i = 0; i < MAX_ISCSI_CONNECTIONS; i++) {
591 		conn = find_iscsi_connection_by_id(i);
592 		if (conn == NULL) {
593 			continue;
594 		}
595 		if (target != NULL && conn->target != target) {
596 			continue;
597 		}
598 		num++;
599 	}
600 	pthread_mutex_unlock(&g_conns_mutex);
601 	return num;
602 }
603 
604 static void
605 iscsi_conns_cleanup(void)
606 {
607 	free(g_num_connections);
608 	munmap(g_conns_array, sizeof(struct spdk_iscsi_conn) *
609 	       MAX_ISCSI_CONNECTIONS);
610 	g_conns_array = MAP_FAILED;
611 	shm_unlink(g_shm_name);
612 	if (g_conns_array_fd >= 0) {
613 		close(g_conns_array_fd);
614 		g_conns_array_fd = -1;
615 	}
616 }
617 
618 static void
619 iscsi_conn_check_shutdown_cb(void *arg1, void *arg2)
620 {
621 	iscsi_conns_cleanup();
622 	spdk_shutdown_iscsi_conns_done();
623 }
624 
625 static int
626 iscsi_conn_check_shutdown(void *arg)
627 {
628 	struct spdk_event *event;
629 
630 	if (spdk_iscsi_get_active_conns(NULL) != 0) {
631 		return 1;
632 	}
633 
634 	spdk_poller_unregister(&g_shutdown_timer);
635 	event = spdk_event_allocate(spdk_env_get_current_core(),
636 				    iscsi_conn_check_shutdown_cb, NULL, NULL);
637 	spdk_event_call(event);
638 
639 	return 1;
640 }
641 
642 static void
643 iscsi_conn_close_lun(struct spdk_iscsi_conn *conn, int lun_id)
644 {
645 	struct spdk_scsi_lun_desc *desc;
646 
647 	desc = conn->open_lun_descs[lun_id];
648 	if (desc != NULL) {
649 		spdk_scsi_lun_free_io_channel(desc);
650 		spdk_scsi_lun_close(desc);
651 		conn->open_lun_descs[lun_id] = NULL;
652 	}
653 }
654 
655 static void
656 iscsi_conn_close_luns(struct spdk_iscsi_conn *conn)
657 {
658 	int i;
659 
660 	for (i = 0; i < SPDK_SCSI_DEV_MAX_LUN; i++) {
661 		iscsi_conn_close_lun(conn, i);
662 	}
663 }
664 
665 static void
666 _iscsi_conn_remove_lun(void *arg1, void *arg2)
667 {
668 	struct spdk_iscsi_conn *conn = arg1;
669 	struct spdk_scsi_lun *lun = arg2;
670 	int lun_id = spdk_scsi_lun_get_id(lun);
671 	struct spdk_iscsi_pdu *pdu, *tmp_pdu;
672 	struct spdk_iscsi_task *iscsi_task, *tmp_iscsi_task;
673 
674 	/* If a connection is already in stating status, just return */
675 	if (conn->state >= ISCSI_CONN_STATE_EXITING) {
676 		return;
677 	}
678 
679 	spdk_clear_all_transfer_task(conn, lun, NULL);
680 	TAILQ_FOREACH_SAFE(pdu, &conn->write_pdu_list, tailq, tmp_pdu) {
681 		if (pdu->task && (lun == pdu->task->scsi.lun)) {
682 			TAILQ_REMOVE(&conn->write_pdu_list, pdu, tailq);
683 			spdk_iscsi_conn_free_pdu(conn, pdu);
684 		}
685 	}
686 
687 	TAILQ_FOREACH_SAFE(pdu, &conn->snack_pdu_list, tailq, tmp_pdu) {
688 		if (pdu->task && (lun == pdu->task->scsi.lun)) {
689 			TAILQ_REMOVE(&conn->snack_pdu_list, pdu, tailq);
690 			spdk_iscsi_task_put(pdu->task);
691 			spdk_put_pdu(pdu);
692 		}
693 	}
694 
695 	TAILQ_FOREACH_SAFE(iscsi_task, &conn->queued_datain_tasks, link, tmp_iscsi_task) {
696 		if ((!iscsi_task->is_queued) && (lun == iscsi_task->scsi.lun)) {
697 			TAILQ_REMOVE(&conn->queued_datain_tasks, iscsi_task, link);
698 			spdk_iscsi_task_put(iscsi_task);
699 		}
700 	}
701 
702 	iscsi_conn_close_lun(conn, lun_id);
703 }
704 
705 static void
706 iscsi_conn_remove_lun(struct spdk_scsi_lun *lun, void *remove_ctx)
707 {
708 	struct spdk_iscsi_conn *conn = remove_ctx;
709 	struct spdk_event *event;
710 
711 	event = spdk_event_allocate(conn->lcore, _iscsi_conn_remove_lun,
712 				    conn, lun);
713 	spdk_event_call(event);
714 }
715 
716 static void
717 iscsi_conn_open_luns(struct spdk_iscsi_conn *conn)
718 {
719 	int i, rc;
720 	struct spdk_scsi_lun *lun;
721 	struct spdk_scsi_lun_desc *desc;
722 
723 	for (i = 0; i < SPDK_SCSI_DEV_MAX_LUN; i++) {
724 		lun = spdk_scsi_dev_get_lun(conn->dev, i);
725 		if (lun == NULL) {
726 			continue;
727 		}
728 
729 		rc = spdk_scsi_lun_open(lun, iscsi_conn_remove_lun, conn, &desc);
730 		if (rc != 0) {
731 			goto error;
732 		}
733 
734 		rc = spdk_scsi_lun_allocate_io_channel(desc);
735 		if (rc != 0) {
736 			spdk_scsi_lun_close(desc);
737 			goto error;
738 		}
739 
740 		conn->open_lun_descs[i] = desc;
741 	}
742 
743 	return;
744 
745 error:
746 	iscsi_conn_close_luns(conn);
747 }
748 
749 /**
750  *  This function will stop executing the specified connection.
751  */
752 static void
753 iscsi_conn_stop(struct spdk_iscsi_conn *conn)
754 {
755 	struct spdk_iscsi_tgt_node *target;
756 
757 	if (conn->state == ISCSI_CONN_STATE_EXITED && conn->sess != NULL &&
758 	    conn->sess->session_type == SESSION_TYPE_NORMAL &&
759 	    conn->full_feature) {
760 		target = conn->sess->target;
761 		pthread_mutex_lock(&target->mutex);
762 		target->num_active_conns--;
763 		pthread_mutex_unlock(&target->mutex);
764 
765 		iscsi_conn_close_luns(conn);
766 	}
767 
768 	assert(conn->lcore == spdk_env_get_current_core());
769 
770 	__sync_fetch_and_sub(&g_num_connections[conn->lcore], 1);
771 	iscsi_poll_group_remove_conn(conn);
772 }
773 
774 void
775 spdk_iscsi_conns_start_exit(struct spdk_iscsi_tgt_node *target)
776 {
777 	struct spdk_iscsi_conn	*conn;
778 	int			i;
779 
780 	pthread_mutex_lock(&g_conns_mutex);
781 
782 	for (i = 0; i < MAX_ISCSI_CONNECTIONS; i++) {
783 		conn = find_iscsi_connection_by_id(i);
784 		if (conn == NULL) {
785 			continue;
786 		}
787 
788 		if (target != NULL && conn->target != target) {
789 			continue;
790 		}
791 
792 		/* Do not set conn->state if the connection has already started exiting.
793 		  * This ensures we do not move a connection from EXITED state back to EXITING.
794 		  */
795 		if (conn->state < ISCSI_CONN_STATE_EXITING) {
796 			conn->state = ISCSI_CONN_STATE_EXITING;
797 		}
798 	}
799 
800 	pthread_mutex_unlock(&g_conns_mutex);
801 }
802 
803 void
804 spdk_shutdown_iscsi_conns(void)
805 {
806 	spdk_iscsi_conns_start_exit(NULL);
807 
808 	g_shutdown_timer = spdk_poller_register(iscsi_conn_check_shutdown, NULL, 1000);
809 }
810 
811 int
812 spdk_iscsi_drop_conns(struct spdk_iscsi_conn *conn, const char *conn_match,
813 		      int drop_all)
814 {
815 	struct spdk_iscsi_conn	*xconn;
816 	const char			*xconn_match;
817 	int				i, num;
818 
819 	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "spdk_iscsi_drop_conns\n");
820 
821 	num = 0;
822 	pthread_mutex_lock(&g_conns_mutex);
823 	for (i = 0; i < MAX_ISCSI_CONNECTIONS; i++) {
824 		xconn = find_iscsi_connection_by_id(i);
825 
826 		if (xconn == NULL) {
827 			continue;
828 		}
829 
830 		if (xconn == conn) {
831 			continue;
832 		}
833 
834 		if (!drop_all && xconn->initiator_port == NULL) {
835 			continue;
836 		}
837 
838 		xconn_match =
839 			drop_all ? xconn->initiator_name : spdk_scsi_port_get_name(xconn->initiator_port);
840 
841 		if (!strcasecmp(conn_match, xconn_match) &&
842 		    conn->target == xconn->target) {
843 
844 			if (num == 0) {
845 				/*
846 				 * Only print this message before we report the
847 				 *  first dropped connection.
848 				 */
849 				SPDK_ERRLOG("drop old connections %s by %s\n",
850 					    conn->target->name, conn_match);
851 			}
852 
853 			SPDK_ERRLOG("exiting conn by %s (%s)\n",
854 				    xconn_match, xconn->initiator_addr);
855 			if (xconn->sess != NULL) {
856 				SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "TSIH=%u\n", xconn->sess->tsih);
857 			} else {
858 				SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "TSIH=xx\n");
859 			}
860 
861 			SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "CID=%u\n", xconn->cid);
862 
863 			/* Do not set xconn->state if the connection has already started exiting.
864 			  * This ensures we do not move a connection from EXITED state back to EXITING.
865 			  */
866 			if (xconn->state < ISCSI_CONN_STATE_EXITING) {
867 				xconn->state = ISCSI_CONN_STATE_EXITING;
868 			}
869 			num++;
870 		}
871 	}
872 
873 	pthread_mutex_unlock(&g_conns_mutex);
874 
875 	if (num != 0) {
876 		SPDK_ERRLOG("exiting %d conns\n", num);
877 	}
878 
879 	return 0;
880 }
881 
882 /**
883  * \brief Reads data for the specified iSCSI connection from its TCP socket.
884  *
885  * The TCP socket is marked as non-blocking, so this function may not read
886  * all data requested.
887  *
888  * Returns SPDK_ISCSI_CONNECTION_FATAL if the recv() operation indicates a fatal
889  * error with the TCP connection (including if the TCP connection was closed
890  * unexpectedly.
891  *
892  * Otherwise returns the number of bytes successfully read.
893  */
894 int
895 spdk_iscsi_conn_read_data(struct spdk_iscsi_conn *conn, int bytes,
896 			  void *buf)
897 {
898 	int ret;
899 
900 	if (bytes == 0) {
901 		return 0;
902 	}
903 
904 	ret = spdk_sock_recv(conn->sock, buf, bytes);
905 
906 	if (ret > 0) {
907 		spdk_trace_record(TRACE_ISCSI_READ_FROM_SOCKET_DONE, conn->id, ret, 0, 0);
908 		return ret;
909 	}
910 
911 	if (ret < 0) {
912 		if (errno == EAGAIN || errno == EWOULDBLOCK) {
913 			return 0;
914 		}
915 
916 		/* For connect reset issue, do not output error log */
917 		if (errno == ECONNRESET) {
918 			SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "spdk_sock_recv() failed, errno %d: %s\n",
919 				      errno, spdk_strerror(errno));
920 		} else {
921 			SPDK_ERRLOG("spdk_sock_recv() failed, errno %d: %s\n",
922 				    errno, spdk_strerror(errno));
923 		}
924 	}
925 
926 	/* connection closed */
927 	return SPDK_ISCSI_CONNECTION_FATAL;
928 }
929 
930 int
931 spdk_iscsi_conn_readv_data(struct spdk_iscsi_conn *conn,
932 			   struct iovec *iov, int iovcnt)
933 {
934 	int ret;
935 
936 	if (iov == NULL || iovcnt == 0) {
937 		return 0;
938 	}
939 
940 	if (iovcnt == 1) {
941 		return spdk_iscsi_conn_read_data(conn, iov[0].iov_len,
942 						 iov[0].iov_base);
943 	}
944 
945 	ret = spdk_sock_readv(conn->sock, iov, iovcnt);
946 
947 	if (ret > 0) {
948 		spdk_trace_record(TRACE_ISCSI_READ_FROM_SOCKET_DONE, conn->id, ret, 0, 0);
949 		return ret;
950 	}
951 
952 	if (ret < 0) {
953 		if (errno == EAGAIN || errno == EWOULDBLOCK) {
954 			return 0;
955 		}
956 
957 		/* For connect reset issue, do not output error log */
958 		if (errno == ECONNRESET) {
959 			SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "spdk_sock_readv() failed, errno %d: %s\n",
960 				      errno, spdk_strerror(errno));
961 		} else {
962 			SPDK_ERRLOG("spdk_sock_readv() failed, errno %d: %s\n",
963 				    errno, spdk_strerror(errno));
964 		}
965 	}
966 
967 	/* connection closed */
968 	return SPDK_ISCSI_CONNECTION_FATAL;
969 }
970 
971 void
972 spdk_iscsi_task_mgmt_cpl(struct spdk_scsi_task *scsi_task)
973 {
974 	struct spdk_iscsi_task *task = spdk_iscsi_task_from_scsi_task(scsi_task);
975 
976 	spdk_iscsi_task_mgmt_response(task->conn, task);
977 	spdk_iscsi_task_put(task);
978 }
979 
980 static void
981 iscsi_task_copy_to_rsp_scsi_status(struct spdk_iscsi_task *primary,
982 				   struct spdk_scsi_task *task)
983 {
984 	memcpy(primary->rsp_sense_data, task->sense_data, task->sense_data_len);
985 	primary->rsp_sense_data_len = task->sense_data_len;
986 	primary->rsp_scsi_status = task->status;
987 }
988 
989 static void
990 iscsi_task_copy_from_rsp_scsi_status(struct spdk_scsi_task *task,
991 				     struct spdk_iscsi_task *primary)
992 {
993 	memcpy(task->sense_data, primary->rsp_sense_data,
994 	       primary->rsp_sense_data_len);
995 	task->sense_data_len = primary->rsp_sense_data_len;
996 	task->status = primary->rsp_scsi_status;
997 }
998 
999 static void
1000 process_completed_read_subtask_list(struct spdk_iscsi_conn *conn,
1001 				    struct spdk_iscsi_task *primary)
1002 {
1003 	struct spdk_iscsi_task *subtask, *tmp;
1004 
1005 	TAILQ_FOREACH_SAFE(subtask, &primary->subtask_list, subtask_link, tmp) {
1006 		if (subtask->scsi.offset == primary->bytes_completed) {
1007 			TAILQ_REMOVE(&primary->subtask_list, subtask, subtask_link);
1008 			primary->bytes_completed += subtask->scsi.length;
1009 			spdk_iscsi_task_response(conn, subtask);
1010 			spdk_iscsi_task_put(subtask);
1011 		} else {
1012 			break;
1013 		}
1014 	}
1015 }
1016 
1017 static void
1018 process_read_task_completion(struct spdk_iscsi_conn *conn,
1019 			     struct spdk_iscsi_task *task,
1020 			     struct spdk_iscsi_task *primary)
1021 {
1022 	struct spdk_iscsi_task *tmp;
1023 
1024 	/* If the status of the completed subtask is the first failure,
1025 	 * copy it to out-of-order subtasks and remember it as the status
1026 	 * of the command,
1027 	 *
1028 	 * Even if the status of the completed task is success,
1029 	 * there are any failed subtask ever, copy the first failed status
1030 	 * to it.
1031 	 */
1032 	if (task->scsi.status != SPDK_SCSI_STATUS_GOOD) {
1033 		if (primary->rsp_scsi_status == SPDK_SCSI_STATUS_GOOD) {
1034 			TAILQ_FOREACH(tmp, &primary->subtask_list, subtask_link) {
1035 				spdk_scsi_task_copy_status(&tmp->scsi, &task->scsi);
1036 			}
1037 			iscsi_task_copy_to_rsp_scsi_status(primary, &task->scsi);
1038 		}
1039 	} else if (primary->rsp_scsi_status != SPDK_SCSI_STATUS_GOOD) {
1040 		iscsi_task_copy_from_rsp_scsi_status(&task->scsi, primary);
1041 	}
1042 
1043 	if ((task != primary) &&
1044 	    (task->scsi.offset != primary->bytes_completed)) {
1045 		TAILQ_FOREACH(tmp, &primary->subtask_list, subtask_link) {
1046 			if (task->scsi.offset < tmp->scsi.offset) {
1047 				TAILQ_INSERT_BEFORE(tmp, task, subtask_link);
1048 				return;
1049 			}
1050 		}
1051 
1052 		TAILQ_INSERT_TAIL(&primary->subtask_list, task, subtask_link);
1053 		return;
1054 	}
1055 
1056 	primary->bytes_completed += task->scsi.length;
1057 	spdk_iscsi_task_response(conn, task);
1058 
1059 	if ((task != primary) ||
1060 	    (task->scsi.transfer_len == task->scsi.length)) {
1061 		spdk_iscsi_task_put(task);
1062 	}
1063 	process_completed_read_subtask_list(conn, primary);
1064 
1065 	spdk_iscsi_conn_handle_queued_datain_tasks(conn);
1066 }
1067 
1068 void
1069 spdk_iscsi_task_cpl(struct spdk_scsi_task *scsi_task)
1070 {
1071 	struct spdk_iscsi_task *primary;
1072 	struct spdk_iscsi_task *task = spdk_iscsi_task_from_scsi_task(scsi_task);
1073 	struct spdk_iscsi_conn *conn = task->conn;
1074 	struct spdk_iscsi_pdu *pdu = task->pdu;
1075 
1076 	spdk_trace_record(TRACE_ISCSI_TASK_DONE, conn->id, 0, (uintptr_t)task, 0);
1077 
1078 	task->is_queued = false;
1079 	primary = spdk_iscsi_task_get_primary(task);
1080 
1081 	if (spdk_iscsi_task_is_read(primary)) {
1082 		process_read_task_completion(conn, task, primary);
1083 	} else {
1084 		primary->bytes_completed += task->scsi.length;
1085 
1086 		/* If the status of the subtask is the first failure, remember it as
1087 		 * the status of the command and set it to the status of the primary
1088 		 * task later.
1089 		 *
1090 		 * If the first failed task is the primary, two copies can be avoided
1091 		 * but code simplicity is prioritized.
1092 		 */
1093 		if (task->scsi.status == SPDK_SCSI_STATUS_GOOD) {
1094 			if (task != primary) {
1095 				primary->scsi.data_transferred += task->scsi.data_transferred;
1096 			}
1097 		} else if (primary->rsp_scsi_status == SPDK_SCSI_STATUS_GOOD) {
1098 			iscsi_task_copy_to_rsp_scsi_status(primary, &task->scsi);
1099 		}
1100 
1101 		if (primary->bytes_completed == primary->scsi.transfer_len) {
1102 			spdk_del_transfer_task(conn, primary->tag);
1103 			if (primary->rsp_scsi_status != SPDK_SCSI_STATUS_GOOD) {
1104 				iscsi_task_copy_from_rsp_scsi_status(&primary->scsi, primary);
1105 			}
1106 			spdk_iscsi_task_response(conn, primary);
1107 			/*
1108 			 * Check if this is the last task completed for an iSCSI write
1109 			 *  that required child subtasks.  If task != primary, we know
1110 			 *  for sure that it was part of an iSCSI write with child subtasks.
1111 			 *  The trickier case is when the last task completed was the initial
1112 			 *  task - in this case the task will have a smaller length than
1113 			 *  the overall transfer length.
1114 			 */
1115 			if (task != primary || task->scsi.length != task->scsi.transfer_len) {
1116 				TAILQ_REMOVE(&conn->active_r2t_tasks, primary, link);
1117 				spdk_iscsi_task_put(primary);
1118 			}
1119 		}
1120 		spdk_iscsi_task_put(task);
1121 	}
1122 	if (!task->parent) {
1123 		spdk_trace_record(TRACE_ISCSI_PDU_COMPLETED, 0, 0, (uintptr_t)pdu, 0);
1124 	}
1125 }
1126 
1127 static int
1128 iscsi_get_pdu_length(struct spdk_iscsi_pdu *pdu, int header_digest,
1129 		     int data_digest)
1130 {
1131 	int data_len, enable_digest, total;
1132 
1133 	enable_digest = 1;
1134 	if (pdu->bhs.opcode == ISCSI_OP_LOGIN_RSP) {
1135 		enable_digest = 0;
1136 	}
1137 
1138 	total = ISCSI_BHS_LEN;
1139 
1140 	total += (4 * pdu->bhs.total_ahs_len);
1141 
1142 	if (enable_digest && header_digest) {
1143 		total += ISCSI_DIGEST_LEN;
1144 	}
1145 
1146 	data_len = DGET24(pdu->bhs.data_segment_len);
1147 	if (data_len > 0) {
1148 		total += ISCSI_ALIGN(data_len);
1149 		if (enable_digest && data_digest) {
1150 			total += ISCSI_DIGEST_LEN;
1151 		}
1152 	}
1153 
1154 	return total;
1155 }
1156 
1157 void
1158 spdk_iscsi_conn_handle_nop(struct spdk_iscsi_conn *conn)
1159 {
1160 	uint64_t	tsc;
1161 
1162 	/**
1163 	  * This function will be executed by nop_poller of iSCSI polling group, so
1164 	  * we need to check the connection state first, then do the nop interval
1165 	  * expiration check work.
1166 	  */
1167 	if ((conn->state == ISCSI_CONN_STATE_EXITED) ||
1168 	    (conn->state == ISCSI_CONN_STATE_EXITING)) {
1169 		return;
1170 	}
1171 
1172 	/* Check for nop interval expiration */
1173 	tsc = spdk_get_ticks();
1174 	if (conn->nop_outstanding) {
1175 		if ((tsc - conn->last_nopin) > (conn->timeout  * spdk_get_ticks_hz())) {
1176 			SPDK_ERRLOG("Timed out waiting for NOP-Out response from initiator\n");
1177 			SPDK_ERRLOG("  tsc=0x%lx, last_nopin=0x%lx\n", tsc, conn->last_nopin);
1178 			SPDK_ERRLOG("  initiator=%s, target=%s\n", conn->initiator_name,
1179 				    conn->target_short_name);
1180 			conn->state = ISCSI_CONN_STATE_EXITING;
1181 		}
1182 	} else if (tsc - conn->last_nopin > conn->nopininterval) {
1183 		spdk_iscsi_send_nopin(conn);
1184 	}
1185 }
1186 
1187 /**
1188  * \brief Makes one attempt to flush response PDUs back to the initiator.
1189  *
1190  * Builds a list of iovecs for response PDUs that must be sent back to the
1191  * initiator and passes it to writev().
1192  *
1193  * Since the socket is non-blocking, writev() may not be able to flush all
1194  * of the iovecs, and may even partially flush one of the iovecs.  In this
1195  * case, the partially flushed PDU will remain on the write_pdu_list with
1196  * an offset pointing to the next byte to be flushed.
1197  *
1198  * Returns 0 if all PDUs were flushed.
1199  *
1200  * Returns 1 if some PDUs could not be flushed due to lack of send buffer
1201  * space.
1202  *
1203  * Returns -1 if an exception error occurred indicating the TCP connection
1204  * should be closed.
1205  */
1206 static int
1207 iscsi_conn_flush_pdus_internal(struct spdk_iscsi_conn *conn)
1208 {
1209 	const int num_iovs = 32;
1210 	struct iovec iovs[num_iovs];
1211 	struct iovec *iov = iovs;
1212 	int iovcnt = 0;
1213 	int bytes = 0;
1214 	uint32_t total_length = 0;
1215 	uint32_t mapped_length = 0;
1216 	struct spdk_iscsi_pdu *pdu;
1217 	int pdu_length;
1218 
1219 	pdu = TAILQ_FIRST(&conn->write_pdu_list);
1220 
1221 	if (pdu == NULL) {
1222 		return 0;
1223 	}
1224 
1225 	/*
1226 	 * Build up a list of iovecs for the first few PDUs in the
1227 	 *  connection's write_pdu_list. For the first PDU, check if it was
1228 	 *  partially written out the last time this function was called, and
1229 	 *  if so adjust the iovec array accordingly. This check is done in
1230 	 *  spdk_iscsi_build_iovs() and so applied to remaining PDUs too.
1231 	 *  But extra overhead is negligible.
1232 	 */
1233 	while (pdu != NULL && ((num_iovs - iovcnt) > 0)) {
1234 		iovcnt += spdk_iscsi_build_iovs(conn, &iovs[iovcnt], num_iovs - iovcnt,
1235 						pdu, &mapped_length);
1236 		total_length += mapped_length;
1237 		pdu = TAILQ_NEXT(pdu, tailq);
1238 	}
1239 
1240 	spdk_trace_record(TRACE_ISCSI_FLUSH_WRITEBUF_START, conn->id, total_length, 0, iovcnt);
1241 
1242 	bytes = spdk_sock_writev(conn->sock, iov, iovcnt);
1243 	if (bytes == -1) {
1244 		if (errno == EWOULDBLOCK || errno == EAGAIN) {
1245 			return 1;
1246 		} else {
1247 			SPDK_ERRLOG("spdk_sock_writev() failed, errno %d: %s\n",
1248 				    errno, spdk_strerror(errno));
1249 			return -1;
1250 		}
1251 	}
1252 
1253 	spdk_trace_record(TRACE_ISCSI_FLUSH_WRITEBUF_DONE, conn->id, bytes, 0, 0);
1254 
1255 	pdu = TAILQ_FIRST(&conn->write_pdu_list);
1256 
1257 	/*
1258 	 * Free any PDUs that were fully written.  If a PDU was only
1259 	 *  partially written, update its writev_offset so that next
1260 	 *  time only the unwritten portion will be sent to writev().
1261 	 */
1262 	while (bytes > 0) {
1263 		pdu_length = iscsi_get_pdu_length(pdu, conn->header_digest,
1264 						  conn->data_digest);
1265 		pdu_length -= pdu->writev_offset;
1266 
1267 		if (bytes >= pdu_length) {
1268 			bytes -= pdu_length;
1269 			TAILQ_REMOVE(&conn->write_pdu_list, pdu, tailq);
1270 
1271 			if ((conn->full_feature) &&
1272 			    (conn->sess->ErrorRecoveryLevel >= 1) &&
1273 			    spdk_iscsi_is_deferred_free_pdu(pdu)) {
1274 				SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "stat_sn=%d\n",
1275 					      from_be32(&pdu->bhs.stat_sn));
1276 				TAILQ_INSERT_TAIL(&conn->snack_pdu_list, pdu,
1277 						  tailq);
1278 			} else {
1279 				spdk_iscsi_conn_free_pdu(conn, pdu);
1280 			}
1281 
1282 			pdu = TAILQ_FIRST(&conn->write_pdu_list);
1283 		} else {
1284 			pdu->writev_offset += bytes;
1285 			bytes = 0;
1286 		}
1287 	}
1288 
1289 	return TAILQ_EMPTY(&conn->write_pdu_list) ? 0 : 1;
1290 }
1291 
1292 /**
1293  * \brief Flushes response PDUs back to the initiator.
1294  *
1295  * This function may return without all PDUs having flushed to the
1296  * underlying TCP socket buffer - for example, in the case where the
1297  * socket buffer is already full.
1298  *
1299  * During normal RUNNING connection state, if not all PDUs are flushed,
1300  * then subsequent calls to this routine will eventually flush
1301  * remaining PDUs.
1302  *
1303  * During other connection states (EXITING or LOGGED_OUT), this
1304  * function will spin until all PDUs have successfully been flushed.
1305  */
1306 static int
1307 iscsi_conn_flush_pdus(void *_conn)
1308 {
1309 	struct spdk_iscsi_conn *conn = _conn;
1310 	int rc;
1311 
1312 	if (conn->state == ISCSI_CONN_STATE_RUNNING) {
1313 		rc = iscsi_conn_flush_pdus_internal(conn);
1314 		if (rc == 0 && conn->flush_poller != NULL) {
1315 			spdk_poller_unregister(&conn->flush_poller);
1316 		} else if (rc == 1 && conn->flush_poller == NULL) {
1317 			conn->flush_poller = spdk_poller_register(iscsi_conn_flush_pdus,
1318 					     conn, 50);
1319 		}
1320 	} else {
1321 		/*
1322 		 * If the connection state is not RUNNING, then
1323 		 * keep trying to flush PDUs until our list is
1324 		 * empty - to make sure all data is sent before
1325 		 * closing the connection.
1326 		 */
1327 		do {
1328 			rc = iscsi_conn_flush_pdus_internal(conn);
1329 		} while (rc == 1);
1330 	}
1331 
1332 	if (rc < 0 && conn->state < ISCSI_CONN_STATE_EXITING) {
1333 		/*
1334 		 * If the poller has already started destruction of the connection,
1335 		 *  i.e. the socket read failed, then the connection state may already
1336 		 *  be EXITED.  We don't want to set it back to EXITING in that case.
1337 		 */
1338 		conn->state = ISCSI_CONN_STATE_EXITING;
1339 	}
1340 
1341 	return 1;
1342 }
1343 
1344 static int
1345 iscsi_dif_verify(struct spdk_iscsi_pdu *pdu, struct spdk_dif_ctx *dif_ctx)
1346 {
1347 	struct iovec iov;
1348 	struct spdk_dif_error err_blk = {};
1349 	uint32_t num_blocks;
1350 	int rc;
1351 
1352 	iov.iov_base = pdu->data;
1353 	iov.iov_len = pdu->data_buf_len;
1354 	num_blocks = pdu->data_buf_len / dif_ctx->block_size;
1355 
1356 	rc = spdk_dif_verify(&iov, 1, num_blocks, dif_ctx, &err_blk);
1357 	if (rc != 0) {
1358 		SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n",
1359 			    err_blk.err_type, err_blk.err_offset);
1360 	}
1361 
1362 	return rc;
1363 }
1364 
1365 void
1366 spdk_iscsi_conn_write_pdu(struct spdk_iscsi_conn *conn, struct spdk_iscsi_pdu *pdu)
1367 {
1368 	uint32_t crc32c;
1369 	int rc;
1370 
1371 	if (spdk_unlikely(spdk_iscsi_get_dif_ctx(conn, pdu, &pdu->dif_ctx))) {
1372 		rc = iscsi_dif_verify(pdu, &pdu->dif_ctx);
1373 		if (rc != 0) {
1374 			spdk_iscsi_conn_free_pdu(conn, pdu);
1375 			conn->state = ISCSI_CONN_STATE_EXITING;
1376 			return;
1377 		}
1378 		pdu->dif_insert_or_strip = true;
1379 	}
1380 
1381 	if (pdu->bhs.opcode != ISCSI_OP_LOGIN_RSP) {
1382 		/* Header Digest */
1383 		if (conn->header_digest) {
1384 			crc32c = spdk_iscsi_pdu_calc_header_digest(pdu);
1385 			MAKE_DIGEST_WORD(pdu->header_digest, crc32c);
1386 		}
1387 
1388 		/* Data Digest */
1389 		if (conn->data_digest && DGET24(pdu->bhs.data_segment_len) != 0) {
1390 			crc32c = spdk_iscsi_pdu_calc_data_digest(pdu);
1391 			MAKE_DIGEST_WORD(pdu->data_digest, crc32c);
1392 		}
1393 	}
1394 
1395 	TAILQ_INSERT_TAIL(&conn->write_pdu_list, pdu, tailq);
1396 	iscsi_conn_flush_pdus(conn);
1397 }
1398 
1399 #define GET_PDU_LOOP_COUNT	16
1400 
1401 static int
1402 iscsi_conn_handle_incoming_pdus(struct spdk_iscsi_conn *conn)
1403 {
1404 	struct spdk_iscsi_pdu *pdu;
1405 	int i, rc;
1406 
1407 	/* Read new PDUs from network */
1408 	for (i = 0; i < GET_PDU_LOOP_COUNT; i++) {
1409 		rc = spdk_iscsi_read_pdu(conn, &pdu);
1410 		if (rc == 0) {
1411 			break;
1412 		} else if (rc == SPDK_ISCSI_CONNECTION_FATAL) {
1413 			return rc;
1414 		}
1415 
1416 		if (conn->state == ISCSI_CONN_STATE_LOGGED_OUT) {
1417 			SPDK_ERRLOG("pdu received after logout\n");
1418 			spdk_put_pdu(pdu);
1419 			return SPDK_ISCSI_CONNECTION_FATAL;
1420 		}
1421 
1422 		rc = spdk_iscsi_execute(conn, pdu);
1423 		spdk_put_pdu(pdu);
1424 		if (rc != 0) {
1425 			SPDK_ERRLOG("spdk_iscsi_execute() fatal error on %s(%s)\n",
1426 				    conn->target_port != NULL ? spdk_scsi_port_get_name(conn->target_port) : "NULL",
1427 				    conn->initiator_port != NULL ? spdk_scsi_port_get_name(conn->initiator_port) : "NULL");
1428 			return rc;
1429 		}
1430 
1431 		spdk_trace_record(TRACE_ISCSI_TASK_EXECUTED, 0, 0, (uintptr_t)pdu, 0);
1432 		if (conn->is_stopped) {
1433 			break;
1434 		}
1435 	}
1436 
1437 	return i;
1438 }
1439 
1440 static void
1441 iscsi_conn_sock_cb(void *arg, struct spdk_sock_group *group, struct spdk_sock *sock)
1442 {
1443 	struct spdk_iscsi_conn *conn = arg;
1444 	int rc;
1445 
1446 	assert(conn != NULL);
1447 
1448 	if ((conn->state == ISCSI_CONN_STATE_EXITED) ||
1449 	    (conn->state == ISCSI_CONN_STATE_EXITING)) {
1450 		return;
1451 	}
1452 
1453 	/* Handle incoming PDUs */
1454 	rc = iscsi_conn_handle_incoming_pdus(conn);
1455 	if (rc < 0) {
1456 		conn->state = ISCSI_CONN_STATE_EXITING;
1457 		iscsi_conn_flush_pdus(conn);
1458 	}
1459 }
1460 
1461 static void
1462 iscsi_conn_full_feature_migrate(void *arg1, void *arg2)
1463 {
1464 	struct spdk_iscsi_conn *conn = arg1;
1465 
1466 	if (conn->sess->session_type == SESSION_TYPE_NORMAL) {
1467 		iscsi_conn_open_luns(conn);
1468 	}
1469 
1470 	/* The poller has been unregistered, so now we can re-register it on the new core. */
1471 	conn->lcore = spdk_env_get_current_core();
1472 	iscsi_poll_group_add_conn(conn);
1473 }
1474 
1475 void
1476 spdk_iscsi_conn_migration(struct spdk_iscsi_conn *conn)
1477 {
1478 	int				lcore;
1479 	struct spdk_event		*event;
1480 	struct spdk_iscsi_tgt_node *target;
1481 
1482 	lcore = iscsi_conn_allocate_reactor(conn->portal->cpumask);
1483 	if (conn->sess->session_type == SESSION_TYPE_NORMAL) {
1484 		target = conn->sess->target;
1485 		pthread_mutex_lock(&target->mutex);
1486 		target->num_active_conns++;
1487 		if (target->num_active_conns == 1) {
1488 			/**
1489 			 * This is the only active connection for this target node.
1490 			 *  Save the lcore in the target node so it can be used for
1491 			 *  any other connections to this target node.
1492 			 */
1493 			target->lcore = lcore;
1494 		} else {
1495 			/**
1496 			 * There are other active connections for this target node.
1497 			 *  Ignore the lcore specified by the allocator and use the
1498 			 *  the target node's lcore to ensure this connection runs on
1499 			 *  the same lcore as other connections for this target node.
1500 			 */
1501 			lcore = target->lcore;
1502 		}
1503 		pthread_mutex_unlock(&target->mutex);
1504 	}
1505 
1506 	iscsi_poll_group_remove_conn_sock(conn);
1507 	spdk_poller_unregister(&conn->flush_poller);
1508 	iscsi_conn_stop(conn);
1509 
1510 	__sync_fetch_and_add(&g_num_connections[lcore], 1);
1511 	conn->last_nopin = spdk_get_ticks();
1512 	event = spdk_event_allocate(lcore, iscsi_conn_full_feature_migrate,
1513 				    conn, NULL);
1514 	spdk_event_call(event);
1515 }
1516 
1517 void
1518 spdk_iscsi_conn_set_min_per_core(int count)
1519 {
1520 	g_connections_per_lcore = count;
1521 }
1522 
1523 int
1524 spdk_iscsi_conn_get_min_per_core(void)
1525 {
1526 	return g_connections_per_lcore;
1527 }
1528 
1529 static uint32_t
1530 iscsi_conn_allocate_reactor(const struct spdk_cpuset *cpumask)
1531 {
1532 	uint32_t i, selected_core;
1533 	int32_t num_pollers, min_pollers;
1534 
1535 	min_pollers = INT_MAX;
1536 	selected_core = spdk_env_get_first_core();
1537 
1538 	SPDK_ENV_FOREACH_CORE(i) {
1539 		if (!spdk_cpuset_get_cpu(cpumask, i)) {
1540 			continue;
1541 		}
1542 
1543 		/* This core is running. Check how many pollers it already has. */
1544 		num_pollers = g_num_connections[i];
1545 
1546 		if ((num_pollers > 0) && (num_pollers < g_connections_per_lcore)) {
1547 			/* Fewer than the maximum connections per core,
1548 			 * but at least 1. Use this core.
1549 			 */
1550 			return i;
1551 		} else if (num_pollers < min_pollers) {
1552 			/* Track the core that has the minimum number of pollers
1553 			 * to be used if no cores meet our criteria
1554 			 */
1555 			selected_core = i;
1556 			min_pollers = num_pollers;
1557 		}
1558 	}
1559 
1560 	return selected_core;
1561 }
1562 
1563 static int
1564 logout_timeout(void *arg)
1565 {
1566 	struct spdk_iscsi_conn *conn = arg;
1567 
1568 	spdk_iscsi_conn_destruct(conn);
1569 
1570 	return -1;
1571 }
1572 
1573 void
1574 spdk_iscsi_conn_logout(struct spdk_iscsi_conn *conn)
1575 {
1576 	conn->state = ISCSI_CONN_STATE_LOGGED_OUT;
1577 	conn->logout_timer = spdk_poller_register(logout_timeout, conn, ISCSI_LOGOUT_TIMEOUT * 1000000);
1578 }
1579 
1580 SPDK_TRACE_REGISTER_FN(iscsi_conn_trace, "iscsi_conn", TRACE_GROUP_ISCSI)
1581 {
1582 	spdk_trace_register_owner(OWNER_ISCSI_CONN, 'c');
1583 	spdk_trace_register_object(OBJECT_ISCSI_PDU, 'p');
1584 	spdk_trace_register_description("ISCSI_READ_FROM_SOCKET_DONE", "",
1585 					TRACE_ISCSI_READ_FROM_SOCKET_DONE,
1586 					OWNER_ISCSI_CONN, OBJECT_NONE, 0, 0, "");
1587 	spdk_trace_register_description("ISCSI_FLUSH_WRITEBUF_START", "", TRACE_ISCSI_FLUSH_WRITEBUF_START,
1588 					OWNER_ISCSI_CONN, OBJECT_NONE, 0, 0, "iovec: ");
1589 	spdk_trace_register_description("ISCSI_FLUSH_WRITEBUF_DONE", "", TRACE_ISCSI_FLUSH_WRITEBUF_DONE,
1590 					OWNER_ISCSI_CONN, OBJECT_NONE, 0, 0, "");
1591 	spdk_trace_register_description("ISCSI_READ_PDU", "", TRACE_ISCSI_READ_PDU,
1592 					OWNER_ISCSI_CONN, OBJECT_ISCSI_PDU, 1, 0, "opc:   ");
1593 	spdk_trace_register_description("ISCSI_TASK_DONE", "", TRACE_ISCSI_TASK_DONE,
1594 					OWNER_ISCSI_CONN, OBJECT_SCSI_TASK, 0, 0, "");
1595 	spdk_trace_register_description("ISCSI_TASK_QUEUE", "", TRACE_ISCSI_TASK_QUEUE,
1596 					OWNER_ISCSI_CONN, OBJECT_SCSI_TASK, 1, 1, "pdu:   ");
1597 	spdk_trace_register_description("ISCSI_TASK_EXECUTED", "", TRACE_ISCSI_TASK_EXECUTED,
1598 					OWNER_ISCSI_CONN, OBJECT_ISCSI_PDU, 0, 0, "");
1599 	spdk_trace_register_description("ISCSI_PDU_COMPLETED", "", TRACE_ISCSI_PDU_COMPLETED,
1600 					OWNER_ISCSI_CONN, OBJECT_ISCSI_PDU, 0, 0, "");
1601 }
1602