xref: /spdk/lib/iscsi/conn.c (revision 1fc4165fe9bf8512483356ad8e6d27f793f2e3db)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
5  *   Copyright (c) Intel Corporation.
6  *   All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 #include "spdk/stdinc.h"
36 
37 #include "spdk/endian.h"
38 #include "spdk/env.h"
39 #include "spdk/event.h"
40 #include "spdk/thread.h"
41 #include "spdk/queue.h"
42 #include "spdk/trace.h"
43 #include "spdk/net.h"
44 #include "spdk/sock.h"
45 #include "spdk/string.h"
46 
47 #include "spdk_internal/log.h"
48 
49 #include "iscsi/task.h"
50 #include "iscsi/conn.h"
51 #include "iscsi/tgt_node.h"
52 #include "iscsi/portal_grp.h"
53 
54 #define MAKE_DIGEST_WORD(BUF, CRC32C) \
55         (   ((*((uint8_t *)(BUF)+0)) = (uint8_t)((uint32_t)(CRC32C) >> 0)), \
56             ((*((uint8_t *)(BUF)+1)) = (uint8_t)((uint32_t)(CRC32C) >> 8)), \
57             ((*((uint8_t *)(BUF)+2)) = (uint8_t)((uint32_t)(CRC32C) >> 16)), \
58             ((*((uint8_t *)(BUF)+3)) = (uint8_t)((uint32_t)(CRC32C) >> 24)))
59 
60 #define SPDK_ISCSI_CONNECTION_MEMSET(conn)		\
61 	memset(&(conn)->portal, 0, sizeof(*(conn)) -	\
62 		offsetof(struct spdk_iscsi_conn, portal));
63 
64 static int g_connections_per_lcore;
65 static uint32_t *g_num_connections;
66 
67 struct spdk_iscsi_conn *g_conns_array = MAP_FAILED;
68 static int g_conns_array_fd = -1;
69 static char g_shm_name[64];
70 
71 static pthread_mutex_t g_conns_mutex = PTHREAD_MUTEX_INITIALIZER;
72 
73 static struct spdk_poller *g_shutdown_timer = NULL;
74 
75 static uint32_t spdk_iscsi_conn_allocate_reactor(const struct spdk_cpuset *cpumask);
76 
77 static void spdk_iscsi_conn_full_feature_migrate(void *arg1, void *arg2);
78 static void spdk_iscsi_conn_stop(struct spdk_iscsi_conn *conn);
79 static void spdk_iscsi_conn_sock_cb(void *arg, struct spdk_sock_group *group,
80 				    struct spdk_sock *sock);
81 
82 static struct spdk_iscsi_conn *
83 allocate_conn(void)
84 {
85 	struct spdk_iscsi_conn	*conn;
86 	int				i;
87 
88 	pthread_mutex_lock(&g_conns_mutex);
89 	for (i = 0; i < MAX_ISCSI_CONNECTIONS; i++) {
90 		conn = &g_conns_array[i];
91 		if (!conn->is_valid) {
92 			SPDK_ISCSI_CONNECTION_MEMSET(conn);
93 			conn->is_valid = 1;
94 			pthread_mutex_unlock(&g_conns_mutex);
95 			return conn;
96 		}
97 	}
98 	pthread_mutex_unlock(&g_conns_mutex);
99 
100 	return NULL;
101 }
102 
103 static void
104 free_conn(struct spdk_iscsi_conn *conn)
105 {
106 	free(conn->portal_host);
107 	free(conn->portal_port);
108 	conn->is_valid = 0;
109 }
110 
111 static struct spdk_iscsi_conn *
112 spdk_find_iscsi_connection_by_id(int cid)
113 {
114 	if (g_conns_array[cid].is_valid == 1) {
115 		return &g_conns_array[cid];
116 	} else {
117 		return NULL;
118 	}
119 }
120 
121 int spdk_initialize_iscsi_conns(void)
122 {
123 	size_t conns_size = sizeof(struct spdk_iscsi_conn) * MAX_ISCSI_CONNECTIONS;
124 	uint32_t i, last_core;
125 
126 	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "spdk_iscsi_init\n");
127 
128 	snprintf(g_shm_name, sizeof(g_shm_name), "/spdk_iscsi_conns.%d", spdk_app_get_shm_id());
129 	g_conns_array_fd = shm_open(g_shm_name, O_RDWR | O_CREAT, 0600);
130 	if (g_conns_array_fd < 0) {
131 		SPDK_ERRLOG("could not shm_open %s\n", g_shm_name);
132 		goto err;
133 	}
134 
135 	if (ftruncate(g_conns_array_fd, conns_size) != 0) {
136 		SPDK_ERRLOG("could not ftruncate\n");
137 		goto err;
138 	}
139 	g_conns_array = mmap(0, conns_size, PROT_READ | PROT_WRITE, MAP_SHARED,
140 			     g_conns_array_fd, 0);
141 
142 	if (g_conns_array == MAP_FAILED) {
143 		fprintf(stderr, "could not mmap cons array file %s (%d)\n", g_shm_name, errno);
144 		goto err;
145 	}
146 
147 	memset(g_conns_array, 0, conns_size);
148 
149 	for (i = 0; i < MAX_ISCSI_CONNECTIONS; i++) {
150 		g_conns_array[i].id = i;
151 	}
152 
153 	last_core = spdk_env_get_last_core();
154 	g_num_connections = calloc(last_core + 1, sizeof(uint32_t));
155 	if (!g_num_connections) {
156 		SPDK_ERRLOG("Could not allocate array size=%u for g_num_connections\n",
157 			    last_core + 1);
158 		goto err;
159 	}
160 
161 	return 0;
162 
163 err:
164 	if (g_conns_array != MAP_FAILED) {
165 		munmap(g_conns_array, conns_size);
166 		g_conns_array = MAP_FAILED;
167 	}
168 
169 	if (g_conns_array_fd >= 0) {
170 		close(g_conns_array_fd);
171 		g_conns_array_fd = -1;
172 		shm_unlink(g_shm_name);
173 	}
174 
175 	return -1;
176 }
177 
178 static void
179 spdk_iscsi_poll_group_add_conn_sock(struct spdk_iscsi_conn *conn)
180 {
181 	struct spdk_iscsi_poll_group *poll_group;
182 	int rc;
183 
184 	assert(conn->lcore == spdk_env_get_current_core());
185 
186 	poll_group = &g_spdk_iscsi.poll_group[conn->lcore];
187 
188 	rc = spdk_sock_group_add_sock(poll_group->sock_group, conn->sock, spdk_iscsi_conn_sock_cb, conn);
189 	if (rc < 0) {
190 		SPDK_ERRLOG("Failed to add sock=%p of conn=%p\n", conn->sock, conn);
191 	}
192 }
193 
194 static void
195 spdk_iscsi_poll_group_remove_conn_sock(struct spdk_iscsi_conn *conn)
196 {
197 	struct spdk_iscsi_poll_group *poll_group;
198 	int rc;
199 
200 	assert(conn->lcore == spdk_env_get_current_core());
201 
202 	poll_group = &g_spdk_iscsi.poll_group[conn->lcore];
203 
204 	rc = spdk_sock_group_remove_sock(poll_group->sock_group, conn->sock);
205 	if (rc < 0) {
206 		SPDK_ERRLOG("Failed to remove sock=%p of conn=%p\n", conn->sock, conn);
207 	}
208 }
209 
210 static void
211 spdk_iscsi_poll_group_add_conn(struct spdk_iscsi_conn *conn)
212 {
213 	struct spdk_iscsi_poll_group *poll_group;
214 
215 	assert(conn->lcore == spdk_env_get_current_core());
216 
217 	poll_group = &g_spdk_iscsi.poll_group[conn->lcore];
218 
219 	conn->is_stopped = false;
220 	STAILQ_INSERT_TAIL(&poll_group->connections, conn, link);
221 	spdk_iscsi_poll_group_add_conn_sock(conn);
222 }
223 
224 static void
225 spdk_iscsi_poll_group_remove_conn(struct spdk_iscsi_conn *conn)
226 {
227 	struct spdk_iscsi_poll_group *poll_group;
228 
229 	assert(conn->lcore == spdk_env_get_current_core());
230 
231 	poll_group = &g_spdk_iscsi.poll_group[conn->lcore];
232 
233 	conn->is_stopped = true;
234 	STAILQ_REMOVE(&poll_group->connections, conn, spdk_iscsi_conn, link);
235 }
236 
237 /**
238  * \brief Create an iSCSI connection from the given parameters and schedule it
239  *        on a reactor.
240  *
241  * \code
242  *
243  * # identify reactor where the new connections work item will be scheduled
244  * reactor = spdk_iscsi_conn_allocate_reactor()
245  * allocate spdk_iscsi_conn object
246  * initialize spdk_iscsi_conn object
247  * schedule iSCSI connection work item on reactor
248  *
249  * \endcode
250  */
251 int
252 spdk_iscsi_conn_construct(struct spdk_iscsi_portal *portal,
253 			  struct spdk_sock *sock)
254 {
255 	struct spdk_iscsi_conn *conn;
256 	int bufsize, i, rc;
257 
258 	conn = allocate_conn();
259 	if (conn == NULL) {
260 		SPDK_ERRLOG("Could not allocate connection.\n");
261 		return -1;
262 	}
263 
264 	pthread_mutex_lock(&g_spdk_iscsi.mutex);
265 	conn->timeout = g_spdk_iscsi.timeout;
266 	conn->nopininterval = g_spdk_iscsi.nopininterval;
267 	conn->nopininterval *= spdk_get_ticks_hz(); /* seconds to TSC */
268 	conn->nop_outstanding = false;
269 	conn->data_out_cnt = 0;
270 	conn->data_in_cnt = 0;
271 	pthread_mutex_unlock(&g_spdk_iscsi.mutex);
272 	conn->MaxRecvDataSegmentLength = 8192; /* RFC3720(12.12) */
273 
274 	conn->portal = portal;
275 	conn->pg_tag = portal->group->tag;
276 	conn->portal_host = strdup(portal->host);
277 	conn->portal_port = strdup(portal->port);
278 	conn->portal_cpumask = portal->cpumask;
279 	conn->sock = sock;
280 
281 	conn->state = ISCSI_CONN_STATE_INVALID;
282 	conn->login_phase = ISCSI_SECURITY_NEGOTIATION_PHASE;
283 	conn->ttt = 0;
284 
285 	conn->partial_text_parameter = NULL;
286 
287 	for (i = 0; i < MAX_CONNECTION_PARAMS; i++) {
288 		conn->conn_param_state_negotiated[i] = false;
289 	}
290 
291 	for (i = 0; i < MAX_SESSION_PARAMS; i++) {
292 		conn->sess_param_state_negotiated[i] = false;
293 	}
294 
295 	for (i = 0; i < DEFAULT_MAXR2T; i++) {
296 		conn->outstanding_r2t_tasks[i] = NULL;
297 	}
298 
299 	TAILQ_INIT(&conn->write_pdu_list);
300 	TAILQ_INIT(&conn->snack_pdu_list);
301 	TAILQ_INIT(&conn->queued_r2t_tasks);
302 	TAILQ_INIT(&conn->active_r2t_tasks);
303 	TAILQ_INIT(&conn->queued_datain_tasks);
304 	memset(&conn->open_lun_descs, 0, sizeof(conn->open_lun_descs));
305 
306 	rc = spdk_sock_getaddr(sock, conn->target_addr, sizeof conn->target_addr, NULL,
307 			       conn->initiator_addr, sizeof conn->initiator_addr, NULL);
308 	if (rc < 0) {
309 		SPDK_ERRLOG("spdk_sock_getaddr() failed\n");
310 		goto error_return;
311 	}
312 
313 	bufsize = 2 * 1024 * 1024;
314 	rc = spdk_sock_set_recvbuf(conn->sock, bufsize);
315 	if (rc != 0) {
316 		SPDK_ERRLOG("spdk_sock_set_recvbuf failed\n");
317 	}
318 
319 	bufsize = 32 * 1024 * 1024 / g_spdk_iscsi.MaxConnections;
320 	if (bufsize > 2 * 1024 * 1024) {
321 		bufsize = 2 * 1024 * 1024;
322 	}
323 	rc = spdk_sock_set_sendbuf(conn->sock, bufsize);
324 	if (rc != 0) {
325 		SPDK_ERRLOG("spdk_sock_set_sendbuf failed\n");
326 	}
327 
328 	/* set low water mark */
329 	rc = spdk_sock_set_recvlowat(conn->sock, 1);
330 	if (rc != 0) {
331 		SPDK_ERRLOG("spdk_sock_set_recvlowat() failed\n");
332 		goto error_return;
333 	}
334 
335 	/* set default params */
336 	rc = spdk_iscsi_conn_params_init(&conn->params);
337 	if (rc < 0) {
338 		SPDK_ERRLOG("iscsi_conn_params_init() failed\n");
339 		goto error_return;
340 	}
341 	conn->logout_timer = NULL;
342 	conn->shutdown_timer = NULL;
343 	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "Launching connection on acceptor thread\n");
344 	conn->pending_task_cnt = 0;
345 
346 	conn->lcore = spdk_env_get_current_core();
347 	__sync_fetch_and_add(&g_num_connections[conn->lcore], 1);
348 
349 	spdk_iscsi_poll_group_add_conn(conn);
350 	return 0;
351 
352 error_return:
353 	spdk_iscsi_param_free(conn->params);
354 	free_conn(conn);
355 	return -1;
356 }
357 
358 void
359 spdk_iscsi_conn_free_pdu(struct spdk_iscsi_conn *conn, struct spdk_iscsi_pdu *pdu)
360 {
361 	if (pdu->task) {
362 		if (pdu->bhs.opcode == ISCSI_OP_SCSI_DATAIN) {
363 			if (pdu->task->scsi.offset > 0) {
364 				conn->data_in_cnt--;
365 				if (pdu->bhs.flags & ISCSI_DATAIN_STATUS) {
366 					/* Free the primary task after the last subtask done */
367 					conn->data_in_cnt--;
368 					spdk_iscsi_task_put(spdk_iscsi_task_get_primary(pdu->task));
369 				}
370 			}
371 		} else if (pdu->bhs.opcode == ISCSI_OP_SCSI_RSP &&
372 			   pdu->task->scsi.status != SPDK_SCSI_STATUS_GOOD) {
373 			if (pdu->task->scsi.offset > 0) {
374 				spdk_iscsi_task_put(spdk_iscsi_task_get_primary(pdu->task));
375 			}
376 		}
377 		spdk_iscsi_task_put(pdu->task);
378 	}
379 	spdk_put_pdu(pdu);
380 }
381 
382 static int spdk_iscsi_conn_free_tasks(struct spdk_iscsi_conn *conn)
383 {
384 	struct spdk_iscsi_pdu *pdu, *tmp_pdu;
385 	struct spdk_iscsi_task *iscsi_task, *tmp_iscsi_task;
386 
387 	TAILQ_FOREACH_SAFE(pdu, &conn->write_pdu_list, tailq, tmp_pdu) {
388 		TAILQ_REMOVE(&conn->write_pdu_list, pdu, tailq);
389 		spdk_iscsi_conn_free_pdu(conn, pdu);
390 	}
391 
392 	TAILQ_FOREACH_SAFE(pdu, &conn->snack_pdu_list, tailq, tmp_pdu) {
393 		TAILQ_REMOVE(&conn->snack_pdu_list, pdu, tailq);
394 		if (pdu->task) {
395 			spdk_iscsi_task_put(pdu->task);
396 		}
397 		spdk_put_pdu(pdu);
398 	}
399 
400 	TAILQ_FOREACH_SAFE(iscsi_task, &conn->queued_datain_tasks, link, tmp_iscsi_task) {
401 		if (!iscsi_task->is_queued) {
402 			TAILQ_REMOVE(&conn->queued_datain_tasks, iscsi_task, link);
403 			spdk_iscsi_task_put(iscsi_task);
404 		}
405 	}
406 
407 	if (conn->pending_task_cnt) {
408 		return -1;
409 	}
410 
411 	return 0;
412 }
413 
414 static void
415 _spdk_iscsi_conn_free(struct spdk_iscsi_conn *conn)
416 {
417 	if (conn == NULL) {
418 		return;
419 	}
420 
421 	spdk_iscsi_param_free(conn->params);
422 
423 	/*
424 	 * Each connection pre-allocates its next PDU - make sure these get
425 	 *  freed here.
426 	 */
427 	spdk_put_pdu(conn->pdu_in_progress);
428 
429 	free_conn(conn);
430 }
431 
432 static void
433 spdk_iscsi_conn_cleanup_backend(struct spdk_iscsi_conn *conn)
434 {
435 	int rc;
436 	struct spdk_iscsi_tgt_node *target;
437 
438 	if (conn->sess->connections > 1) {
439 		/* connection specific cleanup */
440 	} else if (!g_spdk_iscsi.AllowDuplicateIsid) {
441 		/* clean up all tasks to all LUNs for session */
442 		target = conn->sess->target;
443 		if (target != NULL) {
444 			rc = spdk_iscsi_tgt_node_cleanup_luns(conn, target);
445 			if (rc < 0) {
446 				SPDK_ERRLOG("target abort failed\n");
447 			}
448 		}
449 	}
450 }
451 
452 static void
453 spdk_iscsi_conn_free(struct spdk_iscsi_conn *conn)
454 {
455 	struct spdk_iscsi_sess *sess;
456 	int idx;
457 	uint32_t i;
458 
459 	pthread_mutex_lock(&g_conns_mutex);
460 
461 	if (conn->sess == NULL) {
462 		goto end;
463 	}
464 
465 	idx = -1;
466 	sess = conn->sess;
467 	conn->sess = NULL;
468 
469 	for (i = 0; i < sess->connections; i++) {
470 		if (sess->conns[i] == conn) {
471 			idx = i;
472 			break;
473 		}
474 	}
475 
476 	if (idx < 0) {
477 		SPDK_ERRLOG("remove conn not found\n");
478 	} else {
479 		for (i = idx; i < sess->connections - 1; i++) {
480 			sess->conns[i] = sess->conns[i + 1];
481 		}
482 		sess->conns[sess->connections - 1] = NULL;
483 		sess->connections--;
484 
485 		if (sess->connections == 0) {
486 			/* cleanup last connection */
487 			SPDK_DEBUGLOG(SPDK_LOG_ISCSI,
488 				      "cleanup last conn free sess\n");
489 			spdk_free_sess(sess);
490 		}
491 	}
492 
493 	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "Terminating connections(tsih %d): %d\n",
494 		      sess->tsih, sess->connections);
495 
496 end:
497 	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "cleanup free conn\n");
498 	_spdk_iscsi_conn_free(conn);
499 
500 	pthread_mutex_unlock(&g_conns_mutex);
501 }
502 
503 static int
504 _spdk_iscsi_conn_check_shutdown(void *arg)
505 {
506 	struct spdk_iscsi_conn *conn = arg;
507 	int rc;
508 
509 	rc = spdk_iscsi_conn_free_tasks(conn);
510 	if (rc < 0) {
511 		return 1;
512 	}
513 
514 	spdk_poller_unregister(&conn->shutdown_timer);
515 
516 	spdk_iscsi_conn_stop(conn);
517 	spdk_iscsi_conn_free(conn);
518 
519 	return 1;
520 }
521 
522 static void
523 _spdk_iscsi_conn_destruct(struct spdk_iscsi_conn *conn)
524 {
525 	int rc;
526 
527 	spdk_clear_all_transfer_task(conn, NULL, NULL);
528 	spdk_iscsi_poll_group_remove_conn_sock(conn);
529 	spdk_sock_close(&conn->sock);
530 	spdk_poller_unregister(&conn->logout_timer);
531 	spdk_poller_unregister(&conn->flush_poller);
532 
533 	rc = spdk_iscsi_conn_free_tasks(conn);
534 	if (rc < 0) {
535 		/* The connection cannot be freed yet. Check back later. */
536 		conn->shutdown_timer = spdk_poller_register(_spdk_iscsi_conn_check_shutdown, conn, 1000);
537 	} else {
538 		spdk_iscsi_conn_stop(conn);
539 		spdk_iscsi_conn_free(conn);
540 	}
541 }
542 
543 static int
544 _spdk_iscsi_conn_check_pending_tasks(void *arg)
545 {
546 	struct spdk_iscsi_conn *conn = arg;
547 
548 	if (conn->dev != NULL && spdk_scsi_dev_has_pending_tasks(conn->dev)) {
549 		return 1;
550 	}
551 
552 	spdk_poller_unregister(&conn->shutdown_timer);
553 
554 	_spdk_iscsi_conn_destruct(conn);
555 
556 	return 1;
557 }
558 
559 void
560 spdk_iscsi_conn_destruct(struct spdk_iscsi_conn *conn)
561 {
562 	/* If a connection is already in exited status, just return */
563 	if (conn->state >= ISCSI_CONN_STATE_EXITED) {
564 		return;
565 	}
566 
567 	conn->state = ISCSI_CONN_STATE_EXITED;
568 
569 	if (conn->sess != NULL && conn->pending_task_cnt > 0) {
570 		spdk_iscsi_conn_cleanup_backend(conn);
571 	}
572 
573 	if (conn->dev != NULL && spdk_scsi_dev_has_pending_tasks(conn->dev)) {
574 		conn->shutdown_timer = spdk_poller_register(_spdk_iscsi_conn_check_pending_tasks, conn, 1000);
575 	} else {
576 		_spdk_iscsi_conn_destruct(conn);
577 	}
578 }
579 
580 static int
581 spdk_iscsi_get_active_conns(void)
582 {
583 	struct spdk_iscsi_conn *conn;
584 	int num = 0;
585 	int i;
586 
587 	pthread_mutex_lock(&g_conns_mutex);
588 	for (i = 0; i < MAX_ISCSI_CONNECTIONS; i++) {
589 		conn = spdk_find_iscsi_connection_by_id(i);
590 		if (conn == NULL) {
591 			continue;
592 		}
593 		num++;
594 	}
595 	pthread_mutex_unlock(&g_conns_mutex);
596 	return num;
597 }
598 
599 static void
600 spdk_iscsi_conns_cleanup(void)
601 {
602 	free(g_num_connections);
603 	munmap(g_conns_array, sizeof(struct spdk_iscsi_conn) *
604 	       MAX_ISCSI_CONNECTIONS);
605 	shm_unlink(g_shm_name);
606 	if (g_conns_array_fd >= 0) {
607 		close(g_conns_array_fd);
608 		g_conns_array_fd = -1;
609 	}
610 }
611 
612 static void
613 spdk_iscsi_conn_check_shutdown_cb(void *arg1, void *arg2)
614 {
615 	spdk_iscsi_conns_cleanup();
616 	spdk_shutdown_iscsi_conns_done();
617 }
618 
619 static int
620 spdk_iscsi_conn_check_shutdown(void *arg)
621 {
622 	struct spdk_event *event;
623 
624 	if (spdk_iscsi_get_active_conns() != 0) {
625 		return 1;
626 	}
627 
628 	spdk_poller_unregister(&g_shutdown_timer);
629 	event = spdk_event_allocate(spdk_env_get_current_core(),
630 				    spdk_iscsi_conn_check_shutdown_cb, NULL, NULL);
631 	spdk_event_call(event);
632 
633 	return 1;
634 }
635 
636 static void
637 spdk_iscsi_conn_close_lun(struct spdk_iscsi_conn *conn, int lun_id)
638 {
639 	struct spdk_scsi_desc *desc;
640 
641 	desc = conn->open_lun_descs[lun_id];
642 	if (desc != NULL) {
643 		spdk_scsi_lun_free_io_channel(desc);
644 		spdk_scsi_lun_close(desc);
645 		conn->open_lun_descs[lun_id] = NULL;
646 	}
647 }
648 
649 static void
650 spdk_iscsi_conn_close_luns(struct spdk_iscsi_conn *conn)
651 {
652 	int i;
653 
654 	for (i = 0; i < SPDK_SCSI_DEV_MAX_LUN; i++) {
655 		spdk_iscsi_conn_close_lun(conn, i);
656 	}
657 }
658 
659 static void
660 _iscsi_conn_remove_lun(void *arg1, void *arg2)
661 {
662 	struct spdk_iscsi_conn *conn = arg1;
663 	struct spdk_scsi_lun *lun = arg2;
664 	int lun_id = spdk_scsi_lun_get_id(lun);
665 	struct spdk_iscsi_pdu *pdu, *tmp_pdu;
666 	struct spdk_iscsi_task *iscsi_task, *tmp_iscsi_task;
667 
668 	/* If a connection is already in stating status, just return */
669 	if (conn->state >= ISCSI_CONN_STATE_EXITING) {
670 		return;
671 	}
672 
673 	spdk_clear_all_transfer_task(conn, lun, NULL);
674 	TAILQ_FOREACH_SAFE(pdu, &conn->write_pdu_list, tailq, tmp_pdu) {
675 		if (pdu->task && (lun == pdu->task->scsi.lun)) {
676 			TAILQ_REMOVE(&conn->write_pdu_list, pdu, tailq);
677 			spdk_iscsi_conn_free_pdu(conn, pdu);
678 		}
679 	}
680 
681 	TAILQ_FOREACH_SAFE(pdu, &conn->snack_pdu_list, tailq, tmp_pdu) {
682 		if (pdu->task && (lun == pdu->task->scsi.lun)) {
683 			TAILQ_REMOVE(&conn->snack_pdu_list, pdu, tailq);
684 			spdk_iscsi_task_put(pdu->task);
685 			spdk_put_pdu(pdu);
686 		}
687 	}
688 
689 	TAILQ_FOREACH_SAFE(iscsi_task, &conn->queued_datain_tasks, link, tmp_iscsi_task) {
690 		if ((!iscsi_task->is_queued) && (lun == iscsi_task->scsi.lun)) {
691 			TAILQ_REMOVE(&conn->queued_datain_tasks, iscsi_task, link);
692 			spdk_iscsi_task_put(iscsi_task);
693 		}
694 	}
695 
696 	spdk_iscsi_conn_close_lun(conn, lun_id);
697 }
698 
699 static void
700 spdk_iscsi_conn_remove_lun(struct spdk_scsi_lun *lun, void *remove_ctx)
701 {
702 	struct spdk_iscsi_conn *conn = remove_ctx;
703 	struct spdk_event *event;
704 
705 	event = spdk_event_allocate(conn->lcore, _iscsi_conn_remove_lun,
706 				    conn, lun);
707 	spdk_event_call(event);
708 }
709 
710 static void
711 spdk_iscsi_conn_open_luns(struct spdk_iscsi_conn *conn)
712 {
713 	int i, rc;
714 	struct spdk_scsi_lun *lun;
715 	struct spdk_scsi_desc *desc;
716 
717 	for (i = 0; i < SPDK_SCSI_DEV_MAX_LUN; i++) {
718 		lun = spdk_scsi_dev_get_lun(conn->dev, i);
719 		if (lun == NULL) {
720 			continue;
721 		}
722 
723 		rc = spdk_scsi_lun_open(lun, spdk_iscsi_conn_remove_lun, conn, &desc);
724 		if (rc != 0) {
725 			goto error;
726 		}
727 
728 		rc = spdk_scsi_lun_allocate_io_channel(desc);
729 		if (rc != 0) {
730 			spdk_scsi_lun_close(desc);
731 			goto error;
732 		}
733 
734 		conn->open_lun_descs[i] = desc;
735 	}
736 
737 	return;
738 
739 error:
740 	spdk_iscsi_conn_close_luns(conn);
741 }
742 
743 /**
744  *  This function will stop executing the specified connection.
745  */
746 static void
747 spdk_iscsi_conn_stop(struct spdk_iscsi_conn *conn)
748 {
749 	struct spdk_iscsi_tgt_node *target;
750 
751 	if (conn->state == ISCSI_CONN_STATE_EXITED && conn->sess != NULL &&
752 	    conn->sess->session_type == SESSION_TYPE_NORMAL &&
753 	    conn->full_feature) {
754 		target = conn->sess->target;
755 		pthread_mutex_lock(&target->mutex);
756 		target->num_active_conns--;
757 		pthread_mutex_unlock(&target->mutex);
758 
759 		spdk_iscsi_conn_close_luns(conn);
760 	}
761 
762 	assert(conn->lcore == spdk_env_get_current_core());
763 
764 	__sync_fetch_and_sub(&g_num_connections[conn->lcore], 1);
765 	spdk_iscsi_poll_group_remove_conn(conn);
766 }
767 
768 void spdk_shutdown_iscsi_conns(void)
769 {
770 	struct spdk_iscsi_conn	*conn;
771 	int			i;
772 
773 	pthread_mutex_lock(&g_conns_mutex);
774 
775 	for (i = 0; i < MAX_ISCSI_CONNECTIONS; i++) {
776 		conn = spdk_find_iscsi_connection_by_id(i);
777 		if (conn == NULL) {
778 			continue;
779 		}
780 
781 		/* Do not set conn->state if the connection has already started exiting.
782 		  * This ensures we do not move a connection from EXITED state back to EXITING.
783 		  */
784 		if (conn->state < ISCSI_CONN_STATE_EXITING) {
785 			conn->state = ISCSI_CONN_STATE_EXITING;
786 		}
787 	}
788 
789 	pthread_mutex_unlock(&g_conns_mutex);
790 	g_shutdown_timer = spdk_poller_register(spdk_iscsi_conn_check_shutdown, NULL,
791 						1000);
792 }
793 
794 int
795 spdk_iscsi_drop_conns(struct spdk_iscsi_conn *conn, const char *conn_match,
796 		      int drop_all)
797 {
798 	struct spdk_iscsi_conn	*xconn;
799 	const char			*xconn_match;
800 	int				i, num;
801 
802 	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "spdk_iscsi_drop_conns\n");
803 
804 	num = 0;
805 	pthread_mutex_lock(&g_conns_mutex);
806 	for (i = 0; i < MAX_ISCSI_CONNECTIONS; i++) {
807 		xconn = spdk_find_iscsi_connection_by_id(i);
808 
809 		if (xconn == NULL) {
810 			continue;
811 		}
812 
813 		if (xconn == conn) {
814 			continue;
815 		}
816 
817 		if (!drop_all && xconn->initiator_port == NULL) {
818 			continue;
819 		}
820 
821 		xconn_match =
822 			drop_all ? xconn->initiator_name : spdk_scsi_port_get_name(xconn->initiator_port);
823 
824 		if (!strcasecmp(conn_match, xconn_match) &&
825 		    conn->target == xconn->target) {
826 
827 			if (num == 0) {
828 				/*
829 				 * Only print this message before we report the
830 				 *  first dropped connection.
831 				 */
832 				SPDK_ERRLOG("drop old connections %s by %s\n",
833 					    conn->target->name, conn_match);
834 			}
835 
836 			SPDK_ERRLOG("exiting conn by %s (%s)\n",
837 				    xconn_match, xconn->initiator_addr);
838 			if (xconn->sess != NULL) {
839 				SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "TSIH=%u\n", xconn->sess->tsih);
840 			} else {
841 				SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "TSIH=xx\n");
842 			}
843 
844 			SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "CID=%u\n", xconn->cid);
845 
846 			/* Do not set xconn->state if the connection has already started exiting.
847 			  * This ensures we do not move a connection from EXITED state back to EXITING.
848 			  */
849 			if (xconn->state < ISCSI_CONN_STATE_EXITING) {
850 				xconn->state = ISCSI_CONN_STATE_EXITING;
851 			}
852 			num++;
853 		}
854 	}
855 
856 	pthread_mutex_unlock(&g_conns_mutex);
857 
858 	if (num != 0) {
859 		SPDK_ERRLOG("exiting %d conns\n", num);
860 	}
861 
862 	return 0;
863 }
864 
865 /**
866  * \brief Reads data for the specified iSCSI connection from its TCP socket.
867  *
868  * The TCP socket is marked as non-blocking, so this function may not read
869  * all data requested.
870  *
871  * Returns SPDK_ISCSI_CONNECTION_FATAL if the recv() operation indicates a fatal
872  * error with the TCP connection (including if the TCP connection was closed
873  * unexpectedly.
874  *
875  * Otherwise returns the number of bytes successfully read.
876  */
877 int
878 spdk_iscsi_conn_readv_data(struct spdk_iscsi_conn *conn,
879 			   struct iovec *iov, int iovcnt)
880 {
881 	int ret;
882 
883 	if (iov == NULL || iovcnt == 0) {
884 		return 0;
885 	}
886 
887 	ret = spdk_sock_readv(conn->sock, iov, iovcnt);
888 
889 	if (ret > 0) {
890 		spdk_trace_record(TRACE_ISCSI_READ_FROM_SOCKET_DONE, conn->id, ret, 0, 0);
891 		return ret;
892 	}
893 
894 	if (ret < 0) {
895 		if (errno == EAGAIN || errno == EWOULDBLOCK) {
896 			return 0;
897 		}
898 
899 		/* For connect reset issue, do not output error log */
900 		if (errno == ECONNRESET) {
901 			SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "spdk_sock_readv() failed, errno %d: %s\n",
902 				      errno, spdk_strerror(errno));
903 		} else {
904 			SPDK_ERRLOG("spdk_sock_readv() failed, errno %d: %s\n",
905 				    errno, spdk_strerror(errno));
906 		}
907 	}
908 
909 	/* connection closed */
910 	return SPDK_ISCSI_CONNECTION_FATAL;
911 }
912 
913 int
914 spdk_iscsi_conn_read_data(struct spdk_iscsi_conn *conn, int bytes,
915 			  void *buf)
916 {
917 	struct iovec iov;
918 
919 	iov.iov_base = buf;
920 	iov.iov_len = bytes;
921 
922 	return spdk_iscsi_conn_readv_data(conn, &iov, 1);
923 }
924 
925 void
926 spdk_iscsi_task_mgmt_cpl(struct spdk_scsi_task *scsi_task)
927 {
928 	struct spdk_iscsi_task *task = spdk_iscsi_task_from_scsi_task(scsi_task);
929 
930 	spdk_iscsi_task_mgmt_response(task->conn, task);
931 	spdk_iscsi_task_put(task);
932 }
933 
934 static void
935 spdk_iscsi_task_copy_to_rsp_scsi_status(struct spdk_iscsi_task *primary,
936 					struct spdk_scsi_task *task)
937 {
938 	memcpy(primary->rsp_sense_data, task->sense_data, task->sense_data_len);
939 	primary->rsp_sense_data_len = task->sense_data_len;
940 	primary->rsp_scsi_status = task->status;
941 }
942 
943 static void
944 spdk_iscsi_task_copy_from_rsp_scsi_status(struct spdk_scsi_task *task,
945 		struct spdk_iscsi_task *primary)
946 {
947 	memcpy(task->sense_data, primary->rsp_sense_data,
948 	       primary->rsp_sense_data_len);
949 	task->sense_data_len = primary->rsp_sense_data_len;
950 	task->status = primary->rsp_scsi_status;
951 }
952 
953 static void
954 process_completed_read_subtask_list(struct spdk_iscsi_conn *conn,
955 				    struct spdk_iscsi_task *primary)
956 {
957 	struct spdk_iscsi_task *subtask, *tmp;
958 
959 	TAILQ_FOREACH_SAFE(subtask, &primary->subtask_list, subtask_link, tmp) {
960 		if (subtask->scsi.offset == primary->bytes_completed) {
961 			TAILQ_REMOVE(&primary->subtask_list, subtask, subtask_link);
962 			primary->bytes_completed += subtask->scsi.length;
963 			spdk_iscsi_task_response(conn, subtask);
964 			spdk_iscsi_task_put(subtask);
965 		} else {
966 			break;
967 		}
968 	}
969 }
970 
971 static void
972 process_read_task_completion(struct spdk_iscsi_conn *conn,
973 			     struct spdk_iscsi_task *task,
974 			     struct spdk_iscsi_task *primary)
975 {
976 	struct spdk_iscsi_task *tmp;
977 
978 	/* If the status of the completed subtask is the first failure,
979 	 * copy it to out-of-order subtasks and remember it as the status
980 	 * of the command,
981 	 *
982 	 * Even if the status of the completed task is success,
983 	 * there are any failed subtask ever, copy the first failed status
984 	 * to it.
985 	 */
986 	if (task->scsi.status != SPDK_SCSI_STATUS_GOOD) {
987 		if (primary->rsp_scsi_status == SPDK_SCSI_STATUS_GOOD) {
988 			TAILQ_FOREACH(tmp, &primary->subtask_list, subtask_link) {
989 				spdk_scsi_task_copy_status(&tmp->scsi, &task->scsi);
990 			}
991 			spdk_iscsi_task_copy_to_rsp_scsi_status(primary, &task->scsi);
992 		}
993 	} else if (primary->rsp_scsi_status != SPDK_SCSI_STATUS_GOOD) {
994 		spdk_iscsi_task_copy_from_rsp_scsi_status(&task->scsi, primary);
995 	}
996 
997 	if ((task != primary) &&
998 	    (task->scsi.offset != primary->bytes_completed)) {
999 		TAILQ_FOREACH(tmp, &primary->subtask_list, subtask_link) {
1000 			if (task->scsi.offset < tmp->scsi.offset) {
1001 				TAILQ_INSERT_BEFORE(tmp, task, subtask_link);
1002 				return;
1003 			}
1004 		}
1005 
1006 		TAILQ_INSERT_TAIL(&primary->subtask_list, task, subtask_link);
1007 		return;
1008 	}
1009 
1010 	primary->bytes_completed += task->scsi.length;
1011 	spdk_iscsi_task_response(conn, task);
1012 
1013 	if ((task != primary) ||
1014 	    (task->scsi.transfer_len == task->scsi.length)) {
1015 		spdk_iscsi_task_put(task);
1016 	}
1017 	process_completed_read_subtask_list(conn, primary);
1018 
1019 	spdk_iscsi_conn_handle_queued_datain_tasks(conn);
1020 }
1021 
1022 void
1023 spdk_iscsi_task_cpl(struct spdk_scsi_task *scsi_task)
1024 {
1025 	struct spdk_iscsi_task *primary;
1026 	struct spdk_iscsi_task *task = spdk_iscsi_task_from_scsi_task(scsi_task);
1027 	struct spdk_iscsi_conn *conn = task->conn;
1028 	struct spdk_iscsi_pdu *pdu = task->pdu;
1029 
1030 	spdk_trace_record(TRACE_ISCSI_TASK_DONE, conn->id, 0, (uintptr_t)task, 0);
1031 
1032 	task->is_queued = false;
1033 	primary = spdk_iscsi_task_get_primary(task);
1034 
1035 	if (spdk_iscsi_task_is_read(primary)) {
1036 		process_read_task_completion(conn, task, primary);
1037 	} else {
1038 		primary->bytes_completed += task->scsi.length;
1039 
1040 		/* If the status of the subtask is the first failure, remember it as
1041 		 * the status of the command and set it to the status of the primary
1042 		 * task later.
1043 		 *
1044 		 * If the first failed task is the primary, two copies can be avoided
1045 		 * but code simplicity is prioritized.
1046 		 */
1047 		if (task->scsi.status == SPDK_SCSI_STATUS_GOOD) {
1048 			if (task != primary) {
1049 				primary->scsi.data_transferred += task->scsi.data_transferred;
1050 			}
1051 		} else if (primary->rsp_scsi_status == SPDK_SCSI_STATUS_GOOD) {
1052 			spdk_iscsi_task_copy_to_rsp_scsi_status(primary, &task->scsi);
1053 		}
1054 
1055 		if (primary->bytes_completed == primary->scsi.transfer_len) {
1056 			spdk_del_transfer_task(conn, primary->tag);
1057 			if (primary->rsp_scsi_status != SPDK_SCSI_STATUS_GOOD) {
1058 				spdk_iscsi_task_copy_from_rsp_scsi_status(&primary->scsi, primary);
1059 			}
1060 			spdk_iscsi_task_response(conn, primary);
1061 			/*
1062 			 * Check if this is the last task completed for an iSCSI write
1063 			 *  that required child subtasks.  If task != primary, we know
1064 			 *  for sure that it was part of an iSCSI write with child subtasks.
1065 			 *  The trickier case is when the last task completed was the initial
1066 			 *  task - in this case the task will have a smaller length than
1067 			 *  the overall transfer length.
1068 			 */
1069 			if (task != primary || task->scsi.length != task->scsi.transfer_len) {
1070 				TAILQ_REMOVE(&conn->active_r2t_tasks, primary, link);
1071 				spdk_iscsi_task_put(primary);
1072 			}
1073 		}
1074 		spdk_iscsi_task_put(task);
1075 	}
1076 	if (!task->parent) {
1077 		spdk_trace_record(TRACE_ISCSI_PDU_COMPLETED, 0, 0, (uintptr_t)pdu, 0);
1078 	}
1079 }
1080 
1081 static int
1082 spdk_iscsi_get_pdu_length(struct spdk_iscsi_pdu *pdu, int header_digest,
1083 			  int data_digest)
1084 {
1085 	int data_len, enable_digest, total;
1086 
1087 	enable_digest = 1;
1088 	if (pdu->bhs.opcode == ISCSI_OP_LOGIN_RSP) {
1089 		enable_digest = 0;
1090 	}
1091 
1092 	total = ISCSI_BHS_LEN;
1093 
1094 	total += (4 * pdu->bhs.total_ahs_len);
1095 
1096 	if (enable_digest && header_digest) {
1097 		total += ISCSI_DIGEST_LEN;
1098 	}
1099 
1100 	data_len = DGET24(pdu->bhs.data_segment_len);
1101 	if (data_len > 0) {
1102 		total += ISCSI_ALIGN(data_len);
1103 		if (enable_digest && data_digest) {
1104 			total += ISCSI_DIGEST_LEN;
1105 		}
1106 	}
1107 
1108 	return total;
1109 }
1110 
1111 void
1112 spdk_iscsi_conn_handle_nop(struct spdk_iscsi_conn *conn)
1113 {
1114 	uint64_t	tsc;
1115 
1116 	/**
1117 	  * This function will be executed by nop_poller of iSCSI polling group, so
1118 	  * we need to check the connection state first, then do the nop interval
1119 	  * expiration check work.
1120 	  */
1121 	if ((conn->state == ISCSI_CONN_STATE_EXITED) ||
1122 	    (conn->state == ISCSI_CONN_STATE_EXITING)) {
1123 		return;
1124 	}
1125 
1126 	/* Check for nop interval expiration */
1127 	tsc = spdk_get_ticks();
1128 	if (conn->nop_outstanding) {
1129 		if ((tsc - conn->last_nopin) > (conn->timeout  * spdk_get_ticks_hz())) {
1130 			SPDK_ERRLOG("Timed out waiting for NOP-Out response from initiator\n");
1131 			SPDK_ERRLOG("  tsc=0x%lx, last_nopin=0x%lx\n", tsc, conn->last_nopin);
1132 			SPDK_ERRLOG("  initiator=%s, target=%s\n", conn->initiator_name,
1133 				    conn->target_short_name);
1134 			conn->state = ISCSI_CONN_STATE_EXITING;
1135 		}
1136 	} else if (tsc - conn->last_nopin > conn->nopininterval) {
1137 		spdk_iscsi_send_nopin(conn);
1138 	}
1139 }
1140 
1141 /**
1142  * \brief Makes one attempt to flush response PDUs back to the initiator.
1143  *
1144  * Builds a list of iovecs for response PDUs that must be sent back to the
1145  * initiator and passes it to writev().
1146  *
1147  * Since the socket is non-blocking, writev() may not be able to flush all
1148  * of the iovecs, and may even partially flush one of the iovecs.  In this
1149  * case, the partially flushed PDU will remain on the write_pdu_list with
1150  * an offset pointing to the next byte to be flushed.
1151  *
1152  * Returns 0 if all PDUs were flushed.
1153  *
1154  * Returns 1 if some PDUs could not be flushed due to lack of send buffer
1155  * space.
1156  *
1157  * Returns -1 if an exception error occurred indicating the TCP connection
1158  * should be closed.
1159  */
1160 static int
1161 spdk_iscsi_conn_flush_pdus_internal(struct spdk_iscsi_conn *conn)
1162 {
1163 	const int num_iovs = 32;
1164 	struct iovec iovs[num_iovs];
1165 	struct iovec *iov = iovs;
1166 	int iovcnt = 0;
1167 	int bytes = 0;
1168 	uint32_t total_length = 0;
1169 	uint32_t mapped_length = 0;
1170 	struct spdk_iscsi_pdu *pdu;
1171 	int pdu_length;
1172 
1173 	pdu = TAILQ_FIRST(&conn->write_pdu_list);
1174 
1175 	if (pdu == NULL) {
1176 		return 0;
1177 	}
1178 
1179 	/*
1180 	 * Build up a list of iovecs for the first few PDUs in the
1181 	 *  connection's write_pdu_list. For the first PDU, check if it was
1182 	 *  partially written out the last time this function was called, and
1183 	 *  if so adjust the iovec array accordingly. This check is done in
1184 	 *  spdk_iscsi_build_iovs() and so applied to remaining PDUs too.
1185 	 *  But extra overhead is negligible.
1186 	 */
1187 	while (pdu != NULL && ((num_iovs - iovcnt) > 0)) {
1188 		iovcnt += spdk_iscsi_build_iovs(conn, &iovs[iovcnt], num_iovs - iovcnt,
1189 						pdu, &mapped_length);
1190 		total_length += mapped_length;
1191 		pdu = TAILQ_NEXT(pdu, tailq);
1192 	}
1193 
1194 	spdk_trace_record(TRACE_ISCSI_FLUSH_WRITEBUF_START, conn->id, total_length, 0, iovcnt);
1195 
1196 	bytes = spdk_sock_writev(conn->sock, iov, iovcnt);
1197 	if (bytes == -1) {
1198 		if (errno == EWOULDBLOCK || errno == EAGAIN) {
1199 			return 1;
1200 		} else {
1201 			SPDK_ERRLOG("spdk_sock_writev() failed, errno %d: %s\n",
1202 				    errno, spdk_strerror(errno));
1203 			return -1;
1204 		}
1205 	}
1206 
1207 	spdk_trace_record(TRACE_ISCSI_FLUSH_WRITEBUF_DONE, conn->id, bytes, 0, 0);
1208 
1209 	pdu = TAILQ_FIRST(&conn->write_pdu_list);
1210 
1211 	/*
1212 	 * Free any PDUs that were fully written.  If a PDU was only
1213 	 *  partially written, update its writev_offset so that next
1214 	 *  time only the unwritten portion will be sent to writev().
1215 	 */
1216 	while (bytes > 0) {
1217 		pdu_length = spdk_iscsi_get_pdu_length(pdu,
1218 						       conn->header_digest,
1219 						       conn->data_digest);
1220 		pdu_length -= pdu->writev_offset;
1221 
1222 		if (bytes >= pdu_length) {
1223 			bytes -= pdu_length;
1224 			TAILQ_REMOVE(&conn->write_pdu_list, pdu, tailq);
1225 
1226 			if ((conn->full_feature) &&
1227 			    (conn->sess->ErrorRecoveryLevel >= 1) &&
1228 			    spdk_iscsi_is_deferred_free_pdu(pdu)) {
1229 				SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "stat_sn=%d\n",
1230 					      from_be32(&pdu->bhs.stat_sn));
1231 				TAILQ_INSERT_TAIL(&conn->snack_pdu_list, pdu,
1232 						  tailq);
1233 			} else {
1234 				spdk_iscsi_conn_free_pdu(conn, pdu);
1235 			}
1236 
1237 			pdu = TAILQ_FIRST(&conn->write_pdu_list);
1238 		} else {
1239 			pdu->writev_offset += bytes;
1240 			bytes = 0;
1241 		}
1242 	}
1243 
1244 	return TAILQ_EMPTY(&conn->write_pdu_list) ? 0 : 1;
1245 }
1246 
1247 /**
1248  * \brief Flushes response PDUs back to the initiator.
1249  *
1250  * This function may return without all PDUs having flushed to the
1251  * underlying TCP socket buffer - for example, in the case where the
1252  * socket buffer is already full.
1253  *
1254  * During normal RUNNING connection state, if not all PDUs are flushed,
1255  * then subsequent calls to this routine will eventually flush
1256  * remaining PDUs.
1257  *
1258  * During other connection states (EXITING or LOGGED_OUT), this
1259  * function will spin until all PDUs have successfully been flushed.
1260  */
1261 static int
1262 spdk_iscsi_conn_flush_pdus(void *_conn)
1263 {
1264 	struct spdk_iscsi_conn *conn = _conn;
1265 	int rc;
1266 
1267 	if (conn->state == ISCSI_CONN_STATE_RUNNING) {
1268 		rc = spdk_iscsi_conn_flush_pdus_internal(conn);
1269 		if (rc == 0 && conn->flush_poller != NULL) {
1270 			spdk_poller_unregister(&conn->flush_poller);
1271 		} else if (rc == 1 && conn->flush_poller == NULL) {
1272 			conn->flush_poller = spdk_poller_register(spdk_iscsi_conn_flush_pdus,
1273 					     conn, 50);
1274 		}
1275 	} else {
1276 		/*
1277 		 * If the connection state is not RUNNING, then
1278 		 * keep trying to flush PDUs until our list is
1279 		 * empty - to make sure all data is sent before
1280 		 * closing the connection.
1281 		 */
1282 		do {
1283 			rc = spdk_iscsi_conn_flush_pdus_internal(conn);
1284 		} while (rc == 1);
1285 	}
1286 
1287 	if (rc < 0 && conn->state < ISCSI_CONN_STATE_EXITING) {
1288 		/*
1289 		 * If the poller has already started destruction of the connection,
1290 		 *  i.e. the socket read failed, then the connection state may already
1291 		 *  be EXITED.  We don't want to set it back to EXITING in that case.
1292 		 */
1293 		conn->state = ISCSI_CONN_STATE_EXITING;
1294 	}
1295 
1296 	return 1;
1297 }
1298 
1299 void
1300 spdk_iscsi_conn_write_pdu(struct spdk_iscsi_conn *conn, struct spdk_iscsi_pdu *pdu)
1301 {
1302 	uint32_t crc32c;
1303 
1304 	if (pdu->bhs.opcode != ISCSI_OP_LOGIN_RSP) {
1305 		/* Header Digest */
1306 		if (conn->header_digest) {
1307 			crc32c = spdk_iscsi_pdu_calc_header_digest(pdu);
1308 			MAKE_DIGEST_WORD(pdu->header_digest, crc32c);
1309 		}
1310 
1311 		/* Data Digest */
1312 		if (conn->data_digest && DGET24(pdu->bhs.data_segment_len) != 0) {
1313 			crc32c = spdk_iscsi_pdu_calc_data_digest(pdu);
1314 			MAKE_DIGEST_WORD(pdu->data_digest, crc32c);
1315 		}
1316 	}
1317 
1318 	TAILQ_INSERT_TAIL(&conn->write_pdu_list, pdu, tailq);
1319 	spdk_iscsi_conn_flush_pdus(conn);
1320 }
1321 
1322 #define GET_PDU_LOOP_COUNT	16
1323 
1324 static int
1325 spdk_iscsi_conn_handle_incoming_pdus(struct spdk_iscsi_conn *conn)
1326 {
1327 	struct spdk_iscsi_pdu *pdu;
1328 	int i, rc;
1329 
1330 	/* Read new PDUs from network */
1331 	for (i = 0; i < GET_PDU_LOOP_COUNT; i++) {
1332 		rc = spdk_iscsi_read_pdu(conn, &pdu);
1333 		if (rc == 0) {
1334 			break;
1335 		} else if (rc == SPDK_ISCSI_CONNECTION_FATAL) {
1336 			return rc;
1337 		}
1338 
1339 		if (conn->state == ISCSI_CONN_STATE_LOGGED_OUT) {
1340 			SPDK_ERRLOG("pdu received after logout\n");
1341 			spdk_put_pdu(pdu);
1342 			return SPDK_ISCSI_CONNECTION_FATAL;
1343 		}
1344 
1345 		rc = spdk_iscsi_execute(conn, pdu);
1346 		spdk_put_pdu(pdu);
1347 		if (rc != 0) {
1348 			SPDK_ERRLOG("spdk_iscsi_execute() fatal error on %s(%s)\n",
1349 				    conn->target_port != NULL ? spdk_scsi_port_get_name(conn->target_port) : "NULL",
1350 				    conn->initiator_port != NULL ? spdk_scsi_port_get_name(conn->initiator_port) : "NULL");
1351 			return rc;
1352 		}
1353 
1354 		spdk_trace_record(TRACE_ISCSI_TASK_EXECUTED, 0, 0, (uintptr_t)pdu, 0);
1355 		if (conn->is_stopped) {
1356 			break;
1357 		}
1358 	}
1359 
1360 	return i;
1361 }
1362 
1363 static void
1364 spdk_iscsi_conn_sock_cb(void *arg, struct spdk_sock_group *group, struct spdk_sock *sock)
1365 {
1366 	struct spdk_iscsi_conn *conn = arg;
1367 	int rc;
1368 
1369 	assert(conn != NULL);
1370 
1371 	if ((conn->state == ISCSI_CONN_STATE_EXITED) ||
1372 	    (conn->state == ISCSI_CONN_STATE_EXITING)) {
1373 		return;
1374 	}
1375 
1376 	/* Handle incoming PDUs */
1377 	rc = spdk_iscsi_conn_handle_incoming_pdus(conn);
1378 	if (rc < 0) {
1379 		conn->state = ISCSI_CONN_STATE_EXITING;
1380 		spdk_iscsi_conn_flush_pdus(conn);
1381 	}
1382 }
1383 
1384 static void
1385 spdk_iscsi_conn_full_feature_migrate(void *arg1, void *arg2)
1386 {
1387 	struct spdk_iscsi_conn *conn = arg1;
1388 
1389 	if (conn->sess->session_type == SESSION_TYPE_NORMAL) {
1390 		spdk_iscsi_conn_open_luns(conn);
1391 	}
1392 
1393 	/* The poller has been unregistered, so now we can re-register it on the new core. */
1394 	conn->lcore = spdk_env_get_current_core();
1395 	spdk_iscsi_poll_group_add_conn(conn);
1396 }
1397 
1398 void
1399 spdk_iscsi_conn_migration(struct spdk_iscsi_conn *conn)
1400 {
1401 	int				lcore;
1402 	struct spdk_event		*event;
1403 	struct spdk_iscsi_tgt_node *target;
1404 
1405 	lcore = spdk_iscsi_conn_allocate_reactor(conn->portal->cpumask);
1406 	if (conn->sess->session_type == SESSION_TYPE_NORMAL) {
1407 		target = conn->sess->target;
1408 		pthread_mutex_lock(&target->mutex);
1409 		target->num_active_conns++;
1410 		if (target->num_active_conns == 1) {
1411 			/**
1412 			 * This is the only active connection for this target node.
1413 			 *  Save the lcore in the target node so it can be used for
1414 			 *  any other connections to this target node.
1415 			 */
1416 			target->lcore = lcore;
1417 		} else {
1418 			/**
1419 			 * There are other active connections for this target node.
1420 			 *  Ignore the lcore specified by the allocator and use the
1421 			 *  the target node's lcore to ensure this connection runs on
1422 			 *  the same lcore as other connections for this target node.
1423 			 */
1424 			lcore = target->lcore;
1425 		}
1426 		pthread_mutex_unlock(&target->mutex);
1427 	}
1428 
1429 	spdk_iscsi_poll_group_remove_conn_sock(conn);
1430 	spdk_poller_unregister(&conn->flush_poller);
1431 	spdk_iscsi_conn_stop(conn);
1432 
1433 	__sync_fetch_and_add(&g_num_connections[lcore], 1);
1434 	conn->last_nopin = spdk_get_ticks();
1435 	event = spdk_event_allocate(lcore, spdk_iscsi_conn_full_feature_migrate,
1436 				    conn, NULL);
1437 	spdk_event_call(event);
1438 }
1439 
1440 void
1441 spdk_iscsi_conn_set_min_per_core(int count)
1442 {
1443 	g_connections_per_lcore = count;
1444 }
1445 
1446 int
1447 spdk_iscsi_conn_get_min_per_core(void)
1448 {
1449 	return g_connections_per_lcore;
1450 }
1451 
1452 static uint32_t
1453 spdk_iscsi_conn_allocate_reactor(const struct spdk_cpuset *cpumask)
1454 {
1455 	uint32_t i, selected_core;
1456 	int32_t num_pollers, min_pollers;
1457 
1458 	min_pollers = INT_MAX;
1459 	selected_core = spdk_env_get_first_core();
1460 
1461 	SPDK_ENV_FOREACH_CORE(i) {
1462 		if (!spdk_cpuset_get_cpu(cpumask, i)) {
1463 			continue;
1464 		}
1465 
1466 		/* This core is running. Check how many pollers it already has. */
1467 		num_pollers = g_num_connections[i];
1468 
1469 		if ((num_pollers > 0) && (num_pollers < g_connections_per_lcore)) {
1470 			/* Fewer than the maximum connections per core,
1471 			 * but at least 1. Use this core.
1472 			 */
1473 			return i;
1474 		} else if (num_pollers < min_pollers) {
1475 			/* Track the core that has the minimum number of pollers
1476 			 * to be used if no cores meet our criteria
1477 			 */
1478 			selected_core = i;
1479 			min_pollers = num_pollers;
1480 		}
1481 	}
1482 
1483 	return selected_core;
1484 }
1485 
1486 static int
1487 logout_timeout(void *arg)
1488 {
1489 	struct spdk_iscsi_conn *conn = arg;
1490 
1491 	spdk_iscsi_conn_destruct(conn);
1492 
1493 	return -1;
1494 }
1495 
1496 void
1497 spdk_iscsi_conn_logout(struct spdk_iscsi_conn *conn)
1498 {
1499 	conn->state = ISCSI_CONN_STATE_LOGGED_OUT;
1500 	conn->logout_timer = spdk_poller_register(logout_timeout, conn, ISCSI_LOGOUT_TIMEOUT * 1000000);
1501 }
1502 
1503 SPDK_TRACE_REGISTER_FN(iscsi_conn_trace, "iscsi_conn", TRACE_GROUP_ISCSI)
1504 {
1505 	spdk_trace_register_owner(OWNER_ISCSI_CONN, 'c');
1506 	spdk_trace_register_object(OBJECT_ISCSI_PDU, 'p');
1507 	spdk_trace_register_description("ISCSI_READ_FROM_SOCKET_DONE", "",
1508 					TRACE_ISCSI_READ_FROM_SOCKET_DONE,
1509 					OWNER_ISCSI_CONN, OBJECT_NONE, 0, 0, "");
1510 	spdk_trace_register_description("ISCSI_FLUSH_WRITEBUF_START", "", TRACE_ISCSI_FLUSH_WRITEBUF_START,
1511 					OWNER_ISCSI_CONN, OBJECT_NONE, 0, 0, "iovec: ");
1512 	spdk_trace_register_description("ISCSI_FLUSH_WRITEBUF_DONE", "", TRACE_ISCSI_FLUSH_WRITEBUF_DONE,
1513 					OWNER_ISCSI_CONN, OBJECT_NONE, 0, 0, "");
1514 	spdk_trace_register_description("ISCSI_READ_PDU", "", TRACE_ISCSI_READ_PDU,
1515 					OWNER_ISCSI_CONN, OBJECT_ISCSI_PDU, 1, 0, "opc:   ");
1516 	spdk_trace_register_description("ISCSI_TASK_DONE", "", TRACE_ISCSI_TASK_DONE,
1517 					OWNER_ISCSI_CONN, OBJECT_SCSI_TASK, 0, 0, "");
1518 	spdk_trace_register_description("ISCSI_TASK_QUEUE", "", TRACE_ISCSI_TASK_QUEUE,
1519 					OWNER_ISCSI_CONN, OBJECT_SCSI_TASK, 1, 1, "pdu:   ");
1520 	spdk_trace_register_description("ISCSI_TASK_EXECUTED", "", TRACE_ISCSI_TASK_EXECUTED,
1521 					OWNER_ISCSI_CONN, OBJECT_ISCSI_PDU, 0, 0, "");
1522 	spdk_trace_register_description("ISCSI_PDU_COMPLETED", "", TRACE_ISCSI_PDU_COMPLETED,
1523 					OWNER_ISCSI_CONN, OBJECT_ISCSI_PDU, 0, 0, "");
1524 }
1525