xref: /spdk/module/bdev/iscsi/bdev_iscsi.c (revision 307b8c112ffd90a26d53dd15fad67bd9038ef526)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (c) Intel Corporation.
3  *   All rights reserved.
4  */
5 
6 #include "spdk/stdinc.h"
7 
8 #include "spdk/bdev.h"
9 #include "spdk/env.h"
10 #include "spdk/fd.h"
11 #include "spdk/thread.h"
12 #include "spdk/json.h"
13 #include "spdk/util.h"
14 #include "spdk/rpc.h"
15 #include "spdk/string.h"
16 #include "spdk/iscsi_spec.h"
17 
18 #include "spdk/log.h"
19 #include "spdk/bdev_module.h"
20 
21 #include "iscsi/iscsi.h"
22 #include "iscsi/scsi-lowlevel.h"
23 
24 #include "bdev_iscsi.h"
25 
26 struct bdev_iscsi_lun;
27 
28 #define BDEV_ISCSI_CONNECTION_POLL_US 500 /* 0.5 ms */
29 #define BDEV_ISCSI_NO_MAIN_CH_POLL_US 10000 /* 10ms */
30 
31 #define BDEV_ISCSI_TIMEOUT_POLL_PERIOD_DEFAULT	1000000ULL /* 1 s */
32 #define BDEV_ISCSI_TIMEOUT_DEFAULT 30 /* 30 s */
33 #define BDEV_ISCSI_TIMEOUT_POLL_PERIOD_DIVISOR 30
34 
35 #define DEFAULT_INITIATOR_NAME "iqn.2016-06.io.spdk:init"
36 
37 /* MAXIMUM UNMAP LBA COUNT:
38  * indicates the maximum  number of LBAs that may be unmapped
39  * by an UNMAP command.
40  */
41 #define BDEV_ISCSI_DEFAULT_MAX_UNMAP_LBA_COUNT (32768)
42 
43 /* MAXIMUM UNMAP BLOCK DESCRIPTOR COUNT:
44  * indicates the maximum number of UNMAP block descriptors that
45  * shall be contained in the parameter data transferred to the
46  * device server for an UNMAP command.
47  */
48 #define BDEV_ISCSI_MAX_UNMAP_BLOCK_DESCS_COUNT (1)
49 
50 static int bdev_iscsi_initialize(void);
51 static void bdev_iscsi_readcapacity16(struct iscsi_context *context, struct bdev_iscsi_lun *lun);
52 static void _bdev_iscsi_submit_request(void *_bdev_io);
53 
54 static TAILQ_HEAD(, bdev_iscsi_conn_req) g_iscsi_conn_req = TAILQ_HEAD_INITIALIZER(
55 			g_iscsi_conn_req);
56 static struct spdk_poller *g_conn_poller = NULL;
57 
58 struct bdev_iscsi_io {
59 	struct spdk_thread *submit_td;
60 	struct bdev_iscsi_lun *lun;
61 	enum spdk_bdev_io_status status;
62 	int scsi_status;
63 	enum spdk_scsi_sense sk;
64 	uint8_t asc;
65 	uint8_t ascq;
66 };
67 
68 struct bdev_iscsi_lun {
69 	struct spdk_bdev		bdev;
70 	struct iscsi_context		*context;
71 	char				*initiator_iqn;
72 	int				lun_id;
73 	char				*url;
74 	pthread_mutex_t			mutex;
75 	uint32_t			ch_count;
76 	struct spdk_thread		*main_td;
77 	struct spdk_poller		*no_main_ch_poller;
78 	struct spdk_thread		*no_main_ch_poller_td;
79 	bool				unmap_supported;
80 	uint32_t			max_unmap;
81 	struct spdk_poller		*poller;
82 	struct spdk_poller		*timeout_poller;
83 };
84 
85 struct bdev_iscsi_io_channel {
86 	struct bdev_iscsi_lun	*lun;
87 };
88 
89 struct bdev_iscsi_conn_req {
90 	char					*url;
91 	char					*bdev_name;
92 	char					*initiator_iqn;
93 	struct iscsi_context			*context;
94 	spdk_bdev_iscsi_create_cb		create_cb;
95 	void					*create_cb_arg;
96 	bool					unmap_supported;
97 	uint32_t				max_unmap;
98 	int					lun;
99 	int					status;
100 	TAILQ_ENTRY(bdev_iscsi_conn_req)	link;
101 };
102 
103 static struct spdk_bdev_iscsi_opts g_opts = {
104 	.timeout_sec = BDEV_ISCSI_TIMEOUT_DEFAULT,
105 	.timeout_poller_period_us = BDEV_ISCSI_TIMEOUT_POLL_PERIOD_DEFAULT,
106 };
107 
108 void
109 bdev_iscsi_get_opts(struct spdk_bdev_iscsi_opts *opts)
110 {
111 	*opts = g_opts;
112 }
113 
114 int
115 bdev_iscsi_set_opts(struct spdk_bdev_iscsi_opts *opts)
116 {
117 	/* make the poller period equal to timeout / 30 */
118 	opts->timeout_poller_period_us = (opts->timeout_sec * 1000000ULL) /
119 					 BDEV_ISCSI_TIMEOUT_POLL_PERIOD_DIVISOR;
120 
121 	g_opts = *opts;
122 
123 	return 0;
124 }
125 
126 static void
127 complete_conn_req(struct bdev_iscsi_conn_req *req, struct spdk_bdev *bdev,
128 		  int status)
129 {
130 	TAILQ_REMOVE(&g_iscsi_conn_req, req, link);
131 	req->create_cb(req->create_cb_arg, bdev, status);
132 
133 	/*
134 	 * we are still running in the context of iscsi_service()
135 	 * so do not tear down its data structures here
136 	 */
137 	req->status = status;
138 }
139 
140 static int
141 bdev_iscsi_get_ctx_size(void)
142 {
143 	return sizeof(struct bdev_iscsi_io);
144 }
145 
146 static void
147 _iscsi_free_lun(void *arg)
148 {
149 	struct bdev_iscsi_lun *lun = arg;
150 
151 	assert(lun != NULL);
152 	iscsi_destroy_context(lun->context);
153 	pthread_mutex_destroy(&lun->mutex);
154 	free(lun->bdev.name);
155 	free(lun->url);
156 	free(lun->initiator_iqn);
157 
158 	spdk_bdev_destruct_done(&lun->bdev, 0);
159 	free(lun);
160 }
161 
162 static void
163 _bdev_iscsi_conn_req_free(struct bdev_iscsi_conn_req *req)
164 {
165 	free(req->initiator_iqn);
166 	free(req->bdev_name);
167 	free(req->url);
168 	/* destroy will call iscsi_disconnect() implicitly if connected */
169 	iscsi_destroy_context(req->context);
170 	free(req);
171 }
172 
173 static void
174 bdev_iscsi_finish(void)
175 {
176 	struct bdev_iscsi_conn_req *req, *tmp;
177 
178 	/* clear out pending connection requests here. We cannot
179 	 * simply set the state to a non SCSI_STATUS_GOOD state as
180 	 * the connection poller wont run anymore
181 	 */
182 	TAILQ_FOREACH_SAFE(req, &g_iscsi_conn_req, link, tmp) {
183 		_bdev_iscsi_conn_req_free(req);
184 	}
185 
186 	if (g_conn_poller) {
187 		spdk_poller_unregister(&g_conn_poller);
188 	}
189 }
190 
191 static struct spdk_bdev_module g_iscsi_bdev_module = {
192 	.name		= "iscsi",
193 	.module_init	= bdev_iscsi_initialize,
194 	.module_fini	= bdev_iscsi_finish,
195 	.get_ctx_size	= bdev_iscsi_get_ctx_size,
196 };
197 
198 SPDK_BDEV_MODULE_REGISTER(iscsi, &g_iscsi_bdev_module);
199 
200 static void
201 _bdev_iscsi_io_complete(void *_iscsi_io)
202 {
203 	struct bdev_iscsi_io *iscsi_io = _iscsi_io;
204 
205 	if (iscsi_io->status == SPDK_BDEV_IO_STATUS_SUCCESS) {
206 		spdk_bdev_io_complete_scsi_status(spdk_bdev_io_from_ctx(iscsi_io), iscsi_io->scsi_status,
207 						  iscsi_io->sk, iscsi_io->asc, iscsi_io->ascq);
208 	} else {
209 		spdk_bdev_io_complete(spdk_bdev_io_from_ctx(iscsi_io), iscsi_io->status);
210 	}
211 }
212 
213 static void
214 bdev_iscsi_io_complete(struct bdev_iscsi_io *iscsi_io, enum spdk_bdev_io_status status)
215 {
216 	iscsi_io->status = status;
217 	if (iscsi_io->submit_td != NULL) {
218 		spdk_thread_send_msg(iscsi_io->submit_td, _bdev_iscsi_io_complete, iscsi_io);
219 	} else {
220 		_bdev_iscsi_io_complete(iscsi_io);
221 	}
222 }
223 
224 static bool
225 _bdev_iscsi_is_size_change(int status, struct scsi_task *task)
226 {
227 	if (status == SPDK_SCSI_STATUS_CHECK_CONDITION &&
228 	    (uint8_t)task->sense.key == SPDK_SCSI_SENSE_UNIT_ATTENTION &&
229 	    task->sense.ascq == 0x2a09) {
230 		/* ASCQ: SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED (0x2a09) */
231 		return true;
232 	}
233 
234 	return false;
235 }
236 
237 /* Common call back function for read/write/flush command */
238 static void
239 bdev_iscsi_command_cb(struct iscsi_context *context, int status, void *_task, void *_iscsi_io)
240 {
241 	struct scsi_task *task = _task;
242 	struct bdev_iscsi_io *iscsi_io = _iscsi_io;
243 	struct spdk_bdev_io *bdev_io;
244 
245 	iscsi_io->scsi_status = status;
246 	iscsi_io->sk = (uint8_t)task->sense.key;
247 	iscsi_io->asc = (task->sense.ascq >> 8) & 0xFF;
248 	iscsi_io->ascq = task->sense.ascq & 0xFF;
249 
250 	scsi_free_scsi_task(task);
251 
252 	if (_bdev_iscsi_is_size_change(status, task)) {
253 		bdev_iscsi_readcapacity16(context, iscsi_io->lun);
254 
255 		/* Retry this failed IO immediately */
256 		bdev_io = spdk_bdev_io_from_ctx(iscsi_io);
257 		if (iscsi_io->submit_td != NULL) {
258 			spdk_thread_send_msg(iscsi_io->lun->main_td,
259 					     _bdev_iscsi_submit_request, bdev_io);
260 		} else {
261 			_bdev_iscsi_submit_request(bdev_io);
262 		}
263 	} else {
264 		bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_SUCCESS);
265 	}
266 }
267 
268 static int
269 bdev_iscsi_resize(struct spdk_bdev *bdev, const uint64_t new_size_in_block)
270 {
271 	int rc;
272 
273 	assert(bdev->module == &g_iscsi_bdev_module);
274 
275 	if (new_size_in_block <= bdev->blockcnt) {
276 		SPDK_ERRLOG("The new bdev size must be larger than current bdev size.\n");
277 		return -EINVAL;
278 	}
279 
280 	rc = spdk_bdev_notify_blockcnt_change(bdev, new_size_in_block);
281 	if (rc != 0) {
282 		SPDK_ERRLOG("failed to notify block cnt change.\n");
283 		return rc;
284 	}
285 
286 	return 0;
287 }
288 
289 static void
290 bdev_iscsi_readcapacity16_cb(struct iscsi_context *context, int status, void *_task,
291 			     void *private_data)
292 {
293 	struct bdev_iscsi_lun *lun = private_data;
294 	struct scsi_readcapacity16 *readcap16;
295 	struct scsi_task *task = _task;
296 	uint64_t size_in_block = 0;
297 	int rc;
298 
299 	if (status != SPDK_SCSI_STATUS_GOOD) {
300 		SPDK_ERRLOG("iSCSI error: %s\n", iscsi_get_error(context));
301 		goto ret;
302 	}
303 
304 	readcap16 = scsi_datain_unmarshall(task);
305 	if (!readcap16) {
306 		SPDK_ERRLOG("Read capacity error\n");
307 		goto ret;
308 	}
309 
310 	size_in_block = readcap16->returned_lba + 1;
311 
312 	rc = bdev_iscsi_resize(&lun->bdev, size_in_block);
313 	if (rc != 0) {
314 		SPDK_ERRLOG("Bdev (%s) resize error: %d\n", lun->bdev.name, rc);
315 	}
316 
317 ret:
318 	scsi_free_scsi_task(task);
319 }
320 
321 static void
322 bdev_iscsi_readcapacity16(struct iscsi_context *context, struct bdev_iscsi_lun *lun)
323 {
324 	struct scsi_task *task;
325 
326 	task = iscsi_readcapacity16_task(context, lun->lun_id,
327 					 bdev_iscsi_readcapacity16_cb, lun);
328 	if (task == NULL) {
329 		SPDK_ERRLOG("failed to get readcapacity16_task\n");
330 	}
331 }
332 
333 static void
334 bdev_iscsi_readv(struct bdev_iscsi_lun *lun, struct bdev_iscsi_io *iscsi_io,
335 		 struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t lba)
336 {
337 	struct scsi_task *task;
338 
339 	SPDK_DEBUGLOG(iscsi_init, "read %d iovs size %lu to lba: %#lx\n",
340 		      iovcnt, nbytes, lba);
341 
342 	task = iscsi_read16_task(lun->context, lun->lun_id, lba, nbytes, lun->bdev.blocklen, 0, 0, 0, 0, 0,
343 				 bdev_iscsi_command_cb, iscsi_io);
344 	if (task == NULL) {
345 		SPDK_ERRLOG("failed to get read16_task\n");
346 		bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_FAILED);
347 		return;
348 	}
349 
350 #if defined(LIBISCSI_FEATURE_IOVECTOR)
351 	scsi_task_set_iov_in(task, (struct scsi_iovec *)iov, iovcnt);
352 #else
353 	int i;
354 	for (i = 0; i < iovcnt; i++) {
355 		scsi_task_add_data_in_buffer(task, iov[i].iov_len, iov[i].iov_base);
356 	}
357 #endif
358 }
359 
360 static void
361 bdev_iscsi_writev(struct bdev_iscsi_lun *lun, struct bdev_iscsi_io *iscsi_io,
362 		  struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t lba)
363 {
364 	struct scsi_task *task;
365 
366 	SPDK_DEBUGLOG(iscsi_init, "write %d iovs size %lu to lba: %#lx\n",
367 		      iovcnt, nbytes, lba);
368 
369 	task = iscsi_write16_task(lun->context, lun->lun_id, lba, NULL, nbytes, lun->bdev.blocklen, 0, 0, 0,
370 				  0, 0,
371 				  bdev_iscsi_command_cb, iscsi_io);
372 	if (task == NULL) {
373 		SPDK_ERRLOG("failed to get write16_task\n");
374 		bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_FAILED);
375 		return;
376 	}
377 
378 #if defined(LIBISCSI_FEATURE_IOVECTOR)
379 	scsi_task_set_iov_out(task, (struct scsi_iovec *)iov, iovcnt);
380 #else
381 	int i;
382 	for (i = 0; i < iovcnt; i++) {
383 		scsi_task_add_data_in_buffer(task, iov[i].iov_len, iov[i].iov_base);
384 	}
385 #endif
386 }
387 
388 static void
389 bdev_iscsi_destruct_cb(void *ctx)
390 {
391 	struct bdev_iscsi_lun *lun = ctx;
392 
393 	spdk_poller_unregister(&lun->no_main_ch_poller);
394 	spdk_io_device_unregister(lun, _iscsi_free_lun);
395 }
396 
397 static int
398 bdev_iscsi_destruct(void *ctx)
399 {
400 	struct bdev_iscsi_lun *lun = ctx;
401 
402 	assert(lun->no_main_ch_poller_td);
403 	spdk_thread_send_msg(lun->no_main_ch_poller_td, bdev_iscsi_destruct_cb, lun);
404 	return 1;
405 }
406 
407 static void
408 bdev_iscsi_flush(struct bdev_iscsi_lun *lun, struct bdev_iscsi_io *iscsi_io, uint32_t num_blocks,
409 		 int immed, uint64_t lba)
410 {
411 	struct scsi_task *task;
412 
413 	task = iscsi_synchronizecache16_task(lun->context, lun->lun_id, lba,
414 					     num_blocks, 0, immed, bdev_iscsi_command_cb, iscsi_io);
415 	if (task == NULL) {
416 		SPDK_ERRLOG("failed to get sync16_task\n");
417 		bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_FAILED);
418 		return;
419 	}
420 }
421 
422 static void
423 bdev_iscsi_unmap(struct bdev_iscsi_lun *lun, struct bdev_iscsi_io *iscsi_io,
424 		 uint64_t lba, uint64_t num_blocks)
425 {
426 	struct scsi_task *task;
427 	struct unmap_list list[BDEV_ISCSI_MAX_UNMAP_BLOCK_DESCS_COUNT] = {};
428 	struct unmap_list *entry;
429 	uint32_t num_unmap_list;
430 	uint64_t offset, remaining, unmap_blocks;
431 
432 	num_unmap_list = spdk_divide_round_up(num_blocks, lun->max_unmap);
433 	if (num_unmap_list > BDEV_ISCSI_MAX_UNMAP_BLOCK_DESCS_COUNT) {
434 		SPDK_ERRLOG("Too many unmap entries\n");
435 		goto failed;
436 	}
437 
438 	remaining = num_blocks;
439 	offset = lba;
440 	num_unmap_list = 0;
441 	entry = &list[0];
442 
443 	do {
444 		unmap_blocks = spdk_min(remaining, lun->max_unmap);
445 		entry->lba = offset;
446 		entry->num = unmap_blocks;
447 		num_unmap_list++;
448 		remaining -= unmap_blocks;
449 		offset += unmap_blocks;
450 		entry++;
451 	} while (remaining > 0);
452 
453 	task = iscsi_unmap_task(lun->context, 0, 0, 0, list, num_unmap_list,
454 				bdev_iscsi_command_cb, iscsi_io);
455 	if (task != NULL) {
456 		return;
457 	}
458 	SPDK_ERRLOG("failed to get unmap_task\n");
459 
460 failed:
461 	bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_FAILED);
462 }
463 
464 static void
465 bdev_iscsi_reset_cb(struct iscsi_context *context __attribute__((unused)), int status,
466 		    void *command_data, void *private_data)
467 {
468 	uint32_t tmf_response;
469 	struct bdev_iscsi_io *iscsi_io = private_data;
470 
471 	tmf_response = *(uint32_t *)command_data;
472 	if (tmf_response == ISCSI_TASK_FUNC_RESP_COMPLETE) {
473 		bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_SUCCESS);
474 	} else {
475 		bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_FAILED);
476 	}
477 }
478 
479 static void
480 _bdev_iscsi_reset(void *_bdev_io)
481 {
482 	int rc;
483 	struct spdk_bdev_io *bdev_io = _bdev_io;
484 	struct bdev_iscsi_lun *lun = (struct bdev_iscsi_lun *)bdev_io->bdev->ctxt;
485 	struct bdev_iscsi_io *iscsi_io = (struct bdev_iscsi_io *)bdev_io->driver_ctx;
486 	struct iscsi_context *context = lun->context;
487 
488 	rc = iscsi_task_mgmt_lun_reset_async(context, lun->lun_id,
489 					     bdev_iscsi_reset_cb, iscsi_io);
490 	if (rc != 0) {
491 		SPDK_ERRLOG("failed to do iscsi reset\n");
492 		bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_FAILED);
493 		return;
494 	}
495 }
496 
497 static void
498 bdev_iscsi_reset(struct spdk_bdev_io *bdev_io)
499 {
500 	struct bdev_iscsi_lun *lun = (struct bdev_iscsi_lun *)bdev_io->bdev->ctxt;
501 	spdk_thread_send_msg(lun->main_td, _bdev_iscsi_reset, bdev_io);
502 }
503 
504 static int
505 bdev_iscsi_poll_lun(void *_lun)
506 {
507 	struct bdev_iscsi_lun *lun = _lun;
508 	struct pollfd pfd = {};
509 
510 	pfd.fd = iscsi_get_fd(lun->context);
511 	pfd.events = iscsi_which_events(lun->context);
512 
513 	if (poll(&pfd, 1, 0) < 0) {
514 		SPDK_ERRLOG("poll failed\n");
515 		return SPDK_POLLER_IDLE;
516 	}
517 
518 	if (pfd.revents != 0) {
519 		if (iscsi_service(lun->context, pfd.revents) < 0) {
520 			SPDK_ERRLOG("iscsi_service failed: %s\n", iscsi_get_error(lun->context));
521 		}
522 
523 		return SPDK_POLLER_BUSY;
524 	}
525 
526 	return SPDK_POLLER_IDLE;
527 }
528 
529 static int
530 bdev_iscsi_poll_lun_timeout(void *_lun)
531 {
532 	struct bdev_iscsi_lun *lun = _lun;
533 	/* passing 0 here to iscsi_service means do nothing except for timeout checks */
534 	iscsi_service(lun->context, 0);
535 	return SPDK_POLLER_BUSY;
536 }
537 
538 static int
539 bdev_iscsi_no_main_ch_poll(void *arg)
540 {
541 	struct bdev_iscsi_lun *lun = arg;
542 	enum spdk_thread_poller_rc rc = SPDK_POLLER_IDLE;
543 
544 	if (pthread_mutex_trylock(&lun->mutex)) {
545 		/* Don't care about the error code here. */
546 		return SPDK_POLLER_IDLE;
547 	}
548 
549 	if (lun->ch_count == 0) {
550 		rc = bdev_iscsi_poll_lun(arg);
551 	}
552 
553 	pthread_mutex_unlock(&lun->mutex);
554 	return rc;
555 }
556 
557 static void
558 bdev_iscsi_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
559 		      bool success)
560 {
561 	if (!success) {
562 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
563 		return;
564 	}
565 
566 	bdev_iscsi_readv((struct bdev_iscsi_lun *)bdev_io->bdev->ctxt,
567 			 (struct bdev_iscsi_io *)bdev_io->driver_ctx,
568 			 bdev_io->u.bdev.iovs,
569 			 bdev_io->u.bdev.iovcnt,
570 			 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
571 			 bdev_io->u.bdev.offset_blocks);
572 }
573 
574 static void
575 _bdev_iscsi_submit_request(void *_bdev_io)
576 {
577 	struct spdk_bdev_io *bdev_io = _bdev_io;
578 	struct bdev_iscsi_io *iscsi_io = (struct bdev_iscsi_io *)bdev_io->driver_ctx;
579 	struct bdev_iscsi_lun *lun = (struct bdev_iscsi_lun *)bdev_io->bdev->ctxt;
580 
581 	switch (bdev_io->type) {
582 	case SPDK_BDEV_IO_TYPE_READ:
583 		spdk_bdev_io_get_buf(bdev_io, bdev_iscsi_get_buf_cb,
584 				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
585 		break;
586 
587 	case SPDK_BDEV_IO_TYPE_WRITE:
588 		bdev_iscsi_writev(lun, iscsi_io,
589 				  bdev_io->u.bdev.iovs,
590 				  bdev_io->u.bdev.iovcnt,
591 				  bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
592 				  bdev_io->u.bdev.offset_blocks);
593 		break;
594 	case SPDK_BDEV_IO_TYPE_FLUSH:
595 		bdev_iscsi_flush(lun, iscsi_io,
596 				 bdev_io->u.bdev.num_blocks,
597 				 ISCSI_IMMEDIATE_DATA_NO,
598 				 bdev_io->u.bdev.offset_blocks);
599 		break;
600 	case SPDK_BDEV_IO_TYPE_RESET:
601 		bdev_iscsi_reset(bdev_io);
602 		break;
603 	case SPDK_BDEV_IO_TYPE_UNMAP:
604 		bdev_iscsi_unmap(lun, iscsi_io,
605 				 bdev_io->u.bdev.offset_blocks,
606 				 bdev_io->u.bdev.num_blocks);
607 		break;
608 	default:
609 		bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_FAILED);
610 		break;
611 	}
612 }
613 
614 static void
615 bdev_iscsi_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io)
616 {
617 	struct spdk_thread *submit_td = spdk_io_channel_get_thread(_ch);
618 	struct bdev_iscsi_io *iscsi_io = (struct bdev_iscsi_io *)bdev_io->driver_ctx;
619 	struct bdev_iscsi_lun *lun = (struct bdev_iscsi_lun *)bdev_io->bdev->ctxt;
620 
621 	iscsi_io->lun = lun;
622 
623 	if (lun->main_td != submit_td) {
624 		iscsi_io->submit_td = submit_td;
625 		spdk_thread_send_msg(lun->main_td, _bdev_iscsi_submit_request, bdev_io);
626 		return;
627 	} else {
628 		iscsi_io->submit_td = NULL;
629 	}
630 
631 	_bdev_iscsi_submit_request(bdev_io);
632 }
633 
634 static bool
635 bdev_iscsi_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
636 {
637 	struct bdev_iscsi_lun *lun = ctx;
638 
639 	switch (io_type) {
640 	case SPDK_BDEV_IO_TYPE_READ:
641 	case SPDK_BDEV_IO_TYPE_WRITE:
642 	case SPDK_BDEV_IO_TYPE_FLUSH:
643 	case SPDK_BDEV_IO_TYPE_RESET:
644 		return true;
645 
646 	case SPDK_BDEV_IO_TYPE_UNMAP:
647 		return lun->unmap_supported;
648 	default:
649 		return false;
650 	}
651 }
652 
653 static int
654 bdev_iscsi_create_cb(void *io_device, void *ctx_buf)
655 {
656 	struct bdev_iscsi_io_channel *ch = ctx_buf;
657 	struct bdev_iscsi_lun *lun = io_device;
658 
659 	pthread_mutex_lock(&lun->mutex);
660 	if (lun->ch_count == 0) {
661 		assert(lun->main_td == NULL);
662 		lun->main_td = spdk_get_thread();
663 		lun->poller = SPDK_POLLER_REGISTER(bdev_iscsi_poll_lun, lun, 0);
664 		if (g_opts.timeout_sec > 0) {
665 			lun->timeout_poller = SPDK_POLLER_REGISTER(bdev_iscsi_poll_lun_timeout, lun,
666 					      g_opts.timeout_poller_period_us);
667 		}
668 		ch->lun = lun;
669 	}
670 	lun->ch_count++;
671 	pthread_mutex_unlock(&lun->mutex);
672 
673 	return 0;
674 }
675 
676 static void
677 _iscsi_destroy_cb(void *ctx)
678 {
679 	struct bdev_iscsi_lun *lun = ctx;
680 
681 	pthread_mutex_lock(&lun->mutex);
682 
683 	assert(lun->main_td == spdk_get_thread());
684 	assert(lun->ch_count > 0);
685 
686 	lun->ch_count--;
687 	if (lun->ch_count > 0) {
688 		pthread_mutex_unlock(&lun->mutex);
689 		return;
690 	}
691 
692 	lun->main_td = NULL;
693 	spdk_poller_unregister(&lun->poller);
694 	spdk_poller_unregister(&lun->timeout_poller);
695 
696 	pthread_mutex_unlock(&lun->mutex);
697 }
698 
699 static void
700 bdev_iscsi_destroy_cb(void *io_device, void *ctx_buf)
701 {
702 	struct bdev_iscsi_lun *lun = io_device;
703 	struct spdk_thread *thread;
704 
705 	pthread_mutex_lock(&lun->mutex);
706 	lun->ch_count--;
707 	if (lun->ch_count == 0) {
708 		assert(lun->main_td != NULL);
709 
710 		if (lun->main_td != spdk_get_thread()) {
711 			/* The final channel was destroyed on a different thread
712 			 * than where the first channel was created. Pass a message
713 			 * to the main thread to unregister the poller. */
714 			lun->ch_count++;
715 			thread = lun->main_td;
716 			pthread_mutex_unlock(&lun->mutex);
717 			spdk_thread_send_msg(thread, _iscsi_destroy_cb, lun);
718 			return;
719 		}
720 
721 		lun->main_td = NULL;
722 		spdk_poller_unregister(&lun->poller);
723 		spdk_poller_unregister(&lun->timeout_poller);
724 	}
725 	pthread_mutex_unlock(&lun->mutex);
726 }
727 
728 static struct spdk_io_channel *
729 bdev_iscsi_get_io_channel(void *ctx)
730 {
731 	struct bdev_iscsi_lun *lun = ctx;
732 
733 	return spdk_get_io_channel(lun);
734 }
735 
736 static int
737 bdev_iscsi_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
738 {
739 	struct bdev_iscsi_lun *lun = ctx;
740 
741 	spdk_json_write_named_object_begin(w, "iscsi");
742 	spdk_json_write_named_string(w, "initiator_name", lun->initiator_iqn);
743 	spdk_json_write_named_string(w, "url", lun->url);
744 	spdk_json_write_object_end(w);
745 
746 	return 0;
747 }
748 
749 static void
750 bdev_iscsi_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
751 {
752 	struct bdev_iscsi_lun *lun = bdev->ctxt;
753 
754 	pthread_mutex_lock(&lun->mutex);
755 	spdk_json_write_object_begin(w);
756 
757 	spdk_json_write_named_string(w, "method", "bdev_iscsi_create");
758 
759 	spdk_json_write_named_object_begin(w, "params");
760 	spdk_json_write_named_string(w, "name", bdev->name);
761 	spdk_json_write_named_string(w, "initiator_iqn", lun->initiator_iqn);
762 	spdk_json_write_named_string(w, "url", lun->url);
763 	spdk_json_write_object_end(w);
764 
765 	spdk_json_write_object_end(w);
766 	pthread_mutex_unlock(&lun->mutex);
767 }
768 
769 static const struct spdk_bdev_fn_table iscsi_fn_table = {
770 	.destruct		= bdev_iscsi_destruct,
771 	.submit_request		= bdev_iscsi_submit_request,
772 	.io_type_supported	= bdev_iscsi_io_type_supported,
773 	.get_io_channel		= bdev_iscsi_get_io_channel,
774 	.dump_info_json		= bdev_iscsi_dump_info_json,
775 	.write_config_json	= bdev_iscsi_write_config_json,
776 };
777 
778 static int
779 create_iscsi_lun(struct bdev_iscsi_conn_req *req, uint64_t num_blocks,
780 		 uint32_t block_size, struct spdk_bdev **bdev, uint8_t lbppbe)
781 {
782 	struct bdev_iscsi_lun *lun;
783 	int rc;
784 
785 	lun = calloc(sizeof(*lun), 1);
786 	if (!lun) {
787 		SPDK_ERRLOG("Unable to allocate enough memory for iscsi backend\n");
788 		return -ENOMEM;
789 	}
790 
791 	lun->context = req->context;
792 	lun->lun_id = req->lun;
793 	lun->url = req->url;
794 	lun->initiator_iqn = req->initiator_iqn;
795 
796 	pthread_mutex_init(&lun->mutex, NULL);
797 
798 	lun->bdev.name = req->bdev_name;
799 	lun->bdev.product_name = "iSCSI LUN";
800 	lun->bdev.module = &g_iscsi_bdev_module;
801 	lun->bdev.blocklen = block_size;
802 	lun->bdev.phys_blocklen = block_size * (1 << lbppbe);
803 	lun->bdev.blockcnt = num_blocks;
804 	lun->bdev.ctxt = lun;
805 	lun->unmap_supported = req->unmap_supported;
806 	if (lun->unmap_supported) {
807 		lun->max_unmap = req->max_unmap;
808 		lun->bdev.max_unmap = req->max_unmap;
809 		lun->bdev.max_unmap_segments = BDEV_ISCSI_MAX_UNMAP_BLOCK_DESCS_COUNT;
810 	}
811 
812 	lun->bdev.fn_table = &iscsi_fn_table;
813 
814 	spdk_io_device_register(lun, bdev_iscsi_create_cb, bdev_iscsi_destroy_cb,
815 				sizeof(struct bdev_iscsi_io_channel),
816 				req->bdev_name);
817 	rc = spdk_bdev_register(&lun->bdev);
818 	if (rc) {
819 		spdk_io_device_unregister(lun, NULL);
820 		pthread_mutex_destroy(&lun->mutex);
821 		free(lun);
822 		return rc;
823 	}
824 
825 	lun->no_main_ch_poller_td = spdk_get_thread();
826 	lun->no_main_ch_poller = SPDK_POLLER_REGISTER(bdev_iscsi_no_main_ch_poll, lun,
827 				 BDEV_ISCSI_NO_MAIN_CH_POLL_US);
828 
829 	*bdev = &lun->bdev;
830 	return 0;
831 }
832 
833 static void
834 iscsi_readcapacity16_cb(struct iscsi_context *iscsi, int status,
835 			void *command_data, void *private_data)
836 {
837 	struct bdev_iscsi_conn_req *req = private_data;
838 	struct scsi_readcapacity16 *readcap16;
839 	struct spdk_bdev *bdev = NULL;
840 	struct scsi_task *task = command_data;
841 	struct scsi_task *retry_task = NULL;
842 
843 	if (status != SPDK_SCSI_STATUS_GOOD) {
844 		SPDK_ERRLOG("iSCSI error: %s\n", iscsi_get_error(iscsi));
845 		if (_bdev_iscsi_is_size_change(status, task)) {
846 			scsi_free_scsi_task(task);
847 			retry_task = iscsi_readcapacity16_task(iscsi, req->lun,
848 							       iscsi_readcapacity16_cb, req);
849 			if (retry_task) {
850 				return;
851 			}
852 		}
853 		goto ret;
854 	}
855 
856 	readcap16 = scsi_datain_unmarshall(task);
857 	if (!readcap16) {
858 		status = -ENOMEM;
859 		goto ret;
860 	}
861 
862 	status = create_iscsi_lun(req, readcap16->returned_lba + 1, readcap16->block_length, &bdev,
863 				  readcap16->lbppbe);
864 	if (status) {
865 		SPDK_ERRLOG("Unable to create iscsi bdev: %s (%d)\n", spdk_strerror(-status), status);
866 	}
867 
868 ret:
869 	scsi_free_scsi_task(task);
870 	complete_conn_req(req, bdev, status);
871 }
872 
873 static void
874 bdev_iscsi_inquiry_bl_cb(struct iscsi_context *context, int status, void *_task, void *private_data)
875 {
876 	struct scsi_task *task = _task;
877 	struct scsi_inquiry_block_limits *bl_inq = NULL;
878 	struct bdev_iscsi_conn_req *req = private_data;
879 
880 	if (status == SPDK_SCSI_STATUS_GOOD) {
881 		bl_inq = scsi_datain_unmarshall(task);
882 		if (bl_inq != NULL) {
883 			if (!bl_inq->max_unmap) {
884 				SPDK_ERRLOG("Invalid max_unmap, use the default\n");
885 				req->max_unmap = BDEV_ISCSI_DEFAULT_MAX_UNMAP_LBA_COUNT;
886 			} else {
887 				req->max_unmap = bl_inq->max_unmap;
888 			}
889 		}
890 	}
891 
892 	scsi_free_scsi_task(task);
893 	task = iscsi_readcapacity16_task(context, req->lun, iscsi_readcapacity16_cb, req);
894 	if (task) {
895 		return;
896 	}
897 
898 	SPDK_ERRLOG("iSCSI error: %s\n", iscsi_get_error(req->context));
899 	complete_conn_req(req, NULL, status);
900 }
901 
902 static void
903 bdev_iscsi_inquiry_lbp_cb(struct iscsi_context *context, int status, void *_task,
904 			  void *private_data)
905 {
906 	struct scsi_task *task = _task;
907 	struct scsi_inquiry_logical_block_provisioning *lbp_inq = NULL;
908 	struct bdev_iscsi_conn_req *req = private_data;
909 
910 	if (status == SPDK_SCSI_STATUS_GOOD) {
911 		lbp_inq = scsi_datain_unmarshall(task);
912 		if (lbp_inq != NULL && lbp_inq->lbpu) {
913 			req->unmap_supported = true;
914 			scsi_free_scsi_task(task);
915 
916 			task = iscsi_inquiry_task(context, req->lun, 1,
917 						  SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS,
918 						  255, bdev_iscsi_inquiry_bl_cb, req);
919 			if (task) {
920 				return;
921 			}
922 		}
923 	} else {
924 		scsi_free_scsi_task(task);
925 	}
926 
927 	task = iscsi_readcapacity16_task(context, req->lun, iscsi_readcapacity16_cb, req);
928 	if (task) {
929 		return;
930 	}
931 
932 	SPDK_ERRLOG("iSCSI error: %s\n", iscsi_get_error(req->context));
933 	complete_conn_req(req, NULL, status);
934 }
935 
936 static void
937 iscsi_connect_cb(struct iscsi_context *iscsi, int status,
938 		 void *command_data, void *private_data)
939 {
940 	struct bdev_iscsi_conn_req *req = private_data;
941 	struct scsi_task *task;
942 
943 	if (status != SPDK_SCSI_STATUS_GOOD) {
944 		goto ret;
945 	}
946 
947 	task = iscsi_inquiry_task(iscsi, req->lun, 1,
948 				  SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING,
949 				  255, bdev_iscsi_inquiry_lbp_cb, req);
950 	if (task) {
951 		return;
952 	}
953 
954 ret:
955 	SPDK_ERRLOG("iSCSI error: %s\n", iscsi_get_error(req->context));
956 	complete_conn_req(req, NULL, status);
957 }
958 
959 static int
960 iscsi_bdev_conn_poll(void *arg)
961 {
962 	struct bdev_iscsi_conn_req *req, *tmp;
963 	struct pollfd pfd;
964 	struct iscsi_context *context;
965 
966 	if (TAILQ_EMPTY(&g_iscsi_conn_req)) {
967 		spdk_poller_unregister(&g_conn_poller);
968 		return SPDK_POLLER_IDLE;
969 	}
970 
971 	TAILQ_FOREACH_SAFE(req, &g_iscsi_conn_req, link, tmp) {
972 		context = req->context;
973 		pfd.fd = iscsi_get_fd(context);
974 		pfd.events = iscsi_which_events(context);
975 		pfd.revents = 0;
976 		if (poll(&pfd, 1, 0) < 0) {
977 			SPDK_ERRLOG("poll failed\n");
978 			return SPDK_POLLER_BUSY;
979 		}
980 
981 		if (pfd.revents != 0) {
982 			if (iscsi_service(context, pfd.revents) < 0) {
983 				SPDK_ERRLOG("iscsi_service failed: %s\n", iscsi_get_error(context));
984 			}
985 		}
986 
987 		if (req->status == 0) {
988 			/*
989 			 * The request completed successfully.
990 			 */
991 			free(req);
992 		} else if (req->status > 0) {
993 			/*
994 			 * An error has occurred during connecting.  This req has already
995 			 * been removed from the g_iscsi_conn_req list, but we needed to
996 			 * wait until iscsi_service unwound before we could free the req.
997 			 */
998 			_bdev_iscsi_conn_req_free(req);
999 		}
1000 	}
1001 	return SPDK_POLLER_BUSY;
1002 }
1003 
1004 int
1005 create_iscsi_disk(const char *bdev_name, const char *url, const char *initiator_iqn,
1006 		  spdk_bdev_iscsi_create_cb cb_fn, void *cb_arg)
1007 {
1008 	struct bdev_iscsi_conn_req *req;
1009 	struct iscsi_url *iscsi_url = NULL;
1010 	int rc;
1011 
1012 	if (!bdev_name || !url || !initiator_iqn || strlen(initiator_iqn) == 0 || !cb_fn) {
1013 		return -EINVAL;
1014 	}
1015 
1016 	req = calloc(1, sizeof(struct bdev_iscsi_conn_req));
1017 	if (!req) {
1018 		SPDK_ERRLOG("Cannot allocate pointer of struct bdev_iscsi_conn_req\n");
1019 		return -ENOMEM;
1020 	}
1021 
1022 	req->status = SCSI_STATUS_GOOD;
1023 	req->bdev_name = strdup(bdev_name);
1024 	req->url = strdup(url);
1025 	req->initiator_iqn = strdup(initiator_iqn);
1026 	req->context = iscsi_create_context(initiator_iqn);
1027 	if (!req->bdev_name || !req->url || !req->initiator_iqn || !req->context) {
1028 		SPDK_ERRLOG("Out of memory\n");
1029 		rc = -ENOMEM;
1030 		goto err;
1031 	}
1032 
1033 	req->create_cb = cb_fn;
1034 	req->create_cb_arg = cb_arg;
1035 
1036 	iscsi_url = iscsi_parse_full_url(req->context, url);
1037 	if (iscsi_url == NULL) {
1038 		SPDK_ERRLOG("could not parse URL: %s\n", iscsi_get_error(req->context));
1039 		rc = -EINVAL;
1040 		goto err;
1041 	}
1042 
1043 	req->lun = iscsi_url->lun;
1044 	rc = iscsi_set_session_type(req->context, ISCSI_SESSION_NORMAL);
1045 	rc = rc ? rc : iscsi_set_header_digest(req->context, ISCSI_HEADER_DIGEST_NONE);
1046 	rc = rc ? rc : iscsi_set_targetname(req->context, iscsi_url->target);
1047 	rc = rc ? rc : iscsi_set_timeout(req->context, g_opts.timeout_sec);
1048 	rc = rc ? rc : iscsi_full_connect_async(req->context, iscsi_url->portal, iscsi_url->lun,
1049 						iscsi_connect_cb, req);
1050 	if (rc == 0 && iscsi_url->user[0] != '\0') {
1051 		rc = iscsi_set_initiator_username_pwd(req->context, iscsi_url->user, iscsi_url->passwd);
1052 	}
1053 
1054 	if (rc < 0) {
1055 		SPDK_ERRLOG("Failed to connect provided URL=%s: %s\n", url, iscsi_get_error(req->context));
1056 		goto err;
1057 	}
1058 
1059 	iscsi_destroy_url(iscsi_url);
1060 	req->status = -1;
1061 	TAILQ_INSERT_TAIL(&g_iscsi_conn_req, req, link);
1062 	if (!g_conn_poller) {
1063 		g_conn_poller = SPDK_POLLER_REGISTER(iscsi_bdev_conn_poll, NULL, BDEV_ISCSI_CONNECTION_POLL_US);
1064 	}
1065 
1066 	return 0;
1067 
1068 err:
1069 	/* iscsi_destroy_url() is not NULL-proof */
1070 	if (iscsi_url) {
1071 		iscsi_destroy_url(iscsi_url);
1072 	}
1073 
1074 	if (req->context) {
1075 		iscsi_destroy_context(req->context);
1076 	}
1077 
1078 	free(req->initiator_iqn);
1079 	free(req->bdev_name);
1080 	free(req->url);
1081 	free(req);
1082 	return rc;
1083 }
1084 
1085 void
1086 delete_iscsi_disk(const char *bdev_name, spdk_delete_iscsi_complete cb_fn, void *cb_arg)
1087 {
1088 	int rc;
1089 
1090 	rc = spdk_bdev_unregister_by_name(bdev_name, &g_iscsi_bdev_module, cb_fn, cb_arg);
1091 	if (rc != 0) {
1092 		cb_fn(cb_arg, rc);
1093 	}
1094 }
1095 
1096 static int
1097 bdev_iscsi_initialize(void)
1098 {
1099 	return 0;
1100 }
1101 
1102 SPDK_LOG_REGISTER_COMPONENT(iscsi_init)
1103