xref: /spdk/module/bdev/iscsi/bdev_iscsi.c (revision 95d6c9fac17572b107042103439aafd696d60b0e)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2017 Intel Corporation.
3  *   All rights reserved.
4  */
5 
6 #include "spdk/stdinc.h"
7 
8 #include "spdk/bdev.h"
9 #include "spdk/env.h"
10 #include "spdk/fd.h"
11 #include "spdk/thread.h"
12 #include "spdk/json.h"
13 #include "spdk/util.h"
14 #include "spdk/rpc.h"
15 #include "spdk/string.h"
16 #include "spdk/iscsi_spec.h"
17 
18 #include "spdk/log.h"
19 #include "spdk/bdev_module.h"
20 
21 #include "iscsi/iscsi.h"
22 #include "iscsi/scsi-lowlevel.h"
23 
24 #include "bdev_iscsi.h"
25 
26 struct bdev_iscsi_lun;
27 
28 #define BDEV_ISCSI_CONNECTION_POLL_US 500 /* 0.5 ms */
29 #define BDEV_ISCSI_NO_MAIN_CH_POLL_US 10000 /* 10ms */
30 
31 #define BDEV_ISCSI_TIMEOUT_POLL_PERIOD_DEFAULT	1000000ULL /* 1 s */
32 #define BDEV_ISCSI_TIMEOUT_DEFAULT 30 /* 30 s */
33 #define BDEV_ISCSI_TIMEOUT_POLL_PERIOD_DIVISOR 30
34 
35 #define DEFAULT_INITIATOR_NAME "iqn.2016-06.io.spdk:init"
36 
37 /* MAXIMUM UNMAP LBA COUNT:
38  * indicates the maximum  number of LBAs that may be unmapped
39  * by an UNMAP command.
40  */
41 #define BDEV_ISCSI_DEFAULT_MAX_UNMAP_LBA_COUNT (32768)
42 
43 /* MAXIMUM UNMAP BLOCK DESCRIPTOR COUNT:
44  * indicates the maximum number of UNMAP block descriptors that
45  * shall be contained in the parameter data transferred to the
46  * device server for an UNMAP command.
47  */
48 #define BDEV_ISCSI_MAX_UNMAP_BLOCK_DESCS_COUNT (1)
49 
50 static int bdev_iscsi_initialize(void);
51 static void bdev_iscsi_readcapacity16(struct iscsi_context *context, struct bdev_iscsi_lun *lun);
52 static void _bdev_iscsi_submit_request(void *_bdev_io);
53 
54 static TAILQ_HEAD(, bdev_iscsi_conn_req) g_iscsi_conn_req = TAILQ_HEAD_INITIALIZER(
55 			g_iscsi_conn_req);
56 static struct spdk_poller *g_conn_poller = NULL;
57 
58 struct bdev_iscsi_io {
59 	struct spdk_thread *submit_td;
60 	struct bdev_iscsi_lun *lun;
61 	enum spdk_bdev_io_status status;
62 	int scsi_status;
63 	enum spdk_scsi_sense sk;
64 	uint8_t asc;
65 	uint8_t ascq;
66 };
67 
68 struct bdev_iscsi_lun {
69 	struct spdk_bdev		bdev;
70 	struct iscsi_context		*context;
71 	char				*initiator_iqn;
72 	int				lun_id;
73 	char				*url;
74 	pthread_mutex_t			mutex;
75 	uint32_t			ch_count;
76 	struct spdk_thread		*main_td;
77 	struct spdk_poller		*no_main_ch_poller;
78 	struct spdk_thread		*no_main_ch_poller_td;
79 	bool				unmap_supported;
80 	uint32_t			max_unmap;
81 	struct spdk_poller		*poller;
82 	struct spdk_poller		*timeout_poller;
83 };
84 
85 struct bdev_iscsi_io_channel {
86 	struct bdev_iscsi_lun	*lun;
87 };
88 
89 struct bdev_iscsi_conn_req {
90 	char					*url;
91 	char					*bdev_name;
92 	char					*initiator_iqn;
93 	struct iscsi_context			*context;
94 	spdk_bdev_iscsi_create_cb		create_cb;
95 	void					*create_cb_arg;
96 	bool					unmap_supported;
97 	uint32_t				max_unmap;
98 	int					lun;
99 	int					status;
100 	TAILQ_ENTRY(bdev_iscsi_conn_req)	link;
101 };
102 
103 static struct spdk_bdev_iscsi_opts g_opts = {
104 	.timeout_sec = BDEV_ISCSI_TIMEOUT_DEFAULT,
105 	.timeout_poller_period_us = BDEV_ISCSI_TIMEOUT_POLL_PERIOD_DEFAULT,
106 };
107 
108 void
109 bdev_iscsi_get_opts(struct spdk_bdev_iscsi_opts *opts)
110 {
111 	*opts = g_opts;
112 }
113 
114 int
115 bdev_iscsi_set_opts(struct spdk_bdev_iscsi_opts *opts)
116 {
117 	/* make the poller period equal to timeout / 30 */
118 	opts->timeout_poller_period_us = (opts->timeout_sec * 1000000ULL) /
119 					 BDEV_ISCSI_TIMEOUT_POLL_PERIOD_DIVISOR;
120 
121 	g_opts = *opts;
122 
123 	return 0;
124 }
125 
126 static void
127 complete_conn_req(struct bdev_iscsi_conn_req *req, struct spdk_bdev *bdev,
128 		  int status)
129 {
130 	TAILQ_REMOVE(&g_iscsi_conn_req, req, link);
131 	req->create_cb(req->create_cb_arg, bdev, status);
132 
133 	/*
134 	 * we are still running in the context of iscsi_service()
135 	 * so do not tear down its data structures here
136 	 */
137 	req->status = status;
138 }
139 
140 static int
141 bdev_iscsi_get_ctx_size(void)
142 {
143 	return sizeof(struct bdev_iscsi_io);
144 }
145 
146 static void
147 _iscsi_free_lun(void *arg)
148 {
149 	struct bdev_iscsi_lun *lun = arg;
150 
151 	assert(lun != NULL);
152 	iscsi_destroy_context(lun->context);
153 	pthread_mutex_destroy(&lun->mutex);
154 	free(lun->bdev.name);
155 	free(lun->url);
156 	free(lun->initiator_iqn);
157 
158 	spdk_bdev_destruct_done(&lun->bdev, 0);
159 	free(lun);
160 }
161 
162 static void
163 _bdev_iscsi_conn_req_free(struct bdev_iscsi_conn_req *req)
164 {
165 	free(req->initiator_iqn);
166 	free(req->bdev_name);
167 	free(req->url);
168 	/* destroy will call iscsi_disconnect() implicitly if connected */
169 	iscsi_destroy_context(req->context);
170 	free(req);
171 }
172 
173 static void
174 bdev_iscsi_finish(void)
175 {
176 	struct bdev_iscsi_conn_req *req, *tmp;
177 
178 	/* clear out pending connection requests here. We cannot
179 	 * simply set the state to a non SCSI_STATUS_GOOD state as
180 	 * the connection poller won't run anymore
181 	 */
182 	TAILQ_FOREACH_SAFE(req, &g_iscsi_conn_req, link, tmp) {
183 		_bdev_iscsi_conn_req_free(req);
184 	}
185 
186 	if (g_conn_poller) {
187 		spdk_poller_unregister(&g_conn_poller);
188 	}
189 }
190 
191 static void
192 bdev_iscsi_opts_config_json(struct spdk_json_write_ctx *w)
193 {
194 	spdk_json_write_object_begin(w);
195 
196 	spdk_json_write_named_string(w, "method", "bdev_iscsi_set_options");
197 
198 	spdk_json_write_named_object_begin(w, "params");
199 	spdk_json_write_named_uint64(w, "timeout_sec", g_opts.timeout_sec);
200 	spdk_json_write_object_end(w);
201 
202 	spdk_json_write_object_end(w);
203 }
204 
205 static int
206 bdev_iscsi_config_json(struct spdk_json_write_ctx *w)
207 {
208 	bdev_iscsi_opts_config_json(w);
209 	return 0;
210 }
211 
212 static struct spdk_bdev_module g_iscsi_bdev_module = {
213 	.name		= "iscsi",
214 	.module_init	= bdev_iscsi_initialize,
215 	.module_fini	= bdev_iscsi_finish,
216 	.config_json	= bdev_iscsi_config_json,
217 	.get_ctx_size	= bdev_iscsi_get_ctx_size,
218 };
219 
220 SPDK_BDEV_MODULE_REGISTER(iscsi, &g_iscsi_bdev_module);
221 
222 static void
223 _bdev_iscsi_io_complete(void *_iscsi_io)
224 {
225 	struct bdev_iscsi_io *iscsi_io = _iscsi_io;
226 
227 	if (iscsi_io->status == SPDK_BDEV_IO_STATUS_SUCCESS) {
228 		spdk_bdev_io_complete_scsi_status(spdk_bdev_io_from_ctx(iscsi_io), iscsi_io->scsi_status,
229 						  iscsi_io->sk, iscsi_io->asc, iscsi_io->ascq);
230 	} else {
231 		spdk_bdev_io_complete(spdk_bdev_io_from_ctx(iscsi_io), iscsi_io->status);
232 	}
233 }
234 
235 static void
236 bdev_iscsi_io_complete(struct bdev_iscsi_io *iscsi_io, enum spdk_bdev_io_status status)
237 {
238 	iscsi_io->status = status;
239 	if (iscsi_io->submit_td != NULL) {
240 		spdk_thread_send_msg(iscsi_io->submit_td, _bdev_iscsi_io_complete, iscsi_io);
241 	} else {
242 		_bdev_iscsi_io_complete(iscsi_io);
243 	}
244 }
245 
246 static bool
247 _bdev_iscsi_is_size_change(int status, struct scsi_task *task)
248 {
249 	if (status == SPDK_SCSI_STATUS_CHECK_CONDITION &&
250 	    (uint8_t)task->sense.key == SPDK_SCSI_SENSE_UNIT_ATTENTION &&
251 	    task->sense.ascq == 0x2a09) {
252 		/* ASCQ: SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED (0x2a09) */
253 		return true;
254 	}
255 
256 	return false;
257 }
258 
259 /* Common call back function for read/write/flush command */
260 static void
261 bdev_iscsi_command_cb(struct iscsi_context *context, int status, void *_task, void *_iscsi_io)
262 {
263 	struct scsi_task *task = _task;
264 	struct bdev_iscsi_io *iscsi_io = _iscsi_io;
265 	struct spdk_bdev_io *bdev_io;
266 
267 	iscsi_io->scsi_status = status;
268 	iscsi_io->sk = (uint8_t)task->sense.key;
269 	iscsi_io->asc = (task->sense.ascq >> 8) & 0xFF;
270 	iscsi_io->ascq = task->sense.ascq & 0xFF;
271 
272 	if (_bdev_iscsi_is_size_change(status, task)) {
273 		bdev_iscsi_readcapacity16(context, iscsi_io->lun);
274 
275 		/* Retry this failed IO immediately */
276 		bdev_io = spdk_bdev_io_from_ctx(iscsi_io);
277 		if (iscsi_io->submit_td != NULL) {
278 			spdk_thread_send_msg(iscsi_io->lun->main_td,
279 					     _bdev_iscsi_submit_request, bdev_io);
280 		} else {
281 			_bdev_iscsi_submit_request(bdev_io);
282 		}
283 	} else {
284 		bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_SUCCESS);
285 	}
286 
287 	scsi_free_scsi_task(task);
288 }
289 
290 static int
291 bdev_iscsi_resize(struct spdk_bdev *bdev, const uint64_t new_size_in_block)
292 {
293 	int rc;
294 
295 	assert(bdev->module == &g_iscsi_bdev_module);
296 
297 	if (new_size_in_block <= bdev->blockcnt) {
298 		SPDK_ERRLOG("The new bdev size must be larger than current bdev size.\n");
299 		return -EINVAL;
300 	}
301 
302 	rc = spdk_bdev_notify_blockcnt_change(bdev, new_size_in_block);
303 	if (rc != 0) {
304 		SPDK_ERRLOG("failed to notify block cnt change.\n");
305 		return rc;
306 	}
307 
308 	return 0;
309 }
310 
311 static void
312 bdev_iscsi_readcapacity16_cb(struct iscsi_context *context, int status, void *_task,
313 			     void *private_data)
314 {
315 	struct bdev_iscsi_lun *lun = private_data;
316 	struct scsi_readcapacity16 *readcap16;
317 	struct scsi_task *task = _task;
318 	uint64_t size_in_block = 0;
319 	int rc;
320 
321 	if (status != SPDK_SCSI_STATUS_GOOD) {
322 		SPDK_ERRLOG("iSCSI error: %s\n", iscsi_get_error(context));
323 		goto ret;
324 	}
325 
326 	readcap16 = scsi_datain_unmarshall(task);
327 	if (!readcap16) {
328 		SPDK_ERRLOG("Read capacity error\n");
329 		goto ret;
330 	}
331 
332 	size_in_block = readcap16->returned_lba + 1;
333 
334 	rc = bdev_iscsi_resize(&lun->bdev, size_in_block);
335 	if (rc != 0) {
336 		SPDK_ERRLOG("Bdev (%s) resize error: %d\n", lun->bdev.name, rc);
337 	}
338 
339 ret:
340 	scsi_free_scsi_task(task);
341 }
342 
343 static void
344 bdev_iscsi_readcapacity16(struct iscsi_context *context, struct bdev_iscsi_lun *lun)
345 {
346 	struct scsi_task *task;
347 
348 	task = iscsi_readcapacity16_task(context, lun->lun_id,
349 					 bdev_iscsi_readcapacity16_cb, lun);
350 	if (task == NULL) {
351 		SPDK_ERRLOG("failed to get readcapacity16_task\n");
352 	}
353 }
354 
355 static void
356 bdev_iscsi_readv(struct bdev_iscsi_lun *lun, struct bdev_iscsi_io *iscsi_io,
357 		 struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t lba)
358 {
359 	struct scsi_task *task;
360 
361 	SPDK_DEBUGLOG(iscsi_init, "read %d iovs size %lu to lba: %#lx\n",
362 		      iovcnt, nbytes, lba);
363 
364 	task = iscsi_read16_task(lun->context, lun->lun_id, lba, nbytes, lun->bdev.blocklen, 0, 0, 0, 0, 0,
365 				 bdev_iscsi_command_cb, iscsi_io);
366 	if (task == NULL) {
367 		SPDK_ERRLOG("failed to get read16_task\n");
368 		bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_FAILED);
369 		return;
370 	}
371 
372 #if defined(LIBISCSI_FEATURE_IOVECTOR)
373 	scsi_task_set_iov_in(task, (struct scsi_iovec *)iov, iovcnt);
374 #else
375 	int i;
376 	for (i = 0; i < iovcnt; i++) {
377 		scsi_task_add_data_in_buffer(task, iov[i].iov_len, iov[i].iov_base);
378 	}
379 #endif
380 }
381 
382 static void
383 bdev_iscsi_writev(struct bdev_iscsi_lun *lun, struct bdev_iscsi_io *iscsi_io,
384 		  struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t lba)
385 {
386 	struct scsi_task *task;
387 
388 	SPDK_DEBUGLOG(iscsi_init, "write %d iovs size %lu to lba: %#lx\n",
389 		      iovcnt, nbytes, lba);
390 
391 	task = iscsi_write16_task(lun->context, lun->lun_id, lba, NULL, nbytes, lun->bdev.blocklen, 0, 0, 0,
392 				  0, 0,
393 				  bdev_iscsi_command_cb, iscsi_io);
394 	if (task == NULL) {
395 		SPDK_ERRLOG("failed to get write16_task\n");
396 		bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_FAILED);
397 		return;
398 	}
399 
400 #if defined(LIBISCSI_FEATURE_IOVECTOR)
401 	scsi_task_set_iov_out(task, (struct scsi_iovec *)iov, iovcnt);
402 #else
403 	int i;
404 	for (i = 0; i < iovcnt; i++) {
405 		scsi_task_add_data_in_buffer(task, iov[i].iov_len, iov[i].iov_base);
406 	}
407 #endif
408 }
409 
410 static void
411 bdev_iscsi_destruct_cb(void *ctx)
412 {
413 	struct bdev_iscsi_lun *lun = ctx;
414 
415 	spdk_poller_unregister(&lun->no_main_ch_poller);
416 	spdk_io_device_unregister(lun, _iscsi_free_lun);
417 }
418 
419 static int
420 bdev_iscsi_destruct(void *ctx)
421 {
422 	struct bdev_iscsi_lun *lun = ctx;
423 
424 	assert(lun->no_main_ch_poller_td);
425 	spdk_thread_send_msg(lun->no_main_ch_poller_td, bdev_iscsi_destruct_cb, lun);
426 	return 1;
427 }
428 
429 static void
430 bdev_iscsi_flush(struct bdev_iscsi_lun *lun, struct bdev_iscsi_io *iscsi_io, uint32_t num_blocks,
431 		 int immed, uint64_t lba)
432 {
433 	struct scsi_task *task;
434 
435 	task = iscsi_synchronizecache16_task(lun->context, lun->lun_id, lba,
436 					     num_blocks, 0, immed, bdev_iscsi_command_cb, iscsi_io);
437 	if (task == NULL) {
438 		SPDK_ERRLOG("failed to get sync16_task\n");
439 		bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_FAILED);
440 		return;
441 	}
442 }
443 
444 static void
445 bdev_iscsi_unmap(struct bdev_iscsi_lun *lun, struct bdev_iscsi_io *iscsi_io,
446 		 uint64_t lba, uint64_t num_blocks)
447 {
448 	struct scsi_task *task;
449 	struct unmap_list list[BDEV_ISCSI_MAX_UNMAP_BLOCK_DESCS_COUNT] = {};
450 	struct unmap_list *entry;
451 	uint32_t num_unmap_list;
452 	uint64_t offset, remaining, unmap_blocks;
453 
454 	num_unmap_list = spdk_divide_round_up(num_blocks, lun->max_unmap);
455 	if (num_unmap_list > BDEV_ISCSI_MAX_UNMAP_BLOCK_DESCS_COUNT) {
456 		SPDK_ERRLOG("Too many unmap entries\n");
457 		goto failed;
458 	}
459 
460 	remaining = num_blocks;
461 	offset = lba;
462 	num_unmap_list = 0;
463 	entry = &list[0];
464 
465 	do {
466 		unmap_blocks = spdk_min(remaining, lun->max_unmap);
467 		entry->lba = offset;
468 		entry->num = unmap_blocks;
469 		num_unmap_list++;
470 		remaining -= unmap_blocks;
471 		offset += unmap_blocks;
472 		entry++;
473 	} while (remaining > 0);
474 
475 	task = iscsi_unmap_task(lun->context, lun->lun_id, 0, 0, list, num_unmap_list,
476 				bdev_iscsi_command_cb, iscsi_io);
477 	if (task != NULL) {
478 		return;
479 	}
480 	SPDK_ERRLOG("failed to get unmap_task\n");
481 
482 failed:
483 	bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_FAILED);
484 }
485 
486 static void
487 bdev_iscsi_reset_cb(struct iscsi_context *context __attribute__((unused)), int status,
488 		    void *command_data, void *private_data)
489 {
490 	uint32_t tmf_response;
491 	struct bdev_iscsi_io *iscsi_io = private_data;
492 
493 	tmf_response = *(uint32_t *)command_data;
494 	if (tmf_response == ISCSI_TASK_FUNC_RESP_COMPLETE) {
495 		bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_SUCCESS);
496 	} else {
497 		bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_FAILED);
498 	}
499 }
500 
501 static void
502 _bdev_iscsi_reset(void *_bdev_io)
503 {
504 	int rc;
505 	struct spdk_bdev_io *bdev_io = _bdev_io;
506 	struct bdev_iscsi_lun *lun = (struct bdev_iscsi_lun *)bdev_io->bdev->ctxt;
507 	struct bdev_iscsi_io *iscsi_io = (struct bdev_iscsi_io *)bdev_io->driver_ctx;
508 	struct iscsi_context *context = lun->context;
509 
510 	rc = iscsi_task_mgmt_lun_reset_async(context, lun->lun_id,
511 					     bdev_iscsi_reset_cb, iscsi_io);
512 	if (rc != 0) {
513 		SPDK_ERRLOG("failed to do iscsi reset\n");
514 		bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_FAILED);
515 		return;
516 	}
517 }
518 
519 static void
520 bdev_iscsi_reset(struct spdk_bdev_io *bdev_io)
521 {
522 	struct bdev_iscsi_lun *lun = (struct bdev_iscsi_lun *)bdev_io->bdev->ctxt;
523 	spdk_thread_send_msg(lun->main_td, _bdev_iscsi_reset, bdev_io);
524 }
525 
526 static int
527 bdev_iscsi_poll_lun(void *_lun)
528 {
529 	struct bdev_iscsi_lun *lun = _lun;
530 	struct pollfd pfd = {};
531 
532 	pfd.fd = iscsi_get_fd(lun->context);
533 	pfd.events = iscsi_which_events(lun->context);
534 
535 	if (poll(&pfd, 1, 0) < 0) {
536 		SPDK_ERRLOG("poll failed\n");
537 		return SPDK_POLLER_IDLE;
538 	}
539 
540 	if (pfd.revents != 0) {
541 		if (iscsi_service(lun->context, pfd.revents) < 0) {
542 			SPDK_ERRLOG("iscsi_service failed: %s\n", iscsi_get_error(lun->context));
543 		}
544 
545 		return SPDK_POLLER_BUSY;
546 	}
547 
548 	return SPDK_POLLER_IDLE;
549 }
550 
551 static int
552 bdev_iscsi_poll_lun_timeout(void *_lun)
553 {
554 	struct bdev_iscsi_lun *lun = _lun;
555 	/* passing 0 here to iscsi_service means do nothing except for timeout checks */
556 	iscsi_service(lun->context, 0);
557 	return SPDK_POLLER_BUSY;
558 }
559 
560 static int
561 bdev_iscsi_no_main_ch_poll(void *arg)
562 {
563 	struct bdev_iscsi_lun *lun = arg;
564 	enum spdk_thread_poller_rc rc = SPDK_POLLER_IDLE;
565 
566 	if (pthread_mutex_trylock(&lun->mutex)) {
567 		/* Don't care about the error code here. */
568 		return SPDK_POLLER_IDLE;
569 	}
570 
571 	if (lun->ch_count == 0) {
572 		rc = bdev_iscsi_poll_lun(arg);
573 	}
574 
575 	pthread_mutex_unlock(&lun->mutex);
576 	return rc;
577 }
578 
579 static void
580 bdev_iscsi_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
581 		      bool success)
582 {
583 	if (!success) {
584 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
585 		return;
586 	}
587 
588 	bdev_iscsi_readv((struct bdev_iscsi_lun *)bdev_io->bdev->ctxt,
589 			 (struct bdev_iscsi_io *)bdev_io->driver_ctx,
590 			 bdev_io->u.bdev.iovs,
591 			 bdev_io->u.bdev.iovcnt,
592 			 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
593 			 bdev_io->u.bdev.offset_blocks);
594 }
595 
596 static void
597 _bdev_iscsi_submit_request(void *_bdev_io)
598 {
599 	struct spdk_bdev_io *bdev_io = _bdev_io;
600 	struct bdev_iscsi_io *iscsi_io = (struct bdev_iscsi_io *)bdev_io->driver_ctx;
601 	struct bdev_iscsi_lun *lun = (struct bdev_iscsi_lun *)bdev_io->bdev->ctxt;
602 
603 	switch (bdev_io->type) {
604 	case SPDK_BDEV_IO_TYPE_READ:
605 		spdk_bdev_io_get_buf(bdev_io, bdev_iscsi_get_buf_cb,
606 				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
607 		break;
608 
609 	case SPDK_BDEV_IO_TYPE_WRITE:
610 		bdev_iscsi_writev(lun, iscsi_io,
611 				  bdev_io->u.bdev.iovs,
612 				  bdev_io->u.bdev.iovcnt,
613 				  bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
614 				  bdev_io->u.bdev.offset_blocks);
615 		break;
616 	case SPDK_BDEV_IO_TYPE_FLUSH:
617 		bdev_iscsi_flush(lun, iscsi_io,
618 				 bdev_io->u.bdev.num_blocks,
619 				 ISCSI_IMMEDIATE_DATA_NO,
620 				 bdev_io->u.bdev.offset_blocks);
621 		break;
622 	case SPDK_BDEV_IO_TYPE_RESET:
623 		bdev_iscsi_reset(bdev_io);
624 		break;
625 	case SPDK_BDEV_IO_TYPE_UNMAP:
626 		bdev_iscsi_unmap(lun, iscsi_io,
627 				 bdev_io->u.bdev.offset_blocks,
628 				 bdev_io->u.bdev.num_blocks);
629 		break;
630 	default:
631 		bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_FAILED);
632 		break;
633 	}
634 }
635 
636 static void
637 bdev_iscsi_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io)
638 {
639 	struct spdk_thread *submit_td = spdk_io_channel_get_thread(_ch);
640 	struct bdev_iscsi_io *iscsi_io = (struct bdev_iscsi_io *)bdev_io->driver_ctx;
641 	struct bdev_iscsi_lun *lun = (struct bdev_iscsi_lun *)bdev_io->bdev->ctxt;
642 
643 	iscsi_io->lun = lun;
644 
645 	if (lun->main_td != submit_td) {
646 		iscsi_io->submit_td = submit_td;
647 		spdk_thread_send_msg(lun->main_td, _bdev_iscsi_submit_request, bdev_io);
648 		return;
649 	} else {
650 		iscsi_io->submit_td = NULL;
651 	}
652 
653 	_bdev_iscsi_submit_request(bdev_io);
654 }
655 
656 static bool
657 bdev_iscsi_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
658 {
659 	struct bdev_iscsi_lun *lun = ctx;
660 
661 	switch (io_type) {
662 	case SPDK_BDEV_IO_TYPE_READ:
663 	case SPDK_BDEV_IO_TYPE_WRITE:
664 	case SPDK_BDEV_IO_TYPE_FLUSH:
665 	case SPDK_BDEV_IO_TYPE_RESET:
666 		return true;
667 
668 	case SPDK_BDEV_IO_TYPE_UNMAP:
669 		return lun->unmap_supported;
670 	default:
671 		return false;
672 	}
673 }
674 
675 static int
676 bdev_iscsi_create_cb(void *io_device, void *ctx_buf)
677 {
678 	struct bdev_iscsi_io_channel *ch = ctx_buf;
679 	struct bdev_iscsi_lun *lun = io_device;
680 
681 	pthread_mutex_lock(&lun->mutex);
682 	if (lun->ch_count == 0) {
683 		assert(lun->main_td == NULL);
684 		lun->main_td = spdk_get_thread();
685 		lun->poller = SPDK_POLLER_REGISTER(bdev_iscsi_poll_lun, lun, 0);
686 		if (g_opts.timeout_sec > 0) {
687 			lun->timeout_poller = SPDK_POLLER_REGISTER(bdev_iscsi_poll_lun_timeout, lun,
688 					      g_opts.timeout_poller_period_us);
689 		}
690 		ch->lun = lun;
691 	}
692 	lun->ch_count++;
693 	pthread_mutex_unlock(&lun->mutex);
694 
695 	return 0;
696 }
697 
698 static void
699 _iscsi_destroy_cb(void *ctx)
700 {
701 	struct bdev_iscsi_lun *lun = ctx;
702 
703 	pthread_mutex_lock(&lun->mutex);
704 
705 	assert(lun->main_td == spdk_get_thread());
706 	assert(lun->ch_count > 0);
707 
708 	lun->ch_count--;
709 	if (lun->ch_count > 0) {
710 		pthread_mutex_unlock(&lun->mutex);
711 		return;
712 	}
713 
714 	lun->main_td = NULL;
715 	spdk_poller_unregister(&lun->poller);
716 	spdk_poller_unregister(&lun->timeout_poller);
717 
718 	pthread_mutex_unlock(&lun->mutex);
719 }
720 
721 static void
722 bdev_iscsi_destroy_cb(void *io_device, void *ctx_buf)
723 {
724 	struct bdev_iscsi_lun *lun = io_device;
725 	struct spdk_thread *thread;
726 
727 	pthread_mutex_lock(&lun->mutex);
728 	lun->ch_count--;
729 	if (lun->ch_count == 0) {
730 		assert(lun->main_td != NULL);
731 
732 		if (lun->main_td != spdk_get_thread()) {
733 			/* The final channel was destroyed on a different thread
734 			 * than where the first channel was created. Pass a message
735 			 * to the main thread to unregister the poller. */
736 			lun->ch_count++;
737 			thread = lun->main_td;
738 			pthread_mutex_unlock(&lun->mutex);
739 			spdk_thread_send_msg(thread, _iscsi_destroy_cb, lun);
740 			return;
741 		}
742 
743 		lun->main_td = NULL;
744 		spdk_poller_unregister(&lun->poller);
745 		spdk_poller_unregister(&lun->timeout_poller);
746 	}
747 	pthread_mutex_unlock(&lun->mutex);
748 }
749 
750 static struct spdk_io_channel *
751 bdev_iscsi_get_io_channel(void *ctx)
752 {
753 	struct bdev_iscsi_lun *lun = ctx;
754 
755 	return spdk_get_io_channel(lun);
756 }
757 
758 static int
759 bdev_iscsi_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
760 {
761 	struct bdev_iscsi_lun *lun = ctx;
762 
763 	spdk_json_write_named_object_begin(w, "iscsi");
764 	spdk_json_write_named_string(w, "initiator_name", lun->initiator_iqn);
765 	spdk_json_write_named_string(w, "url", lun->url);
766 	spdk_json_write_object_end(w);
767 
768 	return 0;
769 }
770 
771 static void
772 bdev_iscsi_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
773 {
774 	struct bdev_iscsi_lun *lun = bdev->ctxt;
775 
776 	pthread_mutex_lock(&lun->mutex);
777 	spdk_json_write_object_begin(w);
778 
779 	spdk_json_write_named_string(w, "method", "bdev_iscsi_create");
780 
781 	spdk_json_write_named_object_begin(w, "params");
782 	spdk_json_write_named_string(w, "name", bdev->name);
783 	spdk_json_write_named_string(w, "initiator_iqn", lun->initiator_iqn);
784 	spdk_json_write_named_string(w, "url", lun->url);
785 	spdk_json_write_object_end(w);
786 
787 	spdk_json_write_object_end(w);
788 	pthread_mutex_unlock(&lun->mutex);
789 }
790 
791 static const struct spdk_bdev_fn_table iscsi_fn_table = {
792 	.destruct		= bdev_iscsi_destruct,
793 	.submit_request		= bdev_iscsi_submit_request,
794 	.io_type_supported	= bdev_iscsi_io_type_supported,
795 	.get_io_channel		= bdev_iscsi_get_io_channel,
796 	.dump_info_json		= bdev_iscsi_dump_info_json,
797 	.write_config_json	= bdev_iscsi_write_config_json,
798 };
799 
800 static int
801 create_iscsi_lun(struct bdev_iscsi_conn_req *req, uint64_t num_blocks,
802 		 uint32_t block_size, struct spdk_bdev **bdev, uint8_t lbppbe)
803 {
804 	struct bdev_iscsi_lun *lun;
805 	int rc;
806 
807 	lun = calloc(1, sizeof(*lun));
808 	if (!lun) {
809 		SPDK_ERRLOG("Unable to allocate enough memory for iscsi backend\n");
810 		return -ENOMEM;
811 	}
812 
813 	lun->context = req->context;
814 	lun->lun_id = req->lun;
815 	lun->url = req->url;
816 	lun->initiator_iqn = req->initiator_iqn;
817 
818 	pthread_mutex_init(&lun->mutex, NULL);
819 
820 	lun->bdev.name = req->bdev_name;
821 	lun->bdev.product_name = "iSCSI LUN";
822 	lun->bdev.module = &g_iscsi_bdev_module;
823 	lun->bdev.blocklen = block_size;
824 	lun->bdev.phys_blocklen = block_size * (1 << lbppbe);
825 	lun->bdev.blockcnt = num_blocks;
826 	lun->bdev.ctxt = lun;
827 	lun->unmap_supported = req->unmap_supported;
828 	if (lun->unmap_supported) {
829 		lun->max_unmap = req->max_unmap;
830 		lun->bdev.max_unmap = req->max_unmap;
831 		lun->bdev.max_unmap_segments = BDEV_ISCSI_MAX_UNMAP_BLOCK_DESCS_COUNT;
832 	}
833 
834 	lun->bdev.fn_table = &iscsi_fn_table;
835 
836 	spdk_io_device_register(lun, bdev_iscsi_create_cb, bdev_iscsi_destroy_cb,
837 				sizeof(struct bdev_iscsi_io_channel),
838 				req->bdev_name);
839 	rc = spdk_bdev_register(&lun->bdev);
840 	if (rc) {
841 		spdk_io_device_unregister(lun, NULL);
842 		pthread_mutex_destroy(&lun->mutex);
843 		free(lun);
844 		return rc;
845 	}
846 
847 	lun->no_main_ch_poller_td = spdk_get_thread();
848 	lun->no_main_ch_poller = SPDK_POLLER_REGISTER(bdev_iscsi_no_main_ch_poll, lun,
849 				 BDEV_ISCSI_NO_MAIN_CH_POLL_US);
850 
851 	*bdev = &lun->bdev;
852 	return 0;
853 }
854 
855 static void
856 iscsi_readcapacity16_cb(struct iscsi_context *iscsi, int status,
857 			void *command_data, void *private_data)
858 {
859 	struct bdev_iscsi_conn_req *req = private_data;
860 	struct scsi_readcapacity16 *readcap16;
861 	struct spdk_bdev *bdev = NULL;
862 	struct scsi_task *task = command_data;
863 	struct scsi_task *retry_task = NULL;
864 
865 	if (status != SPDK_SCSI_STATUS_GOOD) {
866 		SPDK_ERRLOG("iSCSI error: %s\n", iscsi_get_error(iscsi));
867 		if (_bdev_iscsi_is_size_change(status, task)) {
868 			scsi_free_scsi_task(task);
869 			retry_task = iscsi_readcapacity16_task(iscsi, req->lun,
870 							       iscsi_readcapacity16_cb, req);
871 			if (retry_task) {
872 				return;
873 			}
874 		}
875 		goto ret;
876 	}
877 
878 	readcap16 = scsi_datain_unmarshall(task);
879 	if (!readcap16) {
880 		status = -ENOMEM;
881 		goto ret;
882 	}
883 
884 	status = create_iscsi_lun(req, readcap16->returned_lba + 1, readcap16->block_length, &bdev,
885 				  readcap16->lbppbe);
886 	if (status) {
887 		SPDK_ERRLOG("Unable to create iscsi bdev: %s (%d)\n", spdk_strerror(-status), status);
888 	}
889 
890 ret:
891 	scsi_free_scsi_task(task);
892 	complete_conn_req(req, bdev, status);
893 }
894 
895 static void
896 bdev_iscsi_inquiry_bl_cb(struct iscsi_context *context, int status, void *_task, void *private_data)
897 {
898 	struct scsi_task *task = _task;
899 	struct scsi_inquiry_block_limits *bl_inq = NULL;
900 	struct bdev_iscsi_conn_req *req = private_data;
901 
902 	if (status == SPDK_SCSI_STATUS_GOOD) {
903 		bl_inq = scsi_datain_unmarshall(task);
904 		if (bl_inq != NULL) {
905 			if (!bl_inq->max_unmap) {
906 				SPDK_ERRLOG("Invalid max_unmap, use the default\n");
907 				req->max_unmap = BDEV_ISCSI_DEFAULT_MAX_UNMAP_LBA_COUNT;
908 			} else {
909 				req->max_unmap = bl_inq->max_unmap;
910 			}
911 		}
912 	}
913 
914 	scsi_free_scsi_task(task);
915 	task = iscsi_readcapacity16_task(context, req->lun, iscsi_readcapacity16_cb, req);
916 	if (task) {
917 		return;
918 	}
919 
920 	SPDK_ERRLOG("iSCSI error: %s\n", iscsi_get_error(req->context));
921 	complete_conn_req(req, NULL, status);
922 }
923 
924 static void
925 bdev_iscsi_inquiry_lbp_cb(struct iscsi_context *context, int status, void *_task,
926 			  void *private_data)
927 {
928 	struct scsi_task *task = _task;
929 	struct scsi_inquiry_logical_block_provisioning *lbp_inq = NULL;
930 	struct bdev_iscsi_conn_req *req = private_data;
931 
932 	if (status == SPDK_SCSI_STATUS_GOOD) {
933 		lbp_inq = scsi_datain_unmarshall(task);
934 		if (lbp_inq != NULL && lbp_inq->lbpu) {
935 			req->unmap_supported = true;
936 			scsi_free_scsi_task(task);
937 
938 			task = iscsi_inquiry_task(context, req->lun, 1,
939 						  SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS,
940 						  255, bdev_iscsi_inquiry_bl_cb, req);
941 			if (task) {
942 				return;
943 			}
944 		}
945 	} else {
946 		scsi_free_scsi_task(task);
947 	}
948 
949 	task = iscsi_readcapacity16_task(context, req->lun, iscsi_readcapacity16_cb, req);
950 	if (task) {
951 		return;
952 	}
953 
954 	SPDK_ERRLOG("iSCSI error: %s\n", iscsi_get_error(req->context));
955 	complete_conn_req(req, NULL, status);
956 }
957 
958 static void
959 iscsi_connect_cb(struct iscsi_context *iscsi, int status,
960 		 void *command_data, void *private_data)
961 {
962 	struct bdev_iscsi_conn_req *req = private_data;
963 	struct scsi_task *task;
964 
965 	if (status != SPDK_SCSI_STATUS_GOOD) {
966 		goto ret;
967 	}
968 
969 	task = iscsi_inquiry_task(iscsi, req->lun, 1,
970 				  SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING,
971 				  255, bdev_iscsi_inquiry_lbp_cb, req);
972 	if (task) {
973 		return;
974 	}
975 
976 ret:
977 	SPDK_ERRLOG("iSCSI error: %s\n", iscsi_get_error(req->context));
978 	complete_conn_req(req, NULL, status);
979 }
980 
981 static int
982 iscsi_bdev_conn_poll(void *arg)
983 {
984 	struct bdev_iscsi_conn_req *req, *tmp;
985 	struct pollfd pfd;
986 	struct iscsi_context *context;
987 
988 	if (TAILQ_EMPTY(&g_iscsi_conn_req)) {
989 		spdk_poller_unregister(&g_conn_poller);
990 		return SPDK_POLLER_IDLE;
991 	}
992 
993 	TAILQ_FOREACH_SAFE(req, &g_iscsi_conn_req, link, tmp) {
994 		context = req->context;
995 		pfd.fd = iscsi_get_fd(context);
996 		pfd.events = iscsi_which_events(context);
997 		pfd.revents = 0;
998 		if (poll(&pfd, 1, 0) < 0) {
999 			SPDK_ERRLOG("poll failed\n");
1000 			return SPDK_POLLER_BUSY;
1001 		}
1002 
1003 		if (pfd.revents != 0) {
1004 			if (iscsi_service(context, pfd.revents) < 0) {
1005 				SPDK_ERRLOG("iscsi_service failed: %s\n", iscsi_get_error(context));
1006 			}
1007 		}
1008 
1009 		if (req->status == 0) {
1010 			/*
1011 			 * The request completed successfully.
1012 			 */
1013 			free(req);
1014 		} else if (req->status > 0) {
1015 			/*
1016 			 * An error has occurred during connecting.  This req has already
1017 			 * been removed from the g_iscsi_conn_req list, but we needed to
1018 			 * wait until iscsi_service unwound before we could free the req.
1019 			 */
1020 			_bdev_iscsi_conn_req_free(req);
1021 		}
1022 	}
1023 	return SPDK_POLLER_BUSY;
1024 }
1025 
1026 int
1027 create_iscsi_disk(const char *bdev_name, const char *url, const char *initiator_iqn,
1028 		  spdk_bdev_iscsi_create_cb cb_fn, void *cb_arg)
1029 {
1030 	struct bdev_iscsi_conn_req *req;
1031 	struct iscsi_url *iscsi_url = NULL;
1032 	int rc;
1033 
1034 	if (!bdev_name || !url || !initiator_iqn || strlen(initiator_iqn) == 0 || !cb_fn) {
1035 		return -EINVAL;
1036 	}
1037 
1038 	req = calloc(1, sizeof(struct bdev_iscsi_conn_req));
1039 	if (!req) {
1040 		SPDK_ERRLOG("Cannot allocate pointer of struct bdev_iscsi_conn_req\n");
1041 		return -ENOMEM;
1042 	}
1043 
1044 	req->status = SCSI_STATUS_GOOD;
1045 	req->bdev_name = strdup(bdev_name);
1046 	req->url = strdup(url);
1047 	req->initiator_iqn = strdup(initiator_iqn);
1048 	req->context = iscsi_create_context(initiator_iqn);
1049 	if (!req->bdev_name || !req->url || !req->initiator_iqn || !req->context) {
1050 		SPDK_ERRLOG("Out of memory\n");
1051 		rc = -ENOMEM;
1052 		goto err;
1053 	}
1054 
1055 	req->create_cb = cb_fn;
1056 	req->create_cb_arg = cb_arg;
1057 
1058 	iscsi_url = iscsi_parse_full_url(req->context, url);
1059 	if (iscsi_url == NULL) {
1060 		SPDK_ERRLOG("could not parse URL: %s\n", iscsi_get_error(req->context));
1061 		rc = -EINVAL;
1062 		goto err;
1063 	}
1064 
1065 	req->lun = iscsi_url->lun;
1066 	rc = iscsi_set_session_type(req->context, ISCSI_SESSION_NORMAL);
1067 	rc = rc ? rc : iscsi_set_header_digest(req->context, ISCSI_HEADER_DIGEST_NONE);
1068 	rc = rc ? rc : iscsi_set_targetname(req->context, iscsi_url->target);
1069 	rc = rc ? rc : iscsi_set_timeout(req->context, g_opts.timeout_sec);
1070 	rc = rc ? rc : iscsi_full_connect_async(req->context, iscsi_url->portal, iscsi_url->lun,
1071 						iscsi_connect_cb, req);
1072 	if (rc == 0 && iscsi_url->user[0] != '\0') {
1073 		rc = iscsi_set_initiator_username_pwd(req->context, iscsi_url->user, iscsi_url->passwd);
1074 	}
1075 
1076 	if (rc < 0) {
1077 		SPDK_ERRLOG("Failed to connect provided URL=%s: %s\n", url, iscsi_get_error(req->context));
1078 		goto err;
1079 	}
1080 
1081 	iscsi_destroy_url(iscsi_url);
1082 	req->status = -1;
1083 	TAILQ_INSERT_TAIL(&g_iscsi_conn_req, req, link);
1084 	if (!g_conn_poller) {
1085 		g_conn_poller = SPDK_POLLER_REGISTER(iscsi_bdev_conn_poll, NULL, BDEV_ISCSI_CONNECTION_POLL_US);
1086 	}
1087 
1088 	return 0;
1089 
1090 err:
1091 	/* iscsi_destroy_url() is not NULL-proof */
1092 	if (iscsi_url) {
1093 		iscsi_destroy_url(iscsi_url);
1094 	}
1095 
1096 	if (req->context) {
1097 		iscsi_destroy_context(req->context);
1098 	}
1099 
1100 	free(req->initiator_iqn);
1101 	free(req->bdev_name);
1102 	free(req->url);
1103 	free(req);
1104 	return rc;
1105 }
1106 
1107 void
1108 delete_iscsi_disk(const char *bdev_name, spdk_delete_iscsi_complete cb_fn, void *cb_arg)
1109 {
1110 	int rc;
1111 
1112 	rc = spdk_bdev_unregister_by_name(bdev_name, &g_iscsi_bdev_module, cb_fn, cb_arg);
1113 	if (rc != 0) {
1114 		cb_fn(cb_arg, rc);
1115 	}
1116 }
1117 
1118 static int
1119 bdev_iscsi_initialize(void)
1120 {
1121 	return 0;
1122 }
1123 
1124 SPDK_LOG_REGISTER_COMPONENT(iscsi_init)
1125