xref: /spdk/module/bdev/iscsi/bdev_iscsi.c (revision 7506a7aa53d239f533af3bc768f0d2af55e735fe)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/stdinc.h"
35 
36 #include "spdk/bdev.h"
37 #include "spdk/env.h"
38 #include "spdk/fd.h"
39 #include "spdk/thread.h"
40 #include "spdk/json.h"
41 #include "spdk/util.h"
42 #include "spdk/rpc.h"
43 #include "spdk/string.h"
44 #include "spdk/iscsi_spec.h"
45 
46 #include "spdk/log.h"
47 #include "spdk/bdev_module.h"
48 
49 #include "iscsi/iscsi.h"
50 #include "iscsi/scsi-lowlevel.h"
51 
52 #include "bdev_iscsi.h"
53 
54 struct bdev_iscsi_lun;
55 
56 #define BDEV_ISCSI_CONNECTION_POLL_US 500 /* 0.5 ms */
57 #define BDEV_ISCSI_NO_MAIN_CH_POLL_US 10000 /* 10ms */
58 
59 #define DEFAULT_INITIATOR_NAME "iqn.2016-06.io.spdk:init"
60 
61 /* MAXIMUM UNMAP LBA COUNT:
62  * indicates the maximum  number of LBAs that may be unmapped
63  * by an UNMAP command.
64  */
65 #define BDEV_ISCSI_DEFAULT_MAX_UNMAP_LBA_COUNT (32768)
66 
67 /* MAXIMUM UNMAP BLOCK DESCRIPTOR COUNT:
68  * indicates the maximum number of UNMAP block descriptors that
69  * shall be contained in the parameter data transferred to the
70  * device server for an UNMAP command.
71  */
72 #define BDEV_ISCSI_MAX_UNMAP_BLOCK_DESCS_COUNT (1)
73 
74 static int bdev_iscsi_initialize(void);
75 static void bdev_iscsi_readcapacity16(struct iscsi_context *context, struct bdev_iscsi_lun *lun);
76 static void _bdev_iscsi_submit_request(void *_bdev_io);
77 
78 static TAILQ_HEAD(, bdev_iscsi_conn_req) g_iscsi_conn_req = TAILQ_HEAD_INITIALIZER(
79 			g_iscsi_conn_req);
80 static struct spdk_poller *g_conn_poller = NULL;
81 
82 struct bdev_iscsi_io {
83 	struct spdk_thread *submit_td;
84 	struct bdev_iscsi_lun *lun;
85 	enum spdk_bdev_io_status status;
86 	int scsi_status;
87 	enum spdk_scsi_sense sk;
88 	uint8_t asc;
89 	uint8_t ascq;
90 };
91 
92 struct bdev_iscsi_lun {
93 	struct spdk_bdev		bdev;
94 	struct iscsi_context		*context;
95 	char				*initiator_iqn;
96 	int				lun_id;
97 	char				*url;
98 	pthread_mutex_t			mutex;
99 	uint32_t			ch_count;
100 	struct spdk_thread		*main_td;
101 	struct spdk_poller		*no_main_ch_poller;
102 	struct spdk_thread		*no_main_ch_poller_td;
103 	bool				unmap_supported;
104 	uint32_t			max_unmap;
105 	struct spdk_poller		*poller;
106 };
107 
108 struct bdev_iscsi_io_channel {
109 	struct bdev_iscsi_lun	*lun;
110 };
111 
112 struct bdev_iscsi_conn_req {
113 	char					*url;
114 	char					*bdev_name;
115 	char					*initiator_iqn;
116 	struct iscsi_context			*context;
117 	spdk_bdev_iscsi_create_cb		create_cb;
118 	void					*create_cb_arg;
119 	bool					unmap_supported;
120 	uint32_t				max_unmap;
121 	int					lun;
122 	int					status;
123 	TAILQ_ENTRY(bdev_iscsi_conn_req)	link;
124 };
125 
126 static void
127 complete_conn_req(struct bdev_iscsi_conn_req *req, struct spdk_bdev *bdev,
128 		  int status)
129 {
130 	TAILQ_REMOVE(&g_iscsi_conn_req, req, link);
131 	req->create_cb(req->create_cb_arg, bdev, status);
132 
133 	/*
134 	 * we are still running in the context of iscsi_service()
135 	 * so do not tear down its data structures here
136 	 */
137 	req->status = status;
138 }
139 
140 static int
141 bdev_iscsi_get_ctx_size(void)
142 {
143 	return sizeof(struct bdev_iscsi_io);
144 }
145 
146 static void
147 _iscsi_free_lun(void *arg)
148 {
149 	struct bdev_iscsi_lun *lun = arg;
150 
151 	assert(lun != NULL);
152 	iscsi_destroy_context(lun->context);
153 	pthread_mutex_destroy(&lun->mutex);
154 	free(lun->bdev.name);
155 	free(lun->url);
156 	free(lun->initiator_iqn);
157 
158 	spdk_bdev_destruct_done(&lun->bdev, 0);
159 	free(lun);
160 }
161 
162 static void
163 _bdev_iscsi_conn_req_free(struct bdev_iscsi_conn_req *req)
164 {
165 	free(req->initiator_iqn);
166 	free(req->bdev_name);
167 	free(req->url);
168 	/* destroy will call iscsi_disconnect() implicitly if connected */
169 	iscsi_destroy_context(req->context);
170 	free(req);
171 }
172 
173 static void
174 bdev_iscsi_finish(void)
175 {
176 	struct bdev_iscsi_conn_req *req, *tmp;
177 
178 	/* clear out pending connection requests here. We cannot
179 	 * simply set the state to a non SCSI_STATUS_GOOD state as
180 	 * the connection poller wont run anymore
181 	 */
182 	TAILQ_FOREACH_SAFE(req, &g_iscsi_conn_req, link, tmp) {
183 		_bdev_iscsi_conn_req_free(req);
184 	}
185 
186 	if (g_conn_poller) {
187 		spdk_poller_unregister(&g_conn_poller);
188 	}
189 }
190 
191 static struct spdk_bdev_module g_iscsi_bdev_module = {
192 	.name		= "iscsi",
193 	.module_init	= bdev_iscsi_initialize,
194 	.module_fini	= bdev_iscsi_finish,
195 	.get_ctx_size	= bdev_iscsi_get_ctx_size,
196 };
197 
198 SPDK_BDEV_MODULE_REGISTER(iscsi, &g_iscsi_bdev_module);
199 
200 static void
201 _bdev_iscsi_io_complete(void *_iscsi_io)
202 {
203 	struct bdev_iscsi_io *iscsi_io = _iscsi_io;
204 
205 	if (iscsi_io->status == SPDK_BDEV_IO_STATUS_SUCCESS) {
206 		spdk_bdev_io_complete_scsi_status(spdk_bdev_io_from_ctx(iscsi_io), iscsi_io->scsi_status,
207 						  iscsi_io->sk, iscsi_io->asc, iscsi_io->ascq);
208 	} else {
209 		spdk_bdev_io_complete(spdk_bdev_io_from_ctx(iscsi_io), iscsi_io->status);
210 	}
211 }
212 
213 static void
214 bdev_iscsi_io_complete(struct bdev_iscsi_io *iscsi_io, enum spdk_bdev_io_status status)
215 {
216 	iscsi_io->status = status;
217 	if (iscsi_io->submit_td != NULL) {
218 		spdk_thread_send_msg(iscsi_io->submit_td, _bdev_iscsi_io_complete, iscsi_io);
219 	} else {
220 		_bdev_iscsi_io_complete(iscsi_io);
221 	}
222 }
223 
224 static bool
225 _bdev_iscsi_is_size_change(int status, struct scsi_task *task)
226 {
227 	if (status == SPDK_SCSI_STATUS_CHECK_CONDITION &&
228 	    (uint8_t)task->sense.key == SPDK_SCSI_SENSE_UNIT_ATTENTION &&
229 	    task->sense.ascq == 0x2a09) {
230 		/* ASCQ: SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED (0x2a09) */
231 		return true;
232 	}
233 
234 	return false;
235 }
236 
237 /* Common call back function for read/write/flush command */
238 static void
239 bdev_iscsi_command_cb(struct iscsi_context *context, int status, void *_task, void *_iscsi_io)
240 {
241 	struct scsi_task *task = _task;
242 	struct bdev_iscsi_io *iscsi_io = _iscsi_io;
243 	struct spdk_bdev_io *bdev_io;
244 
245 	iscsi_io->scsi_status = status;
246 	iscsi_io->sk = (uint8_t)task->sense.key;
247 	iscsi_io->asc = (task->sense.ascq >> 8) & 0xFF;
248 	iscsi_io->ascq = task->sense.ascq & 0xFF;
249 
250 	scsi_free_scsi_task(task);
251 
252 	if (_bdev_iscsi_is_size_change(status, task)) {
253 		bdev_iscsi_readcapacity16(context, iscsi_io->lun);
254 
255 		/* Retry this failed IO immediately */
256 		bdev_io = spdk_bdev_io_from_ctx(iscsi_io);
257 		if (iscsi_io->submit_td != NULL) {
258 			spdk_thread_send_msg(iscsi_io->lun->main_td,
259 					     _bdev_iscsi_submit_request, bdev_io);
260 		} else {
261 			_bdev_iscsi_submit_request(bdev_io);
262 		}
263 	} else {
264 		bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_SUCCESS);
265 	}
266 }
267 
268 static int
269 bdev_iscsi_resize(struct spdk_bdev *bdev, const uint64_t new_size_in_block)
270 {
271 	int rc;
272 
273 	assert(bdev->module == &g_iscsi_bdev_module);
274 
275 	if (new_size_in_block <= bdev->blockcnt) {
276 		SPDK_ERRLOG("The new bdev size must be larger than current bdev size.\n");
277 		return -EINVAL;
278 	}
279 
280 	rc = spdk_bdev_notify_blockcnt_change(bdev, new_size_in_block);
281 	if (rc != 0) {
282 		SPDK_ERRLOG("failed to notify block cnt change.\n");
283 		return rc;
284 	}
285 
286 	return 0;
287 }
288 
289 static void
290 bdev_iscsi_readcapacity16_cb(struct iscsi_context *context, int status, void *_task,
291 			     void *private_data)
292 {
293 	struct bdev_iscsi_lun *lun = private_data;
294 	struct scsi_readcapacity16 *readcap16;
295 	struct scsi_task *task = _task;
296 	uint64_t size_in_block = 0;
297 	int rc;
298 
299 	if (status != SPDK_SCSI_STATUS_GOOD) {
300 		SPDK_ERRLOG("iSCSI error: %s\n", iscsi_get_error(context));
301 		goto ret;
302 	}
303 
304 	readcap16 = scsi_datain_unmarshall(task);
305 	if (!readcap16) {
306 		SPDK_ERRLOG("Read capacity error\n");
307 		goto ret;
308 	}
309 
310 	size_in_block = readcap16->returned_lba + 1;
311 
312 	rc = bdev_iscsi_resize(&lun->bdev, size_in_block);
313 	if (rc != 0) {
314 		SPDK_ERRLOG("Bdev (%s) resize error: %d\n", lun->bdev.name, rc);
315 	}
316 
317 ret:
318 	scsi_free_scsi_task(task);
319 }
320 
321 static void
322 bdev_iscsi_readcapacity16(struct iscsi_context *context, struct bdev_iscsi_lun *lun)
323 {
324 	struct scsi_task *task;
325 
326 	task = iscsi_readcapacity16_task(context, lun->lun_id,
327 					 bdev_iscsi_readcapacity16_cb, lun);
328 	if (task == NULL) {
329 		SPDK_ERRLOG("failed to get readcapacity16_task\n");
330 	}
331 }
332 
333 static void
334 bdev_iscsi_readv(struct bdev_iscsi_lun *lun, struct bdev_iscsi_io *iscsi_io,
335 		 struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t lba)
336 {
337 	struct scsi_task *task;
338 
339 	SPDK_DEBUGLOG(iscsi_init, "read %d iovs size %lu to lba: %#lx\n",
340 		      iovcnt, nbytes, lba);
341 
342 	task = iscsi_read16_task(lun->context, lun->lun_id, lba, nbytes, lun->bdev.blocklen, 0, 0, 0, 0, 0,
343 				 bdev_iscsi_command_cb, iscsi_io);
344 	if (task == NULL) {
345 		SPDK_ERRLOG("failed to get read16_task\n");
346 		bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_FAILED);
347 		return;
348 	}
349 
350 #if defined(LIBISCSI_FEATURE_IOVECTOR)
351 	scsi_task_set_iov_in(task, (struct scsi_iovec *)iov, iovcnt);
352 #else
353 	int i;
354 	for (i = 0; i < iovcnt; i++) {
355 		scsi_task_add_data_in_buffer(task, iov[i].iov_len, iov[i].iov_base);
356 	}
357 #endif
358 }
359 
360 static void
361 bdev_iscsi_writev(struct bdev_iscsi_lun *lun, struct bdev_iscsi_io *iscsi_io,
362 		  struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t lba)
363 {
364 	struct scsi_task *task;
365 
366 	SPDK_DEBUGLOG(iscsi_init, "write %d iovs size %lu to lba: %#lx\n",
367 		      iovcnt, nbytes, lba);
368 
369 	task = iscsi_write16_task(lun->context, lun->lun_id, lba, NULL, nbytes, lun->bdev.blocklen, 0, 0, 0,
370 				  0, 0,
371 				  bdev_iscsi_command_cb, iscsi_io);
372 	if (task == NULL) {
373 		SPDK_ERRLOG("failed to get write16_task\n");
374 		bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_FAILED);
375 		return;
376 	}
377 
378 #if defined(LIBISCSI_FEATURE_IOVECTOR)
379 	scsi_task_set_iov_out(task, (struct scsi_iovec *)iov, iovcnt);
380 #else
381 	int i;
382 	for (i = 0; i < iovcnt; i++) {
383 		scsi_task_add_data_in_buffer(task, iov[i].iov_len, iov[i].iov_base);
384 	}
385 #endif
386 }
387 
388 static void
389 bdev_iscsi_destruct_cb(void *ctx)
390 {
391 	struct bdev_iscsi_lun *lun = ctx;
392 
393 	spdk_poller_unregister(&lun->no_main_ch_poller);
394 	spdk_io_device_unregister(lun, _iscsi_free_lun);
395 }
396 
397 static int
398 bdev_iscsi_destruct(void *ctx)
399 {
400 	struct bdev_iscsi_lun *lun = ctx;
401 
402 	assert(lun->no_main_ch_poller_td);
403 	spdk_thread_send_msg(lun->no_main_ch_poller_td, bdev_iscsi_destruct_cb, lun);
404 	return 1;
405 }
406 
407 static void
408 bdev_iscsi_flush(struct bdev_iscsi_lun *lun, struct bdev_iscsi_io *iscsi_io, uint32_t num_blocks,
409 		 int immed, uint64_t lba)
410 {
411 	struct scsi_task *task;
412 
413 	task = iscsi_synchronizecache16_task(lun->context, lun->lun_id, lba,
414 					     num_blocks, 0, immed, bdev_iscsi_command_cb, iscsi_io);
415 	if (task == NULL) {
416 		SPDK_ERRLOG("failed to get sync16_task\n");
417 		bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_FAILED);
418 		return;
419 	}
420 }
421 
422 static void
423 bdev_iscsi_unmap(struct bdev_iscsi_lun *lun, struct bdev_iscsi_io *iscsi_io,
424 		 uint64_t lba, uint64_t num_blocks)
425 {
426 	struct scsi_task *task;
427 	struct unmap_list list[BDEV_ISCSI_MAX_UNMAP_BLOCK_DESCS_COUNT] = {};
428 	struct unmap_list *entry;
429 	uint32_t num_unmap_list;
430 	uint64_t offset, remaining, unmap_blocks;
431 
432 	num_unmap_list = spdk_divide_round_up(num_blocks, lun->max_unmap);
433 	if (num_unmap_list > BDEV_ISCSI_MAX_UNMAP_BLOCK_DESCS_COUNT) {
434 		SPDK_ERRLOG("Too many unmap entries\n");
435 		goto failed;
436 	}
437 
438 	remaining = num_blocks;
439 	offset = lba;
440 	num_unmap_list = 0;
441 	entry = &list[0];
442 
443 	do {
444 		unmap_blocks = spdk_min(remaining, lun->max_unmap);
445 		entry->lba = offset;
446 		entry->num = unmap_blocks;
447 		num_unmap_list++;
448 		remaining -= unmap_blocks;
449 		offset += unmap_blocks;
450 		entry++;
451 	} while (remaining > 0);
452 
453 	task = iscsi_unmap_task(lun->context, 0, 0, 0, list, num_unmap_list,
454 				bdev_iscsi_command_cb, iscsi_io);
455 	if (task != NULL) {
456 		return;
457 	}
458 	SPDK_ERRLOG("failed to get unmap_task\n");
459 
460 failed:
461 	bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_FAILED);
462 }
463 
464 static void
465 bdev_iscsi_reset_cb(struct iscsi_context *context __attribute__((unused)), int status,
466 		    void *command_data, void *private_data)
467 {
468 	uint32_t tmf_response;
469 	struct bdev_iscsi_io *iscsi_io = private_data;
470 
471 	tmf_response = *(uint32_t *)command_data;
472 	if (tmf_response == ISCSI_TASK_FUNC_RESP_COMPLETE) {
473 		bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_SUCCESS);
474 	} else {
475 		bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_FAILED);
476 	}
477 }
478 
479 static void
480 _bdev_iscsi_reset(void *_bdev_io)
481 {
482 	int rc;
483 	struct spdk_bdev_io *bdev_io = _bdev_io;
484 	struct bdev_iscsi_lun *lun = (struct bdev_iscsi_lun *)bdev_io->bdev->ctxt;
485 	struct bdev_iscsi_io *iscsi_io = (struct bdev_iscsi_io *)bdev_io->driver_ctx;
486 	struct iscsi_context *context = lun->context;
487 
488 	rc = iscsi_task_mgmt_lun_reset_async(context, lun->lun_id,
489 					     bdev_iscsi_reset_cb, iscsi_io);
490 	if (rc != 0) {
491 		SPDK_ERRLOG("failed to do iscsi reset\n");
492 		bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_FAILED);
493 		return;
494 	}
495 }
496 
497 static void
498 bdev_iscsi_reset(struct spdk_bdev_io *bdev_io)
499 {
500 	struct bdev_iscsi_lun *lun = (struct bdev_iscsi_lun *)bdev_io->bdev->ctxt;
501 	spdk_thread_send_msg(lun->main_td, _bdev_iscsi_reset, bdev_io);
502 }
503 
504 static int
505 bdev_iscsi_poll_lun(void *_lun)
506 {
507 	struct bdev_iscsi_lun *lun = _lun;
508 	struct pollfd pfd = {};
509 
510 	pfd.fd = iscsi_get_fd(lun->context);
511 	pfd.events = iscsi_which_events(lun->context);
512 
513 	if (poll(&pfd, 1, 0) < 0) {
514 		SPDK_ERRLOG("poll failed\n");
515 		return SPDK_POLLER_IDLE;
516 	}
517 
518 	if (pfd.revents != 0) {
519 		if (iscsi_service(lun->context, pfd.revents) < 0) {
520 			SPDK_ERRLOG("iscsi_service failed: %s\n", iscsi_get_error(lun->context));
521 		}
522 
523 		return SPDK_POLLER_BUSY;
524 	}
525 
526 	return SPDK_POLLER_IDLE;
527 }
528 
529 static int
530 bdev_iscsi_no_main_ch_poll(void *arg)
531 {
532 	struct bdev_iscsi_lun *lun = arg;
533 	enum spdk_thread_poller_rc rc = SPDK_POLLER_IDLE;
534 
535 	if (pthread_mutex_trylock(&lun->mutex)) {
536 		/* Don't care about the error code here. */
537 		return SPDK_POLLER_IDLE;
538 	}
539 
540 	if (lun->ch_count == 0) {
541 		rc = bdev_iscsi_poll_lun(arg);
542 	}
543 
544 	pthread_mutex_unlock(&lun->mutex);
545 	return rc;
546 }
547 
548 static void
549 bdev_iscsi_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
550 		      bool success)
551 {
552 	if (!success) {
553 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
554 		return;
555 	}
556 
557 	bdev_iscsi_readv((struct bdev_iscsi_lun *)bdev_io->bdev->ctxt,
558 			 (struct bdev_iscsi_io *)bdev_io->driver_ctx,
559 			 bdev_io->u.bdev.iovs,
560 			 bdev_io->u.bdev.iovcnt,
561 			 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
562 			 bdev_io->u.bdev.offset_blocks);
563 }
564 
565 static void _bdev_iscsi_submit_request(void *_bdev_io)
566 {
567 	struct spdk_bdev_io *bdev_io = _bdev_io;
568 	struct bdev_iscsi_io *iscsi_io = (struct bdev_iscsi_io *)bdev_io->driver_ctx;
569 	struct bdev_iscsi_lun *lun = (struct bdev_iscsi_lun *)bdev_io->bdev->ctxt;
570 
571 	switch (bdev_io->type) {
572 	case SPDK_BDEV_IO_TYPE_READ:
573 		spdk_bdev_io_get_buf(bdev_io, bdev_iscsi_get_buf_cb,
574 				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
575 		break;
576 
577 	case SPDK_BDEV_IO_TYPE_WRITE:
578 		bdev_iscsi_writev(lun, iscsi_io,
579 				  bdev_io->u.bdev.iovs,
580 				  bdev_io->u.bdev.iovcnt,
581 				  bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
582 				  bdev_io->u.bdev.offset_blocks);
583 		break;
584 	case SPDK_BDEV_IO_TYPE_FLUSH:
585 		bdev_iscsi_flush(lun, iscsi_io,
586 				 bdev_io->u.bdev.num_blocks,
587 				 ISCSI_IMMEDIATE_DATA_NO,
588 				 bdev_io->u.bdev.offset_blocks);
589 		break;
590 	case SPDK_BDEV_IO_TYPE_RESET:
591 		bdev_iscsi_reset(bdev_io);
592 		break;
593 	case SPDK_BDEV_IO_TYPE_UNMAP:
594 		bdev_iscsi_unmap(lun, iscsi_io,
595 				 bdev_io->u.bdev.offset_blocks,
596 				 bdev_io->u.bdev.num_blocks);
597 		break;
598 	default:
599 		bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_FAILED);
600 		break;
601 	}
602 }
603 
604 static void bdev_iscsi_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io)
605 {
606 	struct spdk_thread *submit_td = spdk_io_channel_get_thread(_ch);
607 	struct bdev_iscsi_io *iscsi_io = (struct bdev_iscsi_io *)bdev_io->driver_ctx;
608 	struct bdev_iscsi_lun *lun = (struct bdev_iscsi_lun *)bdev_io->bdev->ctxt;
609 
610 	iscsi_io->lun = lun;
611 
612 	if (lun->main_td != submit_td) {
613 		iscsi_io->submit_td = submit_td;
614 		spdk_thread_send_msg(lun->main_td, _bdev_iscsi_submit_request, bdev_io);
615 		return;
616 	} else {
617 		iscsi_io->submit_td = NULL;
618 	}
619 
620 	_bdev_iscsi_submit_request(bdev_io);
621 }
622 
623 static bool
624 bdev_iscsi_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
625 {
626 	struct bdev_iscsi_lun *lun = ctx;
627 
628 	switch (io_type) {
629 	case SPDK_BDEV_IO_TYPE_READ:
630 	case SPDK_BDEV_IO_TYPE_WRITE:
631 	case SPDK_BDEV_IO_TYPE_FLUSH:
632 	case SPDK_BDEV_IO_TYPE_RESET:
633 		return true;
634 
635 	case SPDK_BDEV_IO_TYPE_UNMAP:
636 		return lun->unmap_supported;
637 	default:
638 		return false;
639 	}
640 }
641 
642 static int
643 bdev_iscsi_create_cb(void *io_device, void *ctx_buf)
644 {
645 	struct bdev_iscsi_io_channel *ch = ctx_buf;
646 	struct bdev_iscsi_lun *lun = io_device;
647 
648 	pthread_mutex_lock(&lun->mutex);
649 	if (lun->ch_count == 0) {
650 		assert(lun->main_td == NULL);
651 		lun->main_td = spdk_get_thread();
652 		lun->poller = SPDK_POLLER_REGISTER(bdev_iscsi_poll_lun, lun, 0);
653 		ch->lun = lun;
654 	}
655 	lun->ch_count++;
656 	pthread_mutex_unlock(&lun->mutex);
657 
658 	return 0;
659 }
660 
661 static void
662 _iscsi_destroy_cb(void *ctx)
663 {
664 	struct bdev_iscsi_lun *lun = ctx;
665 
666 	pthread_mutex_lock(&lun->mutex);
667 
668 	assert(lun->main_td == spdk_get_thread());
669 	assert(lun->ch_count > 0);
670 
671 	lun->ch_count--;
672 	if (lun->ch_count > 0) {
673 		pthread_mutex_unlock(&lun->mutex);
674 		return;
675 	}
676 
677 	lun->main_td = NULL;
678 	spdk_poller_unregister(&lun->poller);
679 
680 	pthread_mutex_unlock(&lun->mutex);
681 }
682 
683 static void
684 bdev_iscsi_destroy_cb(void *io_device, void *ctx_buf)
685 {
686 	struct bdev_iscsi_lun *lun = io_device;
687 	struct spdk_thread *thread;
688 
689 	pthread_mutex_lock(&lun->mutex);
690 	lun->ch_count--;
691 	if (lun->ch_count == 0) {
692 		assert(lun->main_td != NULL);
693 
694 		if (lun->main_td != spdk_get_thread()) {
695 			/* The final channel was destroyed on a different thread
696 			 * than where the first channel was created. Pass a message
697 			 * to the main thread to unregister the poller. */
698 			lun->ch_count++;
699 			thread = lun->main_td;
700 			pthread_mutex_unlock(&lun->mutex);
701 			spdk_thread_send_msg(thread, _iscsi_destroy_cb, lun);
702 			return;
703 		}
704 
705 		lun->main_td = NULL;
706 		spdk_poller_unregister(&lun->poller);
707 	}
708 	pthread_mutex_unlock(&lun->mutex);
709 }
710 
711 static struct spdk_io_channel *
712 bdev_iscsi_get_io_channel(void *ctx)
713 {
714 	struct bdev_iscsi_lun *lun = ctx;
715 
716 	return spdk_get_io_channel(lun);
717 }
718 
719 static int
720 bdev_iscsi_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
721 {
722 	struct bdev_iscsi_lun *lun = ctx;
723 
724 	spdk_json_write_named_object_begin(w, "iscsi");
725 	spdk_json_write_named_string(w, "initiator_name", lun->initiator_iqn);
726 	spdk_json_write_named_string(w, "url", lun->url);
727 	spdk_json_write_object_end(w);
728 
729 	return 0;
730 }
731 
732 static void
733 bdev_iscsi_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
734 {
735 	struct bdev_iscsi_lun *lun = bdev->ctxt;
736 
737 	pthread_mutex_lock(&lun->mutex);
738 	spdk_json_write_object_begin(w);
739 
740 	spdk_json_write_named_string(w, "method", "bdev_iscsi_create");
741 
742 	spdk_json_write_named_object_begin(w, "params");
743 	spdk_json_write_named_string(w, "name", bdev->name);
744 	spdk_json_write_named_string(w, "initiator_iqn", lun->initiator_iqn);
745 	spdk_json_write_named_string(w, "url", lun->url);
746 	spdk_json_write_object_end(w);
747 
748 	spdk_json_write_object_end(w);
749 	pthread_mutex_unlock(&lun->mutex);
750 }
751 
752 static const struct spdk_bdev_fn_table iscsi_fn_table = {
753 	.destruct		= bdev_iscsi_destruct,
754 	.submit_request		= bdev_iscsi_submit_request,
755 	.io_type_supported	= bdev_iscsi_io_type_supported,
756 	.get_io_channel		= bdev_iscsi_get_io_channel,
757 	.dump_info_json		= bdev_iscsi_dump_info_json,
758 	.write_config_json	= bdev_iscsi_write_config_json,
759 };
760 
761 static int
762 create_iscsi_lun(struct bdev_iscsi_conn_req *req, uint64_t num_blocks,
763 		 uint32_t block_size, struct spdk_bdev **bdev, uint8_t lbppbe)
764 {
765 	struct bdev_iscsi_lun *lun;
766 	int rc;
767 
768 	lun = calloc(sizeof(*lun), 1);
769 	if (!lun) {
770 		SPDK_ERRLOG("Unable to allocate enough memory for iscsi backend\n");
771 		return -ENOMEM;
772 	}
773 
774 	lun->context = req->context;
775 	lun->lun_id = req->lun;
776 	lun->url = req->url;
777 	lun->initiator_iqn = req->initiator_iqn;
778 
779 	pthread_mutex_init(&lun->mutex, NULL);
780 
781 	lun->bdev.name = req->bdev_name;
782 	lun->bdev.product_name = "iSCSI LUN";
783 	lun->bdev.module = &g_iscsi_bdev_module;
784 	lun->bdev.blocklen = block_size;
785 	lun->bdev.phys_blocklen = block_size * (1 << lbppbe);
786 	lun->bdev.blockcnt = num_blocks;
787 	lun->bdev.ctxt = lun;
788 	lun->unmap_supported = req->unmap_supported;
789 	if (lun->unmap_supported) {
790 		lun->max_unmap = req->max_unmap;
791 		lun->bdev.max_unmap = req->max_unmap;
792 		lun->bdev.max_unmap_segments = BDEV_ISCSI_MAX_UNMAP_BLOCK_DESCS_COUNT;
793 	}
794 
795 	lun->bdev.fn_table = &iscsi_fn_table;
796 
797 	spdk_io_device_register(lun, bdev_iscsi_create_cb, bdev_iscsi_destroy_cb,
798 				sizeof(struct bdev_iscsi_io_channel),
799 				req->bdev_name);
800 	rc = spdk_bdev_register(&lun->bdev);
801 	if (rc) {
802 		spdk_io_device_unregister(lun, NULL);
803 		pthread_mutex_destroy(&lun->mutex);
804 		free(lun);
805 		return rc;
806 	}
807 
808 	lun->no_main_ch_poller_td = spdk_get_thread();
809 	lun->no_main_ch_poller = SPDK_POLLER_REGISTER(bdev_iscsi_no_main_ch_poll, lun,
810 				 BDEV_ISCSI_NO_MAIN_CH_POLL_US);
811 
812 	*bdev = &lun->bdev;
813 	return 0;
814 }
815 
816 static void
817 iscsi_readcapacity16_cb(struct iscsi_context *iscsi, int status,
818 			void *command_data, void *private_data)
819 {
820 	struct bdev_iscsi_conn_req *req = private_data;
821 	struct scsi_readcapacity16 *readcap16;
822 	struct spdk_bdev *bdev = NULL;
823 	struct scsi_task *task = command_data;
824 	struct scsi_task *retry_task = NULL;
825 
826 	if (status != SPDK_SCSI_STATUS_GOOD) {
827 		SPDK_ERRLOG("iSCSI error: %s\n", iscsi_get_error(iscsi));
828 		if (_bdev_iscsi_is_size_change(status, task)) {
829 			scsi_free_scsi_task(task);
830 			retry_task = iscsi_readcapacity16_task(iscsi, req->lun,
831 							       iscsi_readcapacity16_cb, req);
832 			if (retry_task) {
833 				return;
834 			}
835 		}
836 		goto ret;
837 	}
838 
839 	readcap16 = scsi_datain_unmarshall(task);
840 	if (!readcap16) {
841 		status = -ENOMEM;
842 		goto ret;
843 	}
844 
845 	status = create_iscsi_lun(req, readcap16->returned_lba + 1, readcap16->block_length, &bdev,
846 				  readcap16->lbppbe);
847 	if (status) {
848 		SPDK_ERRLOG("Unable to create iscsi bdev: %s (%d)\n", spdk_strerror(-status), status);
849 	}
850 
851 ret:
852 	scsi_free_scsi_task(task);
853 	complete_conn_req(req, bdev, status);
854 }
855 
856 static void
857 bdev_iscsi_inquiry_bl_cb(struct iscsi_context *context, int status, void *_task, void *private_data)
858 {
859 	struct scsi_task *task = _task;
860 	struct scsi_inquiry_block_limits *bl_inq = NULL;
861 	struct bdev_iscsi_conn_req *req = private_data;
862 
863 	if (status == SPDK_SCSI_STATUS_GOOD) {
864 		bl_inq = scsi_datain_unmarshall(task);
865 		if (bl_inq != NULL) {
866 			if (!bl_inq->max_unmap) {
867 				SPDK_ERRLOG("Invalid max_unmap, use the default\n");
868 				req->max_unmap = BDEV_ISCSI_DEFAULT_MAX_UNMAP_LBA_COUNT;
869 			} else {
870 				req->max_unmap = bl_inq->max_unmap;
871 			}
872 		}
873 	}
874 
875 	scsi_free_scsi_task(task);
876 	task = iscsi_readcapacity16_task(context, req->lun, iscsi_readcapacity16_cb, req);
877 	if (task) {
878 		return;
879 	}
880 
881 	SPDK_ERRLOG("iSCSI error: %s\n", iscsi_get_error(req->context));
882 	complete_conn_req(req, NULL, status);
883 }
884 
885 static void
886 bdev_iscsi_inquiry_lbp_cb(struct iscsi_context *context, int status, void *_task,
887 			  void *private_data)
888 {
889 	struct scsi_task *task = _task;
890 	struct scsi_inquiry_logical_block_provisioning *lbp_inq = NULL;
891 	struct bdev_iscsi_conn_req *req = private_data;
892 
893 	if (status == SPDK_SCSI_STATUS_GOOD) {
894 		lbp_inq = scsi_datain_unmarshall(task);
895 		if (lbp_inq != NULL && lbp_inq->lbpu) {
896 			req->unmap_supported = true;
897 			scsi_free_scsi_task(task);
898 
899 			task = iscsi_inquiry_task(context, req->lun, 1,
900 						  SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS,
901 						  255, bdev_iscsi_inquiry_bl_cb, req);
902 			if (task) {
903 				return;
904 			}
905 		}
906 	} else {
907 		scsi_free_scsi_task(task);
908 	}
909 
910 	task = iscsi_readcapacity16_task(context, req->lun, iscsi_readcapacity16_cb, req);
911 	if (task) {
912 		return;
913 	}
914 
915 	SPDK_ERRLOG("iSCSI error: %s\n", iscsi_get_error(req->context));
916 	complete_conn_req(req, NULL, status);
917 }
918 
919 static void
920 iscsi_connect_cb(struct iscsi_context *iscsi, int status,
921 		 void *command_data, void *private_data)
922 {
923 	struct bdev_iscsi_conn_req *req = private_data;
924 	struct scsi_task *task;
925 
926 	if (status != SPDK_SCSI_STATUS_GOOD) {
927 		goto ret;
928 	}
929 
930 	task = iscsi_inquiry_task(iscsi, req->lun, 1,
931 				  SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING,
932 				  255, bdev_iscsi_inquiry_lbp_cb, req);
933 	if (task) {
934 		return;
935 	}
936 
937 ret:
938 	SPDK_ERRLOG("iSCSI error: %s\n", iscsi_get_error(req->context));
939 	complete_conn_req(req, NULL, status);
940 }
941 
942 static int
943 iscsi_bdev_conn_poll(void *arg)
944 {
945 	struct bdev_iscsi_conn_req *req, *tmp;
946 	struct pollfd pfd;
947 	struct iscsi_context *context;
948 
949 	if (TAILQ_EMPTY(&g_iscsi_conn_req)) {
950 		spdk_poller_unregister(&g_conn_poller);
951 		return SPDK_POLLER_IDLE;
952 	}
953 
954 	TAILQ_FOREACH_SAFE(req, &g_iscsi_conn_req, link, tmp) {
955 		context = req->context;
956 		pfd.fd = iscsi_get_fd(context);
957 		pfd.events = iscsi_which_events(context);
958 		pfd.revents = 0;
959 		if (poll(&pfd, 1, 0) < 0) {
960 			SPDK_ERRLOG("poll failed\n");
961 			return SPDK_POLLER_BUSY;
962 		}
963 
964 		if (pfd.revents != 0) {
965 			if (iscsi_service(context, pfd.revents) < 0) {
966 				SPDK_ERRLOG("iscsi_service failed: %s\n", iscsi_get_error(context));
967 			}
968 		}
969 
970 		if (req->status == 0) {
971 			/*
972 			 * The request completed successfully.
973 			 */
974 			free(req);
975 		} else if (req->status > 0) {
976 			/*
977 			 * An error has occurred during connecting.  This req has already
978 			 * been removed from the g_iscsi_conn_req list, but we needed to
979 			 * wait until iscsi_service unwound before we could free the req.
980 			 */
981 			_bdev_iscsi_conn_req_free(req);
982 		}
983 	}
984 	return SPDK_POLLER_BUSY;
985 }
986 
987 int
988 create_iscsi_disk(const char *bdev_name, const char *url, const char *initiator_iqn,
989 		  spdk_bdev_iscsi_create_cb cb_fn, void *cb_arg)
990 {
991 	struct bdev_iscsi_conn_req *req;
992 	struct iscsi_url *iscsi_url = NULL;
993 	int rc;
994 
995 	if (!bdev_name || !url || !initiator_iqn || strlen(initiator_iqn) == 0 || !cb_fn) {
996 		return -EINVAL;
997 	}
998 
999 	req = calloc(1, sizeof(struct bdev_iscsi_conn_req));
1000 	if (!req) {
1001 		SPDK_ERRLOG("Cannot allocate pointer of struct bdev_iscsi_conn_req\n");
1002 		return -ENOMEM;
1003 	}
1004 
1005 	req->status = SCSI_STATUS_GOOD;
1006 	req->bdev_name = strdup(bdev_name);
1007 	req->url = strdup(url);
1008 	req->initiator_iqn = strdup(initiator_iqn);
1009 	req->context = iscsi_create_context(initiator_iqn);
1010 	if (!req->bdev_name || !req->url || !req->initiator_iqn || !req->context) {
1011 		SPDK_ERRLOG("Out of memory\n");
1012 		rc = -ENOMEM;
1013 		goto err;
1014 	}
1015 
1016 	req->create_cb = cb_fn;
1017 	req->create_cb_arg = cb_arg;
1018 
1019 	iscsi_url = iscsi_parse_full_url(req->context, url);
1020 	if (iscsi_url == NULL) {
1021 		SPDK_ERRLOG("could not parse URL: %s\n", iscsi_get_error(req->context));
1022 		rc = -EINVAL;
1023 		goto err;
1024 	}
1025 
1026 	req->lun = iscsi_url->lun;
1027 	rc = iscsi_set_session_type(req->context, ISCSI_SESSION_NORMAL);
1028 	rc = rc ? rc : iscsi_set_header_digest(req->context, ISCSI_HEADER_DIGEST_NONE);
1029 	rc = rc ? rc : iscsi_set_targetname(req->context, iscsi_url->target);
1030 	rc = rc ? rc : iscsi_full_connect_async(req->context, iscsi_url->portal, iscsi_url->lun,
1031 						iscsi_connect_cb, req);
1032 	if (rc == 0 && iscsi_url->user[0] != '\0') {
1033 		rc = iscsi_set_initiator_username_pwd(req->context, iscsi_url->user, iscsi_url->passwd);
1034 	}
1035 
1036 	if (rc < 0) {
1037 		SPDK_ERRLOG("Failed to connect provided URL=%s: %s\n", url, iscsi_get_error(req->context));
1038 		goto err;
1039 	}
1040 
1041 	iscsi_destroy_url(iscsi_url);
1042 	req->status = -1;
1043 	TAILQ_INSERT_TAIL(&g_iscsi_conn_req, req, link);
1044 	if (!g_conn_poller) {
1045 		g_conn_poller = SPDK_POLLER_REGISTER(iscsi_bdev_conn_poll, NULL, BDEV_ISCSI_CONNECTION_POLL_US);
1046 	}
1047 
1048 	return 0;
1049 
1050 err:
1051 	/* iscsi_destroy_url() is not NULL-proof */
1052 	if (iscsi_url) {
1053 		iscsi_destroy_url(iscsi_url);
1054 	}
1055 
1056 	if (req->context) {
1057 		iscsi_destroy_context(req->context);
1058 	}
1059 
1060 	free(req->initiator_iqn);
1061 	free(req->bdev_name);
1062 	free(req->url);
1063 	free(req);
1064 	return rc;
1065 }
1066 
1067 void
1068 delete_iscsi_disk(const char *bdev_name, spdk_delete_iscsi_complete cb_fn, void *cb_arg)
1069 {
1070 	int rc;
1071 
1072 	rc = spdk_bdev_unregister_by_name(bdev_name, &g_iscsi_bdev_module, cb_fn, cb_arg);
1073 	if (rc != 0) {
1074 		cb_fn(cb_arg, rc);
1075 	}
1076 }
1077 
1078 static int
1079 bdev_iscsi_initialize(void)
1080 {
1081 	return 0;
1082 }
1083 
1084 SPDK_LOG_REGISTER_COMPONENT(iscsi_init)
1085