xref: /spdk/test/unit/lib/bdev/bdev.c/bdev_ut.c (revision 80b22cf31405a515dce9d470ff11989ff1fdb56e)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2017 Intel Corporation. All rights reserved.
3  *   Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
4  *   Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5  */
6 
7 #include "spdk_cunit.h"
8 
9 #include "common/lib/ut_multithread.c"
10 #include "unit/lib/json_mock.c"
11 
12 #include "spdk/config.h"
13 /* HACK: disable VTune integration so the unit test doesn't need VTune headers and libs to build */
14 #undef SPDK_CONFIG_VTUNE
15 
16 #include "bdev/bdev.c"
17 
18 DEFINE_STUB(spdk_notify_send, uint64_t, (const char *type, const char *ctx), 0);
19 DEFINE_STUB(spdk_notify_type_register, struct spdk_notify_type *, (const char *type), NULL);
20 DEFINE_STUB(spdk_memory_domain_get_dma_device_id, const char *, (struct spdk_memory_domain *domain),
21 	    "test_domain");
22 DEFINE_STUB(spdk_memory_domain_get_dma_device_type, enum spdk_dma_device_type,
23 	    (struct spdk_memory_domain *domain), 0);
24 
25 static bool g_memory_domain_pull_data_called;
26 static bool g_memory_domain_push_data_called;
27 static int g_accel_io_device;
28 
29 DEFINE_RETURN_MOCK(spdk_memory_domain_pull_data, int);
30 int
31 spdk_memory_domain_pull_data(struct spdk_memory_domain *src_domain, void *src_domain_ctx,
32 			     struct iovec *src_iov, uint32_t src_iov_cnt, struct iovec *dst_iov, uint32_t dst_iov_cnt,
33 			     spdk_memory_domain_data_cpl_cb cpl_cb, void *cpl_cb_arg)
34 {
35 	g_memory_domain_pull_data_called = true;
36 	HANDLE_RETURN_MOCK(spdk_memory_domain_pull_data);
37 	cpl_cb(cpl_cb_arg, 0);
38 	return 0;
39 }
40 
41 DEFINE_RETURN_MOCK(spdk_memory_domain_push_data, int);
42 int
43 spdk_memory_domain_push_data(struct spdk_memory_domain *dst_domain, void *dst_domain_ctx,
44 			     struct iovec *dst_iov, uint32_t dst_iovcnt, struct iovec *src_iov, uint32_t src_iovcnt,
45 			     spdk_memory_domain_data_cpl_cb cpl_cb, void *cpl_cb_arg)
46 {
47 	g_memory_domain_push_data_called = true;
48 	HANDLE_RETURN_MOCK(spdk_memory_domain_push_data);
49 	cpl_cb(cpl_cb_arg, 0);
50 	return 0;
51 }
52 
53 struct spdk_io_channel *
54 spdk_accel_get_io_channel(void)
55 {
56 	return spdk_get_io_channel(&g_accel_io_device);
57 }
58 
59 int g_status;
60 int g_count;
61 enum spdk_bdev_event_type g_event_type1;
62 enum spdk_bdev_event_type g_event_type2;
63 enum spdk_bdev_event_type g_event_type3;
64 enum spdk_bdev_event_type g_event_type4;
65 struct spdk_histogram_data *g_histogram;
66 void *g_unregister_arg;
67 int g_unregister_rc;
68 
69 void
70 spdk_scsi_nvme_translate(const struct spdk_bdev_io *bdev_io,
71 			 int *sc, int *sk, int *asc, int *ascq)
72 {
73 }
74 
75 static int
76 ut_accel_ch_create_cb(void *io_device, void *ctx)
77 {
78 	return 0;
79 }
80 
81 static void
82 ut_accel_ch_destroy_cb(void *io_device, void *ctx)
83 {
84 }
85 
86 static int
87 ut_bdev_setup(void)
88 {
89 	spdk_io_device_register(&g_accel_io_device, ut_accel_ch_create_cb,
90 				ut_accel_ch_destroy_cb, 0, NULL);
91 	return 0;
92 }
93 
94 static int
95 ut_bdev_teardown(void)
96 {
97 	spdk_io_device_unregister(&g_accel_io_device, NULL);
98 
99 	return 0;
100 }
101 
102 static int
103 stub_destruct(void *ctx)
104 {
105 	return 0;
106 }
107 
108 struct ut_expected_io {
109 	uint8_t				type;
110 	uint64_t			offset;
111 	uint64_t			src_offset;
112 	uint64_t			length;
113 	int				iovcnt;
114 	struct iovec			iov[SPDK_BDEV_IO_NUM_CHILD_IOV];
115 	void				*md_buf;
116 	TAILQ_ENTRY(ut_expected_io)	link;
117 };
118 
119 struct bdev_ut_channel {
120 	TAILQ_HEAD(, spdk_bdev_io)	outstanding_io;
121 	uint32_t			outstanding_io_count;
122 	TAILQ_HEAD(, ut_expected_io)	expected_io;
123 };
124 
125 static bool g_io_done;
126 static struct spdk_bdev_io *g_bdev_io;
127 static enum spdk_bdev_io_status g_io_status;
128 static enum spdk_bdev_io_status g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
129 static uint32_t g_bdev_ut_io_device;
130 static struct bdev_ut_channel *g_bdev_ut_channel;
131 static void *g_compare_read_buf;
132 static uint32_t g_compare_read_buf_len;
133 static void *g_compare_write_buf;
134 static uint32_t g_compare_write_buf_len;
135 static void *g_compare_md_buf;
136 static bool g_abort_done;
137 static enum spdk_bdev_io_status g_abort_status;
138 static void *g_zcopy_read_buf;
139 static uint32_t g_zcopy_read_buf_len;
140 static void *g_zcopy_write_buf;
141 static uint32_t g_zcopy_write_buf_len;
142 static struct spdk_bdev_io *g_zcopy_bdev_io;
143 static uint64_t g_seek_data_offset;
144 static uint64_t g_seek_hole_offset;
145 static uint64_t g_seek_offset;
146 
147 static struct ut_expected_io *
148 ut_alloc_expected_io(uint8_t type, uint64_t offset, uint64_t length, int iovcnt)
149 {
150 	struct ut_expected_io *expected_io;
151 
152 	expected_io = calloc(1, sizeof(*expected_io));
153 	SPDK_CU_ASSERT_FATAL(expected_io != NULL);
154 
155 	expected_io->type = type;
156 	expected_io->offset = offset;
157 	expected_io->length = length;
158 	expected_io->iovcnt = iovcnt;
159 
160 	return expected_io;
161 }
162 
163 static struct ut_expected_io *
164 ut_alloc_expected_copy_io(uint8_t type, uint64_t offset, uint64_t src_offset, uint64_t length)
165 {
166 	struct ut_expected_io *expected_io;
167 
168 	expected_io = calloc(1, sizeof(*expected_io));
169 	SPDK_CU_ASSERT_FATAL(expected_io != NULL);
170 
171 	expected_io->type = type;
172 	expected_io->offset = offset;
173 	expected_io->src_offset = src_offset;
174 	expected_io->length = length;
175 
176 	return expected_io;
177 }
178 
179 static void
180 ut_expected_io_set_iov(struct ut_expected_io *expected_io, int pos, void *base, size_t len)
181 {
182 	expected_io->iov[pos].iov_base = base;
183 	expected_io->iov[pos].iov_len = len;
184 }
185 
186 static void
187 stub_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io)
188 {
189 	struct bdev_ut_channel *ch = spdk_io_channel_get_ctx(_ch);
190 	struct ut_expected_io *expected_io;
191 	struct iovec *iov, *expected_iov;
192 	struct spdk_bdev_io *bio_to_abort;
193 	int i;
194 
195 	g_bdev_io = bdev_io;
196 
197 	if (g_compare_read_buf && bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
198 		uint32_t len = bdev_io->u.bdev.iovs[0].iov_len;
199 
200 		CU_ASSERT(bdev_io->u.bdev.iovcnt == 1);
201 		CU_ASSERT(g_compare_read_buf_len == len);
202 		memcpy(bdev_io->u.bdev.iovs[0].iov_base, g_compare_read_buf, len);
203 		if (bdev_io->bdev->md_len && bdev_io->u.bdev.md_buf && g_compare_md_buf) {
204 			memcpy(bdev_io->u.bdev.md_buf, g_compare_md_buf,
205 			       bdev_io->bdev->md_len * bdev_io->u.bdev.num_blocks);
206 		}
207 	}
208 
209 	if (g_compare_write_buf && bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
210 		uint32_t len = bdev_io->u.bdev.iovs[0].iov_len;
211 
212 		CU_ASSERT(bdev_io->u.bdev.iovcnt == 1);
213 		CU_ASSERT(g_compare_write_buf_len == len);
214 		memcpy(g_compare_write_buf, bdev_io->u.bdev.iovs[0].iov_base, len);
215 	}
216 
217 	if (g_compare_read_buf && bdev_io->type == SPDK_BDEV_IO_TYPE_COMPARE) {
218 		uint32_t len = bdev_io->u.bdev.iovs[0].iov_len;
219 
220 		CU_ASSERT(bdev_io->u.bdev.iovcnt == 1);
221 		CU_ASSERT(g_compare_read_buf_len == len);
222 		if (memcmp(bdev_io->u.bdev.iovs[0].iov_base, g_compare_read_buf, len)) {
223 			g_io_exp_status = SPDK_BDEV_IO_STATUS_MISCOMPARE;
224 		}
225 		if (bdev_io->u.bdev.md_buf &&
226 		    memcmp(bdev_io->u.bdev.md_buf, g_compare_md_buf,
227 			   bdev_io->bdev->md_len * bdev_io->u.bdev.num_blocks)) {
228 			g_io_exp_status = SPDK_BDEV_IO_STATUS_MISCOMPARE;
229 		}
230 	}
231 
232 	if (bdev_io->type == SPDK_BDEV_IO_TYPE_ABORT) {
233 		if (g_io_exp_status == SPDK_BDEV_IO_STATUS_SUCCESS) {
234 			TAILQ_FOREACH(bio_to_abort, &ch->outstanding_io, module_link) {
235 				if (bio_to_abort == bdev_io->u.abort.bio_to_abort) {
236 					TAILQ_REMOVE(&ch->outstanding_io, bio_to_abort, module_link);
237 					ch->outstanding_io_count--;
238 					spdk_bdev_io_complete(bio_to_abort, SPDK_BDEV_IO_STATUS_FAILED);
239 					break;
240 				}
241 			}
242 		}
243 	}
244 
245 	if (bdev_io->type == SPDK_BDEV_IO_TYPE_ZCOPY) {
246 		if (bdev_io->u.bdev.zcopy.start) {
247 			g_zcopy_bdev_io = bdev_io;
248 			if (bdev_io->u.bdev.zcopy.populate) {
249 				/* Start of a read */
250 				CU_ASSERT(g_zcopy_read_buf != NULL);
251 				CU_ASSERT(g_zcopy_read_buf_len > 0);
252 				bdev_io->u.bdev.iovs[0].iov_base = g_zcopy_read_buf;
253 				bdev_io->u.bdev.iovs[0].iov_len = g_zcopy_read_buf_len;
254 				bdev_io->u.bdev.iovcnt = 1;
255 			} else {
256 				/* Start of a write */
257 				CU_ASSERT(g_zcopy_write_buf != NULL);
258 				CU_ASSERT(g_zcopy_write_buf_len > 0);
259 				bdev_io->u.bdev.iovs[0].iov_base = g_zcopy_write_buf;
260 				bdev_io->u.bdev.iovs[0].iov_len = g_zcopy_write_buf_len;
261 				bdev_io->u.bdev.iovcnt = 1;
262 			}
263 		} else {
264 			if (bdev_io->u.bdev.zcopy.commit) {
265 				/* End of write */
266 				CU_ASSERT(bdev_io->u.bdev.iovs[0].iov_base == g_zcopy_write_buf);
267 				CU_ASSERT(bdev_io->u.bdev.iovs[0].iov_len == g_zcopy_write_buf_len);
268 				CU_ASSERT(bdev_io->u.bdev.iovcnt == 1);
269 				g_zcopy_write_buf = NULL;
270 				g_zcopy_write_buf_len = 0;
271 			} else {
272 				/* End of read */
273 				CU_ASSERT(bdev_io->u.bdev.iovs[0].iov_base == g_zcopy_read_buf);
274 				CU_ASSERT(bdev_io->u.bdev.iovs[0].iov_len == g_zcopy_read_buf_len);
275 				CU_ASSERT(bdev_io->u.bdev.iovcnt == 1);
276 				g_zcopy_read_buf = NULL;
277 				g_zcopy_read_buf_len = 0;
278 			}
279 		}
280 	}
281 
282 	if (bdev_io->type == SPDK_BDEV_IO_TYPE_SEEK_DATA) {
283 		bdev_io->u.bdev.seek.offset = g_seek_data_offset;
284 	}
285 
286 	if (bdev_io->type == SPDK_BDEV_IO_TYPE_SEEK_HOLE) {
287 		bdev_io->u.bdev.seek.offset = g_seek_hole_offset;
288 	}
289 
290 	TAILQ_INSERT_TAIL(&ch->outstanding_io, bdev_io, module_link);
291 	ch->outstanding_io_count++;
292 
293 	expected_io = TAILQ_FIRST(&ch->expected_io);
294 	if (expected_io == NULL) {
295 		return;
296 	}
297 	TAILQ_REMOVE(&ch->expected_io, expected_io, link);
298 
299 	if (expected_io->type != SPDK_BDEV_IO_TYPE_INVALID) {
300 		CU_ASSERT(bdev_io->type == expected_io->type);
301 	}
302 
303 	if (expected_io->md_buf != NULL) {
304 		CU_ASSERT(expected_io->md_buf == bdev_io->u.bdev.md_buf);
305 	}
306 
307 	if (expected_io->length == 0) {
308 		free(expected_io);
309 		return;
310 	}
311 
312 	CU_ASSERT(expected_io->offset == bdev_io->u.bdev.offset_blocks);
313 	CU_ASSERT(expected_io->length = bdev_io->u.bdev.num_blocks);
314 	if (expected_io->type == SPDK_BDEV_IO_TYPE_COPY) {
315 		CU_ASSERT(expected_io->src_offset == bdev_io->u.bdev.copy.src_offset_blocks);
316 	}
317 
318 	if (expected_io->iovcnt == 0) {
319 		free(expected_io);
320 		/* UNMAP, WRITE_ZEROES, FLUSH and COPY don't have iovs, so we can just return now. */
321 		return;
322 	}
323 
324 	CU_ASSERT(expected_io->iovcnt == bdev_io->u.bdev.iovcnt);
325 	for (i = 0; i < expected_io->iovcnt; i++) {
326 		expected_iov = &expected_io->iov[i];
327 		if (bdev_io->internal.orig_iovcnt == 0) {
328 			iov = &bdev_io->u.bdev.iovs[i];
329 		} else {
330 			iov = bdev_io->internal.orig_iovs;
331 		}
332 		CU_ASSERT(iov->iov_len == expected_iov->iov_len);
333 		CU_ASSERT(iov->iov_base == expected_iov->iov_base);
334 	}
335 
336 	free(expected_io);
337 }
338 
339 static void
340 stub_submit_request_get_buf_cb(struct spdk_io_channel *_ch,
341 			       struct spdk_bdev_io *bdev_io, bool success)
342 {
343 	CU_ASSERT(success == true);
344 
345 	stub_submit_request(_ch, bdev_io);
346 }
347 
348 static void
349 stub_submit_request_get_buf(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io)
350 {
351 	spdk_bdev_io_get_buf(bdev_io, stub_submit_request_get_buf_cb,
352 			     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
353 }
354 
355 static uint32_t
356 stub_complete_io(uint32_t num_to_complete)
357 {
358 	struct bdev_ut_channel *ch = g_bdev_ut_channel;
359 	struct spdk_bdev_io *bdev_io;
360 	static enum spdk_bdev_io_status io_status;
361 	uint32_t num_completed = 0;
362 
363 	while (num_completed < num_to_complete) {
364 		if (TAILQ_EMPTY(&ch->outstanding_io)) {
365 			break;
366 		}
367 		bdev_io = TAILQ_FIRST(&ch->outstanding_io);
368 		TAILQ_REMOVE(&ch->outstanding_io, bdev_io, module_link);
369 		ch->outstanding_io_count--;
370 		io_status = g_io_exp_status == SPDK_BDEV_IO_STATUS_SUCCESS ? SPDK_BDEV_IO_STATUS_SUCCESS :
371 			    g_io_exp_status;
372 		spdk_bdev_io_complete(bdev_io, io_status);
373 		num_completed++;
374 	}
375 
376 	return num_completed;
377 }
378 
379 static struct spdk_io_channel *
380 bdev_ut_get_io_channel(void *ctx)
381 {
382 	return spdk_get_io_channel(&g_bdev_ut_io_device);
383 }
384 
385 static bool g_io_types_supported[SPDK_BDEV_NUM_IO_TYPES] = {
386 	[SPDK_BDEV_IO_TYPE_READ]		= true,
387 	[SPDK_BDEV_IO_TYPE_WRITE]		= true,
388 	[SPDK_BDEV_IO_TYPE_COMPARE]		= true,
389 	[SPDK_BDEV_IO_TYPE_UNMAP]		= true,
390 	[SPDK_BDEV_IO_TYPE_FLUSH]		= true,
391 	[SPDK_BDEV_IO_TYPE_RESET]		= true,
392 	[SPDK_BDEV_IO_TYPE_NVME_ADMIN]		= true,
393 	[SPDK_BDEV_IO_TYPE_NVME_IO]		= true,
394 	[SPDK_BDEV_IO_TYPE_NVME_IO_MD]		= true,
395 	[SPDK_BDEV_IO_TYPE_WRITE_ZEROES]	= true,
396 	[SPDK_BDEV_IO_TYPE_ZCOPY]		= true,
397 	[SPDK_BDEV_IO_TYPE_ABORT]		= true,
398 	[SPDK_BDEV_IO_TYPE_SEEK_HOLE]		= true,
399 	[SPDK_BDEV_IO_TYPE_SEEK_DATA]		= true,
400 	[SPDK_BDEV_IO_TYPE_COPY]		= true,
401 };
402 
403 static void
404 ut_enable_io_type(enum spdk_bdev_io_type io_type, bool enable)
405 {
406 	g_io_types_supported[io_type] = enable;
407 }
408 
409 static bool
410 stub_io_type_supported(void *_bdev, enum spdk_bdev_io_type io_type)
411 {
412 	return g_io_types_supported[io_type];
413 }
414 
415 static struct spdk_bdev_fn_table fn_table = {
416 	.destruct = stub_destruct,
417 	.submit_request = stub_submit_request,
418 	.get_io_channel = bdev_ut_get_io_channel,
419 	.io_type_supported = stub_io_type_supported,
420 };
421 
422 static int
423 bdev_ut_create_ch(void *io_device, void *ctx_buf)
424 {
425 	struct bdev_ut_channel *ch = ctx_buf;
426 
427 	CU_ASSERT(g_bdev_ut_channel == NULL);
428 	g_bdev_ut_channel = ch;
429 
430 	TAILQ_INIT(&ch->outstanding_io);
431 	ch->outstanding_io_count = 0;
432 	TAILQ_INIT(&ch->expected_io);
433 	return 0;
434 }
435 
436 static void
437 bdev_ut_destroy_ch(void *io_device, void *ctx_buf)
438 {
439 	CU_ASSERT(g_bdev_ut_channel != NULL);
440 	g_bdev_ut_channel = NULL;
441 }
442 
443 struct spdk_bdev_module bdev_ut_if;
444 
445 static int
446 bdev_ut_module_init(void)
447 {
448 	spdk_io_device_register(&g_bdev_ut_io_device, bdev_ut_create_ch, bdev_ut_destroy_ch,
449 				sizeof(struct bdev_ut_channel), NULL);
450 	spdk_bdev_module_init_done(&bdev_ut_if);
451 	return 0;
452 }
453 
454 static void
455 bdev_ut_module_fini(void)
456 {
457 	spdk_io_device_unregister(&g_bdev_ut_io_device, NULL);
458 }
459 
460 struct spdk_bdev_module bdev_ut_if = {
461 	.name = "bdev_ut",
462 	.module_init = bdev_ut_module_init,
463 	.module_fini = bdev_ut_module_fini,
464 	.async_init = true,
465 };
466 
467 static void vbdev_ut_examine_config(struct spdk_bdev *bdev);
468 static void vbdev_ut_examine_disk(struct spdk_bdev *bdev);
469 
470 static int
471 vbdev_ut_module_init(void)
472 {
473 	return 0;
474 }
475 
476 static void
477 vbdev_ut_module_fini(void)
478 {
479 }
480 
481 struct spdk_bdev_module vbdev_ut_if = {
482 	.name = "vbdev_ut",
483 	.module_init = vbdev_ut_module_init,
484 	.module_fini = vbdev_ut_module_fini,
485 	.examine_config = vbdev_ut_examine_config,
486 	.examine_disk = vbdev_ut_examine_disk,
487 };
488 
489 SPDK_BDEV_MODULE_REGISTER(bdev_ut, &bdev_ut_if)
490 SPDK_BDEV_MODULE_REGISTER(vbdev_ut, &vbdev_ut_if)
491 
492 struct ut_examine_ctx {
493 	void (*examine_config)(struct spdk_bdev *bdev);
494 	void (*examine_disk)(struct spdk_bdev *bdev);
495 	uint32_t examine_config_count;
496 	uint32_t examine_disk_count;
497 };
498 
499 static void
500 vbdev_ut_examine_config(struct spdk_bdev *bdev)
501 {
502 	struct ut_examine_ctx *ctx = bdev->ctxt;
503 
504 	if (ctx != NULL) {
505 		ctx->examine_config_count++;
506 		if (ctx->examine_config != NULL) {
507 			ctx->examine_config(bdev);
508 		}
509 	}
510 
511 	spdk_bdev_module_examine_done(&vbdev_ut_if);
512 }
513 
514 static void
515 vbdev_ut_examine_disk(struct spdk_bdev *bdev)
516 {
517 	struct ut_examine_ctx *ctx = bdev->ctxt;
518 
519 	if (ctx != NULL) {
520 		ctx->examine_disk_count++;
521 		if (ctx->examine_disk != NULL) {
522 			ctx->examine_disk(bdev);
523 		}
524 	}
525 
526 	spdk_bdev_module_examine_done(&vbdev_ut_if);
527 }
528 
529 static struct spdk_bdev *
530 allocate_bdev_ctx(char *name, void *ctx)
531 {
532 	struct spdk_bdev *bdev;
533 	int rc;
534 
535 	bdev = calloc(1, sizeof(*bdev));
536 	SPDK_CU_ASSERT_FATAL(bdev != NULL);
537 
538 	bdev->ctxt = ctx;
539 	bdev->name = name;
540 	bdev->fn_table = &fn_table;
541 	bdev->module = &bdev_ut_if;
542 	bdev->blockcnt = 1024;
543 	bdev->blocklen = 512;
544 
545 	spdk_uuid_generate(&bdev->uuid);
546 
547 	rc = spdk_bdev_register(bdev);
548 	poll_threads();
549 	CU_ASSERT(rc == 0);
550 
551 	return bdev;
552 }
553 
554 static struct spdk_bdev *
555 allocate_bdev(char *name)
556 {
557 	return allocate_bdev_ctx(name, NULL);
558 }
559 
560 static struct spdk_bdev *
561 allocate_vbdev(char *name)
562 {
563 	struct spdk_bdev *bdev;
564 	int rc;
565 
566 	bdev = calloc(1, sizeof(*bdev));
567 	SPDK_CU_ASSERT_FATAL(bdev != NULL);
568 
569 	bdev->name = name;
570 	bdev->fn_table = &fn_table;
571 	bdev->module = &vbdev_ut_if;
572 
573 	rc = spdk_bdev_register(bdev);
574 	poll_threads();
575 	CU_ASSERT(rc == 0);
576 
577 	return bdev;
578 }
579 
580 static void
581 free_bdev(struct spdk_bdev *bdev)
582 {
583 	spdk_bdev_unregister(bdev, NULL, NULL);
584 	poll_threads();
585 	memset(bdev, 0xFF, sizeof(*bdev));
586 	free(bdev);
587 }
588 
589 static void
590 free_vbdev(struct spdk_bdev *bdev)
591 {
592 	spdk_bdev_unregister(bdev, NULL, NULL);
593 	poll_threads();
594 	memset(bdev, 0xFF, sizeof(*bdev));
595 	free(bdev);
596 }
597 
598 static void
599 get_device_stat_cb(struct spdk_bdev *bdev, struct spdk_bdev_io_stat *stat, void *cb_arg, int rc)
600 {
601 	const char *bdev_name;
602 
603 	CU_ASSERT(bdev != NULL);
604 	CU_ASSERT(rc == 0);
605 	bdev_name = spdk_bdev_get_name(bdev);
606 	CU_ASSERT_STRING_EQUAL(bdev_name, "bdev0");
607 
608 	free(stat);
609 
610 	*(bool *)cb_arg = true;
611 }
612 
613 static void
614 bdev_unregister_cb(void *cb_arg, int rc)
615 {
616 	g_unregister_arg = cb_arg;
617 	g_unregister_rc = rc;
618 }
619 
620 static void
621 bdev_ut_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx)
622 {
623 }
624 
625 static void
626 bdev_open_cb1(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx)
627 {
628 	struct spdk_bdev_desc *desc = *(struct spdk_bdev_desc **)event_ctx;
629 
630 	g_event_type1 = type;
631 	if (SPDK_BDEV_EVENT_REMOVE == type) {
632 		spdk_bdev_close(desc);
633 	}
634 }
635 
636 static void
637 bdev_open_cb2(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx)
638 {
639 	struct spdk_bdev_desc *desc = *(struct spdk_bdev_desc **)event_ctx;
640 
641 	g_event_type2 = type;
642 	if (SPDK_BDEV_EVENT_REMOVE == type) {
643 		spdk_bdev_close(desc);
644 	}
645 }
646 
647 static void
648 bdev_open_cb3(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx)
649 {
650 	g_event_type3 = type;
651 }
652 
653 static void
654 bdev_open_cb4(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx)
655 {
656 	g_event_type4 = type;
657 }
658 
659 static void
660 bdev_seek_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
661 {
662 	g_seek_offset = spdk_bdev_io_get_seek_offset(bdev_io);
663 	spdk_bdev_free_io(bdev_io);
664 }
665 
666 static void
667 get_device_stat_test(void)
668 {
669 	struct spdk_bdev *bdev;
670 	struct spdk_bdev_io_stat *stat;
671 	bool done;
672 
673 	bdev = allocate_bdev("bdev0");
674 	stat = calloc(1, sizeof(struct spdk_bdev_io_stat));
675 	if (stat == NULL) {
676 		free_bdev(bdev);
677 		return;
678 	}
679 
680 	done = false;
681 	spdk_bdev_get_device_stat(bdev, stat, get_device_stat_cb, &done);
682 	while (!done) { poll_threads(); }
683 
684 	free_bdev(bdev);
685 }
686 
687 static void
688 open_write_test(void)
689 {
690 	struct spdk_bdev *bdev[9];
691 	struct spdk_bdev_desc *desc[9] = {};
692 	int rc;
693 
694 	/*
695 	 * Create a tree of bdevs to test various open w/ write cases.
696 	 *
697 	 * bdev0 through bdev3 are physical block devices, such as NVMe
698 	 * namespaces or Ceph block devices.
699 	 *
700 	 * bdev4 is a virtual bdev with multiple base bdevs.  This models
701 	 * caching or RAID use cases.
702 	 *
703 	 * bdev5 through bdev7 are all virtual bdevs with the same base
704 	 * bdev (except bdev7). This models partitioning or logical volume
705 	 * use cases.
706 	 *
707 	 * bdev7 is a virtual bdev with multiple base bdevs. One of base bdevs
708 	 * (bdev2) is shared with other virtual bdevs: bdev5 and bdev6. This
709 	 * models caching, RAID, partitioning or logical volumes use cases.
710 	 *
711 	 * bdev8 is a virtual bdev with multiple base bdevs, but these
712 	 * base bdevs are themselves virtual bdevs.
713 	 *
714 	 *                bdev8
715 	 *                  |
716 	 *            +----------+
717 	 *            |          |
718 	 *          bdev4      bdev5   bdev6   bdev7
719 	 *            |          |       |       |
720 	 *        +---+---+      +---+   +   +---+---+
721 	 *        |       |           \  |  /         \
722 	 *      bdev0   bdev1          bdev2         bdev3
723 	 */
724 
725 	bdev[0] = allocate_bdev("bdev0");
726 	rc = spdk_bdev_module_claim_bdev(bdev[0], NULL, &bdev_ut_if);
727 	CU_ASSERT(rc == 0);
728 
729 	bdev[1] = allocate_bdev("bdev1");
730 	rc = spdk_bdev_module_claim_bdev(bdev[1], NULL, &bdev_ut_if);
731 	CU_ASSERT(rc == 0);
732 
733 	bdev[2] = allocate_bdev("bdev2");
734 	rc = spdk_bdev_module_claim_bdev(bdev[2], NULL, &bdev_ut_if);
735 	CU_ASSERT(rc == 0);
736 
737 	bdev[3] = allocate_bdev("bdev3");
738 	rc = spdk_bdev_module_claim_bdev(bdev[3], NULL, &bdev_ut_if);
739 	CU_ASSERT(rc == 0);
740 
741 	bdev[4] = allocate_vbdev("bdev4");
742 	rc = spdk_bdev_module_claim_bdev(bdev[4], NULL, &bdev_ut_if);
743 	CU_ASSERT(rc == 0);
744 
745 	bdev[5] = allocate_vbdev("bdev5");
746 	rc = spdk_bdev_module_claim_bdev(bdev[5], NULL, &bdev_ut_if);
747 	CU_ASSERT(rc == 0);
748 
749 	bdev[6] = allocate_vbdev("bdev6");
750 
751 	bdev[7] = allocate_vbdev("bdev7");
752 
753 	bdev[8] = allocate_vbdev("bdev8");
754 
755 	/* Open bdev0 read-only.  This should succeed. */
756 	rc = spdk_bdev_open_ext("bdev0", false, bdev_ut_event_cb, NULL, &desc[0]);
757 	CU_ASSERT(rc == 0);
758 	SPDK_CU_ASSERT_FATAL(desc[0] != NULL);
759 	CU_ASSERT(bdev[0] == spdk_bdev_desc_get_bdev(desc[0]));
760 	spdk_bdev_close(desc[0]);
761 
762 	/*
763 	 * Open bdev1 read/write.  This should fail since bdev1 has been claimed
764 	 * by a vbdev module.
765 	 */
766 	rc = spdk_bdev_open_ext("bdev1", true, bdev_ut_event_cb, NULL, &desc[1]);
767 	CU_ASSERT(rc == -EPERM);
768 
769 	/*
770 	 * Open bdev4 read/write.  This should fail since bdev3 has been claimed
771 	 * by a vbdev module.
772 	 */
773 	rc = spdk_bdev_open_ext("bdev4", true, bdev_ut_event_cb, NULL, &desc[4]);
774 	CU_ASSERT(rc == -EPERM);
775 
776 	/* Open bdev4 read-only.  This should succeed. */
777 	rc = spdk_bdev_open_ext("bdev4", false, bdev_ut_event_cb, NULL, &desc[4]);
778 	CU_ASSERT(rc == 0);
779 	SPDK_CU_ASSERT_FATAL(desc[4] != NULL);
780 	CU_ASSERT(bdev[4] == spdk_bdev_desc_get_bdev(desc[4]));
781 	spdk_bdev_close(desc[4]);
782 
783 	/*
784 	 * Open bdev8 read/write.  This should succeed since it is a leaf
785 	 * bdev.
786 	 */
787 	rc = spdk_bdev_open_ext("bdev8", true, bdev_ut_event_cb, NULL, &desc[8]);
788 	CU_ASSERT(rc == 0);
789 	SPDK_CU_ASSERT_FATAL(desc[8] != NULL);
790 	CU_ASSERT(bdev[8] == spdk_bdev_desc_get_bdev(desc[8]));
791 	spdk_bdev_close(desc[8]);
792 
793 	/*
794 	 * Open bdev5 read/write.  This should fail since bdev4 has been claimed
795 	 * by a vbdev module.
796 	 */
797 	rc = spdk_bdev_open_ext("bdev5", true, bdev_ut_event_cb, NULL, &desc[5]);
798 	CU_ASSERT(rc == -EPERM);
799 
800 	/* Open bdev4 read-only.  This should succeed. */
801 	rc = spdk_bdev_open_ext("bdev5", false, bdev_ut_event_cb, NULL, &desc[5]);
802 	CU_ASSERT(rc == 0);
803 	SPDK_CU_ASSERT_FATAL(desc[5] != NULL);
804 	CU_ASSERT(bdev[5] == spdk_bdev_desc_get_bdev(desc[5]));
805 	spdk_bdev_close(desc[5]);
806 
807 	free_vbdev(bdev[8]);
808 
809 	free_vbdev(bdev[5]);
810 	free_vbdev(bdev[6]);
811 	free_vbdev(bdev[7]);
812 
813 	free_vbdev(bdev[4]);
814 
815 	free_bdev(bdev[0]);
816 	free_bdev(bdev[1]);
817 	free_bdev(bdev[2]);
818 	free_bdev(bdev[3]);
819 }
820 
821 static void
822 claim_test(void)
823 {
824 	struct spdk_bdev *bdev;
825 	struct spdk_bdev_desc *desc, *open_desc;
826 	int rc;
827 	uint32_t count;
828 
829 	/*
830 	 * A vbdev that uses a read-only bdev may need it to remain read-only.
831 	 * To do so, it opens the bdev read-only, then claims it without
832 	 * passing a spdk_bdev_desc.
833 	 */
834 	bdev = allocate_bdev("bdev0");
835 	rc = spdk_bdev_open_ext("bdev0", false, bdev_ut_event_cb, NULL, &desc);
836 	CU_ASSERT(rc == 0);
837 	CU_ASSERT(desc->write == false);
838 
839 	rc = spdk_bdev_module_claim_bdev(bdev, NULL, &bdev_ut_if);
840 	CU_ASSERT(rc == 0);
841 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_EXCL_WRITE);
842 	CU_ASSERT(bdev->internal.claim.v1.module == &bdev_ut_if);
843 
844 	/* There should be only one open descriptor and it should still be ro */
845 	count = 0;
846 	TAILQ_FOREACH(open_desc, &bdev->internal.open_descs, link) {
847 		CU_ASSERT(open_desc == desc);
848 		CU_ASSERT(!open_desc->write);
849 		count++;
850 	}
851 	CU_ASSERT(count == 1);
852 
853 	/* A read-only bdev is upgraded to read-write if desc is passed. */
854 	spdk_bdev_module_release_bdev(bdev);
855 	rc = spdk_bdev_module_claim_bdev(bdev, desc, &bdev_ut_if);
856 	CU_ASSERT(rc == 0);
857 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_EXCL_WRITE);
858 	CU_ASSERT(bdev->internal.claim.v1.module == &bdev_ut_if);
859 
860 	/* There should be only one open descriptor and it should be rw */
861 	count = 0;
862 	TAILQ_FOREACH(open_desc, &bdev->internal.open_descs, link) {
863 		CU_ASSERT(open_desc == desc);
864 		CU_ASSERT(open_desc->write);
865 		count++;
866 	}
867 	CU_ASSERT(count == 1);
868 
869 	spdk_bdev_close(desc);
870 	free_bdev(bdev);
871 }
872 
873 static void
874 bytes_to_blocks_test(void)
875 {
876 	struct spdk_bdev bdev;
877 	uint64_t offset_blocks, num_blocks;
878 
879 	memset(&bdev, 0, sizeof(bdev));
880 
881 	bdev.blocklen = 512;
882 
883 	/* All parameters valid */
884 	offset_blocks = 0;
885 	num_blocks = 0;
886 	CU_ASSERT(bdev_bytes_to_blocks(&bdev, 512, &offset_blocks, 1024, &num_blocks) == 0);
887 	CU_ASSERT(offset_blocks == 1);
888 	CU_ASSERT(num_blocks == 2);
889 
890 	/* Offset not a block multiple */
891 	CU_ASSERT(bdev_bytes_to_blocks(&bdev, 3, &offset_blocks, 512, &num_blocks) != 0);
892 
893 	/* Length not a block multiple */
894 	CU_ASSERT(bdev_bytes_to_blocks(&bdev, 512, &offset_blocks, 3, &num_blocks) != 0);
895 
896 	/* In case blocklen not the power of two */
897 	bdev.blocklen = 100;
898 	CU_ASSERT(bdev_bytes_to_blocks(&bdev, 100, &offset_blocks, 200, &num_blocks) == 0);
899 	CU_ASSERT(offset_blocks == 1);
900 	CU_ASSERT(num_blocks == 2);
901 
902 	/* Offset not a block multiple */
903 	CU_ASSERT(bdev_bytes_to_blocks(&bdev, 3, &offset_blocks, 100, &num_blocks) != 0);
904 
905 	/* Length not a block multiple */
906 	CU_ASSERT(bdev_bytes_to_blocks(&bdev, 100, &offset_blocks, 3, &num_blocks) != 0);
907 }
908 
909 static void
910 num_blocks_test(void)
911 {
912 	struct spdk_bdev bdev;
913 	struct spdk_bdev_desc *desc = NULL;
914 	int rc;
915 
916 	memset(&bdev, 0, sizeof(bdev));
917 	bdev.name = "num_blocks";
918 	bdev.fn_table = &fn_table;
919 	bdev.module = &bdev_ut_if;
920 	spdk_bdev_register(&bdev);
921 	poll_threads();
922 	spdk_bdev_notify_blockcnt_change(&bdev, 50);
923 
924 	/* Growing block number */
925 	CU_ASSERT(spdk_bdev_notify_blockcnt_change(&bdev, 70) == 0);
926 	/* Shrinking block number */
927 	CU_ASSERT(spdk_bdev_notify_blockcnt_change(&bdev, 30) == 0);
928 
929 	rc = spdk_bdev_open_ext("num_blocks", false, bdev_open_cb1, &desc, &desc);
930 	CU_ASSERT(rc == 0);
931 	SPDK_CU_ASSERT_FATAL(desc != NULL);
932 	CU_ASSERT(&bdev == spdk_bdev_desc_get_bdev(desc));
933 
934 	/* Growing block number */
935 	CU_ASSERT(spdk_bdev_notify_blockcnt_change(&bdev, 80) == 0);
936 	/* Shrinking block number */
937 	CU_ASSERT(spdk_bdev_notify_blockcnt_change(&bdev, 20) != 0);
938 
939 	g_event_type1 = 0xFF;
940 	/* Growing block number */
941 	CU_ASSERT(spdk_bdev_notify_blockcnt_change(&bdev, 90) == 0);
942 
943 	poll_threads();
944 	CU_ASSERT_EQUAL(g_event_type1, SPDK_BDEV_EVENT_RESIZE);
945 
946 	g_event_type1 = 0xFF;
947 	/* Growing block number and closing */
948 	CU_ASSERT(spdk_bdev_notify_blockcnt_change(&bdev, 100) == 0);
949 
950 	spdk_bdev_close(desc);
951 	spdk_bdev_unregister(&bdev, NULL, NULL);
952 
953 	poll_threads();
954 
955 	/* Callback is not called for closed device */
956 	CU_ASSERT_EQUAL(g_event_type1, 0xFF);
957 }
958 
959 static void
960 io_valid_test(void)
961 {
962 	struct spdk_bdev bdev;
963 
964 	memset(&bdev, 0, sizeof(bdev));
965 
966 	bdev.blocklen = 512;
967 	spdk_spin_init(&bdev.internal.spinlock);
968 
969 	spdk_bdev_notify_blockcnt_change(&bdev, 100);
970 
971 	/* All parameters valid */
972 	CU_ASSERT(bdev_io_valid_blocks(&bdev, 1, 2) == true);
973 
974 	/* Last valid block */
975 	CU_ASSERT(bdev_io_valid_blocks(&bdev, 99, 1) == true);
976 
977 	/* Offset past end of bdev */
978 	CU_ASSERT(bdev_io_valid_blocks(&bdev, 100, 1) == false);
979 
980 	/* Offset + length past end of bdev */
981 	CU_ASSERT(bdev_io_valid_blocks(&bdev, 99, 2) == false);
982 
983 	/* Offset near end of uint64_t range (2^64 - 1) */
984 	CU_ASSERT(bdev_io_valid_blocks(&bdev, 18446744073709551615ULL, 1) == false);
985 
986 	spdk_spin_destroy(&bdev.internal.spinlock);
987 }
988 
989 static void
990 alias_add_del_test(void)
991 {
992 	struct spdk_bdev *bdev[3];
993 	int rc;
994 
995 	/* Creating and registering bdevs */
996 	bdev[0] = allocate_bdev("bdev0");
997 	SPDK_CU_ASSERT_FATAL(bdev[0] != 0);
998 
999 	bdev[1] = allocate_bdev("bdev1");
1000 	SPDK_CU_ASSERT_FATAL(bdev[1] != 0);
1001 
1002 	bdev[2] = allocate_bdev("bdev2");
1003 	SPDK_CU_ASSERT_FATAL(bdev[2] != 0);
1004 
1005 	poll_threads();
1006 
1007 	/*
1008 	 * Trying adding an alias identical to name.
1009 	 * Alias is identical to name, so it can not be added to aliases list
1010 	 */
1011 	rc = spdk_bdev_alias_add(bdev[0], bdev[0]->name);
1012 	CU_ASSERT(rc == -EEXIST);
1013 
1014 	/*
1015 	 * Trying to add empty alias,
1016 	 * this one should fail
1017 	 */
1018 	rc = spdk_bdev_alias_add(bdev[0], NULL);
1019 	CU_ASSERT(rc == -EINVAL);
1020 
1021 	/* Trying adding same alias to two different registered bdevs */
1022 
1023 	/* Alias is used first time, so this one should pass */
1024 	rc = spdk_bdev_alias_add(bdev[0], "proper alias 0");
1025 	CU_ASSERT(rc == 0);
1026 
1027 	/* Alias was added to another bdev, so this one should fail */
1028 	rc = spdk_bdev_alias_add(bdev[1], "proper alias 0");
1029 	CU_ASSERT(rc == -EEXIST);
1030 
1031 	/* Alias is used first time, so this one should pass */
1032 	rc = spdk_bdev_alias_add(bdev[1], "proper alias 1");
1033 	CU_ASSERT(rc == 0);
1034 
1035 	/* Trying removing an alias from registered bdevs */
1036 
1037 	/* Alias is not on a bdev aliases list, so this one should fail */
1038 	rc = spdk_bdev_alias_del(bdev[0], "not existing");
1039 	CU_ASSERT(rc == -ENOENT);
1040 
1041 	/* Alias is present on a bdev aliases list, so this one should pass */
1042 	rc = spdk_bdev_alias_del(bdev[0], "proper alias 0");
1043 	CU_ASSERT(rc == 0);
1044 
1045 	/* Alias is present on a bdev aliases list, so this one should pass */
1046 	rc = spdk_bdev_alias_del(bdev[1], "proper alias 1");
1047 	CU_ASSERT(rc == 0);
1048 
1049 	/* Trying to remove name instead of alias, so this one should fail, name cannot be changed or removed */
1050 	rc = spdk_bdev_alias_del(bdev[0], bdev[0]->name);
1051 	CU_ASSERT(rc != 0);
1052 
1053 	/* Trying to del all alias from empty alias list */
1054 	spdk_bdev_alias_del_all(bdev[2]);
1055 	SPDK_CU_ASSERT_FATAL(TAILQ_EMPTY(&bdev[2]->aliases));
1056 
1057 	/* Trying to del all alias from non-empty alias list */
1058 	rc = spdk_bdev_alias_add(bdev[2], "alias0");
1059 	CU_ASSERT(rc == 0);
1060 	rc = spdk_bdev_alias_add(bdev[2], "alias1");
1061 	CU_ASSERT(rc == 0);
1062 	spdk_bdev_alias_del_all(bdev[2]);
1063 	CU_ASSERT(TAILQ_EMPTY(&bdev[2]->aliases));
1064 
1065 	/* Unregister and free bdevs */
1066 	spdk_bdev_unregister(bdev[0], NULL, NULL);
1067 	spdk_bdev_unregister(bdev[1], NULL, NULL);
1068 	spdk_bdev_unregister(bdev[2], NULL, NULL);
1069 
1070 	poll_threads();
1071 
1072 	free(bdev[0]);
1073 	free(bdev[1]);
1074 	free(bdev[2]);
1075 }
1076 
1077 static void
1078 io_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
1079 {
1080 	g_io_done = true;
1081 	g_io_status = bdev_io->internal.status;
1082 	if ((bdev_io->type == SPDK_BDEV_IO_TYPE_ZCOPY) &&
1083 	    (bdev_io->u.bdev.zcopy.start)) {
1084 		g_zcopy_bdev_io = bdev_io;
1085 	} else {
1086 		spdk_bdev_free_io(bdev_io);
1087 		g_zcopy_bdev_io = NULL;
1088 	}
1089 }
1090 
1091 static void
1092 bdev_init_cb(void *arg, int rc)
1093 {
1094 	CU_ASSERT(rc == 0);
1095 }
1096 
1097 static void
1098 bdev_fini_cb(void *arg)
1099 {
1100 }
1101 
1102 static void
1103 ut_init_bdev(struct spdk_bdev_opts *opts)
1104 {
1105 	int rc;
1106 
1107 	if (opts != NULL) {
1108 		rc = spdk_bdev_set_opts(opts);
1109 		CU_ASSERT(rc == 0);
1110 	}
1111 	rc = spdk_iobuf_initialize();
1112 	CU_ASSERT(rc == 0);
1113 	spdk_bdev_initialize(bdev_init_cb, NULL);
1114 	poll_threads();
1115 }
1116 
1117 static void
1118 ut_fini_bdev(void)
1119 {
1120 	spdk_bdev_finish(bdev_fini_cb, NULL);
1121 	spdk_iobuf_finish(bdev_fini_cb, NULL);
1122 	poll_threads();
1123 }
1124 
1125 struct bdev_ut_io_wait_entry {
1126 	struct spdk_bdev_io_wait_entry	entry;
1127 	struct spdk_io_channel		*io_ch;
1128 	struct spdk_bdev_desc		*desc;
1129 	bool				submitted;
1130 };
1131 
1132 static void
1133 io_wait_cb(void *arg)
1134 {
1135 	struct bdev_ut_io_wait_entry *entry = arg;
1136 	int rc;
1137 
1138 	rc = spdk_bdev_read_blocks(entry->desc, entry->io_ch, NULL, 0, 1, io_done, NULL);
1139 	CU_ASSERT(rc == 0);
1140 	entry->submitted = true;
1141 }
1142 
1143 static void
1144 bdev_io_types_test(void)
1145 {
1146 	struct spdk_bdev *bdev;
1147 	struct spdk_bdev_desc *desc = NULL;
1148 	struct spdk_io_channel *io_ch;
1149 	struct spdk_bdev_opts bdev_opts = {};
1150 	int rc;
1151 
1152 	spdk_bdev_get_opts(&bdev_opts, sizeof(bdev_opts));
1153 	bdev_opts.bdev_io_pool_size = 4;
1154 	bdev_opts.bdev_io_cache_size = 2;
1155 	ut_init_bdev(&bdev_opts);
1156 
1157 	bdev = allocate_bdev("bdev0");
1158 
1159 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
1160 	CU_ASSERT(rc == 0);
1161 	poll_threads();
1162 	SPDK_CU_ASSERT_FATAL(desc != NULL);
1163 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
1164 	io_ch = spdk_bdev_get_io_channel(desc);
1165 	CU_ASSERT(io_ch != NULL);
1166 
1167 	/* WRITE and WRITE ZEROES are not supported */
1168 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_WRITE_ZEROES, false);
1169 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_WRITE, false);
1170 	rc = spdk_bdev_write_zeroes_blocks(desc, io_ch, 0, 128, io_done, NULL);
1171 	CU_ASSERT(rc == -ENOTSUP);
1172 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_WRITE_ZEROES, true);
1173 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_WRITE, true);
1174 
1175 	/* NVME_IO, NVME_IO_MD and NVME_ADMIN are not supported */
1176 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_NVME_IO, false);
1177 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_NVME_IO_MD, false);
1178 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_NVME_ADMIN, false);
1179 	rc = spdk_bdev_nvme_io_passthru(desc, io_ch, NULL, NULL, 0, NULL, NULL);
1180 	CU_ASSERT(rc == -ENOTSUP);
1181 	rc = spdk_bdev_nvme_io_passthru_md(desc, io_ch, NULL, NULL, 0, NULL, 0, NULL, NULL);
1182 	CU_ASSERT(rc == -ENOTSUP);
1183 	rc = spdk_bdev_nvme_admin_passthru(desc, io_ch, NULL, NULL, 0, NULL, NULL);
1184 	CU_ASSERT(rc == -ENOTSUP);
1185 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_NVME_IO, true);
1186 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_NVME_IO_MD, true);
1187 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_NVME_ADMIN, true);
1188 
1189 	spdk_put_io_channel(io_ch);
1190 	spdk_bdev_close(desc);
1191 	free_bdev(bdev);
1192 	ut_fini_bdev();
1193 }
1194 
1195 static void
1196 bdev_io_wait_test(void)
1197 {
1198 	struct spdk_bdev *bdev;
1199 	struct spdk_bdev_desc *desc = NULL;
1200 	struct spdk_io_channel *io_ch;
1201 	struct spdk_bdev_opts bdev_opts = {};
1202 	struct bdev_ut_io_wait_entry io_wait_entry;
1203 	struct bdev_ut_io_wait_entry io_wait_entry2;
1204 	int rc;
1205 
1206 	spdk_bdev_get_opts(&bdev_opts, sizeof(bdev_opts));
1207 	bdev_opts.bdev_io_pool_size = 4;
1208 	bdev_opts.bdev_io_cache_size = 2;
1209 	ut_init_bdev(&bdev_opts);
1210 
1211 	bdev = allocate_bdev("bdev0");
1212 
1213 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
1214 	CU_ASSERT(rc == 0);
1215 	poll_threads();
1216 	SPDK_CU_ASSERT_FATAL(desc != NULL);
1217 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
1218 	io_ch = spdk_bdev_get_io_channel(desc);
1219 	CU_ASSERT(io_ch != NULL);
1220 
1221 	rc = spdk_bdev_read_blocks(desc, io_ch, NULL, 0, 1, io_done, NULL);
1222 	CU_ASSERT(rc == 0);
1223 	rc = spdk_bdev_read_blocks(desc, io_ch, NULL, 0, 1, io_done, NULL);
1224 	CU_ASSERT(rc == 0);
1225 	rc = spdk_bdev_read_blocks(desc, io_ch, NULL, 0, 1, io_done, NULL);
1226 	CU_ASSERT(rc == 0);
1227 	rc = spdk_bdev_read_blocks(desc, io_ch, NULL, 0, 1, io_done, NULL);
1228 	CU_ASSERT(rc == 0);
1229 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 4);
1230 
1231 	rc = spdk_bdev_read_blocks(desc, io_ch, NULL, 0, 1, io_done, NULL);
1232 	CU_ASSERT(rc == -ENOMEM);
1233 
1234 	io_wait_entry.entry.bdev = bdev;
1235 	io_wait_entry.entry.cb_fn = io_wait_cb;
1236 	io_wait_entry.entry.cb_arg = &io_wait_entry;
1237 	io_wait_entry.io_ch = io_ch;
1238 	io_wait_entry.desc = desc;
1239 	io_wait_entry.submitted = false;
1240 	/* Cannot use the same io_wait_entry for two different calls. */
1241 	memcpy(&io_wait_entry2, &io_wait_entry, sizeof(io_wait_entry));
1242 	io_wait_entry2.entry.cb_arg = &io_wait_entry2;
1243 
1244 	/* Queue two I/O waits. */
1245 	rc = spdk_bdev_queue_io_wait(bdev, io_ch, &io_wait_entry.entry);
1246 	CU_ASSERT(rc == 0);
1247 	CU_ASSERT(io_wait_entry.submitted == false);
1248 	rc = spdk_bdev_queue_io_wait(bdev, io_ch, &io_wait_entry2.entry);
1249 	CU_ASSERT(rc == 0);
1250 	CU_ASSERT(io_wait_entry2.submitted == false);
1251 
1252 	stub_complete_io(1);
1253 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 4);
1254 	CU_ASSERT(io_wait_entry.submitted == true);
1255 	CU_ASSERT(io_wait_entry2.submitted == false);
1256 
1257 	stub_complete_io(1);
1258 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 4);
1259 	CU_ASSERT(io_wait_entry2.submitted == true);
1260 
1261 	stub_complete_io(4);
1262 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
1263 
1264 	spdk_put_io_channel(io_ch);
1265 	spdk_bdev_close(desc);
1266 	free_bdev(bdev);
1267 	ut_fini_bdev();
1268 }
1269 
1270 static void
1271 bdev_io_spans_split_test(void)
1272 {
1273 	struct spdk_bdev bdev;
1274 	struct spdk_bdev_io bdev_io;
1275 	struct iovec iov[SPDK_BDEV_IO_NUM_CHILD_IOV];
1276 
1277 	memset(&bdev, 0, sizeof(bdev));
1278 	bdev_io.u.bdev.iovs = iov;
1279 
1280 	bdev_io.type = SPDK_BDEV_IO_TYPE_READ;
1281 	bdev.optimal_io_boundary = 0;
1282 	bdev.max_segment_size = 0;
1283 	bdev.max_num_segments = 0;
1284 	bdev_io.bdev = &bdev;
1285 
1286 	/* bdev has no optimal_io_boundary and max_size set - so this should return false. */
1287 	CU_ASSERT(bdev_io_should_split(&bdev_io) == false);
1288 
1289 	bdev.split_on_optimal_io_boundary = true;
1290 	bdev.optimal_io_boundary = 32;
1291 	bdev_io.type = SPDK_BDEV_IO_TYPE_RESET;
1292 
1293 	/* RESETs are not based on LBAs - so this should return false. */
1294 	CU_ASSERT(bdev_io_should_split(&bdev_io) == false);
1295 
1296 	bdev_io.type = SPDK_BDEV_IO_TYPE_READ;
1297 	bdev_io.u.bdev.offset_blocks = 0;
1298 	bdev_io.u.bdev.num_blocks = 32;
1299 
1300 	/* This I/O run right up to, but does not cross, the boundary - so this should return false. */
1301 	CU_ASSERT(bdev_io_should_split(&bdev_io) == false);
1302 
1303 	bdev_io.u.bdev.num_blocks = 33;
1304 
1305 	/* This I/O spans a boundary. */
1306 	CU_ASSERT(bdev_io_should_split(&bdev_io) == true);
1307 
1308 	bdev_io.u.bdev.num_blocks = 32;
1309 	bdev.max_segment_size = 512 * 32;
1310 	bdev.max_num_segments = 1;
1311 	bdev_io.u.bdev.iovcnt = 1;
1312 	iov[0].iov_len = 512;
1313 
1314 	/* Does not cross and exceed max_size or max_segs */
1315 	CU_ASSERT(bdev_io_should_split(&bdev_io) == false);
1316 
1317 	bdev.split_on_optimal_io_boundary = false;
1318 	bdev.max_segment_size = 512;
1319 	bdev.max_num_segments = 1;
1320 	bdev_io.u.bdev.iovcnt = 2;
1321 
1322 	/* Exceed max_segs */
1323 	CU_ASSERT(bdev_io_should_split(&bdev_io) == true);
1324 
1325 	bdev.max_num_segments = 2;
1326 	iov[0].iov_len = 513;
1327 	iov[1].iov_len = 512;
1328 
1329 	/* Exceed max_sizes */
1330 	CU_ASSERT(bdev_io_should_split(&bdev_io) == true);
1331 
1332 	bdev.max_segment_size = 0;
1333 	bdev.write_unit_size = 32;
1334 	bdev.split_on_write_unit = true;
1335 	bdev_io.type = SPDK_BDEV_IO_TYPE_WRITE;
1336 
1337 	/* This I/O is one write unit */
1338 	CU_ASSERT(bdev_io_should_split(&bdev_io) == false);
1339 
1340 	bdev_io.u.bdev.num_blocks = 32 * 2;
1341 
1342 	/* This I/O is more than one write unit */
1343 	CU_ASSERT(bdev_io_should_split(&bdev_io) == true);
1344 
1345 	bdev_io.u.bdev.offset_blocks = 1;
1346 	bdev_io.u.bdev.num_blocks = 32;
1347 
1348 	/* This I/O is not aligned to write unit size */
1349 	CU_ASSERT(bdev_io_should_split(&bdev_io) == true);
1350 }
1351 
1352 static void
1353 bdev_io_boundary_split_test(void)
1354 {
1355 	struct spdk_bdev *bdev;
1356 	struct spdk_bdev_desc *desc = NULL;
1357 	struct spdk_io_channel *io_ch;
1358 	struct spdk_bdev_opts bdev_opts = {};
1359 	struct iovec iov[SPDK_BDEV_IO_NUM_CHILD_IOV * 2];
1360 	struct ut_expected_io *expected_io;
1361 	void *md_buf = (void *)0xFF000000;
1362 	uint64_t i;
1363 	int rc;
1364 
1365 	spdk_bdev_get_opts(&bdev_opts, sizeof(bdev_opts));
1366 	bdev_opts.bdev_io_pool_size = 512;
1367 	bdev_opts.bdev_io_cache_size = 64;
1368 	ut_init_bdev(&bdev_opts);
1369 
1370 	bdev = allocate_bdev("bdev0");
1371 
1372 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
1373 	CU_ASSERT(rc == 0);
1374 	SPDK_CU_ASSERT_FATAL(desc != NULL);
1375 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
1376 	io_ch = spdk_bdev_get_io_channel(desc);
1377 	CU_ASSERT(io_ch != NULL);
1378 
1379 	bdev->optimal_io_boundary = 16;
1380 	bdev->split_on_optimal_io_boundary = false;
1381 
1382 	g_io_done = false;
1383 
1384 	/* First test that the I/O does not get split if split_on_optimal_io_boundary == false. */
1385 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 14, 8, 1);
1386 	ut_expected_io_set_iov(expected_io, 0, (void *)0xF000, 8 * 512);
1387 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1388 
1389 	rc = spdk_bdev_read_blocks(desc, io_ch, (void *)0xF000, 14, 8, io_done, NULL);
1390 	CU_ASSERT(rc == 0);
1391 	CU_ASSERT(g_io_done == false);
1392 
1393 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
1394 	stub_complete_io(1);
1395 	CU_ASSERT(g_io_done == true);
1396 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
1397 
1398 	bdev->split_on_optimal_io_boundary = true;
1399 	bdev->md_interleave = false;
1400 	bdev->md_len = 8;
1401 
1402 	/* Now test that a single-vector command is split correctly.
1403 	 * Offset 14, length 8, payload 0xF000
1404 	 *  Child - Offset 14, length 2, payload 0xF000
1405 	 *  Child - Offset 16, length 6, payload 0xF000 + 2 * 512
1406 	 *
1407 	 * Set up the expected values before calling spdk_bdev_read_blocks
1408 	 */
1409 	g_io_done = false;
1410 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 14, 2, 1);
1411 	expected_io->md_buf = md_buf;
1412 	ut_expected_io_set_iov(expected_io, 0, (void *)0xF000, 2 * 512);
1413 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1414 
1415 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 16, 6, 1);
1416 	expected_io->md_buf = md_buf + 2 * 8;
1417 	ut_expected_io_set_iov(expected_io, 0, (void *)(0xF000 + 2 * 512), 6 * 512);
1418 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1419 
1420 	/* spdk_bdev_read_blocks will submit the first child immediately. */
1421 	rc = spdk_bdev_read_blocks_with_md(desc, io_ch, (void *)0xF000, md_buf,
1422 					   14, 8, io_done, NULL);
1423 	CU_ASSERT(rc == 0);
1424 	CU_ASSERT(g_io_done == false);
1425 
1426 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
1427 	stub_complete_io(2);
1428 	CU_ASSERT(g_io_done == true);
1429 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
1430 
1431 	/* Now set up a more complex, multi-vector command that needs to be split,
1432 	 *  including splitting iovecs.
1433 	 */
1434 	iov[0].iov_base = (void *)0x10000;
1435 	iov[0].iov_len = 512;
1436 	iov[1].iov_base = (void *)0x20000;
1437 	iov[1].iov_len = 20 * 512;
1438 	iov[2].iov_base = (void *)0x30000;
1439 	iov[2].iov_len = 11 * 512;
1440 
1441 	g_io_done = false;
1442 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 14, 2, 2);
1443 	expected_io->md_buf = md_buf;
1444 	ut_expected_io_set_iov(expected_io, 0, (void *)0x10000, 512);
1445 	ut_expected_io_set_iov(expected_io, 1, (void *)0x20000, 512);
1446 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1447 
1448 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 16, 16, 1);
1449 	expected_io->md_buf = md_buf + 2 * 8;
1450 	ut_expected_io_set_iov(expected_io, 0, (void *)(0x20000 + 512), 16 * 512);
1451 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1452 
1453 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 32, 14, 2);
1454 	expected_io->md_buf = md_buf + 18 * 8;
1455 	ut_expected_io_set_iov(expected_io, 0, (void *)(0x20000 + 17 * 512), 3 * 512);
1456 	ut_expected_io_set_iov(expected_io, 1, (void *)0x30000, 11 * 512);
1457 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1458 
1459 	rc = spdk_bdev_writev_blocks_with_md(desc, io_ch, iov, 3, md_buf,
1460 					     14, 32, io_done, NULL);
1461 	CU_ASSERT(rc == 0);
1462 	CU_ASSERT(g_io_done == false);
1463 
1464 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 3);
1465 	stub_complete_io(3);
1466 	CU_ASSERT(g_io_done == true);
1467 
1468 	/* Test multi vector command that needs to be split by strip and then needs to be
1469 	 * split further due to the capacity of child iovs.
1470 	 */
1471 	for (i = 0; i < SPDK_BDEV_IO_NUM_CHILD_IOV * 2; i++) {
1472 		iov[i].iov_base = (void *)((i + 1) * 0x10000);
1473 		iov[i].iov_len = 512;
1474 	}
1475 
1476 	bdev->optimal_io_boundary = SPDK_BDEV_IO_NUM_CHILD_IOV;
1477 	g_io_done = false;
1478 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 0, SPDK_BDEV_IO_NUM_CHILD_IOV,
1479 					   SPDK_BDEV_IO_NUM_CHILD_IOV);
1480 	expected_io->md_buf = md_buf;
1481 	for (i = 0; i < SPDK_BDEV_IO_NUM_CHILD_IOV; i++) {
1482 		ut_expected_io_set_iov(expected_io, i, (void *)((i + 1) * 0x10000), 512);
1483 	}
1484 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1485 
1486 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, SPDK_BDEV_IO_NUM_CHILD_IOV,
1487 					   SPDK_BDEV_IO_NUM_CHILD_IOV, SPDK_BDEV_IO_NUM_CHILD_IOV);
1488 	expected_io->md_buf = md_buf + SPDK_BDEV_IO_NUM_CHILD_IOV * 8;
1489 	for (i = 0; i < SPDK_BDEV_IO_NUM_CHILD_IOV; i++) {
1490 		ut_expected_io_set_iov(expected_io, i,
1491 				       (void *)((i + 1 + SPDK_BDEV_IO_NUM_CHILD_IOV) * 0x10000), 512);
1492 	}
1493 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1494 
1495 	rc = spdk_bdev_readv_blocks_with_md(desc, io_ch, iov, SPDK_BDEV_IO_NUM_CHILD_IOV * 2, md_buf,
1496 					    0, SPDK_BDEV_IO_NUM_CHILD_IOV * 2, io_done, NULL);
1497 	CU_ASSERT(rc == 0);
1498 	CU_ASSERT(g_io_done == false);
1499 
1500 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
1501 	stub_complete_io(1);
1502 	CU_ASSERT(g_io_done == false);
1503 
1504 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
1505 	stub_complete_io(1);
1506 	CU_ASSERT(g_io_done == true);
1507 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
1508 
1509 	/* Test multi vector command that needs to be split by strip and then needs to be
1510 	 * split further due to the capacity of child iovs. In this case, the length of
1511 	 * the rest of iovec array with an I/O boundary is the multiple of block size.
1512 	 */
1513 
1514 	/* Fill iovec array for exactly one boundary. The iovec cnt for this boundary
1515 	 * is SPDK_BDEV_IO_NUM_CHILD_IOV + 1, which exceeds the capacity of child iovs.
1516 	 */
1517 	for (i = 0; i < SPDK_BDEV_IO_NUM_CHILD_IOV - 2; i++) {
1518 		iov[i].iov_base = (void *)((i + 1) * 0x10000);
1519 		iov[i].iov_len = 512;
1520 	}
1521 	for (i = SPDK_BDEV_IO_NUM_CHILD_IOV - 2; i < SPDK_BDEV_IO_NUM_CHILD_IOV; i++) {
1522 		iov[i].iov_base = (void *)((i + 1) * 0x10000);
1523 		iov[i].iov_len = 256;
1524 	}
1525 	iov[SPDK_BDEV_IO_NUM_CHILD_IOV].iov_base = (void *)((SPDK_BDEV_IO_NUM_CHILD_IOV + 1) * 0x10000);
1526 	iov[SPDK_BDEV_IO_NUM_CHILD_IOV].iov_len = 512;
1527 
1528 	/* Add an extra iovec to trigger split */
1529 	iov[SPDK_BDEV_IO_NUM_CHILD_IOV + 1].iov_base = (void *)((SPDK_BDEV_IO_NUM_CHILD_IOV + 2) * 0x10000);
1530 	iov[SPDK_BDEV_IO_NUM_CHILD_IOV + 1].iov_len = 512;
1531 
1532 	bdev->optimal_io_boundary = SPDK_BDEV_IO_NUM_CHILD_IOV;
1533 	g_io_done = false;
1534 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 0,
1535 					   SPDK_BDEV_IO_NUM_CHILD_IOV - 1, SPDK_BDEV_IO_NUM_CHILD_IOV);
1536 	expected_io->md_buf = md_buf;
1537 	for (i = 0; i < SPDK_BDEV_IO_NUM_CHILD_IOV - 2; i++) {
1538 		ut_expected_io_set_iov(expected_io, i,
1539 				       (void *)((i + 1) * 0x10000), 512);
1540 	}
1541 	for (i = SPDK_BDEV_IO_NUM_CHILD_IOV - 2; i < SPDK_BDEV_IO_NUM_CHILD_IOV; i++) {
1542 		ut_expected_io_set_iov(expected_io, i,
1543 				       (void *)((i + 1) * 0x10000), 256);
1544 	}
1545 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1546 
1547 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, SPDK_BDEV_IO_NUM_CHILD_IOV - 1,
1548 					   1, 1);
1549 	expected_io->md_buf = md_buf + (SPDK_BDEV_IO_NUM_CHILD_IOV - 1) * 8;
1550 	ut_expected_io_set_iov(expected_io, 0,
1551 			       (void *)((SPDK_BDEV_IO_NUM_CHILD_IOV + 1) * 0x10000), 512);
1552 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1553 
1554 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, SPDK_BDEV_IO_NUM_CHILD_IOV,
1555 					   1, 1);
1556 	expected_io->md_buf = md_buf + SPDK_BDEV_IO_NUM_CHILD_IOV * 8;
1557 	ut_expected_io_set_iov(expected_io, 0,
1558 			       (void *)((SPDK_BDEV_IO_NUM_CHILD_IOV + 2) * 0x10000), 512);
1559 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1560 
1561 	rc = spdk_bdev_readv_blocks_with_md(desc, io_ch, iov, SPDK_BDEV_IO_NUM_CHILD_IOV + 2, md_buf,
1562 					    0, SPDK_BDEV_IO_NUM_CHILD_IOV + 1, io_done, NULL);
1563 	CU_ASSERT(rc == 0);
1564 	CU_ASSERT(g_io_done == false);
1565 
1566 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
1567 	stub_complete_io(1);
1568 	CU_ASSERT(g_io_done == false);
1569 
1570 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
1571 	stub_complete_io(2);
1572 	CU_ASSERT(g_io_done == true);
1573 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
1574 
1575 	/* Test multi vector command that needs to be split by strip and then needs to be
1576 	 * split further due to the capacity of child iovs, the child request offset should
1577 	 * be rewind to last aligned offset and go success without error.
1578 	 */
1579 	for (i = 0; i < SPDK_BDEV_IO_NUM_CHILD_IOV - 1; i++) {
1580 		iov[i].iov_base = (void *)((i + 1) * 0x10000);
1581 		iov[i].iov_len = 512;
1582 	}
1583 	iov[SPDK_BDEV_IO_NUM_CHILD_IOV - 1].iov_base = (void *)(SPDK_BDEV_IO_NUM_CHILD_IOV * 0x10000);
1584 	iov[SPDK_BDEV_IO_NUM_CHILD_IOV - 1].iov_len = 256;
1585 
1586 	iov[SPDK_BDEV_IO_NUM_CHILD_IOV].iov_base = (void *)((SPDK_BDEV_IO_NUM_CHILD_IOV + 1) * 0x10000);
1587 	iov[SPDK_BDEV_IO_NUM_CHILD_IOV].iov_len = 256;
1588 
1589 	iov[SPDK_BDEV_IO_NUM_CHILD_IOV + 1].iov_base = (void *)((SPDK_BDEV_IO_NUM_CHILD_IOV + 2) * 0x10000);
1590 	iov[SPDK_BDEV_IO_NUM_CHILD_IOV + 1].iov_len = 512;
1591 
1592 	bdev->optimal_io_boundary = SPDK_BDEV_IO_NUM_CHILD_IOV;
1593 	g_io_done = false;
1594 	g_io_status = 0;
1595 	/* The first expected io should be start from offset 0 to SPDK_BDEV_IO_NUM_CHILD_IOV - 1 */
1596 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 0,
1597 					   SPDK_BDEV_IO_NUM_CHILD_IOV - 1, SPDK_BDEV_IO_NUM_CHILD_IOV - 1);
1598 	expected_io->md_buf = md_buf;
1599 	for (i = 0; i < SPDK_BDEV_IO_NUM_CHILD_IOV - 1; i++) {
1600 		ut_expected_io_set_iov(expected_io, i,
1601 				       (void *)((i + 1) * 0x10000), 512);
1602 	}
1603 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1604 	/* The second expected io should be start from offset SPDK_BDEV_IO_NUM_CHILD_IOV - 1 to SPDK_BDEV_IO_NUM_CHILD_IOV */
1605 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, SPDK_BDEV_IO_NUM_CHILD_IOV - 1,
1606 					   1, 2);
1607 	expected_io->md_buf = md_buf + (SPDK_BDEV_IO_NUM_CHILD_IOV - 1) * 8;
1608 	ut_expected_io_set_iov(expected_io, 0,
1609 			       (void *)(SPDK_BDEV_IO_NUM_CHILD_IOV * 0x10000), 256);
1610 	ut_expected_io_set_iov(expected_io, 1,
1611 			       (void *)((SPDK_BDEV_IO_NUM_CHILD_IOV + 1) * 0x10000), 256);
1612 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1613 	/* The third expected io should be start from offset SPDK_BDEV_IO_NUM_CHILD_IOV to SPDK_BDEV_IO_NUM_CHILD_IOV + 1 */
1614 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, SPDK_BDEV_IO_NUM_CHILD_IOV,
1615 					   1, 1);
1616 	expected_io->md_buf = md_buf + SPDK_BDEV_IO_NUM_CHILD_IOV * 8;
1617 	ut_expected_io_set_iov(expected_io, 0,
1618 			       (void *)((SPDK_BDEV_IO_NUM_CHILD_IOV + 2) * 0x10000), 512);
1619 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1620 
1621 	rc = spdk_bdev_readv_blocks_with_md(desc, io_ch, iov, SPDK_BDEV_IO_NUM_CHILD_IOV * 2, md_buf,
1622 					    0, SPDK_BDEV_IO_NUM_CHILD_IOV + 1, io_done, NULL);
1623 	CU_ASSERT(rc == 0);
1624 	CU_ASSERT(g_io_done == false);
1625 
1626 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
1627 	stub_complete_io(1);
1628 	CU_ASSERT(g_io_done == false);
1629 
1630 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
1631 	stub_complete_io(2);
1632 	CU_ASSERT(g_io_done == true);
1633 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
1634 
1635 	/* Test multi vector command that needs to be split due to the IO boundary and
1636 	 * the capacity of child iovs. Especially test the case when the command is
1637 	 * split due to the capacity of child iovs, the tail address is not aligned with
1638 	 * block size and is rewinded to the aligned address.
1639 	 *
1640 	 * The iovecs used in read request is complex but is based on the data
1641 	 * collected in the real issue. We change the base addresses but keep the lengths
1642 	 * not to loose the credibility of the test.
1643 	 */
1644 	bdev->optimal_io_boundary = 128;
1645 	g_io_done = false;
1646 	g_io_status = 0;
1647 
1648 	for (i = 0; i < 31; i++) {
1649 		iov[i].iov_base = (void *)(0xFEED0000000 + (i << 20));
1650 		iov[i].iov_len = 1024;
1651 	}
1652 	iov[31].iov_base = (void *)0xFEED1F00000;
1653 	iov[31].iov_len = 32768;
1654 	iov[32].iov_base = (void *)0xFEED2000000;
1655 	iov[32].iov_len = 160;
1656 	iov[33].iov_base = (void *)0xFEED2100000;
1657 	iov[33].iov_len = 4096;
1658 	iov[34].iov_base = (void *)0xFEED2200000;
1659 	iov[34].iov_len = 4096;
1660 	iov[35].iov_base = (void *)0xFEED2300000;
1661 	iov[35].iov_len = 4096;
1662 	iov[36].iov_base = (void *)0xFEED2400000;
1663 	iov[36].iov_len = 4096;
1664 	iov[37].iov_base = (void *)0xFEED2500000;
1665 	iov[37].iov_len = 4096;
1666 	iov[38].iov_base = (void *)0xFEED2600000;
1667 	iov[38].iov_len = 4096;
1668 	iov[39].iov_base = (void *)0xFEED2700000;
1669 	iov[39].iov_len = 4096;
1670 	iov[40].iov_base = (void *)0xFEED2800000;
1671 	iov[40].iov_len = 4096;
1672 	iov[41].iov_base = (void *)0xFEED2900000;
1673 	iov[41].iov_len = 4096;
1674 	iov[42].iov_base = (void *)0xFEED2A00000;
1675 	iov[42].iov_len = 4096;
1676 	iov[43].iov_base = (void *)0xFEED2B00000;
1677 	iov[43].iov_len = 12288;
1678 	iov[44].iov_base = (void *)0xFEED2C00000;
1679 	iov[44].iov_len = 8192;
1680 	iov[45].iov_base = (void *)0xFEED2F00000;
1681 	iov[45].iov_len = 4096;
1682 	iov[46].iov_base = (void *)0xFEED3000000;
1683 	iov[46].iov_len = 4096;
1684 	iov[47].iov_base = (void *)0xFEED3100000;
1685 	iov[47].iov_len = 4096;
1686 	iov[48].iov_base = (void *)0xFEED3200000;
1687 	iov[48].iov_len = 24576;
1688 	iov[49].iov_base = (void *)0xFEED3300000;
1689 	iov[49].iov_len = 16384;
1690 	iov[50].iov_base = (void *)0xFEED3400000;
1691 	iov[50].iov_len = 12288;
1692 	iov[51].iov_base = (void *)0xFEED3500000;
1693 	iov[51].iov_len = 4096;
1694 	iov[52].iov_base = (void *)0xFEED3600000;
1695 	iov[52].iov_len = 4096;
1696 	iov[53].iov_base = (void *)0xFEED3700000;
1697 	iov[53].iov_len = 4096;
1698 	iov[54].iov_base = (void *)0xFEED3800000;
1699 	iov[54].iov_len = 28672;
1700 	iov[55].iov_base = (void *)0xFEED3900000;
1701 	iov[55].iov_len = 20480;
1702 	iov[56].iov_base = (void *)0xFEED3A00000;
1703 	iov[56].iov_len = 4096;
1704 	iov[57].iov_base = (void *)0xFEED3B00000;
1705 	iov[57].iov_len = 12288;
1706 	iov[58].iov_base = (void *)0xFEED3C00000;
1707 	iov[58].iov_len = 4096;
1708 	iov[59].iov_base = (void *)0xFEED3D00000;
1709 	iov[59].iov_len = 4096;
1710 	iov[60].iov_base = (void *)0xFEED3E00000;
1711 	iov[60].iov_len = 352;
1712 
1713 	/* The 1st child IO must be from iov[0] to iov[31] split by the capacity
1714 	 * of child iovs,
1715 	 */
1716 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 0, 126, 32);
1717 	expected_io->md_buf = md_buf;
1718 	for (i = 0; i < 32; i++) {
1719 		ut_expected_io_set_iov(expected_io, i, iov[i].iov_base, iov[i].iov_len);
1720 	}
1721 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1722 
1723 	/* The 2nd child IO must be from iov[32] to the first 864 bytes of iov[33]
1724 	 * split by the IO boundary requirement.
1725 	 */
1726 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 126, 2, 2);
1727 	expected_io->md_buf = md_buf + 126 * 8;
1728 	ut_expected_io_set_iov(expected_io, 0, iov[32].iov_base, iov[32].iov_len);
1729 	ut_expected_io_set_iov(expected_io, 1, iov[33].iov_base, 864);
1730 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1731 
1732 	/* The 3rd child IO must be from the remaining 3232 bytes of iov[33] to
1733 	 * the first 864 bytes of iov[46] split by the IO boundary requirement.
1734 	 */
1735 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 128, 128, 14);
1736 	expected_io->md_buf = md_buf + 128 * 8;
1737 	ut_expected_io_set_iov(expected_io, 0, (void *)((uintptr_t)iov[33].iov_base + 864),
1738 			       iov[33].iov_len - 864);
1739 	ut_expected_io_set_iov(expected_io, 1, iov[34].iov_base, iov[34].iov_len);
1740 	ut_expected_io_set_iov(expected_io, 2, iov[35].iov_base, iov[35].iov_len);
1741 	ut_expected_io_set_iov(expected_io, 3, iov[36].iov_base, iov[36].iov_len);
1742 	ut_expected_io_set_iov(expected_io, 4, iov[37].iov_base, iov[37].iov_len);
1743 	ut_expected_io_set_iov(expected_io, 5, iov[38].iov_base, iov[38].iov_len);
1744 	ut_expected_io_set_iov(expected_io, 6, iov[39].iov_base, iov[39].iov_len);
1745 	ut_expected_io_set_iov(expected_io, 7, iov[40].iov_base, iov[40].iov_len);
1746 	ut_expected_io_set_iov(expected_io, 8, iov[41].iov_base, iov[41].iov_len);
1747 	ut_expected_io_set_iov(expected_io, 9, iov[42].iov_base, iov[42].iov_len);
1748 	ut_expected_io_set_iov(expected_io, 10, iov[43].iov_base, iov[43].iov_len);
1749 	ut_expected_io_set_iov(expected_io, 11, iov[44].iov_base, iov[44].iov_len);
1750 	ut_expected_io_set_iov(expected_io, 12, iov[45].iov_base, iov[45].iov_len);
1751 	ut_expected_io_set_iov(expected_io, 13, iov[46].iov_base, 864);
1752 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1753 
1754 	/* The 4th child IO must be from the remaining 3232 bytes of iov[46] to the
1755 	 * first 864 bytes of iov[52] split by the IO boundary requirement.
1756 	 */
1757 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 256, 128, 7);
1758 	expected_io->md_buf = md_buf + 256 * 8;
1759 	ut_expected_io_set_iov(expected_io, 0, (void *)((uintptr_t)iov[46].iov_base + 864),
1760 			       iov[46].iov_len - 864);
1761 	ut_expected_io_set_iov(expected_io, 1, iov[47].iov_base, iov[47].iov_len);
1762 	ut_expected_io_set_iov(expected_io, 2, iov[48].iov_base, iov[48].iov_len);
1763 	ut_expected_io_set_iov(expected_io, 3, iov[49].iov_base, iov[49].iov_len);
1764 	ut_expected_io_set_iov(expected_io, 4, iov[50].iov_base, iov[50].iov_len);
1765 	ut_expected_io_set_iov(expected_io, 5, iov[51].iov_base, iov[51].iov_len);
1766 	ut_expected_io_set_iov(expected_io, 6, iov[52].iov_base, 864);
1767 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1768 
1769 	/* The 5th child IO must be from the remaining 3232 bytes of iov[52] to
1770 	 * the first 4096 bytes of iov[57] split by the IO boundary requirement.
1771 	 */
1772 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 384, 128, 6);
1773 	expected_io->md_buf = md_buf + 384 * 8;
1774 	ut_expected_io_set_iov(expected_io, 0, (void *)((uintptr_t)iov[52].iov_base + 864),
1775 			       iov[52].iov_len - 864);
1776 	ut_expected_io_set_iov(expected_io, 1, iov[53].iov_base, iov[53].iov_len);
1777 	ut_expected_io_set_iov(expected_io, 2, iov[54].iov_base, iov[54].iov_len);
1778 	ut_expected_io_set_iov(expected_io, 3, iov[55].iov_base, iov[55].iov_len);
1779 	ut_expected_io_set_iov(expected_io, 4, iov[56].iov_base, iov[56].iov_len);
1780 	ut_expected_io_set_iov(expected_io, 5, iov[57].iov_base, 4960);
1781 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1782 
1783 	/* The 6th child IO must be from the remaining 7328 bytes of iov[57]
1784 	 * to the first 3936 bytes of iov[58] split by the capacity of child iovs.
1785 	 */
1786 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 512, 30, 3);
1787 	expected_io->md_buf = md_buf + 512 * 8;
1788 	ut_expected_io_set_iov(expected_io, 0, (void *)((uintptr_t)iov[57].iov_base + 4960),
1789 			       iov[57].iov_len - 4960);
1790 	ut_expected_io_set_iov(expected_io, 1, iov[58].iov_base, iov[58].iov_len);
1791 	ut_expected_io_set_iov(expected_io, 2, iov[59].iov_base, 3936);
1792 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1793 
1794 	/* The 7th child IO is from the remaining 160 bytes of iov[59] and iov[60]. */
1795 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 542, 1, 2);
1796 	expected_io->md_buf = md_buf + 542 * 8;
1797 	ut_expected_io_set_iov(expected_io, 0, (void *)((uintptr_t)iov[59].iov_base + 3936),
1798 			       iov[59].iov_len - 3936);
1799 	ut_expected_io_set_iov(expected_io, 1, iov[60].iov_base, iov[60].iov_len);
1800 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1801 
1802 	rc = spdk_bdev_readv_blocks_with_md(desc, io_ch, iov, 61, md_buf,
1803 					    0, 543, io_done, NULL);
1804 	CU_ASSERT(rc == 0);
1805 	CU_ASSERT(g_io_done == false);
1806 
1807 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
1808 	stub_complete_io(1);
1809 	CU_ASSERT(g_io_done == false);
1810 
1811 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 5);
1812 	stub_complete_io(5);
1813 	CU_ASSERT(g_io_done == false);
1814 
1815 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
1816 	stub_complete_io(1);
1817 	CU_ASSERT(g_io_done == true);
1818 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
1819 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_SUCCESS);
1820 
1821 	/* Test a WRITE_ZEROES that would span an I/O boundary.  WRITE_ZEROES should not be
1822 	 * split, so test that.
1823 	 */
1824 	bdev->optimal_io_boundary = 15;
1825 	g_io_done = false;
1826 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE_ZEROES, 9, 36, 0);
1827 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1828 
1829 	rc = spdk_bdev_write_zeroes_blocks(desc, io_ch, 9, 36, io_done, NULL);
1830 	CU_ASSERT(rc == 0);
1831 	CU_ASSERT(g_io_done == false);
1832 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
1833 	stub_complete_io(1);
1834 	CU_ASSERT(g_io_done == true);
1835 
1836 	/* Test an UNMAP.  This should also not be split. */
1837 	bdev->optimal_io_boundary = 16;
1838 	g_io_done = false;
1839 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_UNMAP, 15, 2, 0);
1840 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1841 
1842 	rc = spdk_bdev_unmap_blocks(desc, io_ch, 15, 2, io_done, NULL);
1843 	CU_ASSERT(rc == 0);
1844 	CU_ASSERT(g_io_done == false);
1845 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
1846 	stub_complete_io(1);
1847 	CU_ASSERT(g_io_done == true);
1848 
1849 	/* Test a FLUSH.  This should also not be split. */
1850 	bdev->optimal_io_boundary = 16;
1851 	g_io_done = false;
1852 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_FLUSH, 15, 2, 0);
1853 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1854 
1855 	rc = spdk_bdev_flush_blocks(desc, io_ch, 15, 2, io_done, NULL);
1856 	CU_ASSERT(rc == 0);
1857 	CU_ASSERT(g_io_done == false);
1858 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
1859 	stub_complete_io(1);
1860 	CU_ASSERT(g_io_done == true);
1861 
1862 	/* Test a COPY.  This should also not be split. */
1863 	bdev->optimal_io_boundary = 15;
1864 	g_io_done = false;
1865 	expected_io = ut_alloc_expected_copy_io(SPDK_BDEV_IO_TYPE_COPY, 9, 45, 36);
1866 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1867 
1868 	rc = spdk_bdev_copy_blocks(desc, io_ch, 9, 45, 36, io_done, NULL);
1869 	CU_ASSERT(rc == 0);
1870 	CU_ASSERT(g_io_done == false);
1871 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
1872 	stub_complete_io(1);
1873 	CU_ASSERT(g_io_done == true);
1874 
1875 	CU_ASSERT(TAILQ_EMPTY(&g_bdev_ut_channel->expected_io));
1876 
1877 	/* Children requests return an error status */
1878 	bdev->optimal_io_boundary = 16;
1879 	iov[0].iov_base = (void *)0x10000;
1880 	iov[0].iov_len = 512 * 64;
1881 	g_io_exp_status = SPDK_BDEV_IO_STATUS_FAILED;
1882 	g_io_done = false;
1883 	g_io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
1884 
1885 	rc = spdk_bdev_readv_blocks(desc, io_ch, iov, 1, 1, 64, io_done, NULL);
1886 	CU_ASSERT(rc == 0);
1887 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 5);
1888 	stub_complete_io(4);
1889 	CU_ASSERT(g_io_done == false);
1890 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_SUCCESS);
1891 	stub_complete_io(1);
1892 	CU_ASSERT(g_io_done == true);
1893 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_FAILED);
1894 
1895 	/* Test if a multi vector command terminated with failure before continuing
1896 	 * splitting process when one of child I/O failed.
1897 	 * The multi vector command is as same as the above that needs to be split by strip
1898 	 * and then needs to be split further due to the capacity of child iovs.
1899 	 */
1900 	for (i = 0; i < SPDK_BDEV_IO_NUM_CHILD_IOV - 1; i++) {
1901 		iov[i].iov_base = (void *)((i + 1) * 0x10000);
1902 		iov[i].iov_len = 512;
1903 	}
1904 	iov[SPDK_BDEV_IO_NUM_CHILD_IOV - 1].iov_base = (void *)(SPDK_BDEV_IO_NUM_CHILD_IOV * 0x10000);
1905 	iov[SPDK_BDEV_IO_NUM_CHILD_IOV - 1].iov_len = 256;
1906 
1907 	iov[SPDK_BDEV_IO_NUM_CHILD_IOV].iov_base = (void *)((SPDK_BDEV_IO_NUM_CHILD_IOV + 1) * 0x10000);
1908 	iov[SPDK_BDEV_IO_NUM_CHILD_IOV].iov_len = 256;
1909 
1910 	iov[SPDK_BDEV_IO_NUM_CHILD_IOV + 1].iov_base = (void *)((SPDK_BDEV_IO_NUM_CHILD_IOV + 2) * 0x10000);
1911 	iov[SPDK_BDEV_IO_NUM_CHILD_IOV + 1].iov_len = 512;
1912 
1913 	bdev->optimal_io_boundary = SPDK_BDEV_IO_NUM_CHILD_IOV;
1914 
1915 	g_io_exp_status = SPDK_BDEV_IO_STATUS_FAILED;
1916 	g_io_done = false;
1917 	g_io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
1918 
1919 	rc = spdk_bdev_readv_blocks(desc, io_ch, iov, SPDK_BDEV_IO_NUM_CHILD_IOV * 2, 0,
1920 				    SPDK_BDEV_IO_NUM_CHILD_IOV + 1, io_done, NULL);
1921 	CU_ASSERT(rc == 0);
1922 	CU_ASSERT(g_io_done == false);
1923 
1924 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
1925 	stub_complete_io(1);
1926 	CU_ASSERT(g_io_done == true);
1927 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_FAILED);
1928 
1929 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
1930 
1931 	/* for this test we will create the following conditions to hit the code path where
1932 	 * we are trying to send and IO following a split that has no iovs because we had to
1933 	 * trim them for alignment reasons.
1934 	 *
1935 	 * - 16K boundary, our IO will start at offset 0 with a length of 0x4200
1936 	 * - Our IOVs are 0x212 in size so that we run into the 16K boundary at child IOV
1937 	 *   position 30 and overshoot by 0x2e.
1938 	 * - That means we'll send the IO and loop back to pick up the remaining bytes at
1939 	 *   child IOV index 31. When we do, we find that we have to shorten index 31 by 0x2e
1940 	 *   which eliniates that vector so we just send the first split IO with 30 vectors
1941 	 *   and let the completion pick up the last 2 vectors.
1942 	 */
1943 	bdev->optimal_io_boundary = 32;
1944 	bdev->split_on_optimal_io_boundary = true;
1945 	g_io_done = false;
1946 
1947 	/* Init all parent IOVs to 0x212 */
1948 	for (i = 0; i < SPDK_BDEV_IO_NUM_CHILD_IOV + 2; i++) {
1949 		iov[i].iov_base = (void *)((i + 1) * 0x10000);
1950 		iov[i].iov_len = 0x212;
1951 	}
1952 
1953 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 0, SPDK_BDEV_IO_NUM_CHILD_IOV,
1954 					   SPDK_BDEV_IO_NUM_CHILD_IOV - 1);
1955 	/* expect 0-29 to be 1:1 with the parent iov */
1956 	for (i = 0; i < SPDK_BDEV_IO_NUM_CHILD_IOV - 2; i++) {
1957 		ut_expected_io_set_iov(expected_io, i, iov[i].iov_base, iov[i].iov_len);
1958 	}
1959 
1960 	/* expect index 30 to be shortened to 0x1e4 (0x212 - 0x1e) because of the alignment
1961 	 * where 0x1e is the amount we overshot the 16K boundary
1962 	 */
1963 	ut_expected_io_set_iov(expected_io, SPDK_BDEV_IO_NUM_CHILD_IOV - 2,
1964 			       (void *)(iov[SPDK_BDEV_IO_NUM_CHILD_IOV - 2].iov_base), 0x1e4);
1965 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1966 
1967 	/* 2nd child IO will have 2 remaining vectors, one to pick up from the one that was
1968 	 * shortened that take it to the next boundary and then a final one to get us to
1969 	 * 0x4200 bytes for the IO.
1970 	 */
1971 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, SPDK_BDEV_IO_NUM_CHILD_IOV,
1972 					   SPDK_BDEV_IO_NUM_CHILD_IOV, 2);
1973 	/* position 30 picked up the remaining bytes to the next boundary */
1974 	ut_expected_io_set_iov(expected_io, 0,
1975 			       (void *)(iov[SPDK_BDEV_IO_NUM_CHILD_IOV - 2].iov_base + 0x1e4), 0x2e);
1976 
1977 	/* position 31 picked the the rest of the transfer to get us to 0x4200 */
1978 	ut_expected_io_set_iov(expected_io, 1,
1979 			       (void *)(iov[SPDK_BDEV_IO_NUM_CHILD_IOV - 1].iov_base), 0x1d2);
1980 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1981 
1982 	rc = spdk_bdev_readv_blocks(desc, io_ch, iov, SPDK_BDEV_IO_NUM_CHILD_IOV + 1, 0,
1983 				    SPDK_BDEV_IO_NUM_CHILD_IOV + 1, io_done, NULL);
1984 	CU_ASSERT(rc == 0);
1985 	CU_ASSERT(g_io_done == false);
1986 
1987 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
1988 	stub_complete_io(1);
1989 	CU_ASSERT(g_io_done == false);
1990 
1991 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
1992 	stub_complete_io(1);
1993 	CU_ASSERT(g_io_done == true);
1994 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
1995 
1996 	spdk_put_io_channel(io_ch);
1997 	spdk_bdev_close(desc);
1998 	free_bdev(bdev);
1999 	ut_fini_bdev();
2000 }
2001 
2002 static void
2003 bdev_io_max_size_and_segment_split_test(void)
2004 {
2005 	struct spdk_bdev *bdev;
2006 	struct spdk_bdev_desc *desc = NULL;
2007 	struct spdk_io_channel *io_ch;
2008 	struct spdk_bdev_opts bdev_opts = {};
2009 	struct iovec iov[SPDK_BDEV_IO_NUM_CHILD_IOV * 2];
2010 	struct ut_expected_io *expected_io;
2011 	uint64_t i;
2012 	int rc;
2013 
2014 	spdk_bdev_get_opts(&bdev_opts, sizeof(bdev_opts));
2015 	bdev_opts.bdev_io_pool_size = 512;
2016 	bdev_opts.bdev_io_cache_size = 64;
2017 	bdev_opts.opts_size = sizeof(bdev_opts);
2018 	ut_init_bdev(&bdev_opts);
2019 
2020 	bdev = allocate_bdev("bdev0");
2021 
2022 	rc = spdk_bdev_open_ext(bdev->name, true, bdev_ut_event_cb, NULL, &desc);
2023 	CU_ASSERT(rc == 0);
2024 	SPDK_CU_ASSERT_FATAL(desc != NULL);
2025 	io_ch = spdk_bdev_get_io_channel(desc);
2026 	CU_ASSERT(io_ch != NULL);
2027 
2028 	bdev->split_on_optimal_io_boundary = false;
2029 	bdev->optimal_io_boundary = 0;
2030 
2031 	/* Case 0 max_num_segments == 0.
2032 	 * but segment size 2 * 512 > 512
2033 	 */
2034 	bdev->max_segment_size = 512;
2035 	bdev->max_num_segments = 0;
2036 	g_io_done = false;
2037 
2038 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 14, 2, 2);
2039 	ut_expected_io_set_iov(expected_io, 0, (void *)0xF000, 512);
2040 	ut_expected_io_set_iov(expected_io, 1, (void *)(0xF000 + 512), 512);
2041 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2042 
2043 	rc = spdk_bdev_read_blocks(desc, io_ch, (void *)0xF000, 14, 2, io_done, NULL);
2044 	CU_ASSERT(rc == 0);
2045 	CU_ASSERT(g_io_done == false);
2046 
2047 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
2048 	stub_complete_io(1);
2049 	CU_ASSERT(g_io_done == true);
2050 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
2051 
2052 	/* Case 1 max_segment_size == 0
2053 	 * but iov num 2 > 1.
2054 	 */
2055 	bdev->max_segment_size = 0;
2056 	bdev->max_num_segments = 1;
2057 	g_io_done = false;
2058 
2059 	iov[0].iov_base = (void *)0x10000;
2060 	iov[0].iov_len = 512;
2061 	iov[1].iov_base = (void *)0x20000;
2062 	iov[1].iov_len = 8 * 512;
2063 
2064 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 14, 1, 1);
2065 	ut_expected_io_set_iov(expected_io, 0, iov[0].iov_base, iov[0].iov_len);
2066 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2067 
2068 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 15, 8, 1);
2069 	ut_expected_io_set_iov(expected_io, 0, iov[1].iov_base, iov[1].iov_len);
2070 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2071 
2072 	rc = spdk_bdev_readv_blocks(desc, io_ch, iov, 2, 14, 9, io_done, NULL);
2073 	CU_ASSERT(rc == 0);
2074 	CU_ASSERT(g_io_done == false);
2075 
2076 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
2077 	stub_complete_io(2);
2078 	CU_ASSERT(g_io_done == true);
2079 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
2080 
2081 	/* Test that a non-vector command is split correctly.
2082 	 * Set up the expected values before calling spdk_bdev_read_blocks
2083 	 */
2084 	bdev->max_segment_size = 512;
2085 	bdev->max_num_segments = 1;
2086 	g_io_done = false;
2087 
2088 	/* Child IO 0 */
2089 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 14, 1, 1);
2090 	ut_expected_io_set_iov(expected_io, 0, (void *)0xF000, 512);
2091 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2092 
2093 	/* Child IO 1 */
2094 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 15, 1, 1);
2095 	ut_expected_io_set_iov(expected_io, 0, (void *)(0xF000 + 1 * 512), 512);
2096 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2097 
2098 	/* spdk_bdev_read_blocks will submit the first child immediately. */
2099 	rc = spdk_bdev_read_blocks(desc, io_ch, (void *)0xF000, 14, 2, io_done, NULL);
2100 	CU_ASSERT(rc == 0);
2101 	CU_ASSERT(g_io_done == false);
2102 
2103 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
2104 	stub_complete_io(2);
2105 	CU_ASSERT(g_io_done == true);
2106 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
2107 
2108 	/* Now set up a more complex, multi-vector command that needs to be split,
2109 	 * including splitting iovecs.
2110 	 */
2111 	bdev->max_segment_size = 2 * 512;
2112 	bdev->max_num_segments = 1;
2113 	g_io_done = false;
2114 
2115 	iov[0].iov_base = (void *)0x10000;
2116 	iov[0].iov_len = 2 * 512;
2117 	iov[1].iov_base = (void *)0x20000;
2118 	iov[1].iov_len = 4 * 512;
2119 	iov[2].iov_base = (void *)0x30000;
2120 	iov[2].iov_len = 6 * 512;
2121 
2122 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 14, 2, 1);
2123 	ut_expected_io_set_iov(expected_io, 0, iov[0].iov_base, 512 * 2);
2124 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2125 
2126 	/* Split iov[1].size to 2 iov entries then split the segments */
2127 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 16, 2, 1);
2128 	ut_expected_io_set_iov(expected_io, 0, iov[1].iov_base, 512 * 2);
2129 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2130 
2131 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 18, 2, 1);
2132 	ut_expected_io_set_iov(expected_io, 0, iov[1].iov_base + 512 * 2, 512 * 2);
2133 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2134 
2135 	/* Split iov[2].size to 3 iov entries then split the segments */
2136 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 20, 2, 1);
2137 	ut_expected_io_set_iov(expected_io, 0, iov[2].iov_base, 512 * 2);
2138 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2139 
2140 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 22, 2, 1);
2141 	ut_expected_io_set_iov(expected_io, 0, iov[2].iov_base + 512 * 2, 512 * 2);
2142 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2143 
2144 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 24, 2, 1);
2145 	ut_expected_io_set_iov(expected_io, 0, iov[2].iov_base + 512 * 4, 512 * 2);
2146 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2147 
2148 	rc = spdk_bdev_writev_blocks(desc, io_ch, iov, 3, 14, 12, io_done, NULL);
2149 	CU_ASSERT(rc == 0);
2150 	CU_ASSERT(g_io_done == false);
2151 
2152 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 6);
2153 	stub_complete_io(6);
2154 	CU_ASSERT(g_io_done == true);
2155 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
2156 
2157 	/* Test multi vector command that needs to be split by strip and then needs to be
2158 	 * split further due to the capacity of parent IO child iovs.
2159 	 */
2160 	bdev->max_segment_size = 512;
2161 	bdev->max_num_segments = 1;
2162 	g_io_done = false;
2163 
2164 	for (i = 0; i < SPDK_BDEV_IO_NUM_CHILD_IOV; i++) {
2165 		iov[i].iov_base = (void *)((i + 1) * 0x10000);
2166 		iov[i].iov_len = 512 * 2;
2167 	}
2168 
2169 	/* Each input iov.size is split into 2 iovs,
2170 	 * half of the input iov can fill all child iov entries of a single IO.
2171 	 */
2172 	for (i = 0; i < SPDK_BDEV_IO_NUM_CHILD_IOV / 2; i++) {
2173 		expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 2 * i, 1, 1);
2174 		ut_expected_io_set_iov(expected_io, 0, iov[i].iov_base, 512);
2175 		TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2176 
2177 		expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 2 * i + 1, 1, 1);
2178 		ut_expected_io_set_iov(expected_io, 0, iov[i].iov_base + 512, 512);
2179 		TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2180 	}
2181 
2182 	/* The remaining iov is split in the second round */
2183 	for (i = SPDK_BDEV_IO_NUM_CHILD_IOV / 2; i < SPDK_BDEV_IO_NUM_CHILD_IOV; i++) {
2184 		expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, i * 2, 1, 1);
2185 		ut_expected_io_set_iov(expected_io, 0, iov[i].iov_base, 512);
2186 		TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2187 
2188 		expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, i * 2 + 1, 1, 1);
2189 		ut_expected_io_set_iov(expected_io, 0, iov[i].iov_base + 512, 512);
2190 		TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2191 	}
2192 
2193 	rc = spdk_bdev_readv_blocks(desc, io_ch, iov, SPDK_BDEV_IO_NUM_CHILD_IOV, 0,
2194 				    SPDK_BDEV_IO_NUM_CHILD_IOV * 2, io_done, NULL);
2195 	CU_ASSERT(rc == 0);
2196 	CU_ASSERT(g_io_done == false);
2197 
2198 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == SPDK_BDEV_IO_NUM_CHILD_IOV);
2199 	stub_complete_io(SPDK_BDEV_IO_NUM_CHILD_IOV);
2200 	CU_ASSERT(g_io_done == false);
2201 
2202 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == SPDK_BDEV_IO_NUM_CHILD_IOV);
2203 	stub_complete_io(SPDK_BDEV_IO_NUM_CHILD_IOV);
2204 	CU_ASSERT(g_io_done == true);
2205 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
2206 
2207 	/* A wrong case, a child IO that is divided does
2208 	 * not meet the principle of multiples of block size,
2209 	 * and exits with error
2210 	 */
2211 	bdev->max_segment_size = 512;
2212 	bdev->max_num_segments = 1;
2213 	g_io_done = false;
2214 
2215 	iov[0].iov_base = (void *)0x10000;
2216 	iov[0].iov_len = 512 + 256;
2217 	iov[1].iov_base = (void *)0x20000;
2218 	iov[1].iov_len = 256;
2219 
2220 	/* iov[0] is split to 512 and 256.
2221 	 * 256 is less than a block size, and it is found
2222 	 * in the next round of split that it is the first child IO smaller than
2223 	 * the block size, so the error exit
2224 	 */
2225 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 0, 1, 1);
2226 	ut_expected_io_set_iov(expected_io, 0, iov[0].iov_base, 512);
2227 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2228 
2229 	rc = spdk_bdev_readv_blocks(desc, io_ch, iov, 2, 0, 2, io_done, NULL);
2230 	CU_ASSERT(rc == 0);
2231 	CU_ASSERT(g_io_done == false);
2232 
2233 	/* First child IO is OK */
2234 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
2235 	stub_complete_io(1);
2236 	CU_ASSERT(g_io_done == true);
2237 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
2238 
2239 	/* error exit */
2240 	stub_complete_io(1);
2241 	CU_ASSERT(g_io_done == true);
2242 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_FAILED);
2243 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
2244 
2245 	/* Test multi vector command that needs to be split by strip and then needs to be
2246 	 * split further due to the capacity of child iovs.
2247 	 *
2248 	 * In this case, the last two iovs need to be split, but it will exceed the capacity
2249 	 * of child iovs, so it needs to wait until the first batch completed.
2250 	 */
2251 	bdev->max_segment_size = 512;
2252 	bdev->max_num_segments = SPDK_BDEV_IO_NUM_CHILD_IOV;
2253 	g_io_done = false;
2254 
2255 	for (i = 0; i < SPDK_BDEV_IO_NUM_CHILD_IOV - 2; i++) {
2256 		iov[i].iov_base = (void *)((i + 1) * 0x10000);
2257 		iov[i].iov_len = 512;
2258 	}
2259 	for (i = SPDK_BDEV_IO_NUM_CHILD_IOV - 2; i < SPDK_BDEV_IO_NUM_CHILD_IOV; i++) {
2260 		iov[i].iov_base = (void *)((i + 1) * 0x10000);
2261 		iov[i].iov_len = 512 * 2;
2262 	}
2263 
2264 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 0,
2265 					   SPDK_BDEV_IO_NUM_CHILD_IOV, SPDK_BDEV_IO_NUM_CHILD_IOV);
2266 	/* 0 ~ (SPDK_BDEV_IO_NUM_CHILD_IOV - 2) Will not be split */
2267 	for (i = 0; i < SPDK_BDEV_IO_NUM_CHILD_IOV - 2; i++) {
2268 		ut_expected_io_set_iov(expected_io, i, iov[i].iov_base, iov[i].iov_len);
2269 	}
2270 	/* (SPDK_BDEV_IO_NUM_CHILD_IOV - 2) is split */
2271 	ut_expected_io_set_iov(expected_io, i, iov[i].iov_base, 512);
2272 	ut_expected_io_set_iov(expected_io, i + 1, iov[i].iov_base + 512, 512);
2273 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2274 
2275 	/* Child iov entries exceed the max num of parent IO so split it in next round */
2276 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, SPDK_BDEV_IO_NUM_CHILD_IOV, 2, 2);
2277 	ut_expected_io_set_iov(expected_io, 0, iov[i + 1].iov_base, 512);
2278 	ut_expected_io_set_iov(expected_io, 1, iov[i + 1].iov_base + 512, 512);
2279 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2280 
2281 	rc = spdk_bdev_readv_blocks(desc, io_ch, iov, SPDK_BDEV_IO_NUM_CHILD_IOV, 0,
2282 				    SPDK_BDEV_IO_NUM_CHILD_IOV + 2, io_done, NULL);
2283 	CU_ASSERT(rc == 0);
2284 	CU_ASSERT(g_io_done == false);
2285 
2286 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
2287 	stub_complete_io(1);
2288 	CU_ASSERT(g_io_done == false);
2289 
2290 	/* Next round */
2291 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
2292 	stub_complete_io(1);
2293 	CU_ASSERT(g_io_done == true);
2294 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
2295 
2296 	/* This case is similar to the previous one, but the io composed of
2297 	 * the last few entries of child iov is not enough for a blocklen, so they
2298 	 * cannot be put into this IO, but wait until the next time.
2299 	 */
2300 	bdev->max_segment_size = 512;
2301 	bdev->max_num_segments = SPDK_BDEV_IO_NUM_CHILD_IOV;
2302 	g_io_done = false;
2303 
2304 	for (i = 0; i < SPDK_BDEV_IO_NUM_CHILD_IOV - 2; i++) {
2305 		iov[i].iov_base = (void *)((i + 1) * 0x10000);
2306 		iov[i].iov_len = 512;
2307 	}
2308 
2309 	for (i = SPDK_BDEV_IO_NUM_CHILD_IOV - 2; i < SPDK_BDEV_IO_NUM_CHILD_IOV + 2; i++) {
2310 		iov[i].iov_base = (void *)((i + 1) * 0x10000);
2311 		iov[i].iov_len = 128;
2312 	}
2313 
2314 	/* First child iovcnt is't SPDK_BDEV_IO_NUM_CHILD_IOV but SPDK_BDEV_IO_NUM_CHILD_IOV - 2.
2315 	 * Because the left 2 iov is not enough for a blocklen.
2316 	 */
2317 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 0,
2318 					   SPDK_BDEV_IO_NUM_CHILD_IOV - 2, SPDK_BDEV_IO_NUM_CHILD_IOV - 2);
2319 	for (i = 0; i < SPDK_BDEV_IO_NUM_CHILD_IOV - 2; i++) {
2320 		ut_expected_io_set_iov(expected_io, i, iov[i].iov_base, iov[i].iov_len);
2321 	}
2322 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2323 
2324 	/* The second child io waits until the end of the first child io before executing.
2325 	 * Because the iovcnt of the two IOs exceeds the child iovcnt of the parent IO.
2326 	 * SPDK_BDEV_IO_NUM_CHILD_IOV - 2 to SPDK_BDEV_IO_NUM_CHILD_IOV + 2
2327 	 */
2328 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, SPDK_BDEV_IO_NUM_CHILD_IOV - 2,
2329 					   1, 4);
2330 	ut_expected_io_set_iov(expected_io, 0, iov[i].iov_base, iov[i].iov_len);
2331 	ut_expected_io_set_iov(expected_io, 1, iov[i + 1].iov_base, iov[i + 1].iov_len);
2332 	ut_expected_io_set_iov(expected_io, 2, iov[i + 2].iov_base, iov[i + 2].iov_len);
2333 	ut_expected_io_set_iov(expected_io, 3, iov[i + 3].iov_base, iov[i + 3].iov_len);
2334 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2335 
2336 	rc = spdk_bdev_readv_blocks(desc, io_ch, iov, SPDK_BDEV_IO_NUM_CHILD_IOV + 2, 0,
2337 				    SPDK_BDEV_IO_NUM_CHILD_IOV - 1, io_done, NULL);
2338 	CU_ASSERT(rc == 0);
2339 	CU_ASSERT(g_io_done == false);
2340 
2341 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
2342 	stub_complete_io(1);
2343 	CU_ASSERT(g_io_done == false);
2344 
2345 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
2346 	stub_complete_io(1);
2347 	CU_ASSERT(g_io_done == true);
2348 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
2349 
2350 	/* A very complicated case. Each sg entry exceeds max_segment_size and
2351 	 * needs to be split. At the same time, child io must be a multiple of blocklen.
2352 	 * At the same time, child iovcnt exceeds parent iovcnt.
2353 	 */
2354 	bdev->max_segment_size = 512 + 128;
2355 	bdev->max_num_segments = 3;
2356 	g_io_done = false;
2357 
2358 	for (i = 0; i < SPDK_BDEV_IO_NUM_CHILD_IOV - 2; i++) {
2359 		iov[i].iov_base = (void *)((i + 1) * 0x10000);
2360 		iov[i].iov_len = 512 + 256;
2361 	}
2362 
2363 	for (i = SPDK_BDEV_IO_NUM_CHILD_IOV - 2; i < SPDK_BDEV_IO_NUM_CHILD_IOV + 2; i++) {
2364 		iov[i].iov_base = (void *)((i + 1) * 0x10000);
2365 		iov[i].iov_len = 512 + 128;
2366 	}
2367 
2368 	/* Child IOs use 9 entries per for() round and 3 * 9 = 27 child iov entries.
2369 	 * Consume 4 parent IO iov entries per for() round and 6 block size.
2370 	 * Generate 9 child IOs.
2371 	 */
2372 	for (i = 0; i < 3; i++) {
2373 		uint32_t j = i * 4;
2374 		expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, i * 6, 2, 3);
2375 		ut_expected_io_set_iov(expected_io, 0, iov[j].iov_base, 640);
2376 		ut_expected_io_set_iov(expected_io, 1, iov[j].iov_base + 640, 128);
2377 		ut_expected_io_set_iov(expected_io, 2, iov[j + 1].iov_base, 256);
2378 		TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2379 
2380 		/* Child io must be a multiple of blocklen
2381 		 * iov[j + 2] must be split. If the third entry is also added,
2382 		 * the multiple of blocklen cannot be guaranteed. But it still
2383 		 * occupies one iov entry of the parent child iov.
2384 		 */
2385 		expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, i * 6 + 2, 2, 2);
2386 		ut_expected_io_set_iov(expected_io, 0, iov[j + 1].iov_base + 256, 512);
2387 		ut_expected_io_set_iov(expected_io, 1, iov[j + 2].iov_base, 512);
2388 		TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2389 
2390 		expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, i * 6 + 4, 2, 3);
2391 		ut_expected_io_set_iov(expected_io, 0, iov[j + 2].iov_base + 512, 256);
2392 		ut_expected_io_set_iov(expected_io, 1, iov[j + 3].iov_base, 640);
2393 		ut_expected_io_set_iov(expected_io, 2, iov[j + 3].iov_base + 640, 128);
2394 		TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2395 	}
2396 
2397 	/* Child iov position at 27, the 10th child IO
2398 	 * iov entry index is 3 * 4 and offset is 3 * 6
2399 	 */
2400 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 18, 2, 3);
2401 	ut_expected_io_set_iov(expected_io, 0, iov[12].iov_base, 640);
2402 	ut_expected_io_set_iov(expected_io, 1, iov[12].iov_base + 640, 128);
2403 	ut_expected_io_set_iov(expected_io, 2, iov[13].iov_base, 256);
2404 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2405 
2406 	/* Child iov position at 30, the 11th child IO */
2407 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 20, 2, 2);
2408 	ut_expected_io_set_iov(expected_io, 0, iov[13].iov_base + 256, 512);
2409 	ut_expected_io_set_iov(expected_io, 1, iov[14].iov_base, 512);
2410 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2411 
2412 	/* The 2nd split round and iovpos is 0, the 12th child IO */
2413 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 22, 2, 3);
2414 	ut_expected_io_set_iov(expected_io, 0, iov[14].iov_base + 512, 256);
2415 	ut_expected_io_set_iov(expected_io, 1, iov[15].iov_base, 640);
2416 	ut_expected_io_set_iov(expected_io, 2, iov[15].iov_base + 640, 128);
2417 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2418 
2419 	/* Consume 9 child IOs and 27 child iov entries.
2420 	 * Consume 4 parent IO iov entries per for() round and 6 block size.
2421 	 * Parent IO iov index start from 16 and block offset start from 24
2422 	 */
2423 	for (i = 0; i < 3; i++) {
2424 		uint32_t j = i * 4 + 16;
2425 		uint32_t offset = i * 6 + 24;
2426 		expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, offset, 2, 3);
2427 		ut_expected_io_set_iov(expected_io, 0, iov[j].iov_base, 640);
2428 		ut_expected_io_set_iov(expected_io, 1, iov[j].iov_base + 640, 128);
2429 		ut_expected_io_set_iov(expected_io, 2, iov[j + 1].iov_base, 256);
2430 		TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2431 
2432 		/* Child io must be a multiple of blocklen
2433 		 * iov[j + 2] must be split. If the third entry is also added,
2434 		 * the multiple of blocklen cannot be guaranteed. But it still
2435 		 * occupies one iov entry of the parent child iov.
2436 		 */
2437 		expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, offset + 2, 2, 2);
2438 		ut_expected_io_set_iov(expected_io, 0, iov[j + 1].iov_base + 256, 512);
2439 		ut_expected_io_set_iov(expected_io, 1, iov[j + 2].iov_base, 512);
2440 		TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2441 
2442 		expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, offset + 4, 2, 3);
2443 		ut_expected_io_set_iov(expected_io, 0, iov[j + 2].iov_base + 512, 256);
2444 		ut_expected_io_set_iov(expected_io, 1, iov[j + 3].iov_base, 640);
2445 		ut_expected_io_set_iov(expected_io, 2, iov[j + 3].iov_base + 640, 128);
2446 		TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2447 	}
2448 
2449 	/* The 22th child IO, child iov position at 30 */
2450 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 42, 1, 1);
2451 	ut_expected_io_set_iov(expected_io, 0, iov[28].iov_base, 512);
2452 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2453 
2454 	/* The third round */
2455 	/* Here is the 23nd child IO and child iovpos is 0 */
2456 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 43, 2, 3);
2457 	ut_expected_io_set_iov(expected_io, 0, iov[28].iov_base + 512, 256);
2458 	ut_expected_io_set_iov(expected_io, 1, iov[29].iov_base, 640);
2459 	ut_expected_io_set_iov(expected_io, 2, iov[29].iov_base + 640, 128);
2460 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2461 
2462 	/* The 24th child IO */
2463 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 45, 3, 3);
2464 	ut_expected_io_set_iov(expected_io, 0, iov[30].iov_base, 640);
2465 	ut_expected_io_set_iov(expected_io, 1, iov[31].iov_base, 640);
2466 	ut_expected_io_set_iov(expected_io, 2, iov[32].iov_base, 256);
2467 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2468 
2469 	/* The 25th child IO */
2470 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 48, 2, 2);
2471 	ut_expected_io_set_iov(expected_io, 0, iov[32].iov_base + 256, 384);
2472 	ut_expected_io_set_iov(expected_io, 1, iov[33].iov_base, 640);
2473 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2474 
2475 	rc = spdk_bdev_readv_blocks(desc, io_ch, iov, SPDK_BDEV_IO_NUM_CHILD_IOV + 2, 0,
2476 				    50, io_done, NULL);
2477 	CU_ASSERT(rc == 0);
2478 	CU_ASSERT(g_io_done == false);
2479 
2480 	/* Parent IO supports up to 32 child iovs, so it is calculated that
2481 	 * a maximum of 11 IOs can be split at a time, and the
2482 	 * splitting will continue after the first batch is over.
2483 	 */
2484 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 11);
2485 	stub_complete_io(11);
2486 	CU_ASSERT(g_io_done == false);
2487 
2488 	/* The 2nd round */
2489 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 11);
2490 	stub_complete_io(11);
2491 	CU_ASSERT(g_io_done == false);
2492 
2493 	/* The last round */
2494 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 3);
2495 	stub_complete_io(3);
2496 	CU_ASSERT(g_io_done == true);
2497 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
2498 
2499 	/* Test an WRITE_ZEROES.  This should also not be split. */
2500 	bdev->max_segment_size = 512;
2501 	bdev->max_num_segments = 1;
2502 	g_io_done = false;
2503 
2504 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE_ZEROES, 9, 36, 0);
2505 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2506 
2507 	rc = spdk_bdev_write_zeroes_blocks(desc, io_ch, 9, 36, io_done, NULL);
2508 	CU_ASSERT(rc == 0);
2509 	CU_ASSERT(g_io_done == false);
2510 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
2511 	stub_complete_io(1);
2512 	CU_ASSERT(g_io_done == true);
2513 
2514 	/* Test an UNMAP.  This should also not be split. */
2515 	g_io_done = false;
2516 
2517 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_UNMAP, 15, 4, 0);
2518 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2519 
2520 	rc = spdk_bdev_unmap_blocks(desc, io_ch, 15, 4, io_done, NULL);
2521 	CU_ASSERT(rc == 0);
2522 	CU_ASSERT(g_io_done == false);
2523 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
2524 	stub_complete_io(1);
2525 	CU_ASSERT(g_io_done == true);
2526 
2527 	/* Test a FLUSH.  This should also not be split. */
2528 	g_io_done = false;
2529 
2530 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_FLUSH, 15, 4, 0);
2531 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2532 
2533 	rc = spdk_bdev_flush_blocks(desc, io_ch, 15, 2, io_done, NULL);
2534 	CU_ASSERT(rc == 0);
2535 	CU_ASSERT(g_io_done == false);
2536 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
2537 	stub_complete_io(1);
2538 	CU_ASSERT(g_io_done == true);
2539 
2540 	/* Test a COPY.  This should also not be split. */
2541 	g_io_done = false;
2542 
2543 	expected_io = ut_alloc_expected_copy_io(SPDK_BDEV_IO_TYPE_COPY, 9, 45, 36);
2544 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2545 
2546 	rc = spdk_bdev_copy_blocks(desc, io_ch, 9, 45, 36, io_done, NULL);
2547 	CU_ASSERT(rc == 0);
2548 	CU_ASSERT(g_io_done == false);
2549 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
2550 	stub_complete_io(1);
2551 	CU_ASSERT(g_io_done == true);
2552 
2553 	spdk_put_io_channel(io_ch);
2554 	spdk_bdev_close(desc);
2555 	free_bdev(bdev);
2556 	ut_fini_bdev();
2557 }
2558 
2559 static void
2560 bdev_io_mix_split_test(void)
2561 {
2562 	struct spdk_bdev *bdev;
2563 	struct spdk_bdev_desc *desc = NULL;
2564 	struct spdk_io_channel *io_ch;
2565 	struct spdk_bdev_opts bdev_opts = {};
2566 	struct iovec iov[SPDK_BDEV_IO_NUM_CHILD_IOV * 2];
2567 	struct ut_expected_io *expected_io;
2568 	uint64_t i;
2569 	int rc;
2570 
2571 	spdk_bdev_get_opts(&bdev_opts, sizeof(bdev_opts));
2572 	bdev_opts.bdev_io_pool_size = 512;
2573 	bdev_opts.bdev_io_cache_size = 64;
2574 	ut_init_bdev(&bdev_opts);
2575 
2576 	bdev = allocate_bdev("bdev0");
2577 
2578 	rc = spdk_bdev_open_ext(bdev->name, true, bdev_ut_event_cb, NULL, &desc);
2579 	CU_ASSERT(rc == 0);
2580 	SPDK_CU_ASSERT_FATAL(desc != NULL);
2581 	io_ch = spdk_bdev_get_io_channel(desc);
2582 	CU_ASSERT(io_ch != NULL);
2583 
2584 	/* First case optimal_io_boundary == max_segment_size * max_num_segments */
2585 	bdev->split_on_optimal_io_boundary = true;
2586 	bdev->optimal_io_boundary = 16;
2587 
2588 	bdev->max_segment_size = 512;
2589 	bdev->max_num_segments = 16;
2590 	g_io_done = false;
2591 
2592 	/* IO crossing the IO boundary requires split
2593 	 * Total 2 child IOs.
2594 	 */
2595 
2596 	/* The 1st child IO split the segment_size to multiple segment entry */
2597 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 14, 2, 2);
2598 	ut_expected_io_set_iov(expected_io, 0, (void *)0xF000, 512);
2599 	ut_expected_io_set_iov(expected_io, 1, (void *)(0xF000 + 512), 512);
2600 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2601 
2602 	/* The 2nd child IO split the segment_size to multiple segment entry */
2603 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 16, 2, 2);
2604 	ut_expected_io_set_iov(expected_io, 0, (void *)(0xF000 + 2 * 512), 512);
2605 	ut_expected_io_set_iov(expected_io, 1, (void *)(0xF000 + 3 * 512), 512);
2606 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2607 
2608 	rc = spdk_bdev_read_blocks(desc, io_ch, (void *)0xF000, 14, 4, io_done, NULL);
2609 	CU_ASSERT(rc == 0);
2610 	CU_ASSERT(g_io_done == false);
2611 
2612 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
2613 	stub_complete_io(2);
2614 	CU_ASSERT(g_io_done == true);
2615 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
2616 
2617 	/* Second case optimal_io_boundary > max_segment_size * max_num_segments */
2618 	bdev->max_segment_size = 15 * 512;
2619 	bdev->max_num_segments = 1;
2620 	g_io_done = false;
2621 
2622 	/* IO crossing the IO boundary requires split.
2623 	 * The 1st child IO segment size exceeds the max_segment_size,
2624 	 * So 1st child IO will be split to multiple segment entry.
2625 	 * Then it split to 2 child IOs because of the max_num_segments.
2626 	 * Total 3 child IOs.
2627 	 */
2628 
2629 	/* The first 2 IOs are in an IO boundary.
2630 	 * Because the optimal_io_boundary > max_segment_size * max_num_segments
2631 	 * So it split to the first 2 IOs.
2632 	 */
2633 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 0, 15, 1);
2634 	ut_expected_io_set_iov(expected_io, 0, (void *)0xF000, 512 * 15);
2635 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2636 
2637 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 15, 1, 1);
2638 	ut_expected_io_set_iov(expected_io, 0, (void *)(0xF000 + 512 * 15), 512);
2639 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2640 
2641 	/* The 3rd Child IO is because of the io boundary */
2642 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 16, 2, 1);
2643 	ut_expected_io_set_iov(expected_io, 0, (void *)(0xF000 + 512 * 16), 512 * 2);
2644 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2645 
2646 	rc = spdk_bdev_read_blocks(desc, io_ch, (void *)0xF000, 0, 18, io_done, NULL);
2647 	CU_ASSERT(rc == 0);
2648 	CU_ASSERT(g_io_done == false);
2649 
2650 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 3);
2651 	stub_complete_io(3);
2652 	CU_ASSERT(g_io_done == true);
2653 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
2654 
2655 	/* Third case optimal_io_boundary < max_segment_size * max_num_segments */
2656 	bdev->max_segment_size = 17 * 512;
2657 	bdev->max_num_segments = 1;
2658 	g_io_done = false;
2659 
2660 	/* IO crossing the IO boundary requires split.
2661 	 * Child IO does not split.
2662 	 * Total 2 child IOs.
2663 	 */
2664 
2665 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 0, 16, 1);
2666 	ut_expected_io_set_iov(expected_io, 0, (void *)0xF000, 512 * 16);
2667 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2668 
2669 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 16, 2, 1);
2670 	ut_expected_io_set_iov(expected_io, 0, (void *)(0xF000 + 512 * 16), 512 * 2);
2671 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2672 
2673 	rc = spdk_bdev_read_blocks(desc, io_ch, (void *)0xF000, 0, 18, io_done, NULL);
2674 	CU_ASSERT(rc == 0);
2675 	CU_ASSERT(g_io_done == false);
2676 
2677 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
2678 	stub_complete_io(2);
2679 	CU_ASSERT(g_io_done == true);
2680 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
2681 
2682 	/* Now set up a more complex, multi-vector command that needs to be split,
2683 	 * including splitting iovecs.
2684 	 * optimal_io_boundary < max_segment_size * max_num_segments
2685 	 */
2686 	bdev->max_segment_size = 3 * 512;
2687 	bdev->max_num_segments = 6;
2688 	g_io_done = false;
2689 
2690 	iov[0].iov_base = (void *)0x10000;
2691 	iov[0].iov_len = 4 * 512;
2692 	iov[1].iov_base = (void *)0x20000;
2693 	iov[1].iov_len = 4 * 512;
2694 	iov[2].iov_base = (void *)0x30000;
2695 	iov[2].iov_len = 10 * 512;
2696 
2697 	/* IO crossing the IO boundary requires split.
2698 	 * The 1st child IO segment size exceeds the max_segment_size and after
2699 	 * splitting segment_size, the num_segments exceeds max_num_segments.
2700 	 * So 1st child IO will be split to 2 child IOs.
2701 	 * Total 3 child IOs.
2702 	 */
2703 
2704 	/* The first 2 IOs are in an IO boundary.
2705 	 * After splitting segment size the segment num exceeds.
2706 	 * So it splits to 2 child IOs.
2707 	 */
2708 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 0, 14, 6);
2709 	ut_expected_io_set_iov(expected_io, 0, iov[0].iov_base, 512 * 3);
2710 	ut_expected_io_set_iov(expected_io, 1, iov[0].iov_base + 512 * 3, 512);
2711 	ut_expected_io_set_iov(expected_io, 2, iov[1].iov_base, 512 * 3);
2712 	ut_expected_io_set_iov(expected_io, 3, iov[1].iov_base + 512 * 3, 512);
2713 	ut_expected_io_set_iov(expected_io, 4, iov[2].iov_base, 512 * 3);
2714 	ut_expected_io_set_iov(expected_io, 5, iov[2].iov_base + 512 * 3, 512 * 3);
2715 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2716 
2717 	/* The 2nd child IO has the left segment entry */
2718 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 14, 2, 1);
2719 	ut_expected_io_set_iov(expected_io, 0, iov[2].iov_base + 512 * 6, 512 * 2);
2720 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2721 
2722 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 16, 2, 1);
2723 	ut_expected_io_set_iov(expected_io, 0, iov[2].iov_base + 512 * 8, 512 * 2);
2724 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2725 
2726 	rc = spdk_bdev_writev_blocks(desc, io_ch, iov, 3, 0, 18, io_done, NULL);
2727 	CU_ASSERT(rc == 0);
2728 	CU_ASSERT(g_io_done == false);
2729 
2730 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 3);
2731 	stub_complete_io(3);
2732 	CU_ASSERT(g_io_done == true);
2733 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
2734 
2735 	/* A very complicated case. Each sg entry exceeds max_segment_size
2736 	 * and split on io boundary.
2737 	 * optimal_io_boundary < max_segment_size * max_num_segments
2738 	 */
2739 	bdev->max_segment_size = 3 * 512;
2740 	bdev->max_num_segments = SPDK_BDEV_IO_NUM_CHILD_IOV;
2741 	g_io_done = false;
2742 
2743 	for (i = 0; i < 20; i++) {
2744 		iov[i].iov_base = (void *)((i + 1) * 0x10000);
2745 		iov[i].iov_len = 512 * 4;
2746 	}
2747 
2748 	/* IO crossing the IO boundary requires split.
2749 	 * 80 block length can split 5 child IOs base on offset and IO boundary.
2750 	 * Each iov entry needs to be split to 2 entries because of max_segment_size
2751 	 * Total 5 child IOs.
2752 	 */
2753 
2754 	/* 4 iov entries are in an IO boundary and each iov entry splits to 2.
2755 	 * So each child IO occupies 8 child iov entries.
2756 	 */
2757 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 0, 16, 8);
2758 	for (i = 0; i < 4; i++) {
2759 		int iovcnt = i * 2;
2760 		ut_expected_io_set_iov(expected_io, iovcnt, iov[i].iov_base, 512 * 3);
2761 		ut_expected_io_set_iov(expected_io, iovcnt + 1, iov[i].iov_base + 512 * 3, 512);
2762 	}
2763 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2764 
2765 	/* 2nd child IO and total 16 child iov entries of parent IO */
2766 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 16, 16, 8);
2767 	for (i = 4; i < 8; i++) {
2768 		int iovcnt = (i - 4) * 2;
2769 		ut_expected_io_set_iov(expected_io, iovcnt, iov[i].iov_base, 512 * 3);
2770 		ut_expected_io_set_iov(expected_io, iovcnt + 1, iov[i].iov_base + 512 * 3, 512);
2771 	}
2772 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2773 
2774 	/* 3rd child IO and total 24 child iov entries of parent IO */
2775 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 32, 16, 8);
2776 	for (i = 8; i < 12; i++) {
2777 		int iovcnt = (i - 8) * 2;
2778 		ut_expected_io_set_iov(expected_io, iovcnt, iov[i].iov_base, 512 * 3);
2779 		ut_expected_io_set_iov(expected_io, iovcnt + 1, iov[i].iov_base + 512 * 3, 512);
2780 	}
2781 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2782 
2783 	/* 4th child IO and total 32 child iov entries of parent IO */
2784 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 48, 16, 8);
2785 	for (i = 12; i < 16; i++) {
2786 		int iovcnt = (i - 12) * 2;
2787 		ut_expected_io_set_iov(expected_io, iovcnt, iov[i].iov_base, 512 * 3);
2788 		ut_expected_io_set_iov(expected_io, iovcnt + 1, iov[i].iov_base + 512 * 3, 512);
2789 	}
2790 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2791 
2792 	/* 5th child IO and because of the child iov entry it should be split
2793 	 * in next round.
2794 	 */
2795 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 64, 16, 8);
2796 	for (i = 16; i < 20; i++) {
2797 		int iovcnt = (i - 16) * 2;
2798 		ut_expected_io_set_iov(expected_io, iovcnt, iov[i].iov_base, 512 * 3);
2799 		ut_expected_io_set_iov(expected_io, iovcnt + 1, iov[i].iov_base + 512 * 3, 512);
2800 	}
2801 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2802 
2803 	rc = spdk_bdev_writev_blocks(desc, io_ch, iov, 20, 0, 80, io_done, NULL);
2804 	CU_ASSERT(rc == 0);
2805 	CU_ASSERT(g_io_done == false);
2806 
2807 	/* First split round */
2808 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 4);
2809 	stub_complete_io(4);
2810 	CU_ASSERT(g_io_done == false);
2811 
2812 	/* Second split round */
2813 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
2814 	stub_complete_io(1);
2815 	CU_ASSERT(g_io_done == true);
2816 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
2817 
2818 	spdk_put_io_channel(io_ch);
2819 	spdk_bdev_close(desc);
2820 	free_bdev(bdev);
2821 	ut_fini_bdev();
2822 }
2823 
2824 static void
2825 bdev_io_split_with_io_wait(void)
2826 {
2827 	struct spdk_bdev *bdev;
2828 	struct spdk_bdev_desc *desc = NULL;
2829 	struct spdk_io_channel *io_ch;
2830 	struct spdk_bdev_channel *channel;
2831 	struct spdk_bdev_mgmt_channel *mgmt_ch;
2832 	struct spdk_bdev_opts bdev_opts = {};
2833 	struct iovec iov[3];
2834 	struct ut_expected_io *expected_io;
2835 	int rc;
2836 
2837 	spdk_bdev_get_opts(&bdev_opts, sizeof(bdev_opts));
2838 	bdev_opts.bdev_io_pool_size = 2;
2839 	bdev_opts.bdev_io_cache_size = 1;
2840 	ut_init_bdev(&bdev_opts);
2841 
2842 	bdev = allocate_bdev("bdev0");
2843 
2844 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
2845 	CU_ASSERT(rc == 0);
2846 	CU_ASSERT(desc != NULL);
2847 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
2848 	io_ch = spdk_bdev_get_io_channel(desc);
2849 	CU_ASSERT(io_ch != NULL);
2850 	channel = spdk_io_channel_get_ctx(io_ch);
2851 	mgmt_ch = channel->shared_resource->mgmt_ch;
2852 
2853 	bdev->optimal_io_boundary = 16;
2854 	bdev->split_on_optimal_io_boundary = true;
2855 
2856 	rc = spdk_bdev_read_blocks(desc, io_ch, NULL, 0, 1, io_done, NULL);
2857 	CU_ASSERT(rc == 0);
2858 
2859 	/* Now test that a single-vector command is split correctly.
2860 	 * Offset 14, length 8, payload 0xF000
2861 	 *  Child - Offset 14, length 2, payload 0xF000
2862 	 *  Child - Offset 16, length 6, payload 0xF000 + 2 * 512
2863 	 *
2864 	 * Set up the expected values before calling spdk_bdev_read_blocks
2865 	 */
2866 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 14, 2, 1);
2867 	ut_expected_io_set_iov(expected_io, 0, (void *)0xF000, 2 * 512);
2868 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2869 
2870 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 16, 6, 1);
2871 	ut_expected_io_set_iov(expected_io, 0, (void *)(0xF000 + 2 * 512), 6 * 512);
2872 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2873 
2874 	/* The following children will be submitted sequentially due to the capacity of
2875 	 * spdk_bdev_io.
2876 	 */
2877 
2878 	/* The first child I/O will be queued to wait until an spdk_bdev_io becomes available */
2879 	rc = spdk_bdev_read_blocks(desc, io_ch, (void *)0xF000, 14, 8, io_done, NULL);
2880 	CU_ASSERT(rc == 0);
2881 	CU_ASSERT(!TAILQ_EMPTY(&mgmt_ch->io_wait_queue));
2882 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
2883 
2884 	/* Completing the first read I/O will submit the first child */
2885 	stub_complete_io(1);
2886 	CU_ASSERT(TAILQ_EMPTY(&mgmt_ch->io_wait_queue));
2887 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
2888 
2889 	/* Completing the first child will submit the second child */
2890 	stub_complete_io(1);
2891 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
2892 
2893 	/* Complete the second child I/O.  This should result in our callback getting
2894 	 * invoked since the parent I/O is now complete.
2895 	 */
2896 	stub_complete_io(1);
2897 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
2898 
2899 	/* Now set up a more complex, multi-vector command that needs to be split,
2900 	 *  including splitting iovecs.
2901 	 */
2902 	iov[0].iov_base = (void *)0x10000;
2903 	iov[0].iov_len = 512;
2904 	iov[1].iov_base = (void *)0x20000;
2905 	iov[1].iov_len = 20 * 512;
2906 	iov[2].iov_base = (void *)0x30000;
2907 	iov[2].iov_len = 11 * 512;
2908 
2909 	g_io_done = false;
2910 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 14, 2, 2);
2911 	ut_expected_io_set_iov(expected_io, 0, (void *)0x10000, 512);
2912 	ut_expected_io_set_iov(expected_io, 1, (void *)0x20000, 512);
2913 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2914 
2915 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 16, 16, 1);
2916 	ut_expected_io_set_iov(expected_io, 0, (void *)(0x20000 + 512), 16 * 512);
2917 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2918 
2919 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 32, 14, 2);
2920 	ut_expected_io_set_iov(expected_io, 0, (void *)(0x20000 + 17 * 512), 3 * 512);
2921 	ut_expected_io_set_iov(expected_io, 1, (void *)0x30000, 11 * 512);
2922 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2923 
2924 	rc = spdk_bdev_writev_blocks(desc, io_ch, iov, 3, 14, 32, io_done, NULL);
2925 	CU_ASSERT(rc == 0);
2926 	CU_ASSERT(g_io_done == false);
2927 
2928 	/* The following children will be submitted sequentially due to the capacity of
2929 	 * spdk_bdev_io.
2930 	 */
2931 
2932 	/* Completing the first child will submit the second child */
2933 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
2934 	stub_complete_io(1);
2935 	CU_ASSERT(g_io_done == false);
2936 
2937 	/* Completing the second child will submit the third child */
2938 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
2939 	stub_complete_io(1);
2940 	CU_ASSERT(g_io_done == false);
2941 
2942 	/* Completing the third child will result in our callback getting invoked
2943 	 * since the parent I/O is now complete.
2944 	 */
2945 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
2946 	stub_complete_io(1);
2947 	CU_ASSERT(g_io_done == true);
2948 
2949 	CU_ASSERT(TAILQ_EMPTY(&g_bdev_ut_channel->expected_io));
2950 
2951 	spdk_put_io_channel(io_ch);
2952 	spdk_bdev_close(desc);
2953 	free_bdev(bdev);
2954 	ut_fini_bdev();
2955 }
2956 
2957 static void
2958 bdev_io_write_unit_split_test(void)
2959 {
2960 	struct spdk_bdev *bdev;
2961 	struct spdk_bdev_desc *desc = NULL;
2962 	struct spdk_io_channel *io_ch;
2963 	struct spdk_bdev_opts bdev_opts = {};
2964 	struct iovec iov[SPDK_BDEV_IO_NUM_CHILD_IOV * 4];
2965 	struct ut_expected_io *expected_io;
2966 	uint64_t i;
2967 	int rc;
2968 
2969 	spdk_bdev_get_opts(&bdev_opts, sizeof(bdev_opts));
2970 	bdev_opts.bdev_io_pool_size = 512;
2971 	bdev_opts.bdev_io_cache_size = 64;
2972 	ut_init_bdev(&bdev_opts);
2973 
2974 	bdev = allocate_bdev("bdev0");
2975 
2976 	rc = spdk_bdev_open_ext(bdev->name, true, bdev_ut_event_cb, NULL, &desc);
2977 	CU_ASSERT(rc == 0);
2978 	SPDK_CU_ASSERT_FATAL(desc != NULL);
2979 	io_ch = spdk_bdev_get_io_channel(desc);
2980 	CU_ASSERT(io_ch != NULL);
2981 
2982 	/* Write I/O 2x larger than write_unit_size should get split into 2 I/Os */
2983 	bdev->write_unit_size = 32;
2984 	bdev->split_on_write_unit = true;
2985 	g_io_done = false;
2986 
2987 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 0, 32, 1);
2988 	ut_expected_io_set_iov(expected_io, 0, (void *)0xF000, 32 * 512);
2989 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2990 
2991 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 32, 32, 1);
2992 	ut_expected_io_set_iov(expected_io, 0, (void *)(0xF000 + 32 * 512), 32 * 512);
2993 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2994 
2995 	rc = spdk_bdev_write_blocks(desc, io_ch, (void *)0xF000, 0, 64, io_done, NULL);
2996 	CU_ASSERT(rc == 0);
2997 	CU_ASSERT(g_io_done == false);
2998 
2999 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
3000 	stub_complete_io(2);
3001 	CU_ASSERT(g_io_done == true);
3002 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
3003 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_SUCCESS);
3004 
3005 	/* Same as above but with optimal_io_boundary < write_unit_size - the I/O should be split
3006 	 * based on write_unit_size, not optimal_io_boundary */
3007 	bdev->split_on_optimal_io_boundary = true;
3008 	bdev->optimal_io_boundary = 16;
3009 	g_io_done = false;
3010 
3011 	rc = spdk_bdev_write_blocks(desc, io_ch, (void *)0xF000, 0, 64, io_done, NULL);
3012 	CU_ASSERT(rc == 0);
3013 	CU_ASSERT(g_io_done == false);
3014 
3015 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
3016 	stub_complete_io(2);
3017 	CU_ASSERT(g_io_done == true);
3018 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
3019 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_SUCCESS);
3020 
3021 	/* Write I/O should fail if it is smaller than write_unit_size */
3022 	g_io_done = false;
3023 
3024 	rc = spdk_bdev_write_blocks(desc, io_ch, (void *)0xF000, 0, 31, io_done, NULL);
3025 	CU_ASSERT(rc == 0);
3026 	CU_ASSERT(g_io_done == false);
3027 
3028 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
3029 	poll_threads();
3030 	CU_ASSERT(g_io_done == true);
3031 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
3032 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_FAILED);
3033 
3034 	/* Same for I/O not aligned to write_unit_size */
3035 	g_io_done = false;
3036 
3037 	rc = spdk_bdev_write_blocks(desc, io_ch, (void *)0xF000, 1, 32, io_done, NULL);
3038 	CU_ASSERT(rc == 0);
3039 	CU_ASSERT(g_io_done == false);
3040 
3041 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
3042 	poll_threads();
3043 	CU_ASSERT(g_io_done == true);
3044 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
3045 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_FAILED);
3046 
3047 	/* Write should fail if it needs to be split but there are not enough iovs to submit
3048 	 * an entire write unit */
3049 	bdev->write_unit_size = SPDK_COUNTOF(iov) / 2;
3050 	g_io_done = false;
3051 
3052 	for (i = 0; i < SPDK_COUNTOF(iov); i++) {
3053 		iov[i].iov_base = (void *)(0x1000 + 512 * i);
3054 		iov[i].iov_len = 512;
3055 	}
3056 
3057 	rc = spdk_bdev_writev_blocks(desc, io_ch, iov, SPDK_COUNTOF(iov), 0, SPDK_COUNTOF(iov),
3058 				     io_done, NULL);
3059 	CU_ASSERT(rc == 0);
3060 	CU_ASSERT(g_io_done == false);
3061 
3062 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
3063 	poll_threads();
3064 	CU_ASSERT(g_io_done == true);
3065 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
3066 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_FAILED);
3067 
3068 	spdk_put_io_channel(io_ch);
3069 	spdk_bdev_close(desc);
3070 	free_bdev(bdev);
3071 	ut_fini_bdev();
3072 }
3073 
3074 static void
3075 bdev_io_alignment(void)
3076 {
3077 	struct spdk_bdev *bdev;
3078 	struct spdk_bdev_desc *desc = NULL;
3079 	struct spdk_io_channel *io_ch;
3080 	struct spdk_bdev_opts bdev_opts = {};
3081 	int rc;
3082 	void *buf = NULL;
3083 	struct iovec iovs[2];
3084 	int iovcnt;
3085 	uint64_t alignment;
3086 
3087 	spdk_bdev_get_opts(&bdev_opts, sizeof(bdev_opts));
3088 	bdev_opts.bdev_io_pool_size = 20;
3089 	bdev_opts.bdev_io_cache_size = 2;
3090 	ut_init_bdev(&bdev_opts);
3091 
3092 	fn_table.submit_request = stub_submit_request_get_buf;
3093 	bdev = allocate_bdev("bdev0");
3094 
3095 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
3096 	CU_ASSERT(rc == 0);
3097 	CU_ASSERT(desc != NULL);
3098 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
3099 	io_ch = spdk_bdev_get_io_channel(desc);
3100 	CU_ASSERT(io_ch != NULL);
3101 
3102 	/* Create aligned buffer */
3103 	rc = posix_memalign(&buf, 4096, 8192);
3104 	SPDK_CU_ASSERT_FATAL(rc == 0);
3105 
3106 	/* Pass aligned single buffer with no alignment required */
3107 	alignment = 1;
3108 	bdev->required_alignment = spdk_u32log2(alignment);
3109 
3110 	rc = spdk_bdev_write_blocks(desc, io_ch, buf, 0, 1, io_done, NULL);
3111 	CU_ASSERT(rc == 0);
3112 	stub_complete_io(1);
3113 	CU_ASSERT(_are_iovs_aligned(g_bdev_io->u.bdev.iovs, g_bdev_io->u.bdev.iovcnt,
3114 				    alignment));
3115 
3116 	rc = spdk_bdev_read_blocks(desc, io_ch, buf, 0, 1, io_done, NULL);
3117 	CU_ASSERT(rc == 0);
3118 	stub_complete_io(1);
3119 	CU_ASSERT(_are_iovs_aligned(g_bdev_io->u.bdev.iovs, g_bdev_io->u.bdev.iovcnt,
3120 				    alignment));
3121 
3122 	/* Pass unaligned single buffer with no alignment required */
3123 	alignment = 1;
3124 	bdev->required_alignment = spdk_u32log2(alignment);
3125 
3126 	rc = spdk_bdev_write_blocks(desc, io_ch, buf + 4, 0, 1, io_done, NULL);
3127 	CU_ASSERT(rc == 0);
3128 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == 0);
3129 	CU_ASSERT(g_bdev_io->u.bdev.iovs[0].iov_base == buf + 4);
3130 	stub_complete_io(1);
3131 
3132 	rc = spdk_bdev_read_blocks(desc, io_ch, buf + 4, 0, 1, io_done, NULL);
3133 	CU_ASSERT(rc == 0);
3134 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == 0);
3135 	CU_ASSERT(g_bdev_io->u.bdev.iovs[0].iov_base == buf + 4);
3136 	stub_complete_io(1);
3137 
3138 	/* Pass unaligned single buffer with 512 alignment required */
3139 	alignment = 512;
3140 	bdev->required_alignment = spdk_u32log2(alignment);
3141 
3142 	rc = spdk_bdev_write_blocks(desc, io_ch, buf + 4, 0, 1, io_done, NULL);
3143 	CU_ASSERT(rc == 0);
3144 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == 1);
3145 	CU_ASSERT(g_bdev_io->u.bdev.iovs == &g_bdev_io->internal.bounce_iov);
3146 	CU_ASSERT(_are_iovs_aligned(g_bdev_io->u.bdev.iovs, g_bdev_io->u.bdev.iovcnt,
3147 				    alignment));
3148 	stub_complete_io(1);
3149 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == 0);
3150 
3151 	rc = spdk_bdev_read_blocks(desc, io_ch, buf + 4, 0, 1, io_done, NULL);
3152 	CU_ASSERT(rc == 0);
3153 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == 1);
3154 	CU_ASSERT(g_bdev_io->u.bdev.iovs == &g_bdev_io->internal.bounce_iov);
3155 	CU_ASSERT(_are_iovs_aligned(g_bdev_io->u.bdev.iovs, g_bdev_io->u.bdev.iovcnt,
3156 				    alignment));
3157 	stub_complete_io(1);
3158 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == 0);
3159 
3160 	/* Pass unaligned single buffer with 4096 alignment required */
3161 	alignment = 4096;
3162 	bdev->required_alignment = spdk_u32log2(alignment);
3163 
3164 	rc = spdk_bdev_write_blocks(desc, io_ch, buf + 8, 0, 1, io_done, NULL);
3165 	CU_ASSERT(rc == 0);
3166 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == 1);
3167 	CU_ASSERT(g_bdev_io->u.bdev.iovs == &g_bdev_io->internal.bounce_iov);
3168 	CU_ASSERT(_are_iovs_aligned(g_bdev_io->u.bdev.iovs, g_bdev_io->u.bdev.iovcnt,
3169 				    alignment));
3170 	stub_complete_io(1);
3171 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == 0);
3172 
3173 	rc = spdk_bdev_read_blocks(desc, io_ch, buf + 8, 0, 1, io_done, NULL);
3174 	CU_ASSERT(rc == 0);
3175 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == 1);
3176 	CU_ASSERT(g_bdev_io->u.bdev.iovs == &g_bdev_io->internal.bounce_iov);
3177 	CU_ASSERT(_are_iovs_aligned(g_bdev_io->u.bdev.iovs, g_bdev_io->u.bdev.iovcnt,
3178 				    alignment));
3179 	stub_complete_io(1);
3180 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == 0);
3181 
3182 	/* Pass aligned iovs with no alignment required */
3183 	alignment = 1;
3184 	bdev->required_alignment = spdk_u32log2(alignment);
3185 
3186 	iovcnt = 1;
3187 	iovs[0].iov_base = buf;
3188 	iovs[0].iov_len = 512;
3189 
3190 	rc = spdk_bdev_writev(desc, io_ch, iovs, iovcnt, 0, 512, io_done, NULL);
3191 	CU_ASSERT(rc == 0);
3192 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == 0);
3193 	stub_complete_io(1);
3194 	CU_ASSERT(g_bdev_io->u.bdev.iovs[0].iov_base == iovs[0].iov_base);
3195 
3196 	rc = spdk_bdev_readv(desc, io_ch, iovs, iovcnt, 0, 512, io_done, NULL);
3197 	CU_ASSERT(rc == 0);
3198 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == 0);
3199 	stub_complete_io(1);
3200 	CU_ASSERT(g_bdev_io->u.bdev.iovs[0].iov_base == iovs[0].iov_base);
3201 
3202 	/* Pass unaligned iovs with no alignment required */
3203 	alignment = 1;
3204 	bdev->required_alignment = spdk_u32log2(alignment);
3205 
3206 	iovcnt = 2;
3207 	iovs[0].iov_base = buf + 16;
3208 	iovs[0].iov_len = 256;
3209 	iovs[1].iov_base = buf + 16 + 256 + 32;
3210 	iovs[1].iov_len = 256;
3211 
3212 	rc = spdk_bdev_writev(desc, io_ch, iovs, iovcnt, 0, 512, io_done, NULL);
3213 	CU_ASSERT(rc == 0);
3214 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == 0);
3215 	stub_complete_io(1);
3216 	CU_ASSERT(g_bdev_io->u.bdev.iovs[0].iov_base == iovs[0].iov_base);
3217 
3218 	rc = spdk_bdev_readv(desc, io_ch, iovs, iovcnt, 0, 512, io_done, NULL);
3219 	CU_ASSERT(rc == 0);
3220 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == 0);
3221 	stub_complete_io(1);
3222 	CU_ASSERT(g_bdev_io->u.bdev.iovs[0].iov_base == iovs[0].iov_base);
3223 
3224 	/* Pass unaligned iov with 2048 alignment required */
3225 	alignment = 2048;
3226 	bdev->required_alignment = spdk_u32log2(alignment);
3227 
3228 	iovcnt = 2;
3229 	iovs[0].iov_base = buf + 16;
3230 	iovs[0].iov_len = 256;
3231 	iovs[1].iov_base = buf + 16 + 256 + 32;
3232 	iovs[1].iov_len = 256;
3233 
3234 	rc = spdk_bdev_writev(desc, io_ch, iovs, iovcnt, 0, 512, io_done, NULL);
3235 	CU_ASSERT(rc == 0);
3236 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == iovcnt);
3237 	CU_ASSERT(g_bdev_io->u.bdev.iovs == &g_bdev_io->internal.bounce_iov);
3238 	CU_ASSERT(_are_iovs_aligned(g_bdev_io->u.bdev.iovs, g_bdev_io->u.bdev.iovcnt,
3239 				    alignment));
3240 	stub_complete_io(1);
3241 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == 0);
3242 
3243 	rc = spdk_bdev_readv(desc, io_ch, iovs, iovcnt, 0, 512, io_done, NULL);
3244 	CU_ASSERT(rc == 0);
3245 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == iovcnt);
3246 	CU_ASSERT(g_bdev_io->u.bdev.iovs == &g_bdev_io->internal.bounce_iov);
3247 	CU_ASSERT(_are_iovs_aligned(g_bdev_io->u.bdev.iovs, g_bdev_io->u.bdev.iovcnt,
3248 				    alignment));
3249 	stub_complete_io(1);
3250 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == 0);
3251 
3252 	/* Pass iov without allocated buffer without alignment required */
3253 	alignment = 1;
3254 	bdev->required_alignment = spdk_u32log2(alignment);
3255 
3256 	iovcnt = 1;
3257 	iovs[0].iov_base = NULL;
3258 	iovs[0].iov_len = 0;
3259 
3260 	rc = spdk_bdev_readv(desc, io_ch, iovs, iovcnt, 0, 512, io_done, NULL);
3261 	CU_ASSERT(rc == 0);
3262 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == 0);
3263 	CU_ASSERT(_are_iovs_aligned(g_bdev_io->u.bdev.iovs, g_bdev_io->u.bdev.iovcnt,
3264 				    alignment));
3265 	stub_complete_io(1);
3266 
3267 	/* Pass iov without allocated buffer with 1024 alignment required */
3268 	alignment = 1024;
3269 	bdev->required_alignment = spdk_u32log2(alignment);
3270 
3271 	iovcnt = 1;
3272 	iovs[0].iov_base = NULL;
3273 	iovs[0].iov_len = 0;
3274 
3275 	rc = spdk_bdev_readv(desc, io_ch, iovs, iovcnt, 0, 512, io_done, NULL);
3276 	CU_ASSERT(rc == 0);
3277 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == 0);
3278 	CU_ASSERT(_are_iovs_aligned(g_bdev_io->u.bdev.iovs, g_bdev_io->u.bdev.iovcnt,
3279 				    alignment));
3280 	stub_complete_io(1);
3281 
3282 	spdk_put_io_channel(io_ch);
3283 	spdk_bdev_close(desc);
3284 	free_bdev(bdev);
3285 	fn_table.submit_request = stub_submit_request;
3286 	ut_fini_bdev();
3287 
3288 	free(buf);
3289 }
3290 
3291 static void
3292 bdev_io_alignment_with_boundary(void)
3293 {
3294 	struct spdk_bdev *bdev;
3295 	struct spdk_bdev_desc *desc = NULL;
3296 	struct spdk_io_channel *io_ch;
3297 	struct spdk_bdev_opts bdev_opts = {};
3298 	int rc;
3299 	void *buf = NULL;
3300 	struct iovec iovs[2];
3301 	int iovcnt;
3302 	uint64_t alignment;
3303 
3304 	spdk_bdev_get_opts(&bdev_opts, sizeof(bdev_opts));
3305 	bdev_opts.bdev_io_pool_size = 20;
3306 	bdev_opts.bdev_io_cache_size = 2;
3307 	bdev_opts.opts_size = sizeof(bdev_opts);
3308 	ut_init_bdev(&bdev_opts);
3309 
3310 	fn_table.submit_request = stub_submit_request_get_buf;
3311 	bdev = allocate_bdev("bdev0");
3312 
3313 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
3314 	CU_ASSERT(rc == 0);
3315 	CU_ASSERT(desc != NULL);
3316 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
3317 	io_ch = spdk_bdev_get_io_channel(desc);
3318 	CU_ASSERT(io_ch != NULL);
3319 
3320 	/* Create aligned buffer */
3321 	rc = posix_memalign(&buf, 4096, 131072);
3322 	SPDK_CU_ASSERT_FATAL(rc == 0);
3323 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
3324 
3325 	/* 512 * 3 with 2 IO boundary, allocate small data buffer from bdev layer */
3326 	alignment = 512;
3327 	bdev->required_alignment = spdk_u32log2(alignment);
3328 	bdev->optimal_io_boundary = 2;
3329 	bdev->split_on_optimal_io_boundary = true;
3330 
3331 	iovcnt = 1;
3332 	iovs[0].iov_base = NULL;
3333 	iovs[0].iov_len = 512 * 3;
3334 
3335 	rc = spdk_bdev_readv_blocks(desc, io_ch, iovs, iovcnt, 1, 3, io_done, NULL);
3336 	CU_ASSERT(rc == 0);
3337 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
3338 	stub_complete_io(2);
3339 
3340 	/* 8KiB with 16 IO boundary, allocate large data buffer from bdev layer */
3341 	alignment = 512;
3342 	bdev->required_alignment = spdk_u32log2(alignment);
3343 	bdev->optimal_io_boundary = 16;
3344 	bdev->split_on_optimal_io_boundary = true;
3345 
3346 	iovcnt = 1;
3347 	iovs[0].iov_base = NULL;
3348 	iovs[0].iov_len = 512 * 16;
3349 
3350 	rc = spdk_bdev_readv_blocks(desc, io_ch, iovs, iovcnt, 1, 16, io_done, NULL);
3351 	CU_ASSERT(rc == 0);
3352 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
3353 	stub_complete_io(2);
3354 
3355 	/* 512 * 160 with 128 IO boundary, 63.5KiB + 16.5KiB for the two children requests */
3356 	alignment = 512;
3357 	bdev->required_alignment = spdk_u32log2(alignment);
3358 	bdev->optimal_io_boundary = 128;
3359 	bdev->split_on_optimal_io_boundary = true;
3360 
3361 	iovcnt = 1;
3362 	iovs[0].iov_base = buf + 16;
3363 	iovs[0].iov_len = 512 * 160;
3364 	rc = spdk_bdev_readv_blocks(desc, io_ch, iovs, iovcnt, 1, 160, io_done, NULL);
3365 	CU_ASSERT(rc == 0);
3366 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
3367 	stub_complete_io(2);
3368 
3369 	/* 512 * 3 with 2 IO boundary */
3370 	alignment = 512;
3371 	bdev->required_alignment = spdk_u32log2(alignment);
3372 	bdev->optimal_io_boundary = 2;
3373 	bdev->split_on_optimal_io_boundary = true;
3374 
3375 	iovcnt = 2;
3376 	iovs[0].iov_base = buf + 16;
3377 	iovs[0].iov_len = 512;
3378 	iovs[1].iov_base = buf + 16 + 512 + 32;
3379 	iovs[1].iov_len = 1024;
3380 
3381 	rc = spdk_bdev_writev_blocks(desc, io_ch, iovs, iovcnt, 1, 3, io_done, NULL);
3382 	CU_ASSERT(rc == 0);
3383 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
3384 	stub_complete_io(2);
3385 
3386 	rc = spdk_bdev_readv_blocks(desc, io_ch, iovs, iovcnt, 1, 3, io_done, NULL);
3387 	CU_ASSERT(rc == 0);
3388 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
3389 	stub_complete_io(2);
3390 
3391 	/* 512 * 64 with 32 IO boundary */
3392 	bdev->optimal_io_boundary = 32;
3393 	iovcnt = 2;
3394 	iovs[0].iov_base = buf + 16;
3395 	iovs[0].iov_len = 16384;
3396 	iovs[1].iov_base = buf + 16 + 16384 + 32;
3397 	iovs[1].iov_len = 16384;
3398 
3399 	rc = spdk_bdev_writev_blocks(desc, io_ch, iovs, iovcnt, 1, 64, io_done, NULL);
3400 	CU_ASSERT(rc == 0);
3401 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 3);
3402 	stub_complete_io(3);
3403 
3404 	rc = spdk_bdev_readv_blocks(desc, io_ch, iovs, iovcnt, 1, 64, io_done, NULL);
3405 	CU_ASSERT(rc == 0);
3406 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 3);
3407 	stub_complete_io(3);
3408 
3409 	/* 512 * 160 with 32 IO boundary */
3410 	iovcnt = 1;
3411 	iovs[0].iov_base = buf + 16;
3412 	iovs[0].iov_len = 16384 + 65536;
3413 
3414 	rc = spdk_bdev_writev_blocks(desc, io_ch, iovs, iovcnt, 1, 160, io_done, NULL);
3415 	CU_ASSERT(rc == 0);
3416 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 6);
3417 	stub_complete_io(6);
3418 
3419 	spdk_put_io_channel(io_ch);
3420 	spdk_bdev_close(desc);
3421 	free_bdev(bdev);
3422 	fn_table.submit_request = stub_submit_request;
3423 	ut_fini_bdev();
3424 
3425 	free(buf);
3426 }
3427 
3428 static void
3429 histogram_status_cb(void *cb_arg, int status)
3430 {
3431 	g_status = status;
3432 }
3433 
3434 static void
3435 histogram_data_cb(void *cb_arg, int status, struct spdk_histogram_data *histogram)
3436 {
3437 	g_status = status;
3438 	g_histogram = histogram;
3439 }
3440 
3441 static void
3442 histogram_io_count(void *ctx, uint64_t start, uint64_t end, uint64_t count,
3443 		   uint64_t total, uint64_t so_far)
3444 {
3445 	g_count += count;
3446 }
3447 
3448 static void
3449 histogram_channel_data_cb(void *cb_arg, int status, struct spdk_histogram_data *histogram)
3450 {
3451 	spdk_histogram_data_fn cb_fn = cb_arg;
3452 
3453 	g_status = status;
3454 
3455 	if (status == 0) {
3456 		spdk_histogram_data_iterate(histogram, cb_fn, NULL);
3457 	}
3458 }
3459 
3460 static void
3461 bdev_histograms(void)
3462 {
3463 	struct spdk_bdev *bdev;
3464 	struct spdk_bdev_desc *desc = NULL;
3465 	struct spdk_io_channel *ch;
3466 	struct spdk_histogram_data *histogram;
3467 	uint8_t buf[4096];
3468 	int rc;
3469 
3470 	ut_init_bdev(NULL);
3471 
3472 	bdev = allocate_bdev("bdev");
3473 
3474 	rc = spdk_bdev_open_ext("bdev", true, bdev_ut_event_cb, NULL, &desc);
3475 	CU_ASSERT(rc == 0);
3476 	CU_ASSERT(desc != NULL);
3477 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
3478 
3479 	ch = spdk_bdev_get_io_channel(desc);
3480 	CU_ASSERT(ch != NULL);
3481 
3482 	/* Enable histogram */
3483 	g_status = -1;
3484 	spdk_bdev_histogram_enable(bdev, histogram_status_cb, NULL, true);
3485 	poll_threads();
3486 	CU_ASSERT(g_status == 0);
3487 	CU_ASSERT(bdev->internal.histogram_enabled == true);
3488 
3489 	/* Allocate histogram */
3490 	histogram = spdk_histogram_data_alloc();
3491 	SPDK_CU_ASSERT_FATAL(histogram != NULL);
3492 
3493 	/* Check if histogram is zeroed */
3494 	spdk_bdev_histogram_get(bdev, histogram, histogram_data_cb, NULL);
3495 	poll_threads();
3496 	CU_ASSERT(g_status == 0);
3497 	SPDK_CU_ASSERT_FATAL(g_histogram != NULL);
3498 
3499 	g_count = 0;
3500 	spdk_histogram_data_iterate(g_histogram, histogram_io_count, NULL);
3501 
3502 	CU_ASSERT(g_count == 0);
3503 
3504 	rc = spdk_bdev_write_blocks(desc, ch, buf, 0, 1, io_done, NULL);
3505 	CU_ASSERT(rc == 0);
3506 
3507 	spdk_delay_us(10);
3508 	stub_complete_io(1);
3509 	poll_threads();
3510 
3511 	rc = spdk_bdev_read_blocks(desc, ch, buf, 0, 1, io_done, NULL);
3512 	CU_ASSERT(rc == 0);
3513 
3514 	spdk_delay_us(10);
3515 	stub_complete_io(1);
3516 	poll_threads();
3517 
3518 	/* Check if histogram gathered data from all I/O channels */
3519 	g_histogram = NULL;
3520 	spdk_bdev_histogram_get(bdev, histogram, histogram_data_cb, NULL);
3521 	poll_threads();
3522 	CU_ASSERT(g_status == 0);
3523 	CU_ASSERT(bdev->internal.histogram_enabled == true);
3524 	SPDK_CU_ASSERT_FATAL(g_histogram != NULL);
3525 
3526 	g_count = 0;
3527 	spdk_histogram_data_iterate(g_histogram, histogram_io_count, NULL);
3528 	CU_ASSERT(g_count == 2);
3529 
3530 	g_count = 0;
3531 	spdk_bdev_channel_get_histogram(ch, histogram_channel_data_cb, histogram_io_count);
3532 	CU_ASSERT(g_status == 0);
3533 	CU_ASSERT(g_count == 2);
3534 
3535 	/* Disable histogram */
3536 	spdk_bdev_histogram_enable(bdev, histogram_status_cb, NULL, false);
3537 	poll_threads();
3538 	CU_ASSERT(g_status == 0);
3539 	CU_ASSERT(bdev->internal.histogram_enabled == false);
3540 
3541 	/* Try to run histogram commands on disabled bdev */
3542 	spdk_bdev_histogram_get(bdev, histogram, histogram_data_cb, NULL);
3543 	poll_threads();
3544 	CU_ASSERT(g_status == -EFAULT);
3545 
3546 	spdk_bdev_channel_get_histogram(ch, histogram_channel_data_cb, NULL);
3547 	CU_ASSERT(g_status == -EFAULT);
3548 
3549 	spdk_histogram_data_free(histogram);
3550 	spdk_put_io_channel(ch);
3551 	spdk_bdev_close(desc);
3552 	free_bdev(bdev);
3553 	ut_fini_bdev();
3554 }
3555 
3556 static void
3557 _bdev_compare(bool emulated)
3558 {
3559 	struct spdk_bdev *bdev;
3560 	struct spdk_bdev_desc *desc = NULL;
3561 	struct spdk_io_channel *ioch;
3562 	struct ut_expected_io *expected_io;
3563 	uint64_t offset, num_blocks;
3564 	uint32_t num_completed;
3565 	char aa_buf[512];
3566 	char bb_buf[512];
3567 	struct iovec compare_iov;
3568 	uint8_t expected_io_type;
3569 	int rc;
3570 
3571 	if (emulated) {
3572 		expected_io_type = SPDK_BDEV_IO_TYPE_READ;
3573 	} else {
3574 		expected_io_type = SPDK_BDEV_IO_TYPE_COMPARE;
3575 	}
3576 
3577 	memset(aa_buf, 0xaa, sizeof(aa_buf));
3578 	memset(bb_buf, 0xbb, sizeof(bb_buf));
3579 
3580 	g_io_types_supported[SPDK_BDEV_IO_TYPE_COMPARE] = !emulated;
3581 
3582 	ut_init_bdev(NULL);
3583 	fn_table.submit_request = stub_submit_request_get_buf;
3584 	bdev = allocate_bdev("bdev");
3585 
3586 	rc = spdk_bdev_open_ext("bdev", true, bdev_ut_event_cb, NULL, &desc);
3587 	CU_ASSERT_EQUAL(rc, 0);
3588 	SPDK_CU_ASSERT_FATAL(desc != NULL);
3589 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
3590 	ioch = spdk_bdev_get_io_channel(desc);
3591 	SPDK_CU_ASSERT_FATAL(ioch != NULL);
3592 
3593 	fn_table.submit_request = stub_submit_request_get_buf;
3594 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
3595 
3596 	offset = 50;
3597 	num_blocks = 1;
3598 	compare_iov.iov_base = aa_buf;
3599 	compare_iov.iov_len = sizeof(aa_buf);
3600 
3601 	/* 1. successful compare */
3602 	expected_io = ut_alloc_expected_io(expected_io_type, offset, num_blocks, 0);
3603 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
3604 
3605 	g_io_done = false;
3606 	g_compare_read_buf = aa_buf;
3607 	g_compare_read_buf_len = sizeof(aa_buf);
3608 	rc = spdk_bdev_comparev_blocks(desc, ioch, &compare_iov, 1, offset, num_blocks, io_done, NULL);
3609 	CU_ASSERT_EQUAL(rc, 0);
3610 	num_completed = stub_complete_io(1);
3611 	CU_ASSERT_EQUAL(num_completed, 1);
3612 	CU_ASSERT(g_io_done == true);
3613 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_SUCCESS);
3614 
3615 	/* 2. miscompare */
3616 	expected_io = ut_alloc_expected_io(expected_io_type, offset, num_blocks, 0);
3617 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
3618 
3619 	g_io_done = false;
3620 	g_compare_read_buf = bb_buf;
3621 	g_compare_read_buf_len = sizeof(bb_buf);
3622 	rc = spdk_bdev_comparev_blocks(desc, ioch, &compare_iov, 1, offset, num_blocks, io_done, NULL);
3623 	CU_ASSERT_EQUAL(rc, 0);
3624 	num_completed = stub_complete_io(1);
3625 	CU_ASSERT_EQUAL(num_completed, 1);
3626 	CU_ASSERT(g_io_done == true);
3627 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_MISCOMPARE);
3628 
3629 	spdk_put_io_channel(ioch);
3630 	spdk_bdev_close(desc);
3631 	free_bdev(bdev);
3632 	fn_table.submit_request = stub_submit_request;
3633 	ut_fini_bdev();
3634 
3635 	g_io_types_supported[SPDK_BDEV_IO_TYPE_COMPARE] = true;
3636 
3637 	g_compare_read_buf = NULL;
3638 }
3639 
3640 static void
3641 _bdev_compare_with_md(bool emulated)
3642 {
3643 	struct spdk_bdev *bdev;
3644 	struct spdk_bdev_desc *desc = NULL;
3645 	struct spdk_io_channel *ioch;
3646 	struct ut_expected_io *expected_io;
3647 	uint64_t offset, num_blocks;
3648 	uint32_t num_completed;
3649 	char buf[1024 + 16 /* 2 * blocklen + 2 * mdlen */];
3650 	char buf_interleaved_miscompare[1024 + 16 /* 2 * blocklen + 2 * mdlen */];
3651 	char buf_miscompare[1024 /* 2 * blocklen */];
3652 	char md_buf[16];
3653 	char md_buf_miscompare[16];
3654 	struct iovec compare_iov;
3655 	uint8_t expected_io_type;
3656 	int rc;
3657 
3658 	if (emulated) {
3659 		expected_io_type = SPDK_BDEV_IO_TYPE_READ;
3660 	} else {
3661 		expected_io_type = SPDK_BDEV_IO_TYPE_COMPARE;
3662 	}
3663 
3664 	memset(buf, 0xaa, sizeof(buf));
3665 	memset(buf_interleaved_miscompare, 0xaa, sizeof(buf_interleaved_miscompare));
3666 	/* make last md different */
3667 	memset(buf_interleaved_miscompare + 1024 + 8, 0xbb, 8);
3668 	memset(buf_miscompare, 0xbb, sizeof(buf_miscompare));
3669 	memset(md_buf, 0xaa, 16);
3670 	memset(md_buf_miscompare, 0xbb, 16);
3671 
3672 	g_io_types_supported[SPDK_BDEV_IO_TYPE_COMPARE] = !emulated;
3673 
3674 	ut_init_bdev(NULL);
3675 	fn_table.submit_request = stub_submit_request_get_buf;
3676 	bdev = allocate_bdev("bdev");
3677 
3678 	rc = spdk_bdev_open_ext("bdev", true, bdev_ut_event_cb, NULL, &desc);
3679 	CU_ASSERT_EQUAL(rc, 0);
3680 	SPDK_CU_ASSERT_FATAL(desc != NULL);
3681 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
3682 	ioch = spdk_bdev_get_io_channel(desc);
3683 	SPDK_CU_ASSERT_FATAL(ioch != NULL);
3684 
3685 	fn_table.submit_request = stub_submit_request_get_buf;
3686 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
3687 
3688 	offset = 50;
3689 	num_blocks = 2;
3690 
3691 	/* interleaved md & data */
3692 	bdev->md_interleave = true;
3693 	bdev->md_len = 8;
3694 	bdev->blocklen = 512 + 8;
3695 	compare_iov.iov_base = buf;
3696 	compare_iov.iov_len = sizeof(buf);
3697 
3698 	/* 1. successful compare with md interleaved */
3699 	expected_io = ut_alloc_expected_io(expected_io_type, offset, num_blocks, 0);
3700 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
3701 
3702 	g_io_done = false;
3703 	g_compare_read_buf = buf;
3704 	g_compare_read_buf_len = sizeof(buf);
3705 	rc = spdk_bdev_comparev_blocks(desc, ioch, &compare_iov, 1, offset, num_blocks, io_done, NULL);
3706 	CU_ASSERT_EQUAL(rc, 0);
3707 	num_completed = stub_complete_io(1);
3708 	CU_ASSERT_EQUAL(num_completed, 1);
3709 	CU_ASSERT(g_io_done == true);
3710 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_SUCCESS);
3711 
3712 	/* 2. miscompare with md interleaved */
3713 	expected_io = ut_alloc_expected_io(expected_io_type, offset, num_blocks, 0);
3714 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
3715 
3716 	g_io_done = false;
3717 	g_compare_read_buf = buf_interleaved_miscompare;
3718 	g_compare_read_buf_len = sizeof(buf_interleaved_miscompare);
3719 	rc = spdk_bdev_comparev_blocks(desc, ioch, &compare_iov, 1, offset, num_blocks, io_done, NULL);
3720 	CU_ASSERT_EQUAL(rc, 0);
3721 	num_completed = stub_complete_io(1);
3722 	CU_ASSERT_EQUAL(num_completed, 1);
3723 	CU_ASSERT(g_io_done == true);
3724 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_MISCOMPARE);
3725 
3726 	/* Separate data & md buffers */
3727 	bdev->md_interleave = false;
3728 	bdev->blocklen = 512;
3729 	compare_iov.iov_base = buf;
3730 	compare_iov.iov_len = 1024;
3731 
3732 	/* 3. successful compare with md separated */
3733 	expected_io = ut_alloc_expected_io(expected_io_type, offset, num_blocks, 0);
3734 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
3735 
3736 	g_io_done = false;
3737 	g_compare_read_buf = buf;
3738 	g_compare_read_buf_len = 1024;
3739 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
3740 	g_compare_md_buf = md_buf;
3741 	rc = spdk_bdev_comparev_blocks_with_md(desc, ioch, &compare_iov, 1, md_buf,
3742 					       offset, num_blocks, io_done, NULL);
3743 	CU_ASSERT_EQUAL(rc, 0);
3744 	num_completed = stub_complete_io(1);
3745 	CU_ASSERT_EQUAL(num_completed, 1);
3746 	CU_ASSERT(g_io_done == true);
3747 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_SUCCESS);
3748 
3749 	/* 4. miscompare with md separated where md buf is different */
3750 	expected_io = ut_alloc_expected_io(expected_io_type, offset, num_blocks, 0);
3751 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
3752 
3753 	g_io_done = false;
3754 	g_compare_read_buf = buf;
3755 	g_compare_read_buf_len = 1024;
3756 	g_compare_md_buf = md_buf_miscompare;
3757 	rc = spdk_bdev_comparev_blocks_with_md(desc, ioch, &compare_iov, 1, md_buf,
3758 					       offset, num_blocks, io_done, NULL);
3759 	CU_ASSERT_EQUAL(rc, 0);
3760 	num_completed = stub_complete_io(1);
3761 	CU_ASSERT_EQUAL(num_completed, 1);
3762 	CU_ASSERT(g_io_done == true);
3763 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_MISCOMPARE);
3764 
3765 	/* 5. miscompare with md separated where buf is different */
3766 	expected_io = ut_alloc_expected_io(expected_io_type, offset, num_blocks, 0);
3767 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
3768 
3769 	g_io_done = false;
3770 	g_compare_read_buf = buf_miscompare;
3771 	g_compare_read_buf_len = sizeof(buf_miscompare);
3772 	g_compare_md_buf = md_buf;
3773 	rc = spdk_bdev_comparev_blocks_with_md(desc, ioch, &compare_iov, 1, md_buf,
3774 					       offset, num_blocks, io_done, NULL);
3775 	CU_ASSERT_EQUAL(rc, 0);
3776 	num_completed = stub_complete_io(1);
3777 	CU_ASSERT_EQUAL(num_completed, 1);
3778 	CU_ASSERT(g_io_done == true);
3779 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_MISCOMPARE);
3780 
3781 	bdev->md_len = 0;
3782 	g_compare_md_buf = NULL;
3783 
3784 	spdk_put_io_channel(ioch);
3785 	spdk_bdev_close(desc);
3786 	free_bdev(bdev);
3787 	fn_table.submit_request = stub_submit_request;
3788 	ut_fini_bdev();
3789 
3790 	g_io_types_supported[SPDK_BDEV_IO_TYPE_COMPARE] = true;
3791 
3792 	g_compare_read_buf = NULL;
3793 }
3794 
3795 static void
3796 bdev_compare(void)
3797 {
3798 	_bdev_compare(false);
3799 	_bdev_compare_with_md(false);
3800 }
3801 
3802 static void
3803 bdev_compare_emulated(void)
3804 {
3805 	_bdev_compare(true);
3806 	_bdev_compare_with_md(true);
3807 }
3808 
3809 static void
3810 bdev_compare_and_write(void)
3811 {
3812 	struct spdk_bdev *bdev;
3813 	struct spdk_bdev_desc *desc = NULL;
3814 	struct spdk_io_channel *ioch;
3815 	struct ut_expected_io *expected_io;
3816 	uint64_t offset, num_blocks;
3817 	uint32_t num_completed;
3818 	char aa_buf[512];
3819 	char bb_buf[512];
3820 	char cc_buf[512];
3821 	char write_buf[512];
3822 	struct iovec compare_iov;
3823 	struct iovec write_iov;
3824 	int rc;
3825 
3826 	memset(aa_buf, 0xaa, sizeof(aa_buf));
3827 	memset(bb_buf, 0xbb, sizeof(bb_buf));
3828 	memset(cc_buf, 0xcc, sizeof(cc_buf));
3829 
3830 	g_io_types_supported[SPDK_BDEV_IO_TYPE_COMPARE] = false;
3831 
3832 	ut_init_bdev(NULL);
3833 	fn_table.submit_request = stub_submit_request_get_buf;
3834 	bdev = allocate_bdev("bdev");
3835 
3836 	rc = spdk_bdev_open_ext("bdev", true, bdev_ut_event_cb, NULL, &desc);
3837 	CU_ASSERT_EQUAL(rc, 0);
3838 	SPDK_CU_ASSERT_FATAL(desc != NULL);
3839 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
3840 	ioch = spdk_bdev_get_io_channel(desc);
3841 	SPDK_CU_ASSERT_FATAL(ioch != NULL);
3842 
3843 	fn_table.submit_request = stub_submit_request_get_buf;
3844 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
3845 
3846 	offset = 50;
3847 	num_blocks = 1;
3848 	compare_iov.iov_base = aa_buf;
3849 	compare_iov.iov_len = sizeof(aa_buf);
3850 	write_iov.iov_base = bb_buf;
3851 	write_iov.iov_len = sizeof(bb_buf);
3852 
3853 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, offset, num_blocks, 0);
3854 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
3855 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, offset, num_blocks, 0);
3856 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
3857 
3858 	g_io_done = false;
3859 	g_compare_read_buf = aa_buf;
3860 	g_compare_read_buf_len = sizeof(aa_buf);
3861 	memset(write_buf, 0, sizeof(write_buf));
3862 	g_compare_write_buf = write_buf;
3863 	g_compare_write_buf_len = sizeof(write_buf);
3864 	rc = spdk_bdev_comparev_and_writev_blocks(desc, ioch, &compare_iov, 1, &write_iov, 1,
3865 			offset, num_blocks, io_done, NULL);
3866 	/* Trigger range locking */
3867 	poll_threads();
3868 	CU_ASSERT_EQUAL(rc, 0);
3869 	num_completed = stub_complete_io(1);
3870 	CU_ASSERT_EQUAL(num_completed, 1);
3871 	CU_ASSERT(g_io_done == false);
3872 	num_completed = stub_complete_io(1);
3873 	/* Trigger range unlocking */
3874 	poll_threads();
3875 	CU_ASSERT_EQUAL(num_completed, 1);
3876 	CU_ASSERT(g_io_done == true);
3877 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_SUCCESS);
3878 	CU_ASSERT(memcmp(write_buf, bb_buf, sizeof(write_buf)) == 0);
3879 
3880 	/* Test miscompare */
3881 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, offset, num_blocks, 0);
3882 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
3883 
3884 	g_io_done = false;
3885 	g_compare_read_buf = cc_buf;
3886 	g_compare_read_buf_len = sizeof(cc_buf);
3887 	memset(write_buf, 0, sizeof(write_buf));
3888 	g_compare_write_buf = write_buf;
3889 	g_compare_write_buf_len = sizeof(write_buf);
3890 	rc = spdk_bdev_comparev_and_writev_blocks(desc, ioch, &compare_iov, 1, &write_iov, 1,
3891 			offset, num_blocks, io_done, NULL);
3892 	/* Trigger range locking */
3893 	poll_threads();
3894 	CU_ASSERT_EQUAL(rc, 0);
3895 	num_completed = stub_complete_io(1);
3896 	/* Trigger range unlocking earlier because we expect error here */
3897 	poll_threads();
3898 	CU_ASSERT_EQUAL(num_completed, 1);
3899 	CU_ASSERT(g_io_done == true);
3900 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_MISCOMPARE);
3901 	num_completed = stub_complete_io(1);
3902 	CU_ASSERT_EQUAL(num_completed, 0);
3903 
3904 	spdk_put_io_channel(ioch);
3905 	spdk_bdev_close(desc);
3906 	free_bdev(bdev);
3907 	fn_table.submit_request = stub_submit_request;
3908 	ut_fini_bdev();
3909 
3910 	g_io_types_supported[SPDK_BDEV_IO_TYPE_COMPARE] = true;
3911 
3912 	g_compare_read_buf = NULL;
3913 	g_compare_write_buf = NULL;
3914 }
3915 
3916 static void
3917 bdev_write_zeroes(void)
3918 {
3919 	struct spdk_bdev *bdev;
3920 	struct spdk_bdev_desc *desc = NULL;
3921 	struct spdk_io_channel *ioch;
3922 	struct ut_expected_io *expected_io;
3923 	uint64_t offset, num_io_blocks, num_blocks;
3924 	uint32_t num_completed, num_requests;
3925 	int rc;
3926 
3927 	ut_init_bdev(NULL);
3928 	bdev = allocate_bdev("bdev");
3929 
3930 	rc = spdk_bdev_open_ext("bdev", true, bdev_ut_event_cb, NULL, &desc);
3931 	CU_ASSERT_EQUAL(rc, 0);
3932 	SPDK_CU_ASSERT_FATAL(desc != NULL);
3933 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
3934 	ioch = spdk_bdev_get_io_channel(desc);
3935 	SPDK_CU_ASSERT_FATAL(ioch != NULL);
3936 
3937 	fn_table.submit_request = stub_submit_request;
3938 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
3939 
3940 	/* First test that if the bdev supports write_zeroes, the request won't be split */
3941 	bdev->md_len = 0;
3942 	bdev->blocklen = 4096;
3943 	num_blocks = (ZERO_BUFFER_SIZE / bdev->blocklen) * 2;
3944 
3945 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE_ZEROES, 0, num_blocks, 0);
3946 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
3947 	rc = spdk_bdev_write_zeroes_blocks(desc, ioch, 0, num_blocks, io_done, NULL);
3948 	CU_ASSERT_EQUAL(rc, 0);
3949 	num_completed = stub_complete_io(1);
3950 	CU_ASSERT_EQUAL(num_completed, 1);
3951 
3952 	/* Check that if write zeroes is not supported it'll be replaced by regular writes */
3953 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_WRITE_ZEROES, false);
3954 	num_io_blocks = ZERO_BUFFER_SIZE / bdev->blocklen;
3955 	num_requests = 2;
3956 	num_blocks = (ZERO_BUFFER_SIZE / bdev->blocklen) * num_requests;
3957 
3958 	for (offset = 0; offset < num_requests; ++offset) {
3959 		expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE,
3960 						   offset * num_io_blocks, num_io_blocks, 0);
3961 		TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
3962 	}
3963 
3964 	rc = spdk_bdev_write_zeroes_blocks(desc, ioch, 0, num_blocks, io_done, NULL);
3965 	CU_ASSERT_EQUAL(rc, 0);
3966 	num_completed = stub_complete_io(num_requests);
3967 	CU_ASSERT_EQUAL(num_completed, num_requests);
3968 
3969 	/* Check that the splitting is correct if bdev has interleaved metadata */
3970 	bdev->md_interleave = true;
3971 	bdev->md_len = 64;
3972 	bdev->blocklen = 4096 + 64;
3973 	num_blocks = (ZERO_BUFFER_SIZE / bdev->blocklen) * 2;
3974 
3975 	num_requests = offset = 0;
3976 	while (offset < num_blocks) {
3977 		num_io_blocks = spdk_min(ZERO_BUFFER_SIZE / bdev->blocklen, num_blocks - offset);
3978 		expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE,
3979 						   offset, num_io_blocks, 0);
3980 		TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
3981 		offset += num_io_blocks;
3982 		num_requests++;
3983 	}
3984 
3985 	rc = spdk_bdev_write_zeroes_blocks(desc, ioch, 0, num_blocks, io_done, NULL);
3986 	CU_ASSERT_EQUAL(rc, 0);
3987 	num_completed = stub_complete_io(num_requests);
3988 	CU_ASSERT_EQUAL(num_completed, num_requests);
3989 	num_completed = stub_complete_io(num_requests);
3990 	assert(num_completed == 0);
3991 
3992 	/* Check the the same for separate metadata buffer */
3993 	bdev->md_interleave = false;
3994 	bdev->md_len = 64;
3995 	bdev->blocklen = 4096;
3996 
3997 	num_requests = offset = 0;
3998 	while (offset < num_blocks) {
3999 		num_io_blocks = spdk_min(ZERO_BUFFER_SIZE / (bdev->blocklen + bdev->md_len), num_blocks);
4000 		expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE,
4001 						   offset, num_io_blocks, 0);
4002 		expected_io->md_buf = (char *)g_bdev_mgr.zero_buffer + num_io_blocks * bdev->blocklen;
4003 		TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
4004 		offset += num_io_blocks;
4005 		num_requests++;
4006 	}
4007 
4008 	rc = spdk_bdev_write_zeroes_blocks(desc, ioch, 0, num_blocks, io_done, NULL);
4009 	CU_ASSERT_EQUAL(rc, 0);
4010 	num_completed = stub_complete_io(num_requests);
4011 	CU_ASSERT_EQUAL(num_completed, num_requests);
4012 
4013 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_WRITE_ZEROES, true);
4014 	spdk_put_io_channel(ioch);
4015 	spdk_bdev_close(desc);
4016 	free_bdev(bdev);
4017 	ut_fini_bdev();
4018 }
4019 
4020 static void
4021 bdev_zcopy_write(void)
4022 {
4023 	struct spdk_bdev *bdev;
4024 	struct spdk_bdev_desc *desc = NULL;
4025 	struct spdk_io_channel *ioch;
4026 	struct ut_expected_io *expected_io;
4027 	uint64_t offset, num_blocks;
4028 	uint32_t num_completed;
4029 	char aa_buf[512];
4030 	struct iovec iov;
4031 	int rc;
4032 	const bool populate = false;
4033 	const bool commit = true;
4034 
4035 	memset(aa_buf, 0xaa, sizeof(aa_buf));
4036 
4037 	ut_init_bdev(NULL);
4038 	bdev = allocate_bdev("bdev");
4039 
4040 	rc = spdk_bdev_open_ext("bdev", true, bdev_ut_event_cb, NULL, &desc);
4041 	CU_ASSERT_EQUAL(rc, 0);
4042 	SPDK_CU_ASSERT_FATAL(desc != NULL);
4043 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
4044 	ioch = spdk_bdev_get_io_channel(desc);
4045 	SPDK_CU_ASSERT_FATAL(ioch != NULL);
4046 
4047 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
4048 
4049 	offset = 50;
4050 	num_blocks = 1;
4051 	iov.iov_base = NULL;
4052 	iov.iov_len = 0;
4053 
4054 	g_zcopy_read_buf = (void *) 0x1122334455667788UL;
4055 	g_zcopy_read_buf_len = (uint32_t) -1;
4056 	/* Do a zcopy start for a write (populate=false) */
4057 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_ZCOPY, offset, num_blocks, 0);
4058 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
4059 	g_io_done = false;
4060 	g_zcopy_write_buf = aa_buf;
4061 	g_zcopy_write_buf_len = sizeof(aa_buf);
4062 	g_zcopy_bdev_io = NULL;
4063 	rc = spdk_bdev_zcopy_start(desc, ioch, &iov, 1, offset, num_blocks, populate, io_done, NULL);
4064 	CU_ASSERT_EQUAL(rc, 0);
4065 	num_completed = stub_complete_io(1);
4066 	CU_ASSERT_EQUAL(num_completed, 1);
4067 	CU_ASSERT(g_io_done == true);
4068 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_SUCCESS);
4069 	/* Check that the iov has been set up */
4070 	CU_ASSERT(iov.iov_base == g_zcopy_write_buf);
4071 	CU_ASSERT(iov.iov_len == g_zcopy_write_buf_len);
4072 	/* Check that the bdev_io has been saved */
4073 	CU_ASSERT(g_zcopy_bdev_io != NULL);
4074 	/* Now do the zcopy end for a write (commit=true) */
4075 	g_io_done = false;
4076 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_ZCOPY, offset, num_blocks, 0);
4077 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
4078 	rc = spdk_bdev_zcopy_end(g_zcopy_bdev_io, commit, io_done, NULL);
4079 	CU_ASSERT_EQUAL(rc, 0);
4080 	num_completed = stub_complete_io(1);
4081 	CU_ASSERT_EQUAL(num_completed, 1);
4082 	CU_ASSERT(g_io_done == true);
4083 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_SUCCESS);
4084 	/* Check the g_zcopy are reset by io_done */
4085 	CU_ASSERT(g_zcopy_write_buf == NULL);
4086 	CU_ASSERT(g_zcopy_write_buf_len == 0);
4087 	/* Check that io_done has freed the g_zcopy_bdev_io */
4088 	CU_ASSERT(g_zcopy_bdev_io == NULL);
4089 
4090 	/* Check the zcopy read buffer has not been touched which
4091 	 * ensures that the correct buffers were used.
4092 	 */
4093 	CU_ASSERT(g_zcopy_read_buf == (void *) 0x1122334455667788UL);
4094 	CU_ASSERT(g_zcopy_read_buf_len == (uint32_t) -1);
4095 
4096 	spdk_put_io_channel(ioch);
4097 	spdk_bdev_close(desc);
4098 	free_bdev(bdev);
4099 	ut_fini_bdev();
4100 }
4101 
4102 static void
4103 bdev_zcopy_read(void)
4104 {
4105 	struct spdk_bdev *bdev;
4106 	struct spdk_bdev_desc *desc = NULL;
4107 	struct spdk_io_channel *ioch;
4108 	struct ut_expected_io *expected_io;
4109 	uint64_t offset, num_blocks;
4110 	uint32_t num_completed;
4111 	char aa_buf[512];
4112 	struct iovec iov;
4113 	int rc;
4114 	const bool populate = true;
4115 	const bool commit = false;
4116 
4117 	memset(aa_buf, 0xaa, sizeof(aa_buf));
4118 
4119 	ut_init_bdev(NULL);
4120 	bdev = allocate_bdev("bdev");
4121 
4122 	rc = spdk_bdev_open_ext("bdev", true, bdev_ut_event_cb, NULL, &desc);
4123 	CU_ASSERT_EQUAL(rc, 0);
4124 	SPDK_CU_ASSERT_FATAL(desc != NULL);
4125 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
4126 	ioch = spdk_bdev_get_io_channel(desc);
4127 	SPDK_CU_ASSERT_FATAL(ioch != NULL);
4128 
4129 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
4130 
4131 	offset = 50;
4132 	num_blocks = 1;
4133 	iov.iov_base = NULL;
4134 	iov.iov_len = 0;
4135 
4136 	g_zcopy_write_buf = (void *) 0x1122334455667788UL;
4137 	g_zcopy_write_buf_len = (uint32_t) -1;
4138 
4139 	/* Do a zcopy start for a read (populate=true) */
4140 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_ZCOPY, offset, num_blocks, 0);
4141 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
4142 	g_io_done = false;
4143 	g_zcopy_read_buf = aa_buf;
4144 	g_zcopy_read_buf_len = sizeof(aa_buf);
4145 	g_zcopy_bdev_io = NULL;
4146 	rc = spdk_bdev_zcopy_start(desc, ioch, &iov, 1, offset, num_blocks, populate, io_done, NULL);
4147 	CU_ASSERT_EQUAL(rc, 0);
4148 	num_completed = stub_complete_io(1);
4149 	CU_ASSERT_EQUAL(num_completed, 1);
4150 	CU_ASSERT(g_io_done == true);
4151 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_SUCCESS);
4152 	/* Check that the iov has been set up */
4153 	CU_ASSERT(iov.iov_base == g_zcopy_read_buf);
4154 	CU_ASSERT(iov.iov_len == g_zcopy_read_buf_len);
4155 	/* Check that the bdev_io has been saved */
4156 	CU_ASSERT(g_zcopy_bdev_io != NULL);
4157 
4158 	/* Now do the zcopy end for a read (commit=false) */
4159 	g_io_done = false;
4160 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_ZCOPY, offset, num_blocks, 0);
4161 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
4162 	rc = spdk_bdev_zcopy_end(g_zcopy_bdev_io, commit, io_done, NULL);
4163 	CU_ASSERT_EQUAL(rc, 0);
4164 	num_completed = stub_complete_io(1);
4165 	CU_ASSERT_EQUAL(num_completed, 1);
4166 	CU_ASSERT(g_io_done == true);
4167 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_SUCCESS);
4168 	/* Check the g_zcopy are reset by io_done */
4169 	CU_ASSERT(g_zcopy_read_buf == NULL);
4170 	CU_ASSERT(g_zcopy_read_buf_len == 0);
4171 	/* Check that io_done has freed the g_zcopy_bdev_io */
4172 	CU_ASSERT(g_zcopy_bdev_io == NULL);
4173 
4174 	/* Check the zcopy write buffer has not been touched which
4175 	 * ensures that the correct buffers were used.
4176 	 */
4177 	CU_ASSERT(g_zcopy_write_buf == (void *) 0x1122334455667788UL);
4178 	CU_ASSERT(g_zcopy_write_buf_len == (uint32_t) -1);
4179 
4180 	spdk_put_io_channel(ioch);
4181 	spdk_bdev_close(desc);
4182 	free_bdev(bdev);
4183 	ut_fini_bdev();
4184 }
4185 
4186 static void
4187 bdev_open_while_hotremove(void)
4188 {
4189 	struct spdk_bdev *bdev;
4190 	struct spdk_bdev_desc *desc[2] = {};
4191 	int rc;
4192 
4193 	bdev = allocate_bdev("bdev");
4194 
4195 	rc = spdk_bdev_open_ext("bdev", false, bdev_ut_event_cb, NULL, &desc[0]);
4196 	CU_ASSERT(rc == 0);
4197 	SPDK_CU_ASSERT_FATAL(desc[0] != NULL);
4198 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc[0]));
4199 
4200 	spdk_bdev_unregister(bdev, NULL, NULL);
4201 	/* Bdev unregister is handled asynchronously. Poll thread to complete. */
4202 	poll_threads();
4203 
4204 	rc = spdk_bdev_open_ext("bdev", false, bdev_ut_event_cb, NULL, &desc[1]);
4205 	CU_ASSERT(rc == -ENODEV);
4206 	SPDK_CU_ASSERT_FATAL(desc[1] == NULL);
4207 
4208 	spdk_bdev_close(desc[0]);
4209 	free_bdev(bdev);
4210 }
4211 
4212 static void
4213 bdev_close_while_hotremove(void)
4214 {
4215 	struct spdk_bdev *bdev;
4216 	struct spdk_bdev_desc *desc = NULL;
4217 	int rc = 0;
4218 
4219 	bdev = allocate_bdev("bdev");
4220 
4221 	rc = spdk_bdev_open_ext("bdev", true, bdev_open_cb1, &desc, &desc);
4222 	CU_ASSERT_EQUAL(rc, 0);
4223 	SPDK_CU_ASSERT_FATAL(desc != NULL);
4224 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
4225 
4226 	/* Simulate hot-unplug by unregistering bdev */
4227 	g_event_type1 = 0xFF;
4228 	g_unregister_arg = NULL;
4229 	g_unregister_rc = -1;
4230 	spdk_bdev_unregister(bdev, bdev_unregister_cb, (void *)0x12345678);
4231 	/* Close device while remove event is in flight */
4232 	spdk_bdev_close(desc);
4233 
4234 	/* Ensure that unregister callback is delayed */
4235 	CU_ASSERT_EQUAL(g_unregister_arg, NULL);
4236 	CU_ASSERT_EQUAL(g_unregister_rc, -1);
4237 
4238 	poll_threads();
4239 
4240 	/* Event callback shall not be issued because device was closed */
4241 	CU_ASSERT_EQUAL(g_event_type1, 0xFF);
4242 	/* Unregister callback is issued */
4243 	CU_ASSERT_EQUAL(g_unregister_arg, (void *)0x12345678);
4244 	CU_ASSERT_EQUAL(g_unregister_rc, 0);
4245 
4246 	free_bdev(bdev);
4247 }
4248 
4249 static void
4250 bdev_open_ext(void)
4251 {
4252 	struct spdk_bdev *bdev;
4253 	struct spdk_bdev_desc *desc1 = NULL;
4254 	struct spdk_bdev_desc *desc2 = NULL;
4255 	int rc = 0;
4256 
4257 	bdev = allocate_bdev("bdev");
4258 
4259 	rc = spdk_bdev_open_ext("bdev", true, NULL, NULL, &desc1);
4260 	CU_ASSERT_EQUAL(rc, -EINVAL);
4261 
4262 	rc = spdk_bdev_open_ext("bdev", true, bdev_open_cb1, &desc1, &desc1);
4263 	CU_ASSERT_EQUAL(rc, 0);
4264 
4265 	rc = spdk_bdev_open_ext("bdev", true, bdev_open_cb2, &desc2, &desc2);
4266 	CU_ASSERT_EQUAL(rc, 0);
4267 
4268 	g_event_type1 = 0xFF;
4269 	g_event_type2 = 0xFF;
4270 
4271 	/* Simulate hot-unplug by unregistering bdev */
4272 	spdk_bdev_unregister(bdev, NULL, NULL);
4273 	poll_threads();
4274 
4275 	/* Check if correct events have been triggered in event callback fn */
4276 	CU_ASSERT_EQUAL(g_event_type1, SPDK_BDEV_EVENT_REMOVE);
4277 	CU_ASSERT_EQUAL(g_event_type2, SPDK_BDEV_EVENT_REMOVE);
4278 
4279 	free_bdev(bdev);
4280 	poll_threads();
4281 }
4282 
4283 static void
4284 bdev_open_ext_unregister(void)
4285 {
4286 	struct spdk_bdev *bdev;
4287 	struct spdk_bdev_desc *desc1 = NULL;
4288 	struct spdk_bdev_desc *desc2 = NULL;
4289 	struct spdk_bdev_desc *desc3 = NULL;
4290 	struct spdk_bdev_desc *desc4 = NULL;
4291 	int rc = 0;
4292 
4293 	bdev = allocate_bdev("bdev");
4294 
4295 	rc = spdk_bdev_open_ext("bdev", true, NULL, NULL, &desc1);
4296 	CU_ASSERT_EQUAL(rc, -EINVAL);
4297 
4298 	rc = spdk_bdev_open_ext("bdev", true, bdev_open_cb1, &desc1, &desc1);
4299 	CU_ASSERT_EQUAL(rc, 0);
4300 
4301 	rc = spdk_bdev_open_ext("bdev", true, bdev_open_cb2, &desc2, &desc2);
4302 	CU_ASSERT_EQUAL(rc, 0);
4303 
4304 	rc = spdk_bdev_open_ext("bdev", true, bdev_open_cb3, &desc3, &desc3);
4305 	CU_ASSERT_EQUAL(rc, 0);
4306 
4307 	rc = spdk_bdev_open_ext("bdev", true, bdev_open_cb4, &desc4, &desc4);
4308 	CU_ASSERT_EQUAL(rc, 0);
4309 
4310 	g_event_type1 = 0xFF;
4311 	g_event_type2 = 0xFF;
4312 	g_event_type3 = 0xFF;
4313 	g_event_type4 = 0xFF;
4314 
4315 	g_unregister_arg = NULL;
4316 	g_unregister_rc = -1;
4317 
4318 	/* Simulate hot-unplug by unregistering bdev */
4319 	spdk_bdev_unregister(bdev, bdev_unregister_cb, (void *)0x12345678);
4320 
4321 	/*
4322 	 * Unregister is handled asynchronously and event callback
4323 	 * (i.e., above bdev_open_cbN) will be called.
4324 	 * For bdev_open_cb3 and bdev_open_cb4, it is intended to not
4325 	 * close the desc3 and desc4 so that the bdev is not closed.
4326 	 */
4327 	poll_threads();
4328 
4329 	/* Check if correct events have been triggered in event callback fn */
4330 	CU_ASSERT_EQUAL(g_event_type1, SPDK_BDEV_EVENT_REMOVE);
4331 	CU_ASSERT_EQUAL(g_event_type2, SPDK_BDEV_EVENT_REMOVE);
4332 	CU_ASSERT_EQUAL(g_event_type3, SPDK_BDEV_EVENT_REMOVE);
4333 	CU_ASSERT_EQUAL(g_event_type4, SPDK_BDEV_EVENT_REMOVE);
4334 
4335 	/* Check that unregister callback is delayed */
4336 	CU_ASSERT(g_unregister_arg == NULL);
4337 	CU_ASSERT(g_unregister_rc == -1);
4338 
4339 	/*
4340 	 * Explicitly close desc3. As desc4 is still opened there, the
4341 	 * unergister callback is still delayed to execute.
4342 	 */
4343 	spdk_bdev_close(desc3);
4344 	CU_ASSERT(g_unregister_arg == NULL);
4345 	CU_ASSERT(g_unregister_rc == -1);
4346 
4347 	/*
4348 	 * Explicitly close desc4 to trigger the ongoing bdev unregister
4349 	 * operation after last desc is closed.
4350 	 */
4351 	spdk_bdev_close(desc4);
4352 
4353 	/* Poll the thread for the async unregister operation */
4354 	poll_threads();
4355 
4356 	/* Check that unregister callback is executed */
4357 	CU_ASSERT(g_unregister_arg == (void *)0x12345678);
4358 	CU_ASSERT(g_unregister_rc == 0);
4359 
4360 	free_bdev(bdev);
4361 	poll_threads();
4362 }
4363 
4364 struct timeout_io_cb_arg {
4365 	struct iovec iov;
4366 	uint8_t type;
4367 };
4368 
4369 static int
4370 bdev_channel_count_submitted_io(struct spdk_bdev_channel *ch)
4371 {
4372 	struct spdk_bdev_io *bdev_io;
4373 	int n = 0;
4374 
4375 	if (!ch) {
4376 		return -1;
4377 	}
4378 
4379 	TAILQ_FOREACH(bdev_io, &ch->io_submitted, internal.ch_link) {
4380 		n++;
4381 	}
4382 
4383 	return n;
4384 }
4385 
4386 static void
4387 bdev_channel_io_timeout_cb(void *cb_arg, struct spdk_bdev_io *bdev_io)
4388 {
4389 	struct timeout_io_cb_arg *ctx = cb_arg;
4390 
4391 	ctx->type = bdev_io->type;
4392 	ctx->iov.iov_base = bdev_io->iov.iov_base;
4393 	ctx->iov.iov_len = bdev_io->iov.iov_len;
4394 }
4395 
4396 static void
4397 bdev_set_io_timeout(void)
4398 {
4399 	struct spdk_bdev *bdev;
4400 	struct spdk_bdev_desc *desc = NULL;
4401 	struct spdk_io_channel *io_ch = NULL;
4402 	struct spdk_bdev_channel *bdev_ch = NULL;
4403 	struct timeout_io_cb_arg cb_arg;
4404 
4405 	ut_init_bdev(NULL);
4406 	bdev = allocate_bdev("bdev");
4407 
4408 	CU_ASSERT(spdk_bdev_open_ext("bdev", true, bdev_ut_event_cb, NULL, &desc) == 0);
4409 	SPDK_CU_ASSERT_FATAL(desc != NULL);
4410 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
4411 
4412 	io_ch = spdk_bdev_get_io_channel(desc);
4413 	CU_ASSERT(io_ch != NULL);
4414 
4415 	bdev_ch = spdk_io_channel_get_ctx(io_ch);
4416 	CU_ASSERT(TAILQ_EMPTY(&bdev_ch->io_submitted));
4417 
4418 	/* This is the part1.
4419 	 * We will check the bdev_ch->io_submitted list
4420 	 * TO make sure that it can link IOs and only the user submitted IOs
4421 	 */
4422 	CU_ASSERT(spdk_bdev_read(desc, io_ch, (void *)0x1000, 0, 4096, io_done, NULL) == 0);
4423 	CU_ASSERT(bdev_channel_count_submitted_io(bdev_ch) == 1);
4424 	CU_ASSERT(spdk_bdev_write(desc, io_ch, (void *)0x2000, 0, 4096, io_done, NULL) == 0);
4425 	CU_ASSERT(bdev_channel_count_submitted_io(bdev_ch) == 2);
4426 	stub_complete_io(1);
4427 	CU_ASSERT(bdev_channel_count_submitted_io(bdev_ch) == 1);
4428 	stub_complete_io(1);
4429 	CU_ASSERT(bdev_channel_count_submitted_io(bdev_ch) == 0);
4430 
4431 	/* Split IO */
4432 	bdev->optimal_io_boundary = 16;
4433 	bdev->split_on_optimal_io_boundary = true;
4434 
4435 	/* Now test that a single-vector command is split correctly.
4436 	 * Offset 14, length 8, payload 0xF000
4437 	 *  Child - Offset 14, length 2, payload 0xF000
4438 	 *  Child - Offset 16, length 6, payload 0xF000 + 2 * 512
4439 	 *
4440 	 * Set up the expected values before calling spdk_bdev_read_blocks
4441 	 */
4442 	CU_ASSERT(spdk_bdev_read_blocks(desc, io_ch, (void *)0xF000, 14, 8, io_done, NULL) == 0);
4443 	/* We count all submitted IOs including IO that are generated by splitting. */
4444 	CU_ASSERT(bdev_channel_count_submitted_io(bdev_ch) == 3);
4445 	stub_complete_io(1);
4446 	CU_ASSERT(bdev_channel_count_submitted_io(bdev_ch) == 2);
4447 	stub_complete_io(1);
4448 	CU_ASSERT(bdev_channel_count_submitted_io(bdev_ch) == 0);
4449 
4450 	/* Also include the reset IO */
4451 	CU_ASSERT(spdk_bdev_reset(desc, io_ch, io_done, NULL) == 0);
4452 	CU_ASSERT(bdev_channel_count_submitted_io(bdev_ch) == 1);
4453 	poll_threads();
4454 	stub_complete_io(1);
4455 	poll_threads();
4456 	CU_ASSERT(bdev_channel_count_submitted_io(bdev_ch) == 0);
4457 
4458 	/* This is part2
4459 	 * Test the desc timeout poller register
4460 	 */
4461 
4462 	/* Successfully set the timeout */
4463 	CU_ASSERT(spdk_bdev_set_timeout(desc, 30, bdev_channel_io_timeout_cb, &cb_arg) == 0);
4464 	CU_ASSERT(desc->io_timeout_poller != NULL);
4465 	CU_ASSERT(desc->timeout_in_sec == 30);
4466 	CU_ASSERT(desc->cb_fn == bdev_channel_io_timeout_cb);
4467 	CU_ASSERT(desc->cb_arg == &cb_arg);
4468 
4469 	/* Change the timeout limit */
4470 	CU_ASSERT(spdk_bdev_set_timeout(desc, 20, bdev_channel_io_timeout_cb, &cb_arg) == 0);
4471 	CU_ASSERT(desc->io_timeout_poller != NULL);
4472 	CU_ASSERT(desc->timeout_in_sec == 20);
4473 	CU_ASSERT(desc->cb_fn == bdev_channel_io_timeout_cb);
4474 	CU_ASSERT(desc->cb_arg == &cb_arg);
4475 
4476 	/* Disable the timeout */
4477 	CU_ASSERT(spdk_bdev_set_timeout(desc, 0, NULL, NULL) == 0);
4478 	CU_ASSERT(desc->io_timeout_poller == NULL);
4479 
4480 	/* This the part3
4481 	 * We will test to catch timeout IO and check whether the IO is
4482 	 * the submitted one.
4483 	 */
4484 	memset(&cb_arg, 0, sizeof(cb_arg));
4485 	CU_ASSERT(spdk_bdev_set_timeout(desc, 30, bdev_channel_io_timeout_cb, &cb_arg) == 0);
4486 	CU_ASSERT(spdk_bdev_write_blocks(desc, io_ch, (void *)0x1000, 0, 1, io_done, NULL) == 0);
4487 
4488 	/* Don't reach the limit */
4489 	spdk_delay_us(15 * spdk_get_ticks_hz());
4490 	poll_threads();
4491 	CU_ASSERT(cb_arg.type == 0);
4492 	CU_ASSERT(cb_arg.iov.iov_base == (void *)0x0);
4493 	CU_ASSERT(cb_arg.iov.iov_len == 0);
4494 
4495 	/* 15 + 15 = 30 reach the limit */
4496 	spdk_delay_us(15 * spdk_get_ticks_hz());
4497 	poll_threads();
4498 	CU_ASSERT(cb_arg.type == SPDK_BDEV_IO_TYPE_WRITE);
4499 	CU_ASSERT(cb_arg.iov.iov_base == (void *)0x1000);
4500 	CU_ASSERT(cb_arg.iov.iov_len == 1 * bdev->blocklen);
4501 	stub_complete_io(1);
4502 
4503 	/* Use the same split IO above and check the IO */
4504 	memset(&cb_arg, 0, sizeof(cb_arg));
4505 	CU_ASSERT(spdk_bdev_write_blocks(desc, io_ch, (void *)0xF000, 14, 8, io_done, NULL) == 0);
4506 
4507 	/* The first child complete in time */
4508 	spdk_delay_us(15 * spdk_get_ticks_hz());
4509 	poll_threads();
4510 	stub_complete_io(1);
4511 	CU_ASSERT(cb_arg.type == 0);
4512 	CU_ASSERT(cb_arg.iov.iov_base == (void *)0x0);
4513 	CU_ASSERT(cb_arg.iov.iov_len == 0);
4514 
4515 	/* The second child reach the limit */
4516 	spdk_delay_us(15 * spdk_get_ticks_hz());
4517 	poll_threads();
4518 	CU_ASSERT(cb_arg.type == SPDK_BDEV_IO_TYPE_WRITE);
4519 	CU_ASSERT(cb_arg.iov.iov_base == (void *)0xF000);
4520 	CU_ASSERT(cb_arg.iov.iov_len == 8 * bdev->blocklen);
4521 	stub_complete_io(1);
4522 
4523 	/* Also include the reset IO */
4524 	memset(&cb_arg, 0, sizeof(cb_arg));
4525 	CU_ASSERT(spdk_bdev_reset(desc, io_ch, io_done, NULL) == 0);
4526 	spdk_delay_us(30 * spdk_get_ticks_hz());
4527 	poll_threads();
4528 	CU_ASSERT(cb_arg.type == SPDK_BDEV_IO_TYPE_RESET);
4529 	stub_complete_io(1);
4530 	poll_threads();
4531 
4532 	spdk_put_io_channel(io_ch);
4533 	spdk_bdev_close(desc);
4534 	free_bdev(bdev);
4535 	ut_fini_bdev();
4536 }
4537 
4538 static void
4539 bdev_set_qd_sampling(void)
4540 {
4541 	struct spdk_bdev *bdev;
4542 	struct spdk_bdev_desc *desc = NULL;
4543 	struct spdk_io_channel *io_ch = NULL;
4544 	struct spdk_bdev_channel *bdev_ch = NULL;
4545 	struct timeout_io_cb_arg cb_arg;
4546 
4547 	ut_init_bdev(NULL);
4548 	bdev = allocate_bdev("bdev");
4549 
4550 	CU_ASSERT(spdk_bdev_open_ext("bdev", true, bdev_ut_event_cb, NULL, &desc) == 0);
4551 	SPDK_CU_ASSERT_FATAL(desc != NULL);
4552 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
4553 
4554 	io_ch = spdk_bdev_get_io_channel(desc);
4555 	CU_ASSERT(io_ch != NULL);
4556 
4557 	bdev_ch = spdk_io_channel_get_ctx(io_ch);
4558 	CU_ASSERT(TAILQ_EMPTY(&bdev_ch->io_submitted));
4559 
4560 	/* This is the part1.
4561 	 * We will check the bdev_ch->io_submitted list
4562 	 * TO make sure that it can link IOs and only the user submitted IOs
4563 	 */
4564 	CU_ASSERT(spdk_bdev_read(desc, io_ch, (void *)0x1000, 0, 4096, io_done, NULL) == 0);
4565 	CU_ASSERT(bdev_channel_count_submitted_io(bdev_ch) == 1);
4566 	CU_ASSERT(spdk_bdev_write(desc, io_ch, (void *)0x2000, 0, 4096, io_done, NULL) == 0);
4567 	CU_ASSERT(bdev_channel_count_submitted_io(bdev_ch) == 2);
4568 	stub_complete_io(1);
4569 	CU_ASSERT(bdev_channel_count_submitted_io(bdev_ch) == 1);
4570 	stub_complete_io(1);
4571 	CU_ASSERT(bdev_channel_count_submitted_io(bdev_ch) == 0);
4572 
4573 	/* This is the part2.
4574 	 * Test the bdev's qd poller register
4575 	 */
4576 	/* 1st Successfully set the qd sampling period */
4577 	spdk_bdev_set_qd_sampling_period(bdev, 10);
4578 	CU_ASSERT(bdev->internal.new_period == 10);
4579 	CU_ASSERT(bdev->internal.period == 10);
4580 	CU_ASSERT(bdev->internal.qd_desc != NULL);
4581 	poll_threads();
4582 	CU_ASSERT(bdev->internal.qd_poller != NULL);
4583 
4584 	/* 2nd Change the qd sampling period */
4585 	spdk_bdev_set_qd_sampling_period(bdev, 20);
4586 	CU_ASSERT(bdev->internal.new_period == 20);
4587 	CU_ASSERT(bdev->internal.period == 10);
4588 	CU_ASSERT(bdev->internal.qd_desc != NULL);
4589 	poll_threads();
4590 	CU_ASSERT(bdev->internal.qd_poller != NULL);
4591 	CU_ASSERT(bdev->internal.period == bdev->internal.new_period);
4592 
4593 	/* 3rd Change the qd sampling period and verify qd_poll_in_progress */
4594 	spdk_delay_us(20);
4595 	poll_thread_times(0, 1);
4596 	CU_ASSERT(bdev->internal.qd_poll_in_progress == true);
4597 	spdk_bdev_set_qd_sampling_period(bdev, 30);
4598 	CU_ASSERT(bdev->internal.new_period == 30);
4599 	CU_ASSERT(bdev->internal.period == 20);
4600 	poll_threads();
4601 	CU_ASSERT(bdev->internal.qd_poll_in_progress == false);
4602 	CU_ASSERT(bdev->internal.period == bdev->internal.new_period);
4603 
4604 	/* 4th Disable the qd sampling period */
4605 	spdk_bdev_set_qd_sampling_period(bdev, 0);
4606 	CU_ASSERT(bdev->internal.new_period == 0);
4607 	CU_ASSERT(bdev->internal.period == 30);
4608 	poll_threads();
4609 	CU_ASSERT(bdev->internal.qd_poller == NULL);
4610 	CU_ASSERT(bdev->internal.period == bdev->internal.new_period);
4611 	CU_ASSERT(bdev->internal.qd_desc == NULL);
4612 
4613 	/* This is the part3.
4614 	 * We will test the submitted IO and reset works
4615 	 * properly with the qd sampling.
4616 	 */
4617 	memset(&cb_arg, 0, sizeof(cb_arg));
4618 	spdk_bdev_set_qd_sampling_period(bdev, 1);
4619 	poll_threads();
4620 
4621 	CU_ASSERT(spdk_bdev_write(desc, io_ch, (void *)0x2000, 0, 4096, io_done, NULL) == 0);
4622 	CU_ASSERT(bdev_channel_count_submitted_io(bdev_ch) == 1);
4623 
4624 	/* Also include the reset IO */
4625 	memset(&cb_arg, 0, sizeof(cb_arg));
4626 	CU_ASSERT(spdk_bdev_reset(desc, io_ch, io_done, NULL) == 0);
4627 	poll_threads();
4628 
4629 	/* Close the desc */
4630 	spdk_put_io_channel(io_ch);
4631 	spdk_bdev_close(desc);
4632 
4633 	/* Complete the submitted IO and reset */
4634 	stub_complete_io(2);
4635 	poll_threads();
4636 
4637 	free_bdev(bdev);
4638 	ut_fini_bdev();
4639 }
4640 
4641 static void
4642 lba_range_overlap(void)
4643 {
4644 	struct lba_range r1, r2;
4645 
4646 	r1.offset = 100;
4647 	r1.length = 50;
4648 
4649 	r2.offset = 0;
4650 	r2.length = 1;
4651 	CU_ASSERT(!bdev_lba_range_overlapped(&r1, &r2));
4652 
4653 	r2.offset = 0;
4654 	r2.length = 100;
4655 	CU_ASSERT(!bdev_lba_range_overlapped(&r1, &r2));
4656 
4657 	r2.offset = 0;
4658 	r2.length = 110;
4659 	CU_ASSERT(bdev_lba_range_overlapped(&r1, &r2));
4660 
4661 	r2.offset = 100;
4662 	r2.length = 10;
4663 	CU_ASSERT(bdev_lba_range_overlapped(&r1, &r2));
4664 
4665 	r2.offset = 110;
4666 	r2.length = 20;
4667 	CU_ASSERT(bdev_lba_range_overlapped(&r1, &r2));
4668 
4669 	r2.offset = 140;
4670 	r2.length = 150;
4671 	CU_ASSERT(bdev_lba_range_overlapped(&r1, &r2));
4672 
4673 	r2.offset = 130;
4674 	r2.length = 200;
4675 	CU_ASSERT(bdev_lba_range_overlapped(&r1, &r2));
4676 
4677 	r2.offset = 150;
4678 	r2.length = 100;
4679 	CU_ASSERT(!bdev_lba_range_overlapped(&r1, &r2));
4680 
4681 	r2.offset = 110;
4682 	r2.length = 0;
4683 	CU_ASSERT(!bdev_lba_range_overlapped(&r1, &r2));
4684 }
4685 
4686 static bool g_lock_lba_range_done;
4687 static bool g_unlock_lba_range_done;
4688 
4689 static void
4690 lock_lba_range_done(void *ctx, int status)
4691 {
4692 	g_lock_lba_range_done = true;
4693 }
4694 
4695 static void
4696 unlock_lba_range_done(void *ctx, int status)
4697 {
4698 	g_unlock_lba_range_done = true;
4699 }
4700 
4701 static void
4702 lock_lba_range_check_ranges(void)
4703 {
4704 	struct spdk_bdev *bdev;
4705 	struct spdk_bdev_desc *desc = NULL;
4706 	struct spdk_io_channel *io_ch;
4707 	struct spdk_bdev_channel *channel;
4708 	struct lba_range *range;
4709 	int ctx1;
4710 	int rc;
4711 
4712 	ut_init_bdev(NULL);
4713 	bdev = allocate_bdev("bdev0");
4714 
4715 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
4716 	CU_ASSERT(rc == 0);
4717 	CU_ASSERT(desc != NULL);
4718 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
4719 	io_ch = spdk_bdev_get_io_channel(desc);
4720 	CU_ASSERT(io_ch != NULL);
4721 	channel = spdk_io_channel_get_ctx(io_ch);
4722 
4723 	g_lock_lba_range_done = false;
4724 	rc = bdev_lock_lba_range(desc, io_ch, 20, 10, lock_lba_range_done, &ctx1);
4725 	CU_ASSERT(rc == 0);
4726 	poll_threads();
4727 
4728 	CU_ASSERT(g_lock_lba_range_done == true);
4729 	range = TAILQ_FIRST(&channel->locked_ranges);
4730 	SPDK_CU_ASSERT_FATAL(range != NULL);
4731 	CU_ASSERT(range->offset == 20);
4732 	CU_ASSERT(range->length == 10);
4733 	CU_ASSERT(range->owner_ch == channel);
4734 
4735 	/* Unlocks must exactly match a lock. */
4736 	g_unlock_lba_range_done = false;
4737 	rc = bdev_unlock_lba_range(desc, io_ch, 20, 1, unlock_lba_range_done, &ctx1);
4738 	CU_ASSERT(rc == -EINVAL);
4739 	CU_ASSERT(g_unlock_lba_range_done == false);
4740 
4741 	rc = bdev_unlock_lba_range(desc, io_ch, 20, 10, unlock_lba_range_done, &ctx1);
4742 	CU_ASSERT(rc == 0);
4743 	spdk_delay_us(100);
4744 	poll_threads();
4745 
4746 	CU_ASSERT(g_unlock_lba_range_done == true);
4747 	CU_ASSERT(TAILQ_EMPTY(&channel->locked_ranges));
4748 
4749 	spdk_put_io_channel(io_ch);
4750 	spdk_bdev_close(desc);
4751 	free_bdev(bdev);
4752 	ut_fini_bdev();
4753 }
4754 
4755 static void
4756 lock_lba_range_with_io_outstanding(void)
4757 {
4758 	struct spdk_bdev *bdev;
4759 	struct spdk_bdev_desc *desc = NULL;
4760 	struct spdk_io_channel *io_ch;
4761 	struct spdk_bdev_channel *channel;
4762 	struct lba_range *range;
4763 	char buf[4096];
4764 	int ctx1;
4765 	int rc;
4766 
4767 	ut_init_bdev(NULL);
4768 	bdev = allocate_bdev("bdev0");
4769 
4770 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
4771 	CU_ASSERT(rc == 0);
4772 	CU_ASSERT(desc != NULL);
4773 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
4774 	io_ch = spdk_bdev_get_io_channel(desc);
4775 	CU_ASSERT(io_ch != NULL);
4776 	channel = spdk_io_channel_get_ctx(io_ch);
4777 
4778 	g_io_done = false;
4779 	rc = spdk_bdev_read_blocks(desc, io_ch, buf, 20, 1, io_done, &ctx1);
4780 	CU_ASSERT(rc == 0);
4781 
4782 	g_lock_lba_range_done = false;
4783 	rc = bdev_lock_lba_range(desc, io_ch, 20, 10, lock_lba_range_done, &ctx1);
4784 	CU_ASSERT(rc == 0);
4785 	poll_threads();
4786 
4787 	/* The lock should immediately become valid, since there are no outstanding
4788 	 * write I/O.
4789 	 */
4790 	CU_ASSERT(g_io_done == false);
4791 	CU_ASSERT(g_lock_lba_range_done == true);
4792 	range = TAILQ_FIRST(&channel->locked_ranges);
4793 	SPDK_CU_ASSERT_FATAL(range != NULL);
4794 	CU_ASSERT(range->offset == 20);
4795 	CU_ASSERT(range->length == 10);
4796 	CU_ASSERT(range->owner_ch == channel);
4797 	CU_ASSERT(range->locked_ctx == &ctx1);
4798 
4799 	rc = bdev_unlock_lba_range(desc, io_ch, 20, 10, lock_lba_range_done, &ctx1);
4800 	CU_ASSERT(rc == 0);
4801 	stub_complete_io(1);
4802 	spdk_delay_us(100);
4803 	poll_threads();
4804 
4805 	CU_ASSERT(TAILQ_EMPTY(&channel->locked_ranges));
4806 
4807 	/* Now try again, but with a write I/O. */
4808 	g_io_done = false;
4809 	rc = spdk_bdev_write_blocks(desc, io_ch, buf, 20, 1, io_done, &ctx1);
4810 	CU_ASSERT(rc == 0);
4811 
4812 	g_lock_lba_range_done = false;
4813 	rc = bdev_lock_lba_range(desc, io_ch, 20, 10, lock_lba_range_done, &ctx1);
4814 	CU_ASSERT(rc == 0);
4815 	poll_threads();
4816 
4817 	/* The lock should not be fully valid yet, since a write I/O is outstanding.
4818 	 * But note that the range should be on the channel's locked_list, to make sure no
4819 	 * new write I/O are started.
4820 	 */
4821 	CU_ASSERT(g_io_done == false);
4822 	CU_ASSERT(g_lock_lba_range_done == false);
4823 	range = TAILQ_FIRST(&channel->locked_ranges);
4824 	SPDK_CU_ASSERT_FATAL(range != NULL);
4825 	CU_ASSERT(range->offset == 20);
4826 	CU_ASSERT(range->length == 10);
4827 
4828 	/* Complete the write I/O.  This should make the lock valid (checked by confirming
4829 	 * our callback was invoked).
4830 	 */
4831 	stub_complete_io(1);
4832 	spdk_delay_us(100);
4833 	poll_threads();
4834 	CU_ASSERT(g_io_done == true);
4835 	CU_ASSERT(g_lock_lba_range_done == true);
4836 
4837 	rc = bdev_unlock_lba_range(desc, io_ch, 20, 10, unlock_lba_range_done, &ctx1);
4838 	CU_ASSERT(rc == 0);
4839 	poll_threads();
4840 
4841 	CU_ASSERT(TAILQ_EMPTY(&channel->locked_ranges));
4842 
4843 	spdk_put_io_channel(io_ch);
4844 	spdk_bdev_close(desc);
4845 	free_bdev(bdev);
4846 	ut_fini_bdev();
4847 }
4848 
4849 static void
4850 lock_lba_range_overlapped(void)
4851 {
4852 	struct spdk_bdev *bdev;
4853 	struct spdk_bdev_desc *desc = NULL;
4854 	struct spdk_io_channel *io_ch;
4855 	struct spdk_bdev_channel *channel;
4856 	struct lba_range *range;
4857 	int ctx1;
4858 	int rc;
4859 
4860 	ut_init_bdev(NULL);
4861 	bdev = allocate_bdev("bdev0");
4862 
4863 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
4864 	CU_ASSERT(rc == 0);
4865 	CU_ASSERT(desc != NULL);
4866 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
4867 	io_ch = spdk_bdev_get_io_channel(desc);
4868 	CU_ASSERT(io_ch != NULL);
4869 	channel = spdk_io_channel_get_ctx(io_ch);
4870 
4871 	/* Lock range 20-29. */
4872 	g_lock_lba_range_done = false;
4873 	rc = bdev_lock_lba_range(desc, io_ch, 20, 10, lock_lba_range_done, &ctx1);
4874 	CU_ASSERT(rc == 0);
4875 	poll_threads();
4876 
4877 	CU_ASSERT(g_lock_lba_range_done == true);
4878 	range = TAILQ_FIRST(&channel->locked_ranges);
4879 	SPDK_CU_ASSERT_FATAL(range != NULL);
4880 	CU_ASSERT(range->offset == 20);
4881 	CU_ASSERT(range->length == 10);
4882 
4883 	/* Try to lock range 25-39.  It should not lock immediately, since it overlaps with
4884 	 * 20-29.
4885 	 */
4886 	g_lock_lba_range_done = false;
4887 	rc = bdev_lock_lba_range(desc, io_ch, 25, 15, lock_lba_range_done, &ctx1);
4888 	CU_ASSERT(rc == 0);
4889 	poll_threads();
4890 
4891 	CU_ASSERT(g_lock_lba_range_done == false);
4892 	range = TAILQ_FIRST(&bdev->internal.pending_locked_ranges);
4893 	SPDK_CU_ASSERT_FATAL(range != NULL);
4894 	CU_ASSERT(range->offset == 25);
4895 	CU_ASSERT(range->length == 15);
4896 
4897 	/* Unlock 20-29.  This should result in range 25-39 now getting locked since it
4898 	 * no longer overlaps with an active lock.
4899 	 */
4900 	g_unlock_lba_range_done = false;
4901 	rc = bdev_unlock_lba_range(desc, io_ch, 20, 10, unlock_lba_range_done, &ctx1);
4902 	CU_ASSERT(rc == 0);
4903 	poll_threads();
4904 
4905 	CU_ASSERT(g_unlock_lba_range_done == true);
4906 	CU_ASSERT(TAILQ_EMPTY(&bdev->internal.pending_locked_ranges));
4907 	range = TAILQ_FIRST(&channel->locked_ranges);
4908 	SPDK_CU_ASSERT_FATAL(range != NULL);
4909 	CU_ASSERT(range->offset == 25);
4910 	CU_ASSERT(range->length == 15);
4911 
4912 	/* Lock 40-59.  This should immediately lock since it does not overlap with the
4913 	 * currently active 25-39 lock.
4914 	 */
4915 	g_lock_lba_range_done = false;
4916 	rc = bdev_lock_lba_range(desc, io_ch, 40, 20, lock_lba_range_done, &ctx1);
4917 	CU_ASSERT(rc == 0);
4918 	poll_threads();
4919 
4920 	CU_ASSERT(g_lock_lba_range_done == true);
4921 	range = TAILQ_FIRST(&bdev->internal.locked_ranges);
4922 	SPDK_CU_ASSERT_FATAL(range != NULL);
4923 	range = TAILQ_NEXT(range, tailq);
4924 	SPDK_CU_ASSERT_FATAL(range != NULL);
4925 	CU_ASSERT(range->offset == 40);
4926 	CU_ASSERT(range->length == 20);
4927 
4928 	/* Try to lock 35-44.  Note that this overlaps with both 25-39 and 40-59. */
4929 	g_lock_lba_range_done = false;
4930 	rc = bdev_lock_lba_range(desc, io_ch, 35, 10, lock_lba_range_done, &ctx1);
4931 	CU_ASSERT(rc == 0);
4932 	poll_threads();
4933 
4934 	CU_ASSERT(g_lock_lba_range_done == false);
4935 	range = TAILQ_FIRST(&bdev->internal.pending_locked_ranges);
4936 	SPDK_CU_ASSERT_FATAL(range != NULL);
4937 	CU_ASSERT(range->offset == 35);
4938 	CU_ASSERT(range->length == 10);
4939 
4940 	/* Unlock 25-39.  Make sure that 35-44 is still in the pending list, since
4941 	 * the 40-59 lock is still active.
4942 	 */
4943 	g_unlock_lba_range_done = false;
4944 	rc = bdev_unlock_lba_range(desc, io_ch, 25, 15, unlock_lba_range_done, &ctx1);
4945 	CU_ASSERT(rc == 0);
4946 	poll_threads();
4947 
4948 	CU_ASSERT(g_unlock_lba_range_done == true);
4949 	CU_ASSERT(g_lock_lba_range_done == false);
4950 	range = TAILQ_FIRST(&bdev->internal.pending_locked_ranges);
4951 	SPDK_CU_ASSERT_FATAL(range != NULL);
4952 	CU_ASSERT(range->offset == 35);
4953 	CU_ASSERT(range->length == 10);
4954 
4955 	/* Unlock 40-59.  This should result in 35-44 now getting locked, since there are
4956 	 * no longer any active overlapping locks.
4957 	 */
4958 	g_unlock_lba_range_done = false;
4959 	rc = bdev_unlock_lba_range(desc, io_ch, 40, 20, unlock_lba_range_done, &ctx1);
4960 	CU_ASSERT(rc == 0);
4961 	poll_threads();
4962 
4963 	CU_ASSERT(g_unlock_lba_range_done == true);
4964 	CU_ASSERT(g_lock_lba_range_done == true);
4965 	CU_ASSERT(TAILQ_EMPTY(&bdev->internal.pending_locked_ranges));
4966 	range = TAILQ_FIRST(&bdev->internal.locked_ranges);
4967 	SPDK_CU_ASSERT_FATAL(range != NULL);
4968 	CU_ASSERT(range->offset == 35);
4969 	CU_ASSERT(range->length == 10);
4970 
4971 	/* Finally, unlock 35-44. */
4972 	g_unlock_lba_range_done = false;
4973 	rc = bdev_unlock_lba_range(desc, io_ch, 35, 10, unlock_lba_range_done, &ctx1);
4974 	CU_ASSERT(rc == 0);
4975 	poll_threads();
4976 
4977 	CU_ASSERT(g_unlock_lba_range_done == true);
4978 	CU_ASSERT(TAILQ_EMPTY(&bdev->internal.locked_ranges));
4979 
4980 	spdk_put_io_channel(io_ch);
4981 	spdk_bdev_close(desc);
4982 	free_bdev(bdev);
4983 	ut_fini_bdev();
4984 }
4985 
4986 static void
4987 abort_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
4988 {
4989 	g_abort_done = true;
4990 	g_abort_status = bdev_io->internal.status;
4991 	spdk_bdev_free_io(bdev_io);
4992 }
4993 
4994 static void
4995 bdev_io_abort(void)
4996 {
4997 	struct spdk_bdev *bdev;
4998 	struct spdk_bdev_desc *desc = NULL;
4999 	struct spdk_io_channel *io_ch;
5000 	struct spdk_bdev_channel *channel;
5001 	struct spdk_bdev_mgmt_channel *mgmt_ch;
5002 	struct spdk_bdev_opts bdev_opts = {};
5003 	struct iovec iov[SPDK_BDEV_IO_NUM_CHILD_IOV * 2];
5004 	uint64_t io_ctx1 = 0, io_ctx2 = 0, i;
5005 	int rc;
5006 
5007 	spdk_bdev_get_opts(&bdev_opts, sizeof(bdev_opts));
5008 	bdev_opts.bdev_io_pool_size = 7;
5009 	bdev_opts.bdev_io_cache_size = 2;
5010 	ut_init_bdev(&bdev_opts);
5011 
5012 	bdev = allocate_bdev("bdev0");
5013 
5014 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
5015 	CU_ASSERT(rc == 0);
5016 	CU_ASSERT(desc != NULL);
5017 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
5018 	io_ch = spdk_bdev_get_io_channel(desc);
5019 	CU_ASSERT(io_ch != NULL);
5020 	channel = spdk_io_channel_get_ctx(io_ch);
5021 	mgmt_ch = channel->shared_resource->mgmt_ch;
5022 
5023 	g_abort_done = false;
5024 
5025 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_ABORT, false);
5026 
5027 	rc = spdk_bdev_abort(desc, io_ch, &io_ctx1, abort_done, NULL);
5028 	CU_ASSERT(rc == -ENOTSUP);
5029 
5030 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_ABORT, true);
5031 
5032 	rc = spdk_bdev_abort(desc, io_ch, &io_ctx2, abort_done, NULL);
5033 	CU_ASSERT(rc == 0);
5034 	CU_ASSERT(g_abort_done == true);
5035 	CU_ASSERT(g_abort_status == SPDK_BDEV_IO_STATUS_FAILED);
5036 
5037 	/* Test the case that the target I/O was successfully aborted. */
5038 	g_io_done = false;
5039 
5040 	rc = spdk_bdev_read_blocks(desc, io_ch, NULL, 0, 1, io_done, &io_ctx1);
5041 	CU_ASSERT(rc == 0);
5042 	CU_ASSERT(g_io_done == false);
5043 
5044 	g_abort_done = false;
5045 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
5046 
5047 	rc = spdk_bdev_abort(desc, io_ch, &io_ctx1, abort_done, NULL);
5048 	CU_ASSERT(rc == 0);
5049 	CU_ASSERT(g_io_done == true);
5050 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_FAILED);
5051 	stub_complete_io(1);
5052 	CU_ASSERT(g_abort_done == true);
5053 	CU_ASSERT(g_abort_status == SPDK_BDEV_IO_STATUS_SUCCESS);
5054 
5055 	/* Test the case that the target I/O was not aborted because it completed
5056 	 * in the middle of execution of the abort.
5057 	 */
5058 	g_io_done = false;
5059 
5060 	rc = spdk_bdev_read_blocks(desc, io_ch, NULL, 0, 1, io_done, &io_ctx1);
5061 	CU_ASSERT(rc == 0);
5062 	CU_ASSERT(g_io_done == false);
5063 
5064 	g_abort_done = false;
5065 	g_io_exp_status = SPDK_BDEV_IO_STATUS_FAILED;
5066 
5067 	rc = spdk_bdev_abort(desc, io_ch, &io_ctx1, abort_done, NULL);
5068 	CU_ASSERT(rc == 0);
5069 	CU_ASSERT(g_io_done == false);
5070 
5071 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
5072 	stub_complete_io(1);
5073 	CU_ASSERT(g_io_done == true);
5074 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_SUCCESS);
5075 
5076 	g_io_exp_status = SPDK_BDEV_IO_STATUS_FAILED;
5077 	stub_complete_io(1);
5078 	CU_ASSERT(g_abort_done == true);
5079 	CU_ASSERT(g_abort_status == SPDK_BDEV_IO_STATUS_SUCCESS);
5080 
5081 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
5082 
5083 	bdev->optimal_io_boundary = 16;
5084 	bdev->split_on_optimal_io_boundary = true;
5085 
5086 	/* Test that a single-vector command which is split is aborted correctly.
5087 	 * Offset 14, length 8, payload 0xF000
5088 	 *  Child - Offset 14, length 2, payload 0xF000
5089 	 *  Child - Offset 16, length 6, payload 0xF000 + 2 * 512
5090 	 */
5091 	g_io_done = false;
5092 
5093 	rc = spdk_bdev_read_blocks(desc, io_ch, (void *)0xF000, 14, 8, io_done, &io_ctx1);
5094 	CU_ASSERT(rc == 0);
5095 	CU_ASSERT(g_io_done == false);
5096 
5097 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
5098 
5099 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
5100 
5101 	rc = spdk_bdev_abort(desc, io_ch, &io_ctx1, abort_done, NULL);
5102 	CU_ASSERT(rc == 0);
5103 	CU_ASSERT(g_io_done == true);
5104 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_FAILED);
5105 	stub_complete_io(2);
5106 	CU_ASSERT(g_abort_done == true);
5107 	CU_ASSERT(g_abort_status == SPDK_BDEV_IO_STATUS_SUCCESS);
5108 
5109 	/* Test that a multi-vector command that needs to be split by strip and then
5110 	 * needs to be split is aborted correctly. Abort is requested before the second
5111 	 * child I/O was submitted. The parent I/O should complete with failure without
5112 	 * submitting the second child I/O.
5113 	 */
5114 	for (i = 0; i < SPDK_BDEV_IO_NUM_CHILD_IOV * 2; i++) {
5115 		iov[i].iov_base = (void *)((i + 1) * 0x10000);
5116 		iov[i].iov_len = 512;
5117 	}
5118 
5119 	bdev->optimal_io_boundary = SPDK_BDEV_IO_NUM_CHILD_IOV;
5120 	g_io_done = false;
5121 	rc = spdk_bdev_readv_blocks(desc, io_ch, iov, SPDK_BDEV_IO_NUM_CHILD_IOV * 2, 0,
5122 				    SPDK_BDEV_IO_NUM_CHILD_IOV * 2, io_done, &io_ctx1);
5123 	CU_ASSERT(rc == 0);
5124 	CU_ASSERT(g_io_done == false);
5125 
5126 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
5127 
5128 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
5129 
5130 	rc = spdk_bdev_abort(desc, io_ch, &io_ctx1, abort_done, NULL);
5131 	CU_ASSERT(rc == 0);
5132 	CU_ASSERT(g_io_done == true);
5133 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_FAILED);
5134 	stub_complete_io(1);
5135 	CU_ASSERT(g_abort_done == true);
5136 	CU_ASSERT(g_abort_status == SPDK_BDEV_IO_STATUS_SUCCESS);
5137 
5138 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
5139 
5140 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
5141 
5142 	bdev->optimal_io_boundary = 16;
5143 	g_io_done = false;
5144 
5145 	/* Test that a ingle-vector command which is split is aborted correctly.
5146 	 * Differently from the above, the child abort request will be submitted
5147 	 * sequentially due to the capacity of spdk_bdev_io.
5148 	 */
5149 	rc = spdk_bdev_read_blocks(desc, io_ch, (void *)0xF000, 14, 50, io_done, &io_ctx1);
5150 	CU_ASSERT(rc == 0);
5151 	CU_ASSERT(g_io_done == false);
5152 
5153 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 4);
5154 
5155 	g_abort_done = false;
5156 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
5157 
5158 	rc = spdk_bdev_abort(desc, io_ch, &io_ctx1, abort_done, NULL);
5159 	CU_ASSERT(rc == 0);
5160 	CU_ASSERT(!TAILQ_EMPTY(&mgmt_ch->io_wait_queue));
5161 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 4);
5162 
5163 	stub_complete_io(1);
5164 	CU_ASSERT(g_io_done == true);
5165 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_FAILED);
5166 	stub_complete_io(3);
5167 	CU_ASSERT(g_abort_done == true);
5168 	CU_ASSERT(g_abort_status == SPDK_BDEV_IO_STATUS_SUCCESS);
5169 
5170 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
5171 
5172 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
5173 
5174 	spdk_put_io_channel(io_ch);
5175 	spdk_bdev_close(desc);
5176 	free_bdev(bdev);
5177 	ut_fini_bdev();
5178 }
5179 
5180 static void
5181 bdev_unmap(void)
5182 {
5183 	struct spdk_bdev *bdev;
5184 	struct spdk_bdev_desc *desc = NULL;
5185 	struct spdk_io_channel *ioch;
5186 	struct spdk_bdev_channel *bdev_ch;
5187 	struct ut_expected_io *expected_io;
5188 	struct spdk_bdev_opts bdev_opts = {};
5189 	uint32_t i, num_outstanding;
5190 	uint64_t offset, num_blocks, max_unmap_blocks, num_children;
5191 	int rc;
5192 
5193 	spdk_bdev_get_opts(&bdev_opts, sizeof(bdev_opts));
5194 	bdev_opts.bdev_io_pool_size = 512;
5195 	bdev_opts.bdev_io_cache_size = 64;
5196 	ut_init_bdev(&bdev_opts);
5197 
5198 	bdev = allocate_bdev("bdev");
5199 
5200 	rc = spdk_bdev_open_ext("bdev", true, bdev_ut_event_cb, NULL, &desc);
5201 	CU_ASSERT_EQUAL(rc, 0);
5202 	SPDK_CU_ASSERT_FATAL(desc != NULL);
5203 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
5204 	ioch = spdk_bdev_get_io_channel(desc);
5205 	SPDK_CU_ASSERT_FATAL(ioch != NULL);
5206 	bdev_ch = spdk_io_channel_get_ctx(ioch);
5207 	CU_ASSERT(TAILQ_EMPTY(&bdev_ch->io_submitted));
5208 
5209 	fn_table.submit_request = stub_submit_request;
5210 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
5211 
5212 	/* Case 1: First test the request won't be split */
5213 	num_blocks = 32;
5214 
5215 	g_io_done = false;
5216 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_UNMAP, 0, num_blocks, 0);
5217 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
5218 	rc = spdk_bdev_unmap_blocks(desc, ioch, 0, num_blocks, io_done, NULL);
5219 	CU_ASSERT_EQUAL(rc, 0);
5220 	CU_ASSERT(g_io_done == false);
5221 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
5222 	stub_complete_io(1);
5223 	CU_ASSERT(g_io_done == true);
5224 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
5225 
5226 	/* Case 2: Test the split with 2 children requests */
5227 	bdev->max_unmap = 8;
5228 	bdev->max_unmap_segments = 2;
5229 	max_unmap_blocks = bdev->max_unmap * bdev->max_unmap_segments;
5230 	num_blocks = max_unmap_blocks * 2;
5231 	offset = 0;
5232 
5233 	g_io_done = false;
5234 	for (i = 0; i < 2; i++) {
5235 		expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_UNMAP, offset, max_unmap_blocks, 0);
5236 		TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
5237 		offset += max_unmap_blocks;
5238 	}
5239 
5240 	rc = spdk_bdev_unmap_blocks(desc, ioch, 0, num_blocks, io_done, NULL);
5241 	CU_ASSERT_EQUAL(rc, 0);
5242 	CU_ASSERT(g_io_done == false);
5243 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
5244 	stub_complete_io(2);
5245 	CU_ASSERT(g_io_done == true);
5246 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
5247 
5248 	/* Case 3: Test the split with 15 children requests, will finish 8 requests first */
5249 	num_children = 15;
5250 	num_blocks = max_unmap_blocks * num_children;
5251 	g_io_done = false;
5252 	offset = 0;
5253 	for (i = 0; i < num_children; i++) {
5254 		expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_UNMAP, offset, max_unmap_blocks, 0);
5255 		TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
5256 		offset += max_unmap_blocks;
5257 	}
5258 
5259 	rc = spdk_bdev_unmap_blocks(desc, ioch, 0, num_blocks, io_done, NULL);
5260 	CU_ASSERT_EQUAL(rc, 0);
5261 	CU_ASSERT(g_io_done == false);
5262 
5263 	while (num_children > 0) {
5264 		num_outstanding = spdk_min(num_children, SPDK_BDEV_MAX_CHILDREN_UNMAP_WRITE_ZEROES_REQS);
5265 		CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == num_outstanding);
5266 		stub_complete_io(num_outstanding);
5267 		num_children -= num_outstanding;
5268 	}
5269 	CU_ASSERT(g_io_done == true);
5270 
5271 	spdk_put_io_channel(ioch);
5272 	spdk_bdev_close(desc);
5273 	free_bdev(bdev);
5274 	ut_fini_bdev();
5275 }
5276 
5277 static void
5278 bdev_write_zeroes_split_test(void)
5279 {
5280 	struct spdk_bdev *bdev;
5281 	struct spdk_bdev_desc *desc = NULL;
5282 	struct spdk_io_channel *ioch;
5283 	struct spdk_bdev_channel *bdev_ch;
5284 	struct ut_expected_io *expected_io;
5285 	struct spdk_bdev_opts bdev_opts = {};
5286 	uint32_t i, num_outstanding;
5287 	uint64_t offset, num_blocks, max_write_zeroes_blocks, num_children;
5288 	int rc;
5289 
5290 	spdk_bdev_get_opts(&bdev_opts, sizeof(bdev_opts));
5291 	bdev_opts.bdev_io_pool_size = 512;
5292 	bdev_opts.bdev_io_cache_size = 64;
5293 	ut_init_bdev(&bdev_opts);
5294 
5295 	bdev = allocate_bdev("bdev");
5296 
5297 	rc = spdk_bdev_open_ext("bdev", true, bdev_ut_event_cb, NULL, &desc);
5298 	CU_ASSERT_EQUAL(rc, 0);
5299 	SPDK_CU_ASSERT_FATAL(desc != NULL);
5300 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
5301 	ioch = spdk_bdev_get_io_channel(desc);
5302 	SPDK_CU_ASSERT_FATAL(ioch != NULL);
5303 	bdev_ch = spdk_io_channel_get_ctx(ioch);
5304 	CU_ASSERT(TAILQ_EMPTY(&bdev_ch->io_submitted));
5305 
5306 	fn_table.submit_request = stub_submit_request;
5307 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
5308 
5309 	/* Case 1: First test the request won't be split */
5310 	num_blocks = 32;
5311 
5312 	g_io_done = false;
5313 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE_ZEROES, 0, num_blocks, 0);
5314 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
5315 	rc = spdk_bdev_write_zeroes_blocks(desc, ioch, 0, num_blocks, io_done, NULL);
5316 	CU_ASSERT_EQUAL(rc, 0);
5317 	CU_ASSERT(g_io_done == false);
5318 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
5319 	stub_complete_io(1);
5320 	CU_ASSERT(g_io_done == true);
5321 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
5322 
5323 	/* Case 2: Test the split with 2 children requests */
5324 	max_write_zeroes_blocks = 8;
5325 	bdev->max_write_zeroes = max_write_zeroes_blocks;
5326 	num_blocks = max_write_zeroes_blocks * 2;
5327 	offset = 0;
5328 
5329 	g_io_done = false;
5330 	for (i = 0; i < 2; i++) {
5331 		expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE_ZEROES, offset, max_write_zeroes_blocks,
5332 						   0);
5333 		TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
5334 		offset += max_write_zeroes_blocks;
5335 	}
5336 
5337 	rc = spdk_bdev_write_zeroes_blocks(desc, ioch, 0, num_blocks, io_done, NULL);
5338 	CU_ASSERT_EQUAL(rc, 0);
5339 	CU_ASSERT(g_io_done == false);
5340 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
5341 	stub_complete_io(2);
5342 	CU_ASSERT(g_io_done == true);
5343 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
5344 
5345 	/* Case 3: Test the split with 15 children requests, will finish 8 requests first */
5346 	num_children = 15;
5347 	num_blocks = max_write_zeroes_blocks * num_children;
5348 	g_io_done = false;
5349 	offset = 0;
5350 	for (i = 0; i < num_children; i++) {
5351 		expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE_ZEROES, offset, max_write_zeroes_blocks,
5352 						   0);
5353 		TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
5354 		offset += max_write_zeroes_blocks;
5355 	}
5356 
5357 	rc = spdk_bdev_write_zeroes_blocks(desc, ioch, 0, num_blocks, io_done, NULL);
5358 	CU_ASSERT_EQUAL(rc, 0);
5359 	CU_ASSERT(g_io_done == false);
5360 
5361 	while (num_children > 0) {
5362 		num_outstanding = spdk_min(num_children, SPDK_BDEV_MAX_CHILDREN_UNMAP_WRITE_ZEROES_REQS);
5363 		CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == num_outstanding);
5364 		stub_complete_io(num_outstanding);
5365 		num_children -= num_outstanding;
5366 	}
5367 	CU_ASSERT(g_io_done == true);
5368 
5369 	spdk_put_io_channel(ioch);
5370 	spdk_bdev_close(desc);
5371 	free_bdev(bdev);
5372 	ut_fini_bdev();
5373 }
5374 
5375 static void
5376 bdev_set_options_test(void)
5377 {
5378 	struct spdk_bdev_opts bdev_opts = {};
5379 	int rc;
5380 
5381 	/* Case1: Do not set opts_size */
5382 	rc = spdk_bdev_set_opts(&bdev_opts);
5383 	CU_ASSERT(rc == -1);
5384 
5385 	spdk_bdev_get_opts(&bdev_opts, sizeof(bdev_opts));
5386 	bdev_opts.bdev_io_pool_size = 4;
5387 	bdev_opts.bdev_io_cache_size = 2;
5388 	bdev_opts.small_buf_pool_size = 4;
5389 
5390 	/* Case 2: Do not set valid small_buf_pool_size and large_buf_pool_size */
5391 	rc = spdk_bdev_set_opts(&bdev_opts);
5392 	CU_ASSERT(rc == -1);
5393 
5394 	/* Case 3: Do not set valid large_buf_pool_size */
5395 	bdev_opts.small_buf_pool_size = BUF_SMALL_POOL_SIZE;
5396 	bdev_opts.large_buf_pool_size = BUF_LARGE_POOL_SIZE - 1;
5397 	rc = spdk_bdev_set_opts(&bdev_opts);
5398 	CU_ASSERT(rc == -1);
5399 
5400 	/* Case4: set valid large buf_pool_size */
5401 	bdev_opts.large_buf_pool_size = BUF_LARGE_POOL_SIZE;
5402 	rc = spdk_bdev_set_opts(&bdev_opts);
5403 	CU_ASSERT(rc == 0);
5404 
5405 	/* Case5: Set different valid value for small and large buf pool */
5406 	bdev_opts.large_buf_pool_size = BUF_SMALL_POOL_SIZE + 3;
5407 	bdev_opts.large_buf_pool_size = BUF_LARGE_POOL_SIZE + 3;
5408 	rc = spdk_bdev_set_opts(&bdev_opts);
5409 	CU_ASSERT(rc == 0);
5410 }
5411 
5412 static uint64_t
5413 get_ns_time(void)
5414 {
5415 	int rc;
5416 	struct timespec ts;
5417 
5418 	rc = clock_gettime(CLOCK_MONOTONIC, &ts);
5419 	CU_ASSERT(rc == 0);
5420 	return ts.tv_sec * 1000 * 1000 * 1000 + ts.tv_nsec;
5421 }
5422 
5423 static int
5424 rb_tree_get_height(struct spdk_bdev_name *bdev_name)
5425 {
5426 	int h1, h2;
5427 
5428 	if (bdev_name == NULL) {
5429 		return -1;
5430 	} else {
5431 		h1 = rb_tree_get_height(RB_LEFT(bdev_name, node));
5432 		h2 = rb_tree_get_height(RB_RIGHT(bdev_name, node));
5433 
5434 		return spdk_max(h1, h2) + 1;
5435 	}
5436 }
5437 
5438 static void
5439 bdev_multi_allocation(void)
5440 {
5441 	const int max_bdev_num = 1024 * 16;
5442 	char name[max_bdev_num][16];
5443 	char noexist_name[] = "invalid_bdev";
5444 	struct spdk_bdev *bdev[max_bdev_num];
5445 	int i, j;
5446 	uint64_t last_time;
5447 	int bdev_num;
5448 	int height;
5449 
5450 	for (j = 0; j < max_bdev_num; j++) {
5451 		snprintf(name[j], sizeof(name[j]), "bdev%d", j);
5452 	}
5453 
5454 	for (i = 0; i < 16; i++) {
5455 		last_time = get_ns_time();
5456 		bdev_num = 1024 * (i + 1);
5457 		for (j = 0; j < bdev_num; j++) {
5458 			bdev[j] = allocate_bdev(name[j]);
5459 			height = rb_tree_get_height(&bdev[j]->internal.bdev_name);
5460 			CU_ASSERT(height <= (int)(spdk_u32log2(2 * j + 2)));
5461 		}
5462 		SPDK_NOTICELOG("alloc bdev num %d takes %" PRIu64 " ms\n", bdev_num,
5463 			       (get_ns_time() - last_time) / 1000 / 1000);
5464 		for (j = 0; j < bdev_num; j++) {
5465 			CU_ASSERT(spdk_bdev_get_by_name(name[j]) != NULL);
5466 		}
5467 		CU_ASSERT(spdk_bdev_get_by_name(noexist_name) == NULL);
5468 
5469 		for (j = 0; j < bdev_num; j++) {
5470 			free_bdev(bdev[j]);
5471 		}
5472 		for (j = 0; j < bdev_num; j++) {
5473 			CU_ASSERT(spdk_bdev_get_by_name(name[j]) == NULL);
5474 		}
5475 	}
5476 }
5477 
5478 static struct spdk_memory_domain *g_bdev_memory_domain = (struct spdk_memory_domain *) 0xf00df00d;
5479 
5480 static int
5481 test_bdev_get_supported_dma_device_types_op(void *ctx, struct spdk_memory_domain **domains,
5482 		int array_size)
5483 {
5484 	if (array_size > 0 && domains) {
5485 		domains[0] = g_bdev_memory_domain;
5486 	}
5487 
5488 	return 1;
5489 }
5490 
5491 static void
5492 bdev_get_memory_domains(void)
5493 {
5494 	struct spdk_bdev_fn_table fn_table = {
5495 		.get_memory_domains = test_bdev_get_supported_dma_device_types_op
5496 	};
5497 	struct spdk_bdev bdev = { .fn_table = &fn_table };
5498 	struct spdk_memory_domain *domains[2] = {};
5499 	int rc;
5500 
5501 	/* bdev is NULL */
5502 	rc = spdk_bdev_get_memory_domains(NULL, domains, 2);
5503 	CU_ASSERT(rc == -EINVAL);
5504 
5505 	/* domains is NULL */
5506 	rc = spdk_bdev_get_memory_domains(&bdev, NULL, 2);
5507 	CU_ASSERT(rc == 1);
5508 
5509 	/* array size is 0 */
5510 	rc = spdk_bdev_get_memory_domains(&bdev, domains, 0);
5511 	CU_ASSERT(rc == 1);
5512 
5513 	/* get_supported_dma_device_types op is set */
5514 	rc = spdk_bdev_get_memory_domains(&bdev, domains, 2);
5515 	CU_ASSERT(rc == 1);
5516 	CU_ASSERT(domains[0] == g_bdev_memory_domain);
5517 
5518 	/* get_supported_dma_device_types op is not set */
5519 	fn_table.get_memory_domains = NULL;
5520 	rc = spdk_bdev_get_memory_domains(&bdev, domains, 2);
5521 	CU_ASSERT(rc == 0);
5522 }
5523 
5524 static void
5525 _bdev_io_ext(struct spdk_bdev_ext_io_opts *ext_io_opts)
5526 {
5527 	struct spdk_bdev *bdev;
5528 	struct spdk_bdev_desc *desc = NULL;
5529 	struct spdk_io_channel *io_ch;
5530 	char io_buf[512];
5531 	struct iovec iov = { .iov_base = io_buf, .iov_len = 512 };
5532 	struct ut_expected_io *expected_io;
5533 	int rc;
5534 
5535 	ut_init_bdev(NULL);
5536 
5537 	bdev = allocate_bdev("bdev0");
5538 	bdev->md_interleave = false;
5539 	bdev->md_len = 8;
5540 
5541 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
5542 	CU_ASSERT(rc == 0);
5543 	SPDK_CU_ASSERT_FATAL(desc != NULL);
5544 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
5545 	io_ch = spdk_bdev_get_io_channel(desc);
5546 	CU_ASSERT(io_ch != NULL);
5547 
5548 	/* read */
5549 	g_io_done = false;
5550 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 32, 14, 1);
5551 	if (ext_io_opts) {
5552 		expected_io->md_buf = ext_io_opts->metadata;
5553 	}
5554 	ut_expected_io_set_iov(expected_io, 0, iov.iov_base, iov.iov_len);
5555 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
5556 
5557 	rc = spdk_bdev_readv_blocks_ext(desc, io_ch, &iov, 1, 32, 14, io_done, NULL, ext_io_opts);
5558 
5559 	CU_ASSERT(rc == 0);
5560 	CU_ASSERT(g_io_done == false);
5561 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
5562 	stub_complete_io(1);
5563 	CU_ASSERT(g_io_done == true);
5564 
5565 	/* write */
5566 	g_io_done = false;
5567 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 32, 14, 1);
5568 	if (ext_io_opts) {
5569 		expected_io->md_buf = ext_io_opts->metadata;
5570 	}
5571 	ut_expected_io_set_iov(expected_io, 0, iov.iov_base, iov.iov_len);
5572 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
5573 
5574 	rc = spdk_bdev_writev_blocks_ext(desc, io_ch, &iov, 1, 32, 14, io_done, NULL, ext_io_opts);
5575 
5576 	CU_ASSERT(rc == 0);
5577 	CU_ASSERT(g_io_done == false);
5578 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
5579 	stub_complete_io(1);
5580 	CU_ASSERT(g_io_done == true);
5581 
5582 	spdk_put_io_channel(io_ch);
5583 	spdk_bdev_close(desc);
5584 	free_bdev(bdev);
5585 	ut_fini_bdev();
5586 
5587 }
5588 
5589 static void
5590 bdev_io_ext(void)
5591 {
5592 	struct spdk_bdev_ext_io_opts ext_io_opts = {
5593 		.metadata = (void *)0xFF000000,
5594 		.size = sizeof(ext_io_opts)
5595 	};
5596 
5597 	_bdev_io_ext(&ext_io_opts);
5598 }
5599 
5600 static void
5601 bdev_io_ext_no_opts(void)
5602 {
5603 	_bdev_io_ext(NULL);
5604 }
5605 
5606 static void
5607 bdev_io_ext_invalid_opts(void)
5608 {
5609 	struct spdk_bdev *bdev;
5610 	struct spdk_bdev_desc *desc = NULL;
5611 	struct spdk_io_channel *io_ch;
5612 	char io_buf[512];
5613 	struct iovec iov = { .iov_base = io_buf, .iov_len = 512 };
5614 	struct spdk_bdev_ext_io_opts ext_io_opts = {
5615 		.metadata = (void *)0xFF000000,
5616 		.size = sizeof(ext_io_opts)
5617 	};
5618 	int rc;
5619 
5620 	ut_init_bdev(NULL);
5621 
5622 	bdev = allocate_bdev("bdev0");
5623 	bdev->md_interleave = false;
5624 	bdev->md_len = 8;
5625 
5626 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
5627 	CU_ASSERT(rc == 0);
5628 	SPDK_CU_ASSERT_FATAL(desc != NULL);
5629 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
5630 	io_ch = spdk_bdev_get_io_channel(desc);
5631 	CU_ASSERT(io_ch != NULL);
5632 
5633 	/* Test invalid ext_opts size */
5634 	ext_io_opts.size = 0;
5635 	rc = spdk_bdev_readv_blocks_ext(desc, io_ch, &iov, 1, 32, 14, io_done, NULL, &ext_io_opts);
5636 	CU_ASSERT(rc == -EINVAL);
5637 	rc = spdk_bdev_writev_blocks_ext(desc, io_ch, &iov, 1, 32, 14, io_done, NULL, &ext_io_opts);
5638 	CU_ASSERT(rc == -EINVAL);
5639 
5640 	ext_io_opts.size = sizeof(ext_io_opts) * 2;
5641 	rc = spdk_bdev_readv_blocks_ext(desc, io_ch, &iov, 1, 32, 14, io_done, NULL, &ext_io_opts);
5642 	CU_ASSERT(rc == -EINVAL);
5643 	rc = spdk_bdev_writev_blocks_ext(desc, io_ch, &iov, 1, 32, 14, io_done, NULL, &ext_io_opts);
5644 	CU_ASSERT(rc == -EINVAL);
5645 
5646 	ext_io_opts.size = offsetof(struct spdk_bdev_ext_io_opts, metadata) +
5647 			   sizeof(ext_io_opts.metadata) - 1;
5648 	rc = spdk_bdev_readv_blocks_ext(desc, io_ch, &iov, 1, 32, 14, io_done, NULL, &ext_io_opts);
5649 	CU_ASSERT(rc == -EINVAL);
5650 	rc = spdk_bdev_writev_blocks_ext(desc, io_ch, &iov, 1, 32, 14, io_done, NULL, &ext_io_opts);
5651 	CU_ASSERT(rc == -EINVAL);
5652 
5653 	spdk_put_io_channel(io_ch);
5654 	spdk_bdev_close(desc);
5655 	free_bdev(bdev);
5656 	ut_fini_bdev();
5657 }
5658 
5659 static void
5660 bdev_io_ext_split(void)
5661 {
5662 	struct spdk_bdev *bdev;
5663 	struct spdk_bdev_desc *desc = NULL;
5664 	struct spdk_io_channel *io_ch;
5665 	char io_buf[512];
5666 	struct iovec iov = { .iov_base = io_buf, .iov_len = 512 };
5667 	struct ut_expected_io *expected_io;
5668 	struct spdk_bdev_ext_io_opts ext_io_opts = {
5669 		.metadata = (void *)0xFF000000,
5670 		.size = sizeof(ext_io_opts)
5671 	};
5672 	int rc;
5673 
5674 	ut_init_bdev(NULL);
5675 
5676 	bdev = allocate_bdev("bdev0");
5677 	bdev->md_interleave = false;
5678 	bdev->md_len = 8;
5679 
5680 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
5681 	CU_ASSERT(rc == 0);
5682 	SPDK_CU_ASSERT_FATAL(desc != NULL);
5683 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
5684 	io_ch = spdk_bdev_get_io_channel(desc);
5685 	CU_ASSERT(io_ch != NULL);
5686 
5687 	/* Check that IO request with ext_opts and metadata is split correctly
5688 	 * Offset 14, length 8, payload 0xF000
5689 	 *  Child - Offset 14, length 2, payload 0xF000
5690 	 *  Child - Offset 16, length 6, payload 0xF000 + 2 * 512
5691 	 */
5692 	bdev->optimal_io_boundary = 16;
5693 	bdev->split_on_optimal_io_boundary = true;
5694 	bdev->md_interleave = false;
5695 	bdev->md_len = 8;
5696 
5697 	iov.iov_base = (void *)0xF000;
5698 	iov.iov_len = 4096;
5699 	memset(&ext_io_opts, 0, sizeof(ext_io_opts));
5700 	ext_io_opts.metadata = (void *)0xFF000000;
5701 	ext_io_opts.size = sizeof(ext_io_opts);
5702 	g_io_done = false;
5703 
5704 	/* read */
5705 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 14, 2, 1);
5706 	expected_io->md_buf = ext_io_opts.metadata;
5707 	ut_expected_io_set_iov(expected_io, 0, (void *)0xF000, 2 * 512);
5708 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
5709 
5710 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 16, 6, 1);
5711 	expected_io->md_buf = ext_io_opts.metadata + 2 * 8;
5712 	ut_expected_io_set_iov(expected_io, 0, (void *)(0xF000 + 2 * 512), 6 * 512);
5713 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
5714 
5715 	rc = spdk_bdev_readv_blocks_ext(desc, io_ch, &iov, 1, 14, 8, io_done, NULL, &ext_io_opts);
5716 	CU_ASSERT(rc == 0);
5717 	CU_ASSERT(g_io_done == false);
5718 
5719 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
5720 	stub_complete_io(2);
5721 	CU_ASSERT(g_io_done == true);
5722 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
5723 
5724 	/* write */
5725 	g_io_done = false;
5726 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 14, 2, 1);
5727 	expected_io->md_buf = ext_io_opts.metadata;
5728 	ut_expected_io_set_iov(expected_io, 0, (void *)0xF000, 2 * 512);
5729 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
5730 
5731 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 16, 6, 1);
5732 	expected_io->md_buf = ext_io_opts.metadata + 2 * 8;
5733 	ut_expected_io_set_iov(expected_io, 0, (void *)(0xF000 + 2 * 512), 6 * 512);
5734 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
5735 
5736 	rc = spdk_bdev_writev_blocks_ext(desc, io_ch, &iov, 1, 14, 8, io_done, NULL, &ext_io_opts);
5737 	CU_ASSERT(rc == 0);
5738 	CU_ASSERT(g_io_done == false);
5739 
5740 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
5741 	stub_complete_io(2);
5742 	CU_ASSERT(g_io_done == true);
5743 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
5744 
5745 	spdk_put_io_channel(io_ch);
5746 	spdk_bdev_close(desc);
5747 	free_bdev(bdev);
5748 	ut_fini_bdev();
5749 }
5750 
5751 static void
5752 bdev_io_ext_bounce_buffer(void)
5753 {
5754 	struct spdk_bdev *bdev;
5755 	struct spdk_bdev_desc *desc = NULL;
5756 	struct spdk_io_channel *io_ch;
5757 	char io_buf[512];
5758 	struct iovec iov = { .iov_base = io_buf, .iov_len = 512 };
5759 	struct ut_expected_io *expected_io;
5760 	struct spdk_bdev_ext_io_opts ext_io_opts = {
5761 		.metadata = (void *)0xFF000000,
5762 		.size = sizeof(ext_io_opts)
5763 	};
5764 	int rc;
5765 
5766 	ut_init_bdev(NULL);
5767 
5768 	bdev = allocate_bdev("bdev0");
5769 	bdev->md_interleave = false;
5770 	bdev->md_len = 8;
5771 
5772 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
5773 	CU_ASSERT(rc == 0);
5774 	SPDK_CU_ASSERT_FATAL(desc != NULL);
5775 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
5776 	io_ch = spdk_bdev_get_io_channel(desc);
5777 	CU_ASSERT(io_ch != NULL);
5778 
5779 	/* Verify data pull/push
5780 	 * bdev doesn't support memory domains, so buffers from bdev memory pool will be used */
5781 	ext_io_opts.memory_domain = (struct spdk_memory_domain *)0xdeadbeef;
5782 
5783 	/* read */
5784 	g_io_done = false;
5785 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 32, 14, 1);
5786 	ut_expected_io_set_iov(expected_io, 0, iov.iov_base, iov.iov_len);
5787 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
5788 
5789 	rc = spdk_bdev_readv_blocks_ext(desc, io_ch, &iov, 1, 32, 14, io_done, NULL, &ext_io_opts);
5790 
5791 	CU_ASSERT(rc == 0);
5792 	CU_ASSERT(g_io_done == false);
5793 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
5794 	stub_complete_io(1);
5795 	CU_ASSERT(g_memory_domain_push_data_called == true);
5796 	CU_ASSERT(g_io_done == true);
5797 
5798 	/* write */
5799 	g_io_done = false;
5800 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 32, 14, 1);
5801 	ut_expected_io_set_iov(expected_io, 0, iov.iov_base, iov.iov_len);
5802 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
5803 
5804 	rc = spdk_bdev_writev_blocks_ext(desc, io_ch, &iov, 1, 32, 14, io_done, NULL, &ext_io_opts);
5805 
5806 	CU_ASSERT(rc == 0);
5807 	CU_ASSERT(g_memory_domain_pull_data_called == true);
5808 	CU_ASSERT(g_io_done == false);
5809 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
5810 	stub_complete_io(1);
5811 	CU_ASSERT(g_io_done == true);
5812 
5813 	spdk_put_io_channel(io_ch);
5814 	spdk_bdev_close(desc);
5815 	free_bdev(bdev);
5816 	ut_fini_bdev();
5817 }
5818 
5819 static void
5820 bdev_register_uuid_alias(void)
5821 {
5822 	struct spdk_bdev *bdev, *second;
5823 	char uuid[SPDK_UUID_STRING_LEN];
5824 	int rc;
5825 
5826 	ut_init_bdev(NULL);
5827 	bdev = allocate_bdev("bdev0");
5828 
5829 	/* Make sure an UUID was generated  */
5830 	CU_ASSERT_FALSE(spdk_mem_all_zero(&bdev->uuid, sizeof(bdev->uuid)));
5831 
5832 	/* Check that an UUID alias was registered */
5833 	spdk_uuid_fmt_lower(uuid, sizeof(uuid), &bdev->uuid);
5834 	CU_ASSERT_EQUAL(spdk_bdev_get_by_name(uuid), bdev);
5835 
5836 	/* Unregister the bdev */
5837 	spdk_bdev_unregister(bdev, NULL, NULL);
5838 	poll_threads();
5839 	CU_ASSERT_PTR_NULL(spdk_bdev_get_by_name(uuid));
5840 
5841 	/* Check the same, but this time register the bdev with non-zero UUID */
5842 	rc = spdk_bdev_register(bdev);
5843 	CU_ASSERT_EQUAL(rc, 0);
5844 	CU_ASSERT_EQUAL(spdk_bdev_get_by_name(uuid), bdev);
5845 
5846 	/* Unregister the bdev */
5847 	spdk_bdev_unregister(bdev, NULL, NULL);
5848 	poll_threads();
5849 	CU_ASSERT_PTR_NULL(spdk_bdev_get_by_name(uuid));
5850 
5851 	/* Regiser the bdev using UUID as the name */
5852 	bdev->name = uuid;
5853 	rc = spdk_bdev_register(bdev);
5854 	CU_ASSERT_EQUAL(rc, 0);
5855 	CU_ASSERT_EQUAL(spdk_bdev_get_by_name(uuid), bdev);
5856 
5857 	/* Unregister the bdev */
5858 	spdk_bdev_unregister(bdev, NULL, NULL);
5859 	poll_threads();
5860 	CU_ASSERT_PTR_NULL(spdk_bdev_get_by_name(uuid));
5861 
5862 	/* Check that it's not possible to register two bdevs with the same UUIDs */
5863 	bdev->name = "bdev0";
5864 	second = allocate_bdev("bdev1");
5865 	spdk_uuid_copy(&bdev->uuid, &second->uuid);
5866 	rc = spdk_bdev_register(bdev);
5867 	CU_ASSERT_EQUAL(rc, -EEXIST);
5868 
5869 	/* Regenerate the UUID and re-check */
5870 	spdk_uuid_generate(&bdev->uuid);
5871 	rc = spdk_bdev_register(bdev);
5872 	CU_ASSERT_EQUAL(rc, 0);
5873 
5874 	/* And check that both bdevs can be retrieved through their UUIDs */
5875 	spdk_uuid_fmt_lower(uuid, sizeof(uuid), &bdev->uuid);
5876 	CU_ASSERT_EQUAL(spdk_bdev_get_by_name(uuid), bdev);
5877 	spdk_uuid_fmt_lower(uuid, sizeof(uuid), &second->uuid);
5878 	CU_ASSERT_EQUAL(spdk_bdev_get_by_name(uuid), second);
5879 
5880 	free_bdev(second);
5881 	free_bdev(bdev);
5882 	ut_fini_bdev();
5883 }
5884 
5885 static void
5886 bdev_unregister_by_name(void)
5887 {
5888 	struct spdk_bdev *bdev;
5889 	int rc;
5890 
5891 	bdev = allocate_bdev("bdev");
5892 
5893 	g_event_type1 = 0xFF;
5894 	g_unregister_arg = NULL;
5895 	g_unregister_rc = -1;
5896 
5897 	rc = spdk_bdev_unregister_by_name("bdev1", &bdev_ut_if, bdev_unregister_cb, (void *)0x12345678);
5898 	CU_ASSERT(rc == -ENODEV);
5899 
5900 	rc = spdk_bdev_unregister_by_name("bdev", &vbdev_ut_if, bdev_unregister_cb, (void *)0x12345678);
5901 	CU_ASSERT(rc == -ENODEV);
5902 
5903 	rc = spdk_bdev_unregister_by_name("bdev", &bdev_ut_if, bdev_unregister_cb, (void *)0x12345678);
5904 	CU_ASSERT(rc == 0);
5905 
5906 	/* Check that unregister callback is delayed */
5907 	CU_ASSERT(g_unregister_arg == NULL);
5908 	CU_ASSERT(g_unregister_rc == -1);
5909 
5910 	poll_threads();
5911 
5912 	/* Event callback shall not be issued because device was closed */
5913 	CU_ASSERT(g_event_type1 == 0xFF);
5914 	/* Unregister callback is issued */
5915 	CU_ASSERT(g_unregister_arg == (void *)0x12345678);
5916 	CU_ASSERT(g_unregister_rc == 0);
5917 
5918 	free_bdev(bdev);
5919 }
5920 
5921 static int
5922 count_bdevs(void *ctx, struct spdk_bdev *bdev)
5923 {
5924 	int *count = ctx;
5925 
5926 	(*count)++;
5927 
5928 	return 0;
5929 }
5930 
5931 static void
5932 for_each_bdev_test(void)
5933 {
5934 	struct spdk_bdev *bdev[8];
5935 	int rc, count;
5936 
5937 	bdev[0] = allocate_bdev("bdev0");
5938 	bdev[0]->internal.status = SPDK_BDEV_STATUS_REMOVING;
5939 
5940 	bdev[1] = allocate_bdev("bdev1");
5941 	rc = spdk_bdev_module_claim_bdev(bdev[1], NULL, &bdev_ut_if);
5942 	CU_ASSERT(rc == 0);
5943 
5944 	bdev[2] = allocate_bdev("bdev2");
5945 
5946 	bdev[3] = allocate_bdev("bdev3");
5947 	rc = spdk_bdev_module_claim_bdev(bdev[3], NULL, &bdev_ut_if);
5948 	CU_ASSERT(rc == 0);
5949 
5950 	bdev[4] = allocate_bdev("bdev4");
5951 
5952 	bdev[5] = allocate_bdev("bdev5");
5953 	rc = spdk_bdev_module_claim_bdev(bdev[5], NULL, &bdev_ut_if);
5954 	CU_ASSERT(rc == 0);
5955 
5956 	bdev[6] = allocate_bdev("bdev6");
5957 
5958 	bdev[7] = allocate_bdev("bdev7");
5959 
5960 	count = 0;
5961 	rc = spdk_for_each_bdev(&count, count_bdevs);
5962 	CU_ASSERT(rc == 0);
5963 	CU_ASSERT(count == 7);
5964 
5965 	count = 0;
5966 	rc = spdk_for_each_bdev_leaf(&count, count_bdevs);
5967 	CU_ASSERT(rc == 0);
5968 	CU_ASSERT(count == 4);
5969 
5970 	bdev[0]->internal.status = SPDK_BDEV_STATUS_READY;
5971 	free_bdev(bdev[0]);
5972 	free_bdev(bdev[1]);
5973 	free_bdev(bdev[2]);
5974 	free_bdev(bdev[3]);
5975 	free_bdev(bdev[4]);
5976 	free_bdev(bdev[5]);
5977 	free_bdev(bdev[6]);
5978 	free_bdev(bdev[7]);
5979 }
5980 
5981 static void
5982 bdev_seek_test(void)
5983 {
5984 	struct spdk_bdev *bdev;
5985 	struct spdk_bdev_desc *desc = NULL;
5986 	struct spdk_io_channel *io_ch;
5987 	int rc;
5988 
5989 	ut_init_bdev(NULL);
5990 	poll_threads();
5991 
5992 	bdev = allocate_bdev("bdev0");
5993 
5994 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
5995 	CU_ASSERT(rc == 0);
5996 	poll_threads();
5997 	SPDK_CU_ASSERT_FATAL(desc != NULL);
5998 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
5999 	io_ch = spdk_bdev_get_io_channel(desc);
6000 	CU_ASSERT(io_ch != NULL);
6001 
6002 	/* Seek data not supported */
6003 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_SEEK_DATA, false);
6004 	rc = spdk_bdev_seek_data(desc, io_ch, 0, bdev_seek_cb, NULL);
6005 	CU_ASSERT(rc == 0);
6006 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
6007 	poll_threads();
6008 	CU_ASSERT(g_seek_offset == 0);
6009 
6010 	/* Seek hole not supported */
6011 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_SEEK_HOLE, false);
6012 	rc = spdk_bdev_seek_hole(desc, io_ch, 0, bdev_seek_cb, NULL);
6013 	CU_ASSERT(rc == 0);
6014 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
6015 	poll_threads();
6016 	CU_ASSERT(g_seek_offset == UINT64_MAX);
6017 
6018 	/* Seek data supported */
6019 	g_seek_data_offset = 12345;
6020 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_SEEK_DATA, true);
6021 	rc = spdk_bdev_seek_data(desc, io_ch, 0, bdev_seek_cb, NULL);
6022 	CU_ASSERT(rc == 0);
6023 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
6024 	stub_complete_io(1);
6025 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
6026 	CU_ASSERT(g_seek_offset == 12345);
6027 
6028 	/* Seek hole supported */
6029 	g_seek_hole_offset = 67890;
6030 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_SEEK_HOLE, true);
6031 	rc = spdk_bdev_seek_hole(desc, io_ch, 0, bdev_seek_cb, NULL);
6032 	CU_ASSERT(rc == 0);
6033 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
6034 	stub_complete_io(1);
6035 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
6036 	CU_ASSERT(g_seek_offset == 67890);
6037 
6038 	spdk_put_io_channel(io_ch);
6039 	spdk_bdev_close(desc);
6040 	free_bdev(bdev);
6041 	ut_fini_bdev();
6042 }
6043 
6044 static void
6045 bdev_copy(void)
6046 {
6047 	struct spdk_bdev *bdev;
6048 	struct spdk_bdev_desc *desc = NULL;
6049 	struct spdk_io_channel *ioch;
6050 	struct ut_expected_io *expected_io;
6051 	uint64_t src_offset, num_blocks;
6052 	uint32_t num_completed;
6053 	int rc;
6054 
6055 	ut_init_bdev(NULL);
6056 	bdev = allocate_bdev("bdev");
6057 
6058 	rc = spdk_bdev_open_ext("bdev", true, bdev_ut_event_cb, NULL, &desc);
6059 	CU_ASSERT_EQUAL(rc, 0);
6060 	SPDK_CU_ASSERT_FATAL(desc != NULL);
6061 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
6062 	ioch = spdk_bdev_get_io_channel(desc);
6063 	SPDK_CU_ASSERT_FATAL(ioch != NULL);
6064 
6065 	fn_table.submit_request = stub_submit_request;
6066 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
6067 
6068 	/* First test that if the bdev supports copy, the request won't be split */
6069 	bdev->md_len = 0;
6070 	bdev->blocklen = 512;
6071 	num_blocks = 128;
6072 	src_offset = bdev->blockcnt - num_blocks;
6073 
6074 	expected_io = ut_alloc_expected_copy_io(SPDK_BDEV_IO_TYPE_COPY, 0, src_offset, num_blocks);
6075 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
6076 
6077 	rc = spdk_bdev_copy_blocks(desc, ioch, 0, src_offset, num_blocks, io_done, NULL);
6078 	CU_ASSERT_EQUAL(rc, 0);
6079 	num_completed = stub_complete_io(1);
6080 	CU_ASSERT_EQUAL(num_completed, 1);
6081 
6082 	/* Check that if copy is not supported it'll still work */
6083 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, src_offset, num_blocks, 0);
6084 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
6085 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 0, num_blocks, 0);
6086 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
6087 
6088 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_COPY, false);
6089 
6090 	rc = spdk_bdev_copy_blocks(desc, ioch, 0, src_offset, num_blocks, io_done, NULL);
6091 	CU_ASSERT_EQUAL(rc, 0);
6092 	num_completed = stub_complete_io(1);
6093 	CU_ASSERT_EQUAL(num_completed, 1);
6094 	num_completed = stub_complete_io(1);
6095 	CU_ASSERT_EQUAL(num_completed, 1);
6096 
6097 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_COPY, true);
6098 	spdk_put_io_channel(ioch);
6099 	spdk_bdev_close(desc);
6100 	free_bdev(bdev);
6101 	ut_fini_bdev();
6102 }
6103 
6104 static void
6105 bdev_copy_split_test(void)
6106 {
6107 	struct spdk_bdev *bdev;
6108 	struct spdk_bdev_desc *desc = NULL;
6109 	struct spdk_io_channel *ioch;
6110 	struct spdk_bdev_channel *bdev_ch;
6111 	struct ut_expected_io *expected_io;
6112 	struct spdk_bdev_opts bdev_opts = {};
6113 	uint32_t i, num_outstanding;
6114 	uint64_t offset, src_offset, num_blocks, max_copy_blocks, num_children;
6115 	int rc;
6116 
6117 	spdk_bdev_get_opts(&bdev_opts, sizeof(bdev_opts));
6118 	bdev_opts.bdev_io_pool_size = 512;
6119 	bdev_opts.bdev_io_cache_size = 64;
6120 	rc = spdk_bdev_set_opts(&bdev_opts);
6121 	CU_ASSERT(rc == 0);
6122 
6123 	ut_init_bdev(NULL);
6124 	bdev = allocate_bdev("bdev");
6125 
6126 	rc = spdk_bdev_open_ext("bdev", true, bdev_ut_event_cb, NULL, &desc);
6127 	CU_ASSERT_EQUAL(rc, 0);
6128 	SPDK_CU_ASSERT_FATAL(desc != NULL);
6129 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
6130 	ioch = spdk_bdev_get_io_channel(desc);
6131 	SPDK_CU_ASSERT_FATAL(ioch != NULL);
6132 	bdev_ch = spdk_io_channel_get_ctx(ioch);
6133 	CU_ASSERT(TAILQ_EMPTY(&bdev_ch->io_submitted));
6134 
6135 	fn_table.submit_request = stub_submit_request;
6136 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
6137 
6138 	/* Case 1: First test the request won't be split */
6139 	num_blocks = 32;
6140 	src_offset = bdev->blockcnt - num_blocks;
6141 
6142 	g_io_done = false;
6143 	expected_io = ut_alloc_expected_copy_io(SPDK_BDEV_IO_TYPE_COPY, 0, src_offset, num_blocks);
6144 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
6145 	rc = spdk_bdev_copy_blocks(desc, ioch, 0, src_offset, num_blocks, io_done, NULL);
6146 	CU_ASSERT_EQUAL(rc, 0);
6147 	CU_ASSERT(g_io_done == false);
6148 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
6149 	stub_complete_io(1);
6150 	CU_ASSERT(g_io_done == true);
6151 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
6152 
6153 	/* Case 2: Test the split with 2 children requests */
6154 	max_copy_blocks = 8;
6155 	bdev->max_copy = max_copy_blocks;
6156 	num_children = 2;
6157 	num_blocks = max_copy_blocks * num_children;
6158 	offset = 0;
6159 	src_offset = bdev->blockcnt - num_blocks;
6160 
6161 	g_io_done = false;
6162 	for (i = 0; i < num_children; i++) {
6163 		expected_io = ut_alloc_expected_copy_io(SPDK_BDEV_IO_TYPE_COPY, offset,
6164 							src_offset + offset, max_copy_blocks);
6165 		TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
6166 		offset += max_copy_blocks;
6167 	}
6168 
6169 	rc = spdk_bdev_copy_blocks(desc, ioch, 0, src_offset, num_blocks, io_done, NULL);
6170 	CU_ASSERT_EQUAL(rc, 0);
6171 	CU_ASSERT(g_io_done == false);
6172 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == num_children);
6173 	stub_complete_io(num_children);
6174 	CU_ASSERT(g_io_done == true);
6175 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
6176 
6177 	/* Case 3: Test the split with 15 children requests, will finish 8 requests first */
6178 	num_children = 15;
6179 	num_blocks = max_copy_blocks * num_children;
6180 	offset = 0;
6181 	src_offset = bdev->blockcnt - num_blocks;
6182 
6183 	g_io_done = false;
6184 	for (i = 0; i < num_children; i++) {
6185 		expected_io = ut_alloc_expected_copy_io(SPDK_BDEV_IO_TYPE_COPY, offset,
6186 							src_offset + offset, max_copy_blocks);
6187 		TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
6188 		offset += max_copy_blocks;
6189 	}
6190 
6191 	rc = spdk_bdev_copy_blocks(desc, ioch, 0, src_offset, num_blocks, io_done, NULL);
6192 	CU_ASSERT_EQUAL(rc, 0);
6193 	CU_ASSERT(g_io_done == false);
6194 
6195 	while (num_children > 0) {
6196 		num_outstanding = spdk_min(num_children, SPDK_BDEV_MAX_CHILDREN_COPY_REQS);
6197 		CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == num_outstanding);
6198 		stub_complete_io(num_outstanding);
6199 		num_children -= num_outstanding;
6200 	}
6201 	CU_ASSERT(g_io_done == true);
6202 
6203 	spdk_put_io_channel(ioch);
6204 	spdk_bdev_close(desc);
6205 	free_bdev(bdev);
6206 	ut_fini_bdev();
6207 }
6208 
6209 static void
6210 examine_claim_v1(struct spdk_bdev *bdev)
6211 {
6212 	int rc;
6213 
6214 	rc = spdk_bdev_module_claim_bdev(bdev, NULL, &vbdev_ut_if);
6215 	CU_ASSERT(rc == 0);
6216 }
6217 
6218 static void
6219 examine_no_lock_held(struct spdk_bdev *bdev)
6220 {
6221 	CU_ASSERT(!spdk_spin_held(&g_bdev_mgr.spinlock));
6222 	CU_ASSERT(!spdk_spin_held(&bdev->internal.spinlock));
6223 }
6224 
6225 struct examine_claim_v2_ctx {
6226 	struct ut_examine_ctx examine_ctx;
6227 	enum spdk_bdev_claim_type claim_type;
6228 	struct spdk_bdev_desc *desc;
6229 };
6230 
6231 static void
6232 examine_claim_v2(struct spdk_bdev *bdev)
6233 {
6234 	struct examine_claim_v2_ctx *ctx = bdev->ctxt;
6235 	int rc;
6236 
6237 	rc = spdk_bdev_open_ext(bdev->name, false, bdev_ut_event_cb, NULL, &ctx->desc);
6238 	CU_ASSERT(rc == 0);
6239 
6240 	rc = spdk_bdev_module_claim_bdev_desc(ctx->desc, ctx->claim_type, NULL, &vbdev_ut_if);
6241 	CU_ASSERT(rc == 0);
6242 }
6243 
6244 static void
6245 examine_locks(void)
6246 {
6247 	struct spdk_bdev *bdev;
6248 	struct ut_examine_ctx ctx = { 0 };
6249 	struct examine_claim_v2_ctx v2_ctx;
6250 
6251 	/* Without any claims, one code path is taken */
6252 	ctx.examine_config = examine_no_lock_held;
6253 	ctx.examine_disk = examine_no_lock_held;
6254 	bdev = allocate_bdev_ctx("bdev0", &ctx);
6255 	CU_ASSERT(ctx.examine_config_count == 1);
6256 	CU_ASSERT(ctx.examine_disk_count == 1);
6257 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE);
6258 	CU_ASSERT(bdev->internal.claim.v1.module == NULL);
6259 	free_bdev(bdev);
6260 
6261 	/* Exercise another path that is taken when examine_config() takes a v1 claim. */
6262 	memset(&ctx, 0, sizeof(ctx));
6263 	ctx.examine_config = examine_claim_v1;
6264 	ctx.examine_disk = examine_no_lock_held;
6265 	bdev = allocate_bdev_ctx("bdev0", &ctx);
6266 	CU_ASSERT(ctx.examine_config_count == 1);
6267 	CU_ASSERT(ctx.examine_disk_count == 1);
6268 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_EXCL_WRITE);
6269 	CU_ASSERT(bdev->internal.claim.v1.module == &vbdev_ut_if);
6270 	spdk_bdev_module_release_bdev(bdev);
6271 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE);
6272 	CU_ASSERT(bdev->internal.claim.v1.module == NULL);
6273 	free_bdev(bdev);
6274 
6275 	/* Exercise the final path that comes with v2 claims. */
6276 	memset(&v2_ctx, 0, sizeof(v2_ctx));
6277 	v2_ctx.examine_ctx.examine_config = examine_claim_v2;
6278 	v2_ctx.examine_ctx.examine_disk = examine_no_lock_held;
6279 	v2_ctx.claim_type = SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE;
6280 	bdev = allocate_bdev_ctx("bdev0", &v2_ctx);
6281 	CU_ASSERT(v2_ctx.examine_ctx.examine_config_count == 1);
6282 	CU_ASSERT(v2_ctx.examine_ctx.examine_disk_count == 1);
6283 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE);
6284 	spdk_bdev_close(v2_ctx.desc);
6285 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE);
6286 	free_bdev(bdev);
6287 }
6288 
6289 #define UT_ASSERT_CLAIM_V2_COUNT(bdev, expect) \
6290 	do { \
6291 		uint32_t len = 0; \
6292 		struct spdk_bdev_module_claim *claim; \
6293 		TAILQ_FOREACH(claim, &bdev->internal.claim.v2.claims, link) { \
6294 			len++; \
6295 		} \
6296 		CU_ASSERT(len == expect); \
6297 	} while (0)
6298 
6299 static void
6300 claim_v2_rwo(void)
6301 {
6302 	struct spdk_bdev *bdev;
6303 	struct spdk_bdev_desc *desc;
6304 	struct spdk_bdev_desc *desc2;
6305 	struct spdk_bdev_claim_opts opts;
6306 	int rc;
6307 
6308 	bdev = allocate_bdev("bdev0");
6309 
6310 	/* Claim without options */
6311 	desc = NULL;
6312 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
6313 	CU_ASSERT(rc == 0);
6314 	SPDK_CU_ASSERT_FATAL(desc != NULL);
6315 	rc = spdk_bdev_module_claim_bdev_desc(desc, SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE, NULL,
6316 					      &bdev_ut_if);
6317 	CU_ASSERT(rc == 0);
6318 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE);
6319 	CU_ASSERT(desc->claim != NULL);
6320 	CU_ASSERT(desc->claim->module == &bdev_ut_if);
6321 	CU_ASSERT(strcmp(desc->claim->name, "") == 0);
6322 	CU_ASSERT(TAILQ_FIRST(&bdev->internal.claim.v2.claims) == desc->claim);
6323 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 1);
6324 
6325 	/* Release the claim by closing the descriptor */
6326 	spdk_bdev_close(desc);
6327 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE);
6328 	CU_ASSERT(TAILQ_EMPTY(&bdev->internal.open_descs));
6329 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 0);
6330 
6331 	/* Claim with options */
6332 	spdk_bdev_claim_opts_init(&opts, sizeof(opts));
6333 	snprintf(opts.name, sizeof(opts.name), "%s", "claim with options");
6334 	desc = NULL;
6335 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
6336 	CU_ASSERT(rc == 0);
6337 	SPDK_CU_ASSERT_FATAL(desc != NULL);
6338 	rc = spdk_bdev_module_claim_bdev_desc(desc, SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE, &opts,
6339 					      &bdev_ut_if);
6340 	CU_ASSERT(rc == 0);
6341 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE);
6342 	CU_ASSERT(desc->claim != NULL);
6343 	CU_ASSERT(desc->claim->module == &bdev_ut_if);
6344 	CU_ASSERT(strcmp(desc->claim->name, "claim with options") == 0);
6345 	memset(&opts, 0, sizeof(opts));
6346 	CU_ASSERT(strcmp(desc->claim->name, "claim with options") == 0);
6347 	CU_ASSERT(TAILQ_FIRST(&bdev->internal.claim.v2.claims) == desc->claim);
6348 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 1);
6349 
6350 	/* The claim blocks new writers. */
6351 	desc2 = NULL;
6352 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc2);
6353 	CU_ASSERT(rc == -EPERM);
6354 	CU_ASSERT(desc2 == NULL);
6355 
6356 	/* New readers are allowed */
6357 	desc2 = NULL;
6358 	rc = spdk_bdev_open_ext("bdev0", false, bdev_ut_event_cb, NULL, &desc2);
6359 	CU_ASSERT(rc == 0);
6360 	CU_ASSERT(desc2 != NULL);
6361 	CU_ASSERT(!desc2->write);
6362 
6363 	/* No new v2 RWO claims are allowed */
6364 	rc = spdk_bdev_module_claim_bdev_desc(desc2, SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE, NULL,
6365 					      &bdev_ut_if);
6366 	CU_ASSERT(rc == -EPERM);
6367 
6368 	/* No new v2 ROM claims are allowed */
6369 	CU_ASSERT(!desc2->write);
6370 	rc = spdk_bdev_module_claim_bdev_desc(desc2, SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE, NULL,
6371 					      &bdev_ut_if);
6372 	CU_ASSERT(rc == -EPERM);
6373 	CU_ASSERT(!desc2->write);
6374 
6375 	/* No new v2 RWM claims are allowed */
6376 	spdk_bdev_claim_opts_init(&opts, sizeof(opts));
6377 	opts.shared_claim_key = (uint64_t)&opts;
6378 	rc = spdk_bdev_module_claim_bdev_desc(desc2, SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED, &opts,
6379 					      &bdev_ut_if);
6380 	CU_ASSERT(rc == -EPERM);
6381 	CU_ASSERT(!desc2->write);
6382 
6383 	/* No new v1 claims are allowed */
6384 	rc = spdk_bdev_module_claim_bdev(bdev, NULL, &bdev_ut_if);
6385 	CU_ASSERT(rc == -EPERM);
6386 
6387 	/* None of the above changed the existing claim */
6388 	CU_ASSERT(TAILQ_FIRST(&bdev->internal.claim.v2.claims) == desc->claim);
6389 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 1);
6390 
6391 	/* Closing the first descriptor now allows a new claim and it is promoted to rw. */
6392 	spdk_bdev_close(desc);
6393 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE);
6394 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 0);
6395 	CU_ASSERT(!desc2->write);
6396 	rc = spdk_bdev_module_claim_bdev_desc(desc2, SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE, NULL,
6397 					      &bdev_ut_if);
6398 	CU_ASSERT(rc == 0);
6399 	CU_ASSERT(desc2->claim != NULL);
6400 	CU_ASSERT(desc2->write);
6401 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE);
6402 	CU_ASSERT(TAILQ_FIRST(&bdev->internal.claim.v2.claims) == desc2->claim);
6403 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 1);
6404 	spdk_bdev_close(desc2);
6405 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE);
6406 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 0);
6407 
6408 	/* Cannot claim with a key */
6409 	desc = NULL;
6410 	rc = spdk_bdev_open_ext("bdev0", false, bdev_ut_event_cb, NULL, &desc);
6411 	CU_ASSERT(rc == 0);
6412 	SPDK_CU_ASSERT_FATAL(desc != NULL);
6413 	spdk_bdev_claim_opts_init(&opts, sizeof(opts));
6414 	opts.shared_claim_key = (uint64_t)&opts;
6415 	rc = spdk_bdev_module_claim_bdev_desc(desc, SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE, &opts,
6416 					      &bdev_ut_if);
6417 	CU_ASSERT(rc == -EINVAL);
6418 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE);
6419 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 0);
6420 	spdk_bdev_close(desc);
6421 
6422 	/* Clean up */
6423 	free_bdev(bdev);
6424 }
6425 
6426 static void
6427 claim_v2_rom(void)
6428 {
6429 	struct spdk_bdev *bdev;
6430 	struct spdk_bdev_desc *desc;
6431 	struct spdk_bdev_desc *desc2;
6432 	struct spdk_bdev_claim_opts opts;
6433 	int rc;
6434 
6435 	bdev = allocate_bdev("bdev0");
6436 
6437 	/* Claim without options */
6438 	desc = NULL;
6439 	rc = spdk_bdev_open_ext("bdev0", false, bdev_ut_event_cb, NULL, &desc);
6440 	CU_ASSERT(rc == 0);
6441 	SPDK_CU_ASSERT_FATAL(desc != NULL);
6442 	rc = spdk_bdev_module_claim_bdev_desc(desc, SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE, NULL,
6443 					      &bdev_ut_if);
6444 	CU_ASSERT(rc == 0);
6445 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE);
6446 	CU_ASSERT(desc->claim != NULL);
6447 	CU_ASSERT(desc->claim->module == &bdev_ut_if);
6448 	CU_ASSERT(strcmp(desc->claim->name, "") == 0);
6449 	CU_ASSERT(TAILQ_FIRST(&bdev->internal.claim.v2.claims) == desc->claim);
6450 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 1);
6451 
6452 	/* Release the claim by closing the descriptor */
6453 	spdk_bdev_close(desc);
6454 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE);
6455 	CU_ASSERT(TAILQ_EMPTY(&bdev->internal.open_descs));
6456 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE);
6457 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 0);
6458 
6459 	/* Claim with options */
6460 	spdk_bdev_claim_opts_init(&opts, sizeof(opts));
6461 	snprintf(opts.name, sizeof(opts.name), "%s", "claim with options");
6462 	desc = NULL;
6463 	rc = spdk_bdev_open_ext("bdev0", false, bdev_ut_event_cb, NULL, &desc);
6464 	CU_ASSERT(rc == 0);
6465 	SPDK_CU_ASSERT_FATAL(desc != NULL);
6466 	rc = spdk_bdev_module_claim_bdev_desc(desc, SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE, &opts,
6467 					      &bdev_ut_if);
6468 	CU_ASSERT(rc == 0);
6469 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE);
6470 	SPDK_CU_ASSERT_FATAL(desc->claim != NULL);
6471 	CU_ASSERT(desc->claim->module == &bdev_ut_if);
6472 	CU_ASSERT(strcmp(desc->claim->name, "claim with options") == 0);
6473 	memset(&opts, 0, sizeof(opts));
6474 	CU_ASSERT(strcmp(desc->claim->name, "claim with options") == 0);
6475 	CU_ASSERT(TAILQ_FIRST(&bdev->internal.claim.v2.claims) == desc->claim);
6476 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 1);
6477 
6478 	/* The claim blocks new writers. */
6479 	desc2 = NULL;
6480 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc2);
6481 	CU_ASSERT(rc == -EPERM);
6482 	CU_ASSERT(desc2 == NULL);
6483 
6484 	/* New readers are allowed */
6485 	desc2 = NULL;
6486 	rc = spdk_bdev_open_ext("bdev0", false, bdev_ut_event_cb, NULL, &desc2);
6487 	CU_ASSERT(rc == 0);
6488 	CU_ASSERT(desc2 != NULL);
6489 	CU_ASSERT(!desc2->write);
6490 
6491 	/* No new v2 RWO claims are allowed */
6492 	rc = spdk_bdev_module_claim_bdev_desc(desc2, SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE, NULL,
6493 					      &bdev_ut_if);
6494 	CU_ASSERT(rc == -EPERM);
6495 
6496 	/* No new v2 RWM claims are allowed */
6497 	spdk_bdev_claim_opts_init(&opts, sizeof(opts));
6498 	opts.shared_claim_key = (uint64_t)&opts;
6499 	rc = spdk_bdev_module_claim_bdev_desc(desc2, SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED, &opts,
6500 					      &bdev_ut_if);
6501 	CU_ASSERT(rc == -EPERM);
6502 	CU_ASSERT(!desc2->write);
6503 
6504 	/* No new v1 claims are allowed */
6505 	rc = spdk_bdev_module_claim_bdev(bdev, NULL, &bdev_ut_if);
6506 	CU_ASSERT(rc == -EPERM);
6507 
6508 	/* None of the above messed up the existing claim */
6509 	CU_ASSERT(TAILQ_FIRST(&bdev->internal.claim.v2.claims) == desc->claim);
6510 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 1);
6511 
6512 	/* New v2 ROM claims are allowed and the descriptor stays read-only. */
6513 	CU_ASSERT(!desc2->write);
6514 	rc = spdk_bdev_module_claim_bdev_desc(desc2, SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE, NULL,
6515 					      &bdev_ut_if);
6516 	CU_ASSERT(rc == 0);
6517 	CU_ASSERT(!desc2->write);
6518 	CU_ASSERT(TAILQ_FIRST(&bdev->internal.claim.v2.claims) == desc->claim);
6519 	CU_ASSERT(TAILQ_NEXT(desc->claim, link) == desc2->claim);
6520 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 2);
6521 
6522 	/* Claim remains when closing the first descriptor */
6523 	spdk_bdev_close(desc);
6524 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE);
6525 	CU_ASSERT(!TAILQ_EMPTY(&bdev->internal.open_descs));
6526 	CU_ASSERT(TAILQ_FIRST(&bdev->internal.claim.v2.claims) == desc2->claim);
6527 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 1);
6528 
6529 	/* Claim removed when closing the other descriptor */
6530 	spdk_bdev_close(desc2);
6531 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE);
6532 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 0);
6533 	CU_ASSERT(TAILQ_EMPTY(&bdev->internal.open_descs));
6534 
6535 	/* Cannot claim with a key */
6536 	desc = NULL;
6537 	rc = spdk_bdev_open_ext("bdev0", false, bdev_ut_event_cb, NULL, &desc);
6538 	CU_ASSERT(rc == 0);
6539 	SPDK_CU_ASSERT_FATAL(desc != NULL);
6540 	spdk_bdev_claim_opts_init(&opts, sizeof(opts));
6541 	opts.shared_claim_key = (uint64_t)&opts;
6542 	rc = spdk_bdev_module_claim_bdev_desc(desc, SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE, &opts,
6543 					      &bdev_ut_if);
6544 	CU_ASSERT(rc == -EINVAL);
6545 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE);
6546 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 0);
6547 	spdk_bdev_close(desc);
6548 
6549 	/* Cannot claim with a read-write descriptor */
6550 	desc = NULL;
6551 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
6552 	CU_ASSERT(rc == 0);
6553 	SPDK_CU_ASSERT_FATAL(desc != NULL);
6554 	rc = spdk_bdev_module_claim_bdev_desc(desc, SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE, NULL,
6555 					      &bdev_ut_if);
6556 	CU_ASSERT(rc == -EINVAL);
6557 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE);
6558 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 0);
6559 	spdk_bdev_close(desc);
6560 	CU_ASSERT(TAILQ_EMPTY(&bdev->internal.open_descs));
6561 
6562 	/* Clean up */
6563 	free_bdev(bdev);
6564 }
6565 
6566 static void
6567 claim_v2_rwm(void)
6568 {
6569 	struct spdk_bdev *bdev;
6570 	struct spdk_bdev_desc *desc;
6571 	struct spdk_bdev_desc *desc2;
6572 	struct spdk_bdev_claim_opts opts;
6573 	char good_key, bad_key;
6574 	int rc;
6575 
6576 	bdev = allocate_bdev("bdev0");
6577 
6578 	/* Claim without options should fail */
6579 	desc = NULL;
6580 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
6581 	CU_ASSERT(rc == 0);
6582 	SPDK_CU_ASSERT_FATAL(desc != NULL);
6583 	rc = spdk_bdev_module_claim_bdev_desc(desc, SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED, NULL,
6584 					      &bdev_ut_if);
6585 	CU_ASSERT(rc == -EINVAL);
6586 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE);
6587 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 0);
6588 	CU_ASSERT(desc->claim == NULL);
6589 
6590 	/* Claim with options */
6591 	spdk_bdev_claim_opts_init(&opts, sizeof(opts));
6592 	snprintf(opts.name, sizeof(opts.name), "%s", "claim with options");
6593 	opts.shared_claim_key = (uint64_t)&good_key;
6594 	rc = spdk_bdev_module_claim_bdev_desc(desc, SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED, &opts,
6595 					      &bdev_ut_if);
6596 	CU_ASSERT(rc == 0);
6597 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED);
6598 	SPDK_CU_ASSERT_FATAL(desc->claim != NULL);
6599 	CU_ASSERT(desc->claim->module == &bdev_ut_if);
6600 	CU_ASSERT(strcmp(desc->claim->name, "claim with options") == 0);
6601 	memset(&opts, 0, sizeof(opts));
6602 	CU_ASSERT(strcmp(desc->claim->name, "claim with options") == 0);
6603 	CU_ASSERT(TAILQ_FIRST(&bdev->internal.claim.v2.claims) == desc->claim);
6604 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 1);
6605 
6606 	/* The claim blocks new writers. */
6607 	desc2 = NULL;
6608 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc2);
6609 	CU_ASSERT(rc == -EPERM);
6610 	CU_ASSERT(desc2 == NULL);
6611 
6612 	/* New readers are allowed */
6613 	desc2 = NULL;
6614 	rc = spdk_bdev_open_ext("bdev0", false, bdev_ut_event_cb, NULL, &desc2);
6615 	CU_ASSERT(rc == 0);
6616 	CU_ASSERT(desc2 != NULL);
6617 	CU_ASSERT(!desc2->write);
6618 
6619 	/* No new v2 RWO claims are allowed */
6620 	rc = spdk_bdev_module_claim_bdev_desc(desc2, SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE, NULL,
6621 					      &bdev_ut_if);
6622 	CU_ASSERT(rc == -EPERM);
6623 
6624 	/* No new v2 ROM claims are allowed and the descriptor stays read-only. */
6625 	CU_ASSERT(!desc2->write);
6626 	rc = spdk_bdev_module_claim_bdev_desc(desc2, SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE, NULL,
6627 					      &bdev_ut_if);
6628 	CU_ASSERT(rc == -EPERM);
6629 	CU_ASSERT(!desc2->write);
6630 
6631 	/* No new v1 claims are allowed */
6632 	rc = spdk_bdev_module_claim_bdev(bdev, NULL, &bdev_ut_if);
6633 	CU_ASSERT(rc == -EPERM);
6634 
6635 	/* No new v2 RWM claims are allowed if the key does not match */
6636 	spdk_bdev_claim_opts_init(&opts, sizeof(opts));
6637 	opts.shared_claim_key = (uint64_t)&bad_key;
6638 	rc = spdk_bdev_module_claim_bdev_desc(desc2, SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED, &opts,
6639 					      &bdev_ut_if);
6640 	CU_ASSERT(rc == -EPERM);
6641 	CU_ASSERT(!desc2->write);
6642 
6643 	/* None of the above messed up the existing claim */
6644 	CU_ASSERT(TAILQ_FIRST(&bdev->internal.claim.v2.claims) == desc->claim);
6645 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 1);
6646 
6647 	/* New v2 RWM claims are allowed and the descriptor is promoted if the key matches. */
6648 	spdk_bdev_claim_opts_init(&opts, sizeof(opts));
6649 	opts.shared_claim_key = (uint64_t)&good_key;
6650 	CU_ASSERT(!desc2->write);
6651 	rc = spdk_bdev_module_claim_bdev_desc(desc2, SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED, &opts,
6652 					      &bdev_ut_if);
6653 	CU_ASSERT(rc == 0);
6654 	CU_ASSERT(desc2->write);
6655 	CU_ASSERT(TAILQ_NEXT(desc->claim, link) == desc2->claim);
6656 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 2);
6657 
6658 	/* Claim remains when closing the first descriptor */
6659 	spdk_bdev_close(desc);
6660 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED);
6661 	CU_ASSERT(!TAILQ_EMPTY(&bdev->internal.open_descs));
6662 	CU_ASSERT(TAILQ_FIRST(&bdev->internal.claim.v2.claims) == desc2->claim);
6663 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 1);
6664 
6665 	/* Claim removed when closing the other descriptor */
6666 	spdk_bdev_close(desc2);
6667 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE);
6668 	CU_ASSERT(TAILQ_EMPTY(&bdev->internal.open_descs));
6669 
6670 	/* Cannot claim without a key */
6671 	desc = NULL;
6672 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
6673 	CU_ASSERT(rc == 0);
6674 	SPDK_CU_ASSERT_FATAL(desc != NULL);
6675 	spdk_bdev_claim_opts_init(&opts, sizeof(opts));
6676 	rc = spdk_bdev_module_claim_bdev_desc(desc, SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED, &opts,
6677 					      &bdev_ut_if);
6678 	CU_ASSERT(rc == -EINVAL);
6679 	spdk_bdev_close(desc);
6680 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE);
6681 	CU_ASSERT(TAILQ_EMPTY(&bdev->internal.open_descs));
6682 
6683 	/* Clean up */
6684 	free_bdev(bdev);
6685 }
6686 
6687 static void
6688 claim_v2_existing_writer(void)
6689 {
6690 	struct spdk_bdev *bdev;
6691 	struct spdk_bdev_desc *desc;
6692 	struct spdk_bdev_desc *desc2;
6693 	struct spdk_bdev_claim_opts opts;
6694 	enum spdk_bdev_claim_type type;
6695 	enum spdk_bdev_claim_type types[] = {
6696 		SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE,
6697 		SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED,
6698 		SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE
6699 	};
6700 	size_t i;
6701 	int rc;
6702 
6703 	bdev = allocate_bdev("bdev0");
6704 
6705 	desc = NULL;
6706 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
6707 	CU_ASSERT(rc == 0);
6708 	SPDK_CU_ASSERT_FATAL(desc != NULL);
6709 	desc2 = NULL;
6710 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc2);
6711 	CU_ASSERT(rc == 0);
6712 	SPDK_CU_ASSERT_FATAL(desc2 != NULL);
6713 
6714 	for (i = 0; i < SPDK_COUNTOF(types); i++) {
6715 		type = types[i];
6716 		spdk_bdev_claim_opts_init(&opts, sizeof(opts));
6717 		if (type == SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED) {
6718 			opts.shared_claim_key = (uint64_t)&opts;
6719 		}
6720 		rc = spdk_bdev_module_claim_bdev_desc(desc, type, &opts, &bdev_ut_if);
6721 		if (type == SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE) {
6722 			CU_ASSERT(rc == -EINVAL);
6723 		} else {
6724 			CU_ASSERT(rc == -EPERM);
6725 		}
6726 		CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE);
6727 		rc = spdk_bdev_module_claim_bdev_desc(desc2, type, &opts, &bdev_ut_if);
6728 		if (type == SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE) {
6729 			CU_ASSERT(rc == -EINVAL);
6730 		} else {
6731 			CU_ASSERT(rc == -EPERM);
6732 		}
6733 		CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE);
6734 	}
6735 
6736 	spdk_bdev_close(desc);
6737 	spdk_bdev_close(desc2);
6738 
6739 	/* Clean up */
6740 	free_bdev(bdev);
6741 }
6742 
6743 static void
6744 claim_v2_existing_v1(void)
6745 {
6746 	struct spdk_bdev *bdev;
6747 	struct spdk_bdev_desc *desc;
6748 	struct spdk_bdev_claim_opts opts;
6749 	enum spdk_bdev_claim_type type;
6750 	enum spdk_bdev_claim_type types[] = {
6751 		SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE,
6752 		SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED,
6753 		SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE
6754 	};
6755 	size_t i;
6756 	int rc;
6757 
6758 	bdev = allocate_bdev("bdev0");
6759 
6760 	rc = spdk_bdev_module_claim_bdev(bdev, NULL, &bdev_ut_if);
6761 	CU_ASSERT(rc == 0);
6762 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_EXCL_WRITE);
6763 
6764 	desc = NULL;
6765 	rc = spdk_bdev_open_ext("bdev0", false, bdev_ut_event_cb, NULL, &desc);
6766 	CU_ASSERT(rc == 0);
6767 	SPDK_CU_ASSERT_FATAL(desc != NULL);
6768 
6769 	for (i = 0; i < SPDK_COUNTOF(types); i++) {
6770 		type = types[i];
6771 		spdk_bdev_claim_opts_init(&opts, sizeof(opts));
6772 		if (type == SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED) {
6773 			opts.shared_claim_key = (uint64_t)&opts;
6774 		}
6775 		rc = spdk_bdev_module_claim_bdev_desc(desc, type, &opts, &bdev_ut_if);
6776 		CU_ASSERT(rc == -EPERM);
6777 		CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_EXCL_WRITE);
6778 	}
6779 
6780 	spdk_bdev_module_release_bdev(bdev);
6781 	spdk_bdev_close(desc);
6782 
6783 	/* Clean up */
6784 	free_bdev(bdev);
6785 }
6786 
6787 static void
6788 claim_v1_existing_v2(void)
6789 {
6790 	struct spdk_bdev *bdev;
6791 	struct spdk_bdev_desc *desc;
6792 	struct spdk_bdev_claim_opts opts;
6793 	enum spdk_bdev_claim_type type;
6794 	enum spdk_bdev_claim_type types[] = {
6795 		SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE,
6796 		SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED,
6797 		SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE
6798 	};
6799 	size_t i;
6800 	int rc;
6801 
6802 	bdev = allocate_bdev("bdev0");
6803 
6804 	for (i = 0; i < SPDK_COUNTOF(types); i++) {
6805 		type = types[i];
6806 
6807 		desc = NULL;
6808 		rc = spdk_bdev_open_ext("bdev0", false, bdev_ut_event_cb, NULL, &desc);
6809 		CU_ASSERT(rc == 0);
6810 		SPDK_CU_ASSERT_FATAL(desc != NULL);
6811 
6812 		/* Get a v2 claim */
6813 		spdk_bdev_claim_opts_init(&opts, sizeof(opts));
6814 		if (type == SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED) {
6815 			opts.shared_claim_key = (uint64_t)&opts;
6816 		}
6817 		rc = spdk_bdev_module_claim_bdev_desc(desc, type, &opts, &bdev_ut_if);
6818 		CU_ASSERT(rc == 0);
6819 
6820 		/* Fail to get a v1 claim */
6821 		rc = spdk_bdev_module_claim_bdev(bdev, NULL, &bdev_ut_if);
6822 		CU_ASSERT(rc == -EPERM);
6823 
6824 		spdk_bdev_close(desc);
6825 
6826 		/* Now v1 succeeds */
6827 		rc = spdk_bdev_module_claim_bdev(bdev, NULL, &bdev_ut_if);
6828 		CU_ASSERT(rc == 0)
6829 		spdk_bdev_module_release_bdev(bdev);
6830 	}
6831 
6832 	/* Clean up */
6833 	free_bdev(bdev);
6834 }
6835 
6836 static void ut_examine_claimed_config0(struct spdk_bdev *bdev);
6837 static void ut_examine_claimed_disk0(struct spdk_bdev *bdev);
6838 static void ut_examine_claimed_config1(struct spdk_bdev *bdev);
6839 static void ut_examine_claimed_disk1(struct spdk_bdev *bdev);
6840 
6841 #define UT_MAX_EXAMINE_MODS 2
6842 struct spdk_bdev_module examine_claimed_mods[UT_MAX_EXAMINE_MODS] = {
6843 	{
6844 		.name = "vbdev_ut_examine0",
6845 		.module_init = vbdev_ut_module_init,
6846 		.module_fini = vbdev_ut_module_fini,
6847 		.examine_config = ut_examine_claimed_config0,
6848 		.examine_disk = ut_examine_claimed_disk0,
6849 	},
6850 	{
6851 		.name = "vbdev_ut_examine1",
6852 		.module_init = vbdev_ut_module_init,
6853 		.module_fini = vbdev_ut_module_fini,
6854 		.examine_config = ut_examine_claimed_config1,
6855 		.examine_disk = ut_examine_claimed_disk1,
6856 	}
6857 };
6858 
6859 SPDK_BDEV_MODULE_REGISTER(bdev_ut_claimed0, &examine_claimed_mods[0])
6860 SPDK_BDEV_MODULE_REGISTER(bdev_ut_claimed1, &examine_claimed_mods[1])
6861 
6862 struct ut_examine_claimed_ctx {
6863 	uint32_t examine_config_count;
6864 	uint32_t examine_disk_count;
6865 
6866 	/* Claim type to take, with these options */
6867 	enum spdk_bdev_claim_type claim_type;
6868 	struct spdk_bdev_claim_opts claim_opts;
6869 
6870 	/* Expected return value from spdk_bdev_module_claim_bdev_desc() */
6871 	int expect_claim_err;
6872 
6873 	/* Descriptor used for a claim */
6874 	struct spdk_bdev_desc *desc;
6875 } examine_claimed_ctx[UT_MAX_EXAMINE_MODS];
6876 
6877 bool ut_testing_examine_claimed;
6878 
6879 static void
6880 reset_examine_claimed_ctx(void)
6881 {
6882 	struct ut_examine_claimed_ctx *ctx;
6883 	uint32_t i;
6884 
6885 	for (i = 0; i < SPDK_COUNTOF(examine_claimed_ctx); i++) {
6886 		ctx = &examine_claimed_ctx[i];
6887 		if (ctx->desc != NULL) {
6888 			spdk_bdev_close(ctx->desc);
6889 		}
6890 		memset(ctx, 0, sizeof(*ctx));
6891 		spdk_bdev_claim_opts_init(&ctx->claim_opts, sizeof(ctx->claim_opts));
6892 	}
6893 }
6894 
6895 static void
6896 examine_claimed_config(struct spdk_bdev *bdev, uint32_t modnum)
6897 {
6898 	SPDK_CU_ASSERT_FATAL(modnum < UT_MAX_EXAMINE_MODS);
6899 	struct spdk_bdev_module *module = &examine_claimed_mods[modnum];
6900 	struct ut_examine_claimed_ctx *ctx = &examine_claimed_ctx[modnum];
6901 	int rc;
6902 
6903 	if (!ut_testing_examine_claimed) {
6904 		spdk_bdev_module_examine_done(module);
6905 		return;
6906 	}
6907 
6908 	ctx->examine_config_count++;
6909 
6910 	if (ctx->claim_type != SPDK_BDEV_CLAIM_NONE) {
6911 		rc = spdk_bdev_open_ext(bdev->name, false, bdev_ut_event_cb, &ctx->claim_opts,
6912 					&ctx->desc);
6913 		CU_ASSERT(rc == 0);
6914 
6915 		rc = spdk_bdev_module_claim_bdev_desc(ctx->desc, ctx->claim_type, NULL, module);
6916 		CU_ASSERT(rc == ctx->expect_claim_err);
6917 	}
6918 	spdk_bdev_module_examine_done(module);
6919 }
6920 
6921 static void
6922 ut_examine_claimed_config0(struct spdk_bdev *bdev)
6923 {
6924 	examine_claimed_config(bdev, 0);
6925 }
6926 
6927 static void
6928 ut_examine_claimed_config1(struct spdk_bdev *bdev)
6929 {
6930 	examine_claimed_config(bdev, 1);
6931 }
6932 
6933 static void
6934 examine_claimed_disk(struct spdk_bdev *bdev, uint32_t modnum)
6935 {
6936 	SPDK_CU_ASSERT_FATAL(modnum < UT_MAX_EXAMINE_MODS);
6937 	struct spdk_bdev_module *module = &examine_claimed_mods[modnum];
6938 	struct ut_examine_claimed_ctx *ctx = &examine_claimed_ctx[modnum];
6939 
6940 	if (!ut_testing_examine_claimed) {
6941 		spdk_bdev_module_examine_done(module);
6942 		return;
6943 	}
6944 
6945 	ctx->examine_disk_count++;
6946 
6947 	spdk_bdev_module_examine_done(module);
6948 }
6949 
6950 static void
6951 ut_examine_claimed_disk0(struct spdk_bdev *bdev)
6952 {
6953 	examine_claimed_disk(bdev, 0);
6954 }
6955 
6956 static void
6957 ut_examine_claimed_disk1(struct spdk_bdev *bdev)
6958 {
6959 	examine_claimed_disk(bdev, 1);
6960 }
6961 
6962 static void
6963 examine_claimed(void)
6964 {
6965 	struct spdk_bdev *bdev;
6966 	struct spdk_bdev_module *mod = examine_claimed_mods;
6967 	struct ut_examine_claimed_ctx *ctx = examine_claimed_ctx;
6968 
6969 	ut_testing_examine_claimed = true;
6970 	reset_examine_claimed_ctx();
6971 
6972 	/*
6973 	 * With one module claiming, both modules' examine_config should be called, but only the
6974 	 * claiming module's examine_disk should be called.
6975 	 */
6976 	ctx[0].claim_type = SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE;
6977 	bdev = allocate_bdev("bdev0");
6978 	CU_ASSERT(ctx[0].examine_config_count == 1);
6979 	CU_ASSERT(ctx[0].examine_disk_count == 1);
6980 	SPDK_CU_ASSERT_FATAL(ctx[0].desc != NULL);
6981 	CU_ASSERT(ctx[0].desc->claim->module == &mod[0]);
6982 	CU_ASSERT(ctx[1].examine_config_count == 1);
6983 	CU_ASSERT(ctx[1].examine_disk_count == 0);
6984 	CU_ASSERT(ctx[1].desc == NULL);
6985 	reset_examine_claimed_ctx();
6986 	free_bdev(bdev);
6987 
6988 	/*
6989 	 * With two modules claiming, both modules' examine_config and examine_disk should be
6990 	 * called.
6991 	 */
6992 	ctx[0].claim_type = SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE;
6993 	ctx[1].claim_type = SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE;
6994 	bdev = allocate_bdev("bdev0");
6995 	CU_ASSERT(ctx[0].examine_config_count == 1);
6996 	CU_ASSERT(ctx[0].examine_disk_count == 1);
6997 	SPDK_CU_ASSERT_FATAL(ctx[0].desc != NULL);
6998 	CU_ASSERT(ctx[0].desc->claim->module == &mod[0]);
6999 	CU_ASSERT(ctx[1].examine_config_count == 1);
7000 	CU_ASSERT(ctx[1].examine_disk_count == 1);
7001 	SPDK_CU_ASSERT_FATAL(ctx[1].desc != NULL);
7002 	CU_ASSERT(ctx[1].desc->claim->module == &mod[1]);
7003 	reset_examine_claimed_ctx();
7004 	free_bdev(bdev);
7005 
7006 	/*
7007 	 * If two vbdev modules try to claim with conflicting claim types, the module that was added
7008 	 * last wins. The winner gets the claim and is the only one that has its examine_disk
7009 	 * callback invoked.
7010 	 */
7011 	ctx[0].claim_type = SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE;
7012 	ctx[0].expect_claim_err = -EPERM;
7013 	ctx[1].claim_type = SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE;
7014 	bdev = allocate_bdev("bdev0");
7015 	CU_ASSERT(ctx[0].examine_config_count == 1);
7016 	CU_ASSERT(ctx[0].examine_disk_count == 0);
7017 	CU_ASSERT(ctx[1].examine_config_count == 1);
7018 	CU_ASSERT(ctx[1].examine_disk_count == 1);
7019 	SPDK_CU_ASSERT_FATAL(ctx[1].desc != NULL);
7020 	CU_ASSERT(ctx[1].desc->claim->module == &mod[1]);
7021 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE);
7022 	reset_examine_claimed_ctx();
7023 	free_bdev(bdev);
7024 
7025 	ut_testing_examine_claimed = false;
7026 }
7027 
7028 int
7029 main(int argc, char **argv)
7030 {
7031 	CU_pSuite		suite = NULL;
7032 	unsigned int		num_failures;
7033 
7034 	CU_set_error_action(CUEA_ABORT);
7035 	CU_initialize_registry();
7036 
7037 	suite = CU_add_suite("bdev", ut_bdev_setup, ut_bdev_teardown);
7038 
7039 	CU_ADD_TEST(suite, bytes_to_blocks_test);
7040 	CU_ADD_TEST(suite, num_blocks_test);
7041 	CU_ADD_TEST(suite, io_valid_test);
7042 	CU_ADD_TEST(suite, open_write_test);
7043 	CU_ADD_TEST(suite, claim_test);
7044 	CU_ADD_TEST(suite, alias_add_del_test);
7045 	CU_ADD_TEST(suite, get_device_stat_test);
7046 	CU_ADD_TEST(suite, bdev_io_types_test);
7047 	CU_ADD_TEST(suite, bdev_io_wait_test);
7048 	CU_ADD_TEST(suite, bdev_io_spans_split_test);
7049 	CU_ADD_TEST(suite, bdev_io_boundary_split_test);
7050 	CU_ADD_TEST(suite, bdev_io_max_size_and_segment_split_test);
7051 	CU_ADD_TEST(suite, bdev_io_mix_split_test);
7052 	CU_ADD_TEST(suite, bdev_io_split_with_io_wait);
7053 	CU_ADD_TEST(suite, bdev_io_write_unit_split_test);
7054 	CU_ADD_TEST(suite, bdev_io_alignment_with_boundary);
7055 	CU_ADD_TEST(suite, bdev_io_alignment);
7056 	CU_ADD_TEST(suite, bdev_histograms);
7057 	CU_ADD_TEST(suite, bdev_write_zeroes);
7058 	CU_ADD_TEST(suite, bdev_compare_and_write);
7059 	CU_ADD_TEST(suite, bdev_compare);
7060 	CU_ADD_TEST(suite, bdev_compare_emulated);
7061 	CU_ADD_TEST(suite, bdev_zcopy_write);
7062 	CU_ADD_TEST(suite, bdev_zcopy_read);
7063 	CU_ADD_TEST(suite, bdev_open_while_hotremove);
7064 	CU_ADD_TEST(suite, bdev_close_while_hotremove);
7065 	CU_ADD_TEST(suite, bdev_open_ext);
7066 	CU_ADD_TEST(suite, bdev_open_ext_unregister);
7067 	CU_ADD_TEST(suite, bdev_set_io_timeout);
7068 	CU_ADD_TEST(suite, bdev_set_qd_sampling);
7069 	CU_ADD_TEST(suite, lba_range_overlap);
7070 	CU_ADD_TEST(suite, lock_lba_range_check_ranges);
7071 	CU_ADD_TEST(suite, lock_lba_range_with_io_outstanding);
7072 	CU_ADD_TEST(suite, lock_lba_range_overlapped);
7073 	CU_ADD_TEST(suite, bdev_io_abort);
7074 	CU_ADD_TEST(suite, bdev_unmap);
7075 	CU_ADD_TEST(suite, bdev_write_zeroes_split_test);
7076 	CU_ADD_TEST(suite, bdev_set_options_test);
7077 	CU_ADD_TEST(suite, bdev_multi_allocation);
7078 	CU_ADD_TEST(suite, bdev_get_memory_domains);
7079 	CU_ADD_TEST(suite, bdev_io_ext);
7080 	CU_ADD_TEST(suite, bdev_io_ext_no_opts);
7081 	CU_ADD_TEST(suite, bdev_io_ext_invalid_opts);
7082 	CU_ADD_TEST(suite, bdev_io_ext_split);
7083 	CU_ADD_TEST(suite, bdev_io_ext_bounce_buffer);
7084 	CU_ADD_TEST(suite, bdev_register_uuid_alias);
7085 	CU_ADD_TEST(suite, bdev_unregister_by_name);
7086 	CU_ADD_TEST(suite, for_each_bdev_test);
7087 	CU_ADD_TEST(suite, bdev_seek_test);
7088 	CU_ADD_TEST(suite, bdev_copy);
7089 	CU_ADD_TEST(suite, bdev_copy_split_test);
7090 	CU_ADD_TEST(suite, examine_locks);
7091 	CU_ADD_TEST(suite, claim_v2_rwo);
7092 	CU_ADD_TEST(suite, claim_v2_rom);
7093 	CU_ADD_TEST(suite, claim_v2_rwm);
7094 	CU_ADD_TEST(suite, claim_v2_existing_writer);
7095 	CU_ADD_TEST(suite, claim_v2_existing_v1);
7096 	CU_ADD_TEST(suite, claim_v1_existing_v2);
7097 	CU_ADD_TEST(suite, examine_claimed);
7098 
7099 	allocate_cores(1);
7100 	allocate_threads(1);
7101 	set_thread(0);
7102 
7103 	CU_basic_set_mode(CU_BRM_VERBOSE);
7104 	CU_basic_run_tests();
7105 	num_failures = CU_get_number_of_failures();
7106 	CU_cleanup_registry();
7107 
7108 	free_threads();
7109 	free_cores();
7110 
7111 	return num_failures;
7112 }
7113